From 2d8ae8c417db284f598dffb178cc01e7db0f1821 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 2 May 2023 15:36:02 +0200 Subject: nfsd: use vfs setgid helper We've aligned setgid behavior over multiple kernel releases. The details can be found in commit cf619f891971 ("Merge tag 'fs.ovl.setgid.v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping") and commit 426b4ca2d6a5 ("Merge tag 'fs.setgid.v6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux"). Consistent setgid stripping behavior is now encapsulated in the setattr_should_drop_sgid() helper which is used by all filesystems that strip setgid bits outside of vfs proper. Usually ATTR_KILL_SGID is raised in e.g., chown_common() and is subject to the setattr_should_drop_sgid() check to determine whether the setgid bit can be retained. Since nfsd is raising ATTR_KILL_SGID unconditionally it will cause notify_change() to strip it even if the caller had the necessary privileges to retain it. Ensure that nfsd only raises ATR_KILL_SGID if the caller lacks the necessary privileges to retain the setgid bit. Without this patch the setgid stripping tests in LTP will fail: > As you can see, the problem is S_ISGID (0002000) was dropped on a > non-group-executable file while chown was invoked by super-user, while [...] > fchown02.c:66: TFAIL: testfile2: wrong mode permissions 0100700, expected 0102700 [...] > chown02.c:57: TFAIL: testfile2: wrong mode permissions 0100700, expected 0102700 With this patch all tests pass. Reported-by: Sherry Yang Signed-off-by: Christian Brauner Reviewed-by: Jeff Layton Cc: Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index db67f8e19344..0016bcc04a59 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -388,7 +388,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) iap->ia_mode &= ~S_ISGID; } else { /* set ATTR_KILL_* bits and let VFS handle it */ - iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); + iap->ia_valid |= ATTR_KILL_SUID; + iap->ia_valid |= + setattr_should_drop_sgid(&nop_mnt_idmap, inode); } } } -- cgit v1.2.3 From 442a629009818de2f8d9731cafb96e111ca8e4e6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:35:38 -0400 Subject: NFSD: Clean up nfsctl white-space damage Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b4fd7a7062d5..2ecea3b318eb 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -324,7 +324,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) len = qword_get(&mesg, dname, size); if (len <= 0) return -EINVAL; - + path = dname+len+1; len = qword_get(&mesg, path, size); if (len <= 0) @@ -338,7 +338,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) return -EINVAL; maxsize = min(maxsize, NFS3_FHSIZE); - if (qword_get(&mesg, mesg, size)>0) + if (qword_get(&mesg, mesg, size) > 0) return -EINVAL; /* we have all the words, they are in buf.. */ @@ -346,7 +346,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(netns(file), dom, path, &fh, maxsize); + len = exp_rootfh(netns(file), dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; @@ -418,8 +418,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) * OR * * Input: - * buf: C string containing whitespace- - * separated unsigned integer values + * buf: C string containing whitespace- + * separated unsigned integer values * representing the number of NFSD * threads to start in each pool * size: non-zero length of C string in @buf @@ -526,7 +526,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) char *sep; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); - if (size>0) { + if (size > 0) { if (nn->nfsd_serv) /* Cannot change versions without updating * nn->nfsd_serv->sv_xdrsize, and reallocing @@ -637,11 +637,11 @@ out: * OR * * Input: - * buf: C string containing whitespace- - * separated positive or negative - * integer values representing NFS - * protocol versions to enable ("+n") - * or disable ("-n") + * buf: C string containing whitespace- + * separated positive or negative + * integer values representing NFS + * protocol versions to enable ("+n") + * or disable ("-n") * size: non-zero length of C string in @buf * Output: * On success: status of zero or more protocol versions has @@ -705,7 +705,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred } /* - * A transport listener is added by writing it's transport name and + * A transport listener is added by writing its transport name and * a port number. */ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred) @@ -832,9 +832,9 @@ int nfsd_max_blksize; * OR * * Input: - * buf: C string containing an unsigned - * integer value representing the new - * NFS blksize + * buf: C string containing an unsigned + * integer value representing the new + * NFS blksize * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C string @@ -881,9 +881,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) * OR * * Input: - * buf: C string containing an unsigned - * integer value representing the new - * number of max connections + * buf: C string containing an unsigned + * integer value representing the new + * number of max connections * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C string @@ -1065,7 +1065,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) * OR * * Input: - * buf: any value + * buf: any value * size: non-zero length of C string in @buf * Output: * passed-in buffer filled with "Y" or "N" with a newline -- cgit v1.2.3 From 3434d7aa77d24c5c4b3d4385084cfdb607f05dec Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:35:44 -0400 Subject: NFSD: Clean up nfsctl_transaction_write() For easier readability, follow the common convention: if (error) handle_error; continue_normally; No behavior change is expected. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2ecea3b318eb..df0aae211a49 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -109,12 +109,12 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu if (IS_ERR(data)) return PTR_ERR(data); - rv = write_op[ino](file, data, size); - if (rv >= 0) { - simple_transaction_set(file, rv); - rv = size; - } - return rv; + rv = write_op[ino](file, data, size); + if (rv < 0) + return rv; + + simple_transaction_set(file, rv); + return size; } static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) -- cgit v1.2.3 From 39d432fc76301cf0a0c454022117601994ca9397 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:35:50 -0400 Subject: NFSD: trace nfsctl operations Add trace log eye-catchers that record the arguments used to configure NFSD. This helps when troubleshooting the NFSD administrative interfaces. These tracepoints can capture NFSD start-up and shutdown times and parameters, changes in lease time and thread count, and a request to end the namespace's NFSv4 grace period, in addition to the set of NFS versions that are enabled. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 33 +++++-- fs/nfsd/trace.h | 259 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 284 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index df0aae211a49..1489e0b703b4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,6 +25,7 @@ #include "netns.h" #include "pnfs.h" #include "filecache.h" +#include "trace.h" /* * We have a single directory with several nodes in it. @@ -230,6 +231,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; + trace_nfsd_ctl_unlock_ip(net, buf); return nlmsvc_unlock_all_by_ip(sap); } @@ -263,7 +265,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) fo_path = buf; if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - + trace_nfsd_ctl_unlock_fs(netns(file), fo_path); error = kern_path(fo_path, 0, &path); if (error) return error; @@ -341,6 +343,8 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (qword_get(&mesg, mesg, size) > 0) return -EINVAL; + trace_nfsd_ctl_filehandle(netns(file), dname, path, maxsize); + /* we have all the words, they are in buf.. */ dom = unix_domain_find(dname); if (!dom) @@ -399,6 +403,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return rv; if (newthreads < 0) return -EINVAL; + trace_nfsd_ctl_threads(net, newthreads); rv = nfsd_svc(newthreads, net, file->f_cred); if (rv < 0) return rv; @@ -471,6 +476,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) rv = -EINVAL; if (nthreads[i] < 0) goto out_free; + trace_nfsd_ctl_pool_threads(net, i, nthreads[i]); } rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) @@ -536,6 +542,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) if (buf[size-1] != '\n') return -EINVAL; buf[size-1] = 0; + trace_nfsd_ctl_version(netns(file), buf); vers = mesg; len = qword_get(&mesg, vers, size); @@ -689,6 +696,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred err = get_int(&mesg, &fd); if (err != 0 || fd < 0) return -EINVAL; + trace_nfsd_ctl_ports_addfd(net, fd); err = nfsd_create_serv(net); if (err != 0) @@ -720,6 +728,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr if (port < 1 || port > USHRT_MAX) return -EINVAL; + trace_nfsd_ctl_ports_addxprt(net, transport, port); err = nfsd_create_serv(net); if (err != 0) @@ -853,6 +862,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) int rv = get_int(&mesg, &bsize); if (rv) return rv; + trace_nfsd_ctl_maxblksize(netns(file), bsize); + /* force bsize into allowed range and * required alignment. */ @@ -903,6 +914,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size) if (rv) return rv; + trace_nfsd_ctl_maxconn(netns(file), maxconn); nn->max_connections = maxconn; } @@ -913,6 +925,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size) static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time64_t *time, struct nfsd_net *nn) { + struct dentry *dentry = file_dentry(file); char *mesg = buf; int rv, i; @@ -922,6 +935,9 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, rv = get_int(&mesg, &i); if (rv) return rv; + trace_nfsd_ctl_time(netns(file), dentry->d_name.name, + dentry->d_name.len, i); + /* * Some sanity checking. We don't have a reason for * these particular numbers, but problems with the @@ -1014,6 +1030,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, len = qword_get(&mesg, recdir, size); if (len <= 0) return -EINVAL; + trace_nfsd_ctl_recoverydir(netns(file), recdir); status = nfs4_reset_recoverydir(recdir); if (status) @@ -1087,7 +1104,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) case '1': if (!nn->nfsd_serv) return -EBUSY; - nfsd4_end_grace(nn); + trace_nfsd_end_grace(netns(file)); break; default: return -EINVAL; @@ -1192,8 +1209,8 @@ static int __nfsd_symlink(struct inode *dir, struct dentry *dentry, * @content is assumed to be a NUL-terminated string that lives * longer than the symlink itself. */ -static void nfsd_symlink(struct dentry *parent, const char *name, - const char *content) +static void _nfsd_symlink(struct dentry *parent, const char *name, + const char *content) { struct inode *dir = parent->d_inode; struct dentry *dentry; @@ -1210,8 +1227,8 @@ out: inode_unlock(dir); } #else -static inline void nfsd_symlink(struct dentry *parent, const char *name, - const char *content) +static inline void _nfsd_symlink(struct dentry *parent, const char *name, + const char *content) { } @@ -1389,8 +1406,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) ret = simple_fill_super(sb, 0x6e667364, nfsd_files); if (ret) return ret; - nfsd_symlink(sb->s_root, "supported_krb5_enctypes", - "/proc/net/rpc/gss_krb5_enctypes"); + _nfsd_symlink(sb->s_root, "supported_krb5_enctypes", + "/proc/net/rpc/gss_krb5_enctypes"); dentry = nfsd_mkdir(sb->s_root, NULL, "clients"); if (IS_ERR(dentry)) return PTR_ERR(dentry); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 72a906a053dc..2af74983f146 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -1581,6 +1581,265 @@ TRACE_EVENT(nfsd_cb_recall_any_done, ) ); +TRACE_EVENT(nfsd_ctl_unlock_ip, + TP_PROTO( + const struct net *net, + const char *address + ), + TP_ARGS(net, address), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __string(address, address) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __assign_str(address, address); + ), + TP_printk("address=%s", + __get_str(address) + ) +); + +TRACE_EVENT(nfsd_ctl_unlock_fs, + TP_PROTO( + const struct net *net, + const char *path + ), + TP_ARGS(net, path), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __string(path, path) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __assign_str(path, path); + ), + TP_printk("path=%s", + __get_str(path) + ) +); + +TRACE_EVENT(nfsd_ctl_filehandle, + TP_PROTO( + const struct net *net, + const char *domain, + const char *path, + int maxsize + ), + TP_ARGS(net, domain, path, maxsize), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, maxsize) + __string(domain, domain) + __string(path, path) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->maxsize = maxsize; + __assign_str(domain, domain); + __assign_str(path, path); + ), + TP_printk("domain=%s path=%s maxsize=%d", + __get_str(domain), __get_str(path), __entry->maxsize + ) +); + +TRACE_EVENT(nfsd_ctl_threads, + TP_PROTO( + const struct net *net, + int newthreads + ), + TP_ARGS(net, newthreads), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, newthreads) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->newthreads = newthreads; + ), + TP_printk("newthreads=%d", + __entry->newthreads + ) +); + +TRACE_EVENT(nfsd_ctl_pool_threads, + TP_PROTO( + const struct net *net, + int pool, + int nrthreads + ), + TP_ARGS(net, pool, nrthreads), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, pool) + __field(int, nrthreads) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->pool = pool; + __entry->nrthreads = nrthreads; + ), + TP_printk("pool=%d nrthreads=%d", + __entry->pool, __entry->nrthreads + ) +); + +TRACE_EVENT(nfsd_ctl_version, + TP_PROTO( + const struct net *net, + const char *mesg + ), + TP_ARGS(net, mesg), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __string(mesg, mesg) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __assign_str(mesg, mesg); + ), + TP_printk("%s", + __get_str(mesg) + ) +); + +TRACE_EVENT(nfsd_ctl_ports_addfd, + TP_PROTO( + const struct net *net, + int fd + ), + TP_ARGS(net, fd), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, fd) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->fd = fd; + ), + TP_printk("fd=%d", + __entry->fd + ) +); + +TRACE_EVENT(nfsd_ctl_ports_addxprt, + TP_PROTO( + const struct net *net, + const char *transport, + int port + ), + TP_ARGS(net, transport, port), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, port) + __string(transport, transport) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->port = port; + __assign_str(transport, transport); + ), + TP_printk("transport=%s port=%d", + __get_str(transport), __entry->port + ) +); + +TRACE_EVENT(nfsd_ctl_maxblksize, + TP_PROTO( + const struct net *net, + int bsize + ), + TP_ARGS(net, bsize), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, bsize) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->bsize = bsize; + ), + TP_printk("bsize=%d", + __entry->bsize + ) +); + +TRACE_EVENT(nfsd_ctl_maxconn, + TP_PROTO( + const struct net *net, + int maxconn + ), + TP_ARGS(net, maxconn), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, maxconn) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->maxconn = maxconn; + ), + TP_printk("maxconn=%d", + __entry->maxconn + ) +); + +TRACE_EVENT(nfsd_ctl_time, + TP_PROTO( + const struct net *net, + const char *name, + size_t namelen, + int time + ), + TP_ARGS(net, name, namelen, time), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, time) + __string_len(name, name, namelen) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->time = time; + __assign_str_len(name, name, namelen); + ), + TP_printk("file=%s time=%d\n", + __get_str(name), __entry->time + ) +); + +TRACE_EVENT(nfsd_ctl_recoverydir, + TP_PROTO( + const struct net *net, + const char *recdir + ), + TP_ARGS(net, recdir), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __string(recdir, recdir) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __assign_str(recdir, recdir); + ), + TP_printk("recdir=%s", + __get_str(recdir) + ) +); + +TRACE_EVENT(nfsd_end_grace, + TP_PROTO( + const struct net *net + ), + TP_ARGS(net), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + ), + TP_printk("nn=%d", __entry->netns_ino + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From adaa7a50d027b91ee6d56ae8b0a3f6edf5a78776 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 16 May 2023 10:26:09 -0400 Subject: NFSD: Add encoders for NFSv4 clientids and verifiers Deduplicate some common code. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 107 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 55 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 76db2fe29624..1488ce978f7c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3688,6 +3688,30 @@ fail: return -EINVAL; } +static __be32 +nfsd4_encode_verifier4(struct xdr_stream *xdr, const nfs4_verifier *verf) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + memcpy(p, verf->data, sizeof(verf->data)); + return nfs_ok; +} + +static __be32 +nfsd4_encode_clientid4(struct xdr_stream *xdr, const clientid_t *clientid) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(__be64)); + if (!p) + return nfserr_resource; + memcpy(p, clientid, sizeof(*clientid)); + return nfs_ok; +} + static __be32 nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) { @@ -3752,15 +3776,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_commit *commit = &u->commit; - struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, commit->co_verf.data, - NFS4_VERIFIER_SIZE); - return 0; + return nfsd4_encode_verifier4(resp->xdr, &commit->co_verf); } static __be32 @@ -4213,15 +4230,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, int starting_len = xdr->buf->len; __be32 *p; - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_resource; - - /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); - xdr->buf->head[0].iov_len = (char *)xdr->p - - (char *)xdr->buf->head[0].iov_base; + nfserr = nfsd4_encode_verifier4(xdr, &readdir->rd_verf); + if (nfserr != nfs_ok) + return nfserr; /* * Number of bytes left for directory entries allowing for the @@ -4448,23 +4459,25 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_setclientid *scd = &u->setclientid; struct xdr_stream *xdr = resp->xdr; - __be32 *p; if (!nfserr) { - p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8); - p = xdr_encode_opaque_fixed(p, &scd->se_confirm, - NFS4_VERIFIER_SIZE); - } - else if (nfserr == nfserr_clid_inuse) { - p = xdr_reserve_space(xdr, 8); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); + nfserr = nfsd4_encode_clientid4(xdr, &scd->se_clientid); + if (nfserr != nfs_ok) + goto out; + nfserr = nfsd4_encode_verifier4(xdr, &scd->se_confirm); + } else if (nfserr == nfserr_clid_inuse) { + /* empty network id */ + if (xdr_stream_encode_u32(xdr, 0) < 0) { + nfserr = nfserr_resource; + goto out; + } + /* empty universal address */ + if (xdr_stream_encode_u32(xdr, 0) < 0) { + nfserr = nfserr_resource; + goto out; + } } +out: return nfserr; } @@ -4473,17 +4486,12 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_write *write = &u->write; - struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 16); - if (!p) + if (xdr_stream_encode_u32(resp->xdr, write->wr_bytes_written) < 0) return nfserr_resource; - *p++ = cpu_to_be32(write->wr_bytes_written); - *p++ = cpu_to_be32(write->wr_how_written); - p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, - NFS4_VERIFIER_SIZE); - return 0; + if (xdr_stream_encode_u32(resp->xdr, write->wr_how_written) < 0) + return nfserr_resource; + return nfsd4_encode_verifier4(resp->xdr, &write->wr_verifier); } static __be32 @@ -4505,20 +4513,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, server_scope = nn->nfsd_name; server_scope_sz = strlen(nn->nfsd_name); - p = xdr_reserve_space(xdr, - 8 /* eir_clientid */ + - 4 /* eir_sequenceid */ + - 4 /* eir_flags */ + - 4 /* spr_how */); - if (!p) + if (nfsd4_encode_clientid4(xdr, &exid->clientid) != nfs_ok) + return nfserr_resource; + if (xdr_stream_encode_u32(xdr, exid->seqid) < 0) + return nfserr_resource; + if (xdr_stream_encode_u32(xdr, exid->flags) < 0) return nfserr_resource; - p = xdr_encode_opaque_fixed(p, &exid->clientid, 8); - *p++ = cpu_to_be32(exid->seqid); - *p++ = cpu_to_be32(exid->flags); - - *p++ = cpu_to_be32(exid->spa_how); - + if (xdr_stream_encode_u32(xdr, exid->spa_how) < 0) + return nfserr_resource; switch (exid->spa_how) { case SP4_NONE: break; -- cgit v1.2.3 From 66a21db7db59f279a9fcb6445fa36dc6eb001b3c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 16 May 2023 10:26:15 -0400 Subject: NFSD: Replace encode_cinfo() De-duplicate "reserve_space; encode_cinfo". Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 72 +++++++++++++++++++------------------------------------ 1 file changed, 24 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1488ce978f7c..c5c6873b938d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2566,12 +2566,16 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode) return p; } -static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) +static __be32 +nfsd4_encode_change_info4(struct xdr_stream *xdr, struct nfsd4_change_info *c) { - *p++ = cpu_to_be32(c->atomic); - p = xdr_encode_hyper(p, c->before_change); - p = xdr_encode_hyper(p, c->after_change); - return p; + if (xdr_stream_encode_bool(xdr, c->atomic) < 0) + return nfserr_resource; + if (xdr_stream_encode_u64(xdr, c->before_change) < 0) + return nfserr_resource; + if (xdr_stream_encode_u64(xdr, c->after_change) < 0) + return nfserr_resource; + return nfs_ok; } /* Encode as an array of strings the string given with components @@ -3786,12 +3790,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_create *create = &u->create; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - encode_cinfo(p, &create->cr_cinfo); + nfserr = nfsd4_encode_change_info4(xdr, &create->cr_cinfo); + if (nfserr) + return nfserr; return nfsd4_encode_bitmap(xdr, create->cr_bmval[0], create->cr_bmval[1], create->cr_bmval[2]); } @@ -3909,13 +3911,8 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_link *link = &u->link; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &link->li_cinfo); - return 0; + return nfsd4_encode_change_info4(xdr, &link->li_cinfo); } @@ -3930,11 +3927,11 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); if (nfserr) return nfserr; - p = xdr_reserve_space(xdr, 24); - if (!p) + nfserr = nfsd4_encode_change_info4(xdr, &open->op_cinfo); + if (nfserr) + return nfserr; + if (xdr_stream_encode_u32(xdr, open->op_rflags) < 0) return nfserr_resource; - p = encode_cinfo(p, &open->op_cinfo); - *p++ = cpu_to_be32(open->op_rflags); nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1], open->op_bmval[2]); @@ -4310,13 +4307,8 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_remove *remove = &u->remove; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &remove->rm_cinfo); - return 0; + return nfsd4_encode_change_info4(xdr, &remove->rm_cinfo); } static __be32 @@ -4325,14 +4317,11 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_rename *rename = &u->rename; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 40); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &rename->rn_sinfo); - p = encode_cinfo(p, &rename->rn_tinfo); - return 0; + nfserr = nfsd4_encode_change_info4(xdr, &rename->rn_sinfo); + if (nfserr) + return nfserr; + return nfsd4_encode_change_info4(xdr, &rename->rn_tinfo); } static __be32 @@ -5102,15 +5091,8 @@ nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_setxattr *setxattr = &u->setxattr; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - encode_cinfo(p, &setxattr->setxa_cinfo); - - return 0; + return nfsd4_encode_change_info4(xdr, &setxattr->setxa_cinfo); } /* @@ -5256,14 +5238,8 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_removexattr *removexattr = &u->removexattr; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - - p = encode_cinfo(p, &removexattr->rmxa_cinfo); - return 0; + return nfsd4_encode_change_info4(xdr, &removexattr->rmxa_cinfo); } typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); -- cgit v1.2.3 From 82078b9895bd46ce69b4a73e2da40e7e2202fdb5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:45:36 -0400 Subject: NFSD: Ensure that xdr_write_pages updates rq_next_page All other NFSv[23] procedures manage to keep page_ptr and rq_next_page in lock step. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 11 +++++++---- fs/nfsd/nfsxdr.c | 11 +++++++---- include/linux/sunrpc/svc.h | 21 +++++++++++++++++++++ 3 files changed, 35 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 3308dd671ef0..f32128955ec8 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -828,7 +828,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; if (xdr_stream_encode_u32(xdr, resp->len) < 0) return false; - xdr_write_pages(xdr, resp->pages, 0, resp->len); + svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, 0, + resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) return false; break; @@ -859,8 +860,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) return false; - xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, - resp->count); + svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, + rqstp->rq_res.page_base, + resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) return false; break; @@ -961,7 +963,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) return false; - xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); + svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0, + dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) return false; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index caf6355b18fa..5777f40c7353 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -468,7 +468,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) case nfs_ok: if (xdr_stream_encode_u32(xdr, resp->len) < 0) return false; - xdr_write_pages(xdr, &resp->page, 0, resp->len); + svcxdr_encode_opaque_pages(rqstp, xdr, &resp->page, 0, + resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) return false; break; @@ -491,8 +492,9 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) return false; - xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, - resp->count); + svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, + rqstp->rq_res.page_base, + resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) return false; break; @@ -511,7 +513,8 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; switch (resp->status) { case nfs_ok: - xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); + svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0, + dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) return false; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 762d7231e574..3b10636c51a9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -508,6 +508,27 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) xdr->rqst = NULL; } +/** + * svcxdr_encode_opaque_pages - Insert pages into an xdr_stream + * @xdr: xdr_stream to be updated + * @pages: array of pages to insert + * @base: starting offset of first data byte in @pages + * @len: number of data bytes in @pages to insert + * + * After the @pages are added, the tail iovec is instantiated pointing + * to end of the head buffer, and the stream is set up to encode + * subsequent items into the tail. + */ +static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct page **pages, + unsigned int base, + unsigned int len) +{ + xdr_write_pages(xdr, pages, base, len); + xdr->page_ptr = rqstp->rq_next_page - 1; +} + /** * svcxdr_set_auth_slack - * @rqstp: RPC transaction -- cgit v1.2.3 From ba21e20b309564c64761f4953db4456ec8c4e49c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:45:43 -0400 Subject: NFSD: Use svcxdr_encode_opaque_pages() in nfsd4_encode_splice_read() Commit 15b23ef5d348 ("nfsd4: fix corruption of NFSv4 read data") encountered exactly the same issue: after a splice read, a filesystem-owned page is left in rq_pages[]; the symptoms are the same as described there. If the computed number of pages in nfsd4_encode_splice_read() is not exactly the same as the actual number of pages that were consumed by nfsd_splice_actor() (say, because of a bug) then hilarity ensues. Instead of recomputing the page offset based on the size of the payload, use rq_next_page, which is already properly updated by nfsd_splice_actor(), to cause svc_rqst_release_pages() to operate correctly in every instance. This is a defensive change since we believe that after commit 27c934dd8832 ("nfsd: don't replace page in rq_pages if it's a continuation of last page") has been applied, there are no known opportunities for nfsd_splice_actor() to screw up. So I'm not marking it for stable backport. Reported-by: Andy Zlotek Suggested-by: Calum Mackay Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c5c6873b938d..b78c2391a2a1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4032,6 +4032,11 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, return nfsd4_encode_stateid(xdr, &od->od_stateid); } +/* + * The operation of this function assumes that this is the only + * READ operation in the COMPOUND. If there are multiple READs, + * we use nfsd4_encode_readv(). + */ static __be32 nfsd4_encode_splice_read( struct nfsd4_compoundres *resp, struct nfsd4_read *read, @@ -4042,8 +4047,12 @@ static __be32 nfsd4_encode_splice_read( int status, space_left; __be32 nfserr; - /* Make sure there will be room for padding if needed */ - if (xdr->end - xdr->p < 1) + /* + * Make sure there is room at the end of buf->head for + * svcxdr_encode_opaque_pages() to create a tail buffer + * to XDR-pad the payload. + */ + if (xdr->iov != xdr->buf->head || xdr->end - xdr->p < 1) return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, @@ -4052,6 +4061,8 @@ static __be32 nfsd4_encode_splice_read( read->rd_length = maxcount; if (nfserr) goto out_err; + svcxdr_encode_opaque_pages(read->rd_rqstp, xdr, buf->pages, + buf->page_base, maxcount); status = svc_encode_result_payload(read->rd_rqstp, buf->head[0].iov_len, maxcount); if (status) { @@ -4059,31 +4070,19 @@ static __be32 nfsd4_encode_splice_read( goto out_err; } - buf->page_len = maxcount; - buf->len += maxcount; - xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) - / PAGE_SIZE; - - /* Use rest of head for padding and remaining ops: */ - buf->tail[0].iov_base = xdr->p; - buf->tail[0].iov_len = 0; - xdr->iov = buf->tail; - if (maxcount&3) { - int pad = 4 - (maxcount&3); - - *(xdr->p++) = 0; - - buf->tail[0].iov_base += maxcount&3; - buf->tail[0].iov_len = pad; - buf->len += pad; - } - + /* + * Prepare to encode subsequent operations. + * + * xdr_truncate_encode() is not safe to use after a successful + * splice read has been done, so the following stream + * manipulations are open-coded. + */ space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, buf->buflen - buf->len); buf->buflen = buf->len + space_left; xdr->end = (__be32 *)((void *)xdr->end + space_left); - return 0; + return nfs_ok; out_err: /* -- cgit v1.2.3 From ed4a567a179ec15c15f78fa60ca6de9cc4f34897 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:45:50 -0400 Subject: NFSD: Update rq_next_page between COMPOUND operations A GETATTR with a large result can advance xdr->page_ptr without updating rq_next_page. If a splice READ follows that GETATTR in the COMPOUND, nfsd_splice_actor can start splicing at the wrong page. I've also seen READLINK and READDIR leave rq_next_page in an unmodified state. There are potentially a myriad of combinations like this, so play it safe: move the rq_next_page update to nfsd4_encode_operation. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b78c2391a2a1..3887da048232 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5438,6 +5438,12 @@ status: release: if (opdesc && opdesc->op_release) opdesc->op_release(&op->u); + + /* + * Account for pages consumed while encoding this operation. + * The xdr_stream primitives don't manage rq_next_page. + */ + rqstp->rq_next_page = xdr->page_ptr + 1; } /* @@ -5506,9 +5512,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) p = resp->statusp; *p++ = resp->cstate.status; - - rqstp->rq_next_page = xdr->page_ptr + 1; - *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); -- cgit v1.2.3 From 507df40ebf31655203dd05e6e31db5849a83347a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:45:56 -0400 Subject: NFSD: Hoist rq_vec preparation into nfsd_read() Accrue the following benefits: a) Deduplicate this common bit of code. b) Don't prepare rq_vec for NFSv2 and NFSv3 spliced reads, which don't use rq_vec. This is already the case for nfsd4_encode_read(). c) Eventually, converting NFSD's read path to use a bvec iterator will be simpler. In the next patch, nfsd_iter_read() will replace nfsd_readv() for all NFS versions. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 14 +---------- fs/nfsd/nfsproc.c | 14 +---------- fs/nfsd/vfs.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++------- fs/nfsd/vfs.h | 8 +++++-- 4 files changed, 68 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index e6bb8eeb5bc2..fc8d5b7db9f8 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -151,8 +151,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp) { struct nfsd3_readargs *argp = rqstp->rq_argp; struct nfsd3_readres *resp = rqstp->rq_resp; - unsigned int len; - int v; dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", SVCFH_fmt(&argp->fh), @@ -166,17 +164,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp) if (argp->offset + argp->count > (u64)OFFSET_MAX) argp->count = (u64)OFFSET_MAX - argp->offset; - v = 0; - len = argp->count; resp->pages = rqstp->rq_next_page; - while (len > 0) { - struct page *page = *(rqstp->rq_next_page++); - - rqstp->rq_vec[v].iov_base = page_address(page); - rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); - len -= rqstp->rq_vec[v].iov_len; - v++; - } /* Obtain buffer pointer for payload. * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) @@ -187,7 +175,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, v, &resp->count, &resp->eof); + &resp->count, &resp->eof); return rpc_success; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index c37195572fd0..a7315928a760 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -176,9 +176,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) { struct nfsd_readargs *argp = rqstp->rq_argp; struct nfsd_readres *resp = rqstp->rq_resp; - unsigned int len; u32 eof; - int v; dprintk("nfsd: READ %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -187,17 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); - v = 0; - len = argp->count; resp->pages = rqstp->rq_next_page; - while (len > 0) { - struct page *page = *(rqstp->rq_next_page++); - - rqstp->rq_vec[v].iov_base = page_address(page); - rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); - len -= rqstp->rq_vec[v].iov_len; - v++; - } /* Obtain buffer pointer for payload. 19 is 1 word for * status, 17 words for fattr, and 1 word for the byte count. @@ -207,7 +195,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) resp->count = argp->count; fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, v, &resp->count, &eof); + &resp->count, &eof); if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0016bcc04a59..d9055b5c496b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1036,6 +1036,50 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } +/** + * nfsd_iter_read - Perform a VFS read using an iterator + * @rqstp: RPC transaction context + * @fhp: file handle of file to be read + * @file: opened struct file of file to be read + * @offset: starting byte offset + * @count: IN: requested number of bytes; OUT: number of bytes read + * @base: offset in first page of read buffer + * @eof: OUT: set non-zero if operation reached the end of the file + * + * Some filesystems or situations cannot use nfsd_splice_read. This + * function is the slightly less-performant fallback for those cases. + * + * Returns nfs_ok on success, otherwise an nfserr stat value is + * returned. + */ +__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, unsigned long *count, + unsigned int base, u32 *eof) +{ + unsigned long v, total; + struct iov_iter iter; + loff_t ppos = offset; + struct page *page; + ssize_t host_err; + + v = 0; + total = *count; + while (total) { + page = *(rqstp->rq_next_page++); + rqstp->rq_vec[v].iov_base = page_address(page) + base; + rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base); + total -= rqstp->rq_vec[v].iov_len; + ++v; + base = 0; + } + WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec)); + + trace_nfsd_read_vector(rqstp, fhp, offset, *count); + iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count); + host_err = vfs_iter_read(file, &iter, &ppos, 0); + return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); +} + /* * Gathered writes: If another process is currently writing to the file, * there's a high chance this is another nfsd (triggered by a bulk write @@ -1161,14 +1205,24 @@ out_nfserr: return nfserr; } -/* - * Read data from a file. count must contain the requested read count - * on entry. On return, *count contains the number of bytes actually read. +/** + * nfsd_read - Read data from a file + * @rqstp: RPC transaction context + * @fhp: file handle of file to be read + * @offset: starting byte offset + * @count: IN: requested number of bytes; OUT: number of bytes read + * @eof: OUT: set non-zero if operation reached the end of the file + * + * The caller must verify that there is enough space in @rqstp.rq_res + * to perform this operation. + * * N.B. After this call fhp needs an fh_put + * + * Returns nfs_ok on success, otherwise an nfserr stat value is + * returned. */ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count, - u32 *eof) + loff_t offset, unsigned long *count, u32 *eof) { struct nfsd_file *nf; struct file *file; @@ -1183,12 +1237,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof); else - err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof); + err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof); nfsd_file_put(nf); - trace_nfsd_read_done(rqstp, fhp, offset, *count); - return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 43fb57a301d3..6381a2890b0b 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -115,8 +115,12 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kvec *vec, int vlen, unsigned long *count, u32 *eof); -__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, - loff_t, struct kvec *, int, unsigned long *, +__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + unsigned long *count, unsigned int base, + u32 *eof); +__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, unsigned long *count, u32 *eof); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *, -- cgit v1.2.3 From 703d7521555504b3a316b105b4806d641b7ebc76 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:46:03 -0400 Subject: NFSD: Hoist rq_vec preparation into nfsd_read() [step two] Now that the preparation of an rq_vec has been removed from the generic read path, nfsd_splice_read() no longer needs to reset rq_next_page. nfsd4_encode_read() calls nfsd_splice_read() directly. As far as I can ascertain, resetting rq_next_page for NFSv4 splice reads is unnecessary because rq_next_page is already set correctly. Moreover, resetting it might even be incorrect if previous operations in the COMPOUND have already consumed at least a page of the send buffer. I would expect that the result would be encoding the READ payload over previously-encoded results. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 10 +++++----- fs/nfsd/vfs.c | 13 ++++++++++++- include/linux/sunrpc/xdr.h | 3 +-- net/sunrpc/xdr.c | 26 ++++++++++++-------------- 4 files changed, 30 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3887da048232..c67bc904c7b7 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4103,13 +4103,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, __be32 zero = xdr_zero; __be32 nfserr; - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); - if (read->rd_vlen < 0) + if (xdr_reserve_space_vec(xdr, maxcount) < 0) return nfserr_resource; - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, - resp->rqstp->rq_vec, read->rd_vlen, &maxcount, - &read->rd_eof); + nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file, + read->rd_offset, &maxcount, + xdr->buf->page_len & ~PAGE_MASK, + &read->rd_eof); read->rd_length = maxcount; if (nfserr) return nfserr; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d9055b5c496b..37febe1ff039 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1003,6 +1003,18 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, } } +/** + * nfsd_splice_read - Perform a VFS read using a splice pipe + * @rqstp: RPC transaction context + * @fhp: file handle of file to be read + * @file: opened struct file of file to be read + * @offset: starting byte offset + * @count: IN: requested number of bytes; OUT: number of bytes read + * @eof: OUT: set non-zero if operation reached the end of the file + * + * Returns nfs_ok on success, otherwise an nfserr stat value is + * returned. + */ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, u32 *eof) @@ -1016,7 +1028,6 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ssize_t host_err; trace_nfsd_read_splice(rqstp, fhp, offset, *count); - rqstp->rq_next_page = rqstp->rq_respages + 1; host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 72014c9216fc..f89ec4b5ea16 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -242,8 +242,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); -extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, - size_t nbytes); +extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes); extern void __xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 36835b2f5446..2a22e78af116 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_reserve_space); - /** * xdr_reserve_space_vec - Reserves a large amount of buffer space for sending * @xdr: pointer to xdr_stream - * @vec: pointer to a kvec array * @nbytes: number of bytes to reserve * - * Reserves enough buffer space to encode 'nbytes' of data and stores the - * pointers in 'vec'. The size argument passed to xdr_reserve_space() is - * determined based on the number of bytes remaining in the current page to - * avoid invalidating iov_base pointers when xdr_commit_encode() is called. + * The size argument passed to xdr_reserve_space() is determined based + * on the number of bytes remaining in the current page to avoid + * invalidating iov_base pointers when xdr_commit_encode() is called. + * + * Return values: + * %0: success + * %-EMSGSIZE: not enough space is available in @xdr */ -int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes) +int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes) { - int thislen; - int v = 0; + size_t thislen; __be32 *p; /* @@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte xdr->end = xdr->p; } + /* XXX: Let's find a way to make this more efficient */ while (nbytes) { thislen = xdr->buf->page_len % PAGE_SIZE; thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen); p = xdr_reserve_space(xdr, thislen); if (!p) - return -EIO; + return -EMSGSIZE; - vec[v].iov_base = p; - vec[v].iov_len = thislen; - v++; nbytes -= thislen; } - return v; + return 0; } EXPORT_SYMBOL_GPL(xdr_reserve_space_vec); -- cgit v1.2.3 From df56b384de521236c261bf34856dd0d3ba772850 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:46:09 -0400 Subject: NFSD: Remove nfsd_readv() nfsd_readv()'s consumers now use nfsd_iter_read(). Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 15 --------------- fs/nfsd/vfs.h | 5 ----- 2 files changed, 20 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 37febe1ff039..59b7d60ae33e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1032,21 +1032,6 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } -__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, - struct kvec *vec, int vlen, unsigned long *count, - u32 *eof) -{ - struct iov_iter iter; - loff_t ppos = offset; - ssize_t host_err; - - trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_kvec(&iter, ITER_DEST, vec, vlen, *count); - host_err = vfs_iter_read(file, &iter, &ppos, 0); - return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); -} - /** * nfsd_iter_read - Perform a VFS read using an iterator * @rqstp: RPC transaction context diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 6381a2890b0b..a6890ea7b765 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -110,11 +110,6 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, u32 *eof); -__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, - struct kvec *vec, int vlen, - unsigned long *count, - u32 *eof); __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, unsigned int base, -- cgit v1.2.3 From 518f375c15af724cd89a4ec888dea942bb27f77f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 19 May 2023 07:17:23 -0400 Subject: nfsd: don't provide pre/post-op attrs if fh_getattr fails nfsd calls fh_getattr to get the latest inode attrs for pre/post-op info. In the event that fh_getattr fails, it resorts to scraping cached values out of the inode directly. Since these attributes are optional, we can just skip providing them altogether when this happens. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Reviewed-by: Neil Brown --- fs/nfsd/nfsfh.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ccd8485fee04..e8e13ae72e3c 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -623,16 +623,9 @@ void fh_fill_pre_attrs(struct svc_fh *fhp) inode = d_inode(fhp->fh_dentry); err = fh_getattr(fhp, &stat); - if (err) { - /* Grab the times from inode anyway */ - stat.mtime = inode->i_mtime; - stat.ctime = inode->i_ctime; - stat.size = inode->i_size; - if (v4 && IS_I_VERSION(inode)) { - stat.change_cookie = inode_query_iversion(inode); - stat.result_mask |= STATX_CHANGE_COOKIE; - } - } + if (err) + return; + if (v4) fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); @@ -660,15 +653,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp) printk("nfsd: inode locked twice during operation.\n"); err = fh_getattr(fhp, &fhp->fh_post_attr); - if (err) { - fhp->fh_post_saved = false; - fhp->fh_post_attr.ctime = inode->i_ctime; - if (v4 && IS_I_VERSION(inode)) { - fhp->fh_post_attr.change_cookie = inode_query_iversion(inode); - fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE; - } - } else - fhp->fh_post_saved = true; + if (err) + return; + + fhp->fh_post_saved = true; if (v4) fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, inode); -- cgit v1.2.3 From 665e89ab7c5af1f2d260834c861a74b01a30f95f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 3 Jun 2023 07:14:14 +1000 Subject: lockd: drop inappropriate svc_get() from locked_get() The below-mentioned patch was intended to simplify refcounting on the svc_serv used by locked. The goal was to only ever have a single reference from the single thread. To that end we dropped a call to lockd_start_svc() (except when creating thread) which would take a reference, and dropped the svc_put(serv) that would drop that reference. Unfortunately we didn't also remove the svc_get() from lockd_create_svc() in the case where the svc_serv already existed. So after the patch: - on the first call the svc_serv was allocated and the one reference was given to the thread, so there are no extra references - on subsequent calls svc_get() was called so there is now an extra reference. This is clearly not consistent. The inconsistency is also clear in the current code in lockd_get() takes *two* references, one on nlmsvc_serv and one by incrementing nlmsvc_users. This clearly does not match lockd_put(). So: drop that svc_get() from lockd_get() (which used to be in lockd_create_svc(). Reported-by: Ido Schimmel Closes: https://lore.kernel.org/linux-nfs/ZHsI%2FH16VX9kJQX1@shredder/T/#u Fixes: b73a2972041b ("lockd: move lockd_start_svc() call into lockd_create_svc()") Signed-off-by: NeilBrown Tested-by: Ido Schimmel Signed-off-by: Chuck Lever --- fs/lockd/svc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 04ba95b83d16..22d3ff3818f5 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -355,7 +355,6 @@ static int lockd_get(void) int error; if (nlmsvc_serv) { - svc_get(nlmsvc_serv); nlmsvc_users++; return 0; } -- cgit v1.2.3 From 58f5d894006d82ed7335e1c37182fbc5f08c2f51 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Tue, 6 Jun 2023 16:41:02 -0700 Subject: NFSD: add encoding of op_recall flag for write delegation Modified nfsd4_encode_open to encode the op_recall flag properly for OPEN result with write delegation granted. Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c67bc904c7b7..a615fe665381 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3970,7 +3970,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, 32); if (!p) return nfserr_resource; - *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(open->op_recall); /* * TODO: space_limit's in delegations -- cgit v1.2.3 From 262176798b18b12fd8ab84c94cfece0a6a652476 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:13:39 -0400 Subject: NFSD: Add an nfsd4_encode_nfstime4() helper Clean up: de-duplicate some common code. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a615fe665381..26b1343c8035 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, return p; } +static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr, + struct timespec64 *tv) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 3); + if (!p) + return nfserr_resource; + + p = xdr_encode_hyper(p, (s64)tv->tv_sec); + *p = cpu_to_be32(tv->tv_nsec); + return nfs_ok; +} + /* * ctime (in NFSv4, time_metadata) is not writeable, and the client * doesn't really care what resolution could theoretically be stored by @@ -3352,11 +3366,9 @@ out_acl: p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); - *p++ = cpu_to_be32(stat.atime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.atime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); @@ -3365,25 +3377,19 @@ out_acl: p = encode_time_delta(p, d_inode(dentry)); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); - *p++ = cpu_to_be32(stat.ctime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.ctime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); - *p++ = cpu_to_be32(stat.mtime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.mtime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_CREATE) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec); - *p++ = cpu_to_be32(stat.btime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.btime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { u64 ino = stat.ino; -- cgit v1.2.3 From ed9ab7346e908496816cffdecd46932035f66e2e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jun 2023 17:51:34 -0400 Subject: nfsd: move init of percpu reply_cache_stats counters back to nfsd_init_net Commit f5f9d4a314da ("nfsd: move reply cache initialization into nfsd startup") moved the initialization of the reply cache into nfsd startup, but didn't account for the stats counters, which can be accessed before nfsd is ever started. The result can be a NULL pointer dereference when someone accesses /proc/fs/nfsd/reply_cache_stats while nfsd is still shut down. This is a regression and a user-triggerable oops in the right situation: - non-x86_64 arch - /proc/fs/nfsd is mounted in the namespace - nfsd is not started in the namespace - unprivileged user calls "cat /proc/fs/nfsd/reply_cache_stats" Although this is easy to trigger on some arches (like aarch64), on x86_64, calling this_cpu_ptr(NULL) evidently returns a pointer to the fixed_percpu_data. That struct looks just enough like a newly initialized percpu var to allow nfsd_reply_cache_stats_show to access it without Oopsing. Move the initialization of the per-net+per-cpu reply-cache counters back into nfsd_init_net, while leaving the rest of the reply cache allocations to be done at nfsd startup time. Kudos to Eirik who did most of the legwork to track this down. Cc: stable@vger.kernel.org # v6.3+ Fixes: f5f9d4a314da ("nfsd: move reply cache initialization into nfsd startup") Reported-and-tested-by: Eirik Fuller Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2215429 Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/cache.h | 2 ++ fs/nfsd/nfscache.c | 25 ++++++++++++++----------- fs/nfsd/nfsctl.c | 10 +++++++++- 3 files changed, 25 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index f21259ead64b..4c9b87850ab1 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -80,6 +80,8 @@ enum { int nfsd_drc_slab_create(void); void nfsd_drc_slab_free(void); +int nfsd_net_reply_cache_init(struct nfsd_net *nn); +void nfsd_net_reply_cache_destroy(struct nfsd_net *nn); int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 041faa13b852..a8eda1c85829 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -148,12 +148,23 @@ void nfsd_drc_slab_free(void) kmem_cache_destroy(drc_slab); } -static int nfsd_reply_cache_stats_init(struct nfsd_net *nn) +/** + * nfsd_net_reply_cache_init - per net namespace reply cache set-up + * @nn: nfsd_net being initialized + * + * Returns zero on succes; otherwise a negative errno is returned. + */ +int nfsd_net_reply_cache_init(struct nfsd_net *nn) { return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM); } -static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn) +/** + * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down + * @nn: nfsd_net being freed + * + */ +void nfsd_net_reply_cache_destroy(struct nfsd_net *nn) { nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM); } @@ -169,17 +180,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) hashsize = nfsd_hashsize(nn->max_drc_entries); nn->maskbits = ilog2(hashsize); - status = nfsd_reply_cache_stats_init(nn); - if (status) - goto out_nomem; - nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; nn->nfsd_reply_cache_shrinker.seeks = 1; status = register_shrinker(&nn->nfsd_reply_cache_shrinker, "nfsd-reply:%s", nn->nfsd_name); if (status) - goto out_stats_destroy; + return status; nn->drc_hashtbl = kvzalloc(array_size(hashsize, sizeof(*nn->drc_hashtbl)), GFP_KERNEL); @@ -195,9 +202,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) return 0; out_shrinker: unregister_shrinker(&nn->nfsd_reply_cache_shrinker); -out_stats_destroy: - nfsd_reply_cache_stats_destroy(nn); -out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); return -ENOMEM; } @@ -217,7 +221,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) rp, nn); } } - nfsd_reply_cache_stats_destroy(nn); kvfree(nn->drc_hashtbl); nn->drc_hashtbl = NULL; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1489e0b703b4..1a892a0b35de 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1505,6 +1505,9 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; + retval = nfsd_net_reply_cache_init(nn); + if (retval) + goto out_repcache_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; nfsd4_init_leases_net(nn); @@ -1513,6 +1516,8 @@ static __net_init int nfsd_init_net(struct net *net) return 0; +out_repcache_error: + nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); out_export_error: @@ -1521,9 +1526,12 @@ out_export_error: static __net_exit void nfsd_exit_net(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfsd_net_reply_cache_destroy(nn); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); - nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); + nfsd_netns_free_versions(nn); } static struct pernet_operations nfsd_net_ops = { -- cgit v1.2.3 From 5e092be7418fdf0e1e288529bd7e657cb9d7954c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 16 Jun 2023 23:10:53 -0400 Subject: NFSD: Distinguish per-net namespace initialization I find the naming of nfsd_init_net() and nfsd_startup_net() to be confusingly similar. Rename the namespace initialization and tear- down ops and add comments to distinguish their separate purposes. Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 23 +++++++++++++++++++---- fs/nfsd/nfssvc.c | 5 +++++ 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1a892a0b35de..1b8b1aab9a15 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1494,7 +1494,17 @@ static int create_proc_exports_entry(void) unsigned int nfsd_net_id; -static __net_init int nfsd_init_net(struct net *net) +/** + * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace + * @net: a freshly-created network namespace + * + * This information stays around as long as the network namespace is + * alive whether or not there is an NFSD instance running in the + * namespace. + * + * Returns zero on success, or a negative errno otherwise. + */ +static __net_init int nfsd_net_init(struct net *net) { int retval; struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -1524,7 +1534,12 @@ out_export_error: return retval; } -static __net_exit void nfsd_exit_net(struct net *net) +/** + * nfsd_net_exit - Release the nfsd_net portion of a net namespace + * @net: a network namespace that is about to be destroyed + * + */ +static __net_exit void nfsd_net_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -1535,8 +1550,8 @@ static __net_exit void nfsd_exit_net(struct net *net) } static struct pernet_operations nfsd_net_ops = { - .init = nfsd_init_net, - .exit = nfsd_exit_net, + .init = nfsd_net_init, + .exit = nfsd_net_exit, .id = &nfsd_net_id, .size = sizeof(struct nfsd_net), }; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9c7b1ef5be40..2154fa63c5f2 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -402,6 +402,11 @@ void nfsd_reset_write_verifier(struct nfsd_net *nn) write_sequnlock(&nn->writeverf_lock); } +/* + * Crank up a set of per-namespace resources for a new NFSD instance, + * including lockd, a duplicate reply cache, an open file cache + * instance, and a cache of NFSv4 state objects. + */ static int nfsd_startup_net(struct net *net, const struct cred *cred) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); -- cgit v1.2.3 From 75bfb70457a4c4c9f0095e39885382fc5049c5ce Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Jun 2023 15:52:05 +0100 Subject: nfsd: remove redundant assignments to variable len There are a few assignments to variable len where the value is not being read and so the assignments are redundant and can be removed. In one case, the variable len can be removed completely. Cleans up 4 clang scan warnings of the form: fs/nfsd/export.c:100:7: warning: Although the value stored to 'len' is used in the enclosing expression, the value is never actually read from 'len' [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/export.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ae85257b4238..11a0eaa2f914 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -97,7 +97,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) goto out; err = -EINVAL; - if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + if (qword_get(&mesg, buf, PAGE_SIZE) <= 0) goto out; err = -ENOENT; @@ -107,7 +107,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) dprintk("found domain %s\n", buf); err = -EINVAL; - if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + if (qword_get(&mesg, buf, PAGE_SIZE) <= 0) goto out; fsidtype = simple_strtoul(buf, &ep, 10); if (*ep) @@ -593,7 +593,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client path expiry [flags anonuid anongid fsid] */ char *buf; - int len; int err; struct auth_domain *dom = NULL; struct svc_export exp = {}, *expp; @@ -609,8 +608,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) /* client */ err = -EINVAL; - len = qword_get(&mesg, buf, PAGE_SIZE); - if (len <= 0) + if (qword_get(&mesg, buf, PAGE_SIZE) <= 0) goto out; err = -ENOENT; @@ -620,7 +618,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) /* path */ err = -EINVAL; - if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + if (qword_get(&mesg, buf, PAGE_SIZE) <= 0) goto out1; err = kern_path(buf, 0, &exp.ex_path); @@ -665,7 +663,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) goto out3; exp.ex_fsid = an_int; - while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { + while (qword_get(&mesg, buf, PAGE_SIZE) > 0) { if (strcmp(buf, "fsloc") == 0) err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); else if (strcmp(buf, "uuid") == 0) -- cgit v1.2.3