From dcb779fcd4ed5984ad15991d574943d12a8693d1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 15 Feb 2023 06:53:54 -0500 Subject: nfsd: allow reaping files still under writeback On most filesystems, there is no reason to delay reaping an nfsd_file just because its underlying inode is still under writeback. nfsd just relies on client activity or the local flusher threads to do writeback. The main exception is NFS, which flushes all of its dirty data on last close. Add a new EXPORT_OP_FLUSH_ON_CLOSE flag to allow filesystems to signal that they do this, and only skip closing files under writeback on such filesystems. Also, remove a redundant NULL file pointer check in nfsd_file_check_writeback, and clean up nfs's export op flag definitions. Signed-off-by: Jeff Layton Acked-by: Anna Schumaker Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 601700fedc91..9edb29101ec8 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -220,6 +220,7 @@ struct export_operations { #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply atomic attribute updates */ +#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ unsigned long flags; }; -- cgit v1.2.3 From c88c680c6de5077292667fae4a147fd5a6523479 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Mar 2023 07:15:58 -0500 Subject: lockd: remove 2 unused helper functions Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/lockd/lockd.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0168ac9fdda8..26c2aed31a0c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -99,21 +99,11 @@ struct nsm_handle { /* * Rigorous type checking on sockaddr type conversions */ -static inline struct sockaddr_in *nlm_addr_in(const struct nlm_host *host) -{ - return (struct sockaddr_in *)&host->h_addr; -} - static inline struct sockaddr *nlm_addr(const struct nlm_host *host) { return (struct sockaddr *)&host->h_addr; } -static inline struct sockaddr_in *nlm_srcaddr_in(const struct nlm_host *host) -{ - return (struct sockaddr_in *)&host->h_srcaddr; -} - static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host) { return (struct sockaddr *)&host->h_srcaddr; -- cgit v1.2.3 From f0aa4852e63f9c1cfd4322c770e69d7e6817e906 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Mar 2023 07:15:59 -0500 Subject: lockd: move struct nlm_wait to lockd.h The next patch needs struct nlm_wait in fs/lockd/clntproc.c, so move the definition to a shared header file. As an added clean-up, drop the unused b_reclaim field. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/lockd/clntlock.c | 12 ------------ include/linux/lockd/lockd.h | 11 ++++++++++- 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 82b19a30e0f0..464cb15c1a06 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -29,18 +29,6 @@ static int reclaimer(void *ptr); * client perspective. */ -/* - * This is the representation of a blocked client lock. - */ -struct nlm_wait { - struct list_head b_list; /* linked list */ - wait_queue_head_t b_wait; /* where to wait on */ - struct nlm_host * b_host; - struct file_lock * b_lock; /* local file lock */ - unsigned short b_reclaim; /* got to reclaim lock */ - __be32 b_status; /* grant callback status */ -}; - static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 26c2aed31a0c..de5ad2c2179a 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -121,7 +121,16 @@ struct nlm_lockowner { uint32_t pid; }; -struct nlm_wait; +/* + * This is the representation of a blocked client lock. + */ +struct nlm_wait { + struct list_head b_list; /* linked list */ + wait_queue_head_t b_wait; /* where to wait on */ + struct nlm_host *b_host; + struct file_lock *b_lock; /* local file lock */ + __be32 b_status; /* grant callback status */ +}; /* * Memory chunk for NLM client RPC request. -- cgit v1.2.3 From 2005f5b9c35bd736c81e9f24f5c5051967c022ee Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Mar 2023 07:16:00 -0500 Subject: lockd: fix races in client GRANTED_MSG wait logic After the wait for a grant is done (for whatever reason), nlmclnt_block updates the status of the nlm_rqst with the status of the block. At the point it does this, however, the block is still queued its status could change at any time. This is particularly a problem when the waiting task is signaled during the wait. We can end up giving up on the lock just before the GRANTED_MSG callback comes in, and accept it even though the lock request gets back an error, leaving a dangling lock on the server. Since the nlm_wait never lives beyond the end of nlmclnt_lock, put it on the stack and add functions to allow us to enqueue and dequeue the block. Enqueue it just before the lock/wait loop, and dequeue it just after we exit the loop instead of waiting until the end of the function. Also, scrape the status at the time that we dequeue it to ensure that it's final. Reported-by: Yongcheng Yang Link: https://bugzilla.redhat.com/show_bug.cgi?id=2063818 Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/lockd/clntlock.c | 42 +++++++++++++++++++++--------------------- fs/lockd/clntproc.c | 28 ++++++++++++++++++---------- include/linux/lockd/lockd.h | 8 +++++--- 3 files changed, 44 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 464cb15c1a06..c374ee072db3 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -82,41 +82,42 @@ void nlmclnt_done(struct nlm_host *host) } EXPORT_SYMBOL_GPL(nlmclnt_done); +void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct file_lock *fl) +{ + block->b_host = host; + block->b_lock = fl; + init_waitqueue_head(&block->b_wait); + block->b_status = nlm_lck_blocked; +} + /* * Queue up a lock for blocking so that the GRANTED request can see it */ -struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl) +void nlmclnt_queue_block(struct nlm_wait *block) { - struct nlm_wait *block; - - block = kmalloc(sizeof(*block), GFP_KERNEL); - if (block != NULL) { - block->b_host = host; - block->b_lock = fl; - init_waitqueue_head(&block->b_wait); - block->b_status = nlm_lck_blocked; - - spin_lock(&nlm_blocked_lock); - list_add(&block->b_list, &nlm_blocked); - spin_unlock(&nlm_blocked_lock); - } - return block; + spin_lock(&nlm_blocked_lock); + list_add(&block->b_list, &nlm_blocked); + spin_unlock(&nlm_blocked_lock); } -void nlmclnt_finish_block(struct nlm_wait *block) +/* + * Dequeue the block and return its final status + */ +__be32 nlmclnt_dequeue_block(struct nlm_wait *block) { - if (block == NULL) - return; + __be32 status; + spin_lock(&nlm_blocked_lock); list_del(&block->b_list); + status = block->b_status; spin_unlock(&nlm_blocked_lock); - kfree(block); + return status; } /* * Block on a lock */ -int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) +int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout) { long ret; @@ -142,7 +143,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) /* Reset the lock status after a server reboot so we resend */ if (block->b_status == nlm_lck_denied_grace_period) block->b_status = nlm_lck_blocked; - req->a_res.status = block->b_status; return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 16b4de868cd2..a14c9110719c 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -516,9 +516,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) const struct cred *cred = nfs_file_cred(fl->fl_file); struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; - struct nlm_wait *block = NULL; + struct nlm_wait block; unsigned char fl_flags = fl->fl_flags; unsigned char fl_type; + __be32 b_status; int status = -ENOLCK; if (nsm_monitor(host) < 0) @@ -531,31 +532,41 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) if (status < 0) goto out; - block = nlmclnt_prepare_block(host, fl); + nlmclnt_prepare_block(&block, host, fl); again: /* * Initialise resp->status to a valid non-zero value, * since 0 == nlm_lck_granted */ resp->status = nlm_lck_blocked; - for(;;) { + + /* + * A GRANTED callback can come at any time -- even before the reply + * to the LOCK request arrives, so we queue the wait before + * requesting the lock. + */ + nlmclnt_queue_block(&block); + for (;;) { /* Reboot protection */ fl->fl_u.nfs_fl.state = host->h_state; status = nlmclnt_call(cred, req, NLMPROC_LOCK); if (status < 0) break; /* Did a reclaimer thread notify us of a server reboot? */ - if (resp->status == nlm_lck_denied_grace_period) + if (resp->status == nlm_lck_denied_grace_period) continue; if (resp->status != nlm_lck_blocked) break; /* Wait on an NLM blocking lock */ - status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); + status = nlmclnt_wait(&block, req, NLMCLNT_POLL_TIMEOUT); if (status < 0) break; - if (resp->status != nlm_lck_blocked) + if (block.b_status != nlm_lck_blocked) break; } + b_status = nlmclnt_dequeue_block(&block); + if (resp->status == nlm_lck_blocked) + resp->status = b_status; /* if we were interrupted while blocking, then cancel the lock request * and exit @@ -564,7 +575,7 @@ again: if (!req->a_args.block) goto out_unlock; if (nlmclnt_cancel(host, req->a_args.block, fl) == 0) - goto out_unblock; + goto out; } if (resp->status == nlm_granted) { @@ -593,8 +604,6 @@ again: status = -ENOLCK; else status = nlm_stat_to_errno(resp->status); -out_unblock: - nlmclnt_finish_block(block); out: nlmclnt_release_call(req); return status; @@ -602,7 +611,6 @@ out_unlock: /* Fatal error: ensure that we remove the lock altogether */ dprintk("lockd: lock attempt ended in fatal error.\n" " Attempting to unlock.\n"); - nlmclnt_finish_block(block); fl_type = fl->fl_type; fl->fl_type = F_UNLCK; down_read(&host->h_rwsem); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index de5ad2c2179a..f42594a9efe0 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -211,9 +211,11 @@ struct nlm_rqst * nlm_alloc_call(struct nlm_host *host); int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *); void nlmclnt_release_call(struct nlm_rqst *); -struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); -void nlmclnt_finish_block(struct nlm_wait *block); -int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); +void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, + struct file_lock *fl); +void nlmclnt_queue_block(struct nlm_wait *block); +__be32 nlmclnt_dequeue_block(struct nlm_wait *block); +int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout); __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock); void nlmclnt_recovery(struct nlm_host *); -- cgit v1.2.3 From e59fb6749ed833deee5b3cfd7e89925296d41f49 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Mar 2023 07:16:02 -0500 Subject: nfs: move nfs_fhandle_hash to common include file lockd needs to be able to hash filehandles for tracepoints. Move the nfs_fhandle_hash() helper to a common nfs include file. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfs/internal.h | 15 --------------- include/linux/nfs.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a65fe2a63ab..10fb5e7573eb 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -846,27 +846,12 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) } #ifdef CONFIG_CRC32 -/** - * nfs_fhandle_hash - calculate the crc32 hash for the filehandle - * @fh - pointer to filehandle - * - * returns a crc32 hash for the filehandle that is compatible with - * the one displayed by "wireshark". - */ -static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) -{ - return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); -} static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid) { return ~crc32_le(0xFFFFFFFF, &stateid->other[0], NFS4_STATEID_OTHER_SIZE); } #else -static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) -{ - return 0; -} static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) { return 0; diff --git a/include/linux/nfs.h b/include/linux/nfs.h index b06375e88e58..ceb70a926b95 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -10,6 +10,7 @@ #include #include +#include #include /* @@ -44,4 +45,23 @@ enum nfs3_stable_how { /* used by direct.c to mark verf as invalid */ NFS_INVALID_STABLE_HOW = -1 }; + +#ifdef CONFIG_CRC32 +/** + * nfs_fhandle_hash - calculate the crc32 hash for the filehandle + * @fh - pointer to filehandle + * + * returns a crc32 hash for the filehandle that is compatible with + * the one displayed by "wireshark". + */ +static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) +{ + return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); +} +#else /* CONFIG_CRC32 */ +static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) +{ + return 0; +} +#endif /* CONFIG_CRC32 */ #endif /* _LINUX_NFS_H */ -- cgit v1.2.3 From cf64b9bce95095b80f4589e4f54572cc5d8c1538 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 8 Mar 2023 17:51:00 +1100 Subject: SUNRPC: return proper error from get_expiry() The get_expiry() function currently returns a timestamp, and uses the special return value of 0 to indicate an error. Unfortunately this causes a problem when 0 is the correct return value. On a system with no RTC it is possible that the boot time will be seen to be "3". When exportfs probes to see if a particular filesystem supports NFS export it tries to cache information with an expiry time of "3". The intention is for this to be "long in the past". Even with no RTC it will not be far in the future (at most a second or two) so this is harmless. But if the boot time happens to have been calculated to be "3", then get_expiry will fail incorrectly as it converts the number to "seconds since bootime" - 0. To avoid this problem we change get_expiry() to report the error quite separately from the expiry time. The error is now the return value. The expiry time is reported through a by-reference parameter. Reported-by: Jerry Zhang Tested-by: Jerry Zhang Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/export.c | 13 ++++++------- fs/nfsd/nfs4idmap.c | 8 ++++---- include/linux/sunrpc/cache.h | 15 ++++++++------- net/sunrpc/auth_gss/svcauth_gss.c | 12 ++++++------ net/sunrpc/svcauth_unix.c | 12 ++++++------ 5 files changed, 30 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 668c7527b17e..6da74aebe1fb 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -123,11 +123,11 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) /* OK, we seem to have a valid key */ key.h.flags = 0; - key.h.expiry_time = get_expiry(&mesg); - if (key.h.expiry_time == 0) + err = get_expiry(&mesg, &key.h.expiry_time); + if (err) goto out; - key.ek_client = dom; + key.ek_client = dom; key.ek_fsidtype = fsidtype; memcpy(key.ek_fsid, buf, len); @@ -610,9 +610,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_devid_map = NULL; /* expiry */ - err = -EINVAL; - exp.h.expiry_time = get_expiry(&mesg); - if (exp.h.expiry_time == 0) + err = get_expiry(&mesg, &exp.h.expiry_time); + if (err) goto out3; /* flags */ @@ -624,7 +623,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (err || an_int < 0) goto out3; exp.ex_flags= an_int; - + /* anon uid */ err = get_int(&mesg, &an_int); if (err) diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 5e9809aff37e..7a806ac13e31 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -240,8 +240,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) goto out; /* expiry */ - ent.h.expiry_time = get_expiry(&buf); - if (ent.h.expiry_time == 0) + error = get_expiry(&buf, &ent.h.expiry_time); + if (error) goto out; error = -ENOMEM; @@ -408,8 +408,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) memcpy(ent.name, buf1, sizeof(ent.name)); /* expiry */ - ent.h.expiry_time = get_expiry(&buf); - if (ent.h.expiry_time == 0) + error = get_expiry(&buf, &ent.h.expiry_time); + if (error) goto out; /* ID */ diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index ec5a555df96f..518bd28f5ab8 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -300,17 +300,18 @@ static inline int get_time(char **bpp, time64_t *time) return 0; } -static inline time64_t get_expiry(char **bpp) +static inline int get_expiry(char **bpp, time64_t *rvp) { - time64_t rv; + int error; struct timespec64 boot; - if (get_time(bpp, &rv)) - return 0; - if (rv < 0) - return 0; + error = get_time(bpp, rvp); + if (error) + return error; + getboottime64(&boot); - return rv - boot.tv_sec; + (*rvp) -= boot.tv_sec; + return 0; } #endif /* _LINUX_SUNRPC_CACHE_H_ */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 9c843974bb48..c4a566737085 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -257,11 +257,11 @@ static int rsi_parse(struct cache_detail *cd, rsii.h.flags = 0; /* expiry */ - expiry = get_expiry(&mesg); - status = -EINVAL; - if (expiry == 0) + status = get_expiry(&mesg, &expiry); + if (status) goto out; + status = -EINVAL; /* major/minor */ len = qword_get(&mesg, buf, mlen); if (len <= 0) @@ -483,11 +483,11 @@ static int rsc_parse(struct cache_detail *cd, rsci.h.flags = 0; /* expiry */ - expiry = get_expiry(&mesg); - status = -EINVAL; - if (expiry == 0) + status = get_expiry(&mesg, &expiry); + if (status) goto out; + status = -EINVAL; rscp = rsc_lookup(cd, &rsci); if (!rscp) goto out; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 4246363cb095..4485088ce27b 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -225,9 +225,9 @@ static int ip_map_parse(struct cache_detail *cd, return -EINVAL; } - expiry = get_expiry(&mesg); - if (expiry ==0) - return -EINVAL; + err = get_expiry(&mesg, &expiry); + if (err) + return err; /* domainname, or empty for NEGATIVE */ len = qword_get(&mesg, buf, mlen); @@ -506,9 +506,9 @@ static int unix_gid_parse(struct cache_detail *cd, uid = make_kuid(current_user_ns(), id); ug.uid = uid; - expiry = get_expiry(&mesg); - if (expiry == 0) - return -EINVAL; + err = get_expiry(&mesg, &expiry); + if (err) + return err; rv = get_int(&mesg, &gids); if (rv || gids < 0 || gids > 8192) -- cgit v1.2.3 From 0f5162480bd25bd97b91c9153db7afbd89698804 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 17 Mar 2023 17:09:20 -0400 Subject: NFSD: Watch for rq_pages bounds checking errors in nfsd_splice_actor() There have been several bugs over the years where the NFSD splice actor has attempted to write outside the rq_pages array. This is a "should never happen" condition, but if for some reason the pipe splice actor should attempt to walk past the end of rq_pages, it needs to terminate the READ operation to prevent corruption of the pointer addresses in the fields just beyond the array. A server crash is thus prevented. Since the code is not behaving, the READ operation returns -EIO to the client. None of the READ payload data can be trusted if the splice actor isn't operating as expected. Suggested-by: Jeff Layton Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton --- fs/nfsd/vfs.c | 6 +++++- include/linux/sunrpc/svc.h | 2 +- include/trace/events/sunrpc.h | 25 +++++++++++++++++++++++++ net/sunrpc/svc.c | 15 ++++++++++++++- 4 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 5783209f17fc..10aa68ca82ef 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -930,6 +930,9 @@ nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, * Grab and keep cached pages associated with a file in the svc_rqst * so that they can be passed to the network sendmsg/sendpage routines * directly. They will be released after the sending has completed. + * + * Return values: Number of bytes consumed, or -EIO if there are no + * remaining pages in rqstp->rq_pages. */ static int nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, @@ -948,7 +951,8 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, */ if (page == *(rqstp->rq_next_page - 1)) continue; - svc_rqst_replace_page(rqstp, page); + if (unlikely(!svc_rqst_replace_page(rqstp, page))) + return -EIO; } if (rqstp->rq_res.page_len == 0) // first call rqstp->rq_res.page_base = offset % PAGE_SIZE; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 877891536c2f..f5af055280ff 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -422,7 +422,7 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int, int (*threadfn)(void *data)); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); -void svc_rqst_replace_page(struct svc_rqst *rqstp, +bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 3ca54536f8f7..5a3bb42e1f50 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1790,6 +1790,31 @@ DEFINE_EVENT(svc_rqst_status, svc_send, TP_PROTO(const struct svc_rqst *rqst, int status), TP_ARGS(rqst, status)); +TRACE_EVENT(svc_replace_page_err, + TP_PROTO(const struct svc_rqst *rqst), + + TP_ARGS(rqst), + TP_STRUCT__entry( + SVC_RQST_ENDPOINT_FIELDS(rqst) + + __field(const void *, begin) + __field(const void *, respages) + __field(const void *, nextpage) + ), + + TP_fast_assign( + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); + + __entry->begin = rqst->rq_pages; + __entry->respages = rqst->rq_respages; + __entry->nextpage = rqst->rq_next_page; + ), + + TP_printk(SVC_RQST_ENDPOINT_FORMAT " begin=%p respages=%p nextpage=%p", + SVC_RQST_ENDPOINT_VARARGS, + __entry->begin, __entry->respages, __entry->nextpage) +); + TRACE_EVENT(svc_stats_latency, TP_PROTO( const struct svc_rqst *rqst diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index fea7ce8fba14..633aa1eb476b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -842,9 +842,21 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); * * When replacing a page in rq_pages, batch the release of the * replaced pages to avoid hammering the page allocator. + * + * Return values: + * %true: page replaced + * %false: array bounds checking failed */ -void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) +bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) { + struct page **begin = rqstp->rq_pages; + struct page **end = &rqstp->rq_pages[RPCSVC_MAXPAGES]; + + if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) { + trace_svc_replace_page_err(rqstp); + return false; + } + if (*rqstp->rq_next_page) { if (!pagevec_space(&rqstp->rq_pvec)) __pagevec_release(&rqstp->rq_pvec); @@ -853,6 +865,7 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) get_page(page); *(rqstp->rq_next_page++) = page; + return true; } EXPORT_SYMBOL_GPL(svc_rqst_replace_page); -- cgit v1.2.3 From 55fcc7d9159de886296626e47db2c81f8578c7e1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 3 Apr 2023 13:53:07 -0400 Subject: SUNRPC: Ignore return value of ->xpo_sendto Clean up: All callers of svc_process() ignore its return value, so svc_process() can safely be converted to return void. Ditto for svc_send(). The return value of ->xpo_sendto() is now used only as part of a trace event. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 2 +- include/linux/sunrpc/svcsock.h | 2 +- net/sunrpc/svc.c | 13 +++++++------ net/sunrpc/svc_xprt.c | 21 +++++++++------------ 4 files changed, 18 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f5af055280ff..2d31121fc2e6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -430,7 +430,7 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -int svc_process(struct svc_rqst *); +void svc_process(struct svc_rqst *rqstp); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, struct svc_rqst *); int svc_register(const struct svc_serv *, struct net *, const int, diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index bcc555c7ae9c..dd73fa174af5 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -56,7 +56,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk) */ void svc_close_net(struct svc_serv *, struct net *); int svc_recv(struct svc_rqst *, long); -int svc_send(struct svc_rqst *); +void svc_send(struct svc_rqst *rqstp); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); bool svc_alien_sock(struct net *net, int fd); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 633aa1eb476b..0aa8892fad63 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1444,11 +1444,12 @@ err_system_err: goto sendit; } -/* - * Process the RPC request. +/** + * svc_process - Execute one RPC transaction + * @rqstp: RPC transaction context + * */ -int -svc_process(struct svc_rqst *rqstp) +void svc_process(struct svc_rqst *rqstp) { struct kvec *resv = &rqstp->rq_res.head[0]; __be32 *p; @@ -1484,7 +1485,8 @@ svc_process(struct svc_rqst *rqstp) if (!svc_process_common(rqstp)) goto out_drop; - return svc_send(rqstp); + svc_send(rqstp); + return; out_baddir: svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", @@ -1492,7 +1494,6 @@ out_baddir: rqstp->rq_server->sv_stats->rpcbadfmt++; out_drop: svc_drop(rqstp); - return 0; } EXPORT_SYMBOL_GPL(svc_process); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ba629297da4e..36c79b718323 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -909,18 +909,20 @@ void svc_drop(struct svc_rqst *rqstp) } EXPORT_SYMBOL_GPL(svc_drop); -/* - * Return reply to client. +/** + * svc_send - Return reply to client + * @rqstp: RPC transaction context + * */ -int svc_send(struct svc_rqst *rqstp) +void svc_send(struct svc_rqst *rqstp) { struct svc_xprt *xprt; - int len = -EFAULT; struct xdr_buf *xb; + int status; xprt = rqstp->rq_xprt; if (!xprt) - goto out; + return; /* calculate over-all length */ xb = &rqstp->rq_res; @@ -930,15 +932,10 @@ int svc_send(struct svc_rqst *rqstp) trace_svc_xdr_sendto(rqstp->rq_xid, xb); trace_svc_stats_latency(rqstp); - len = xprt->xpt_ops->xpo_sendto(rqstp); + status = xprt->xpt_ops->xpo_sendto(rqstp); - trace_svc_send(rqstp, len); + trace_svc_send(rqstp, status); svc_xprt_release(rqstp); - - if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) - len = 0; -out: - return len; } /* -- cgit v1.2.3 From b20cb39def085723868972182fb58fa906839a4f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Apr 2023 20:17:56 -0400 Subject: SUNRPC: Relocate svc_free_res_pages() Clean-up: There doesn't seem to be a reason why this function is stuck in a header. One thing it prevents is the convenient addition of tracing. Moving it to a source file also makes the rq_respages clean-up logic easier to find. Reviewed-by: Calum Mackay Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 12 +----------- net/sunrpc/svc.c | 19 +++++++++++++++++++ net/sunrpc/svc_xprt.c | 2 +- 3 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 2d31121fc2e6..762d7231e574 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -309,17 +309,6 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_daddr; } -static inline void svc_free_res_pages(struct svc_rqst *rqstp) -{ - while (rqstp->rq_next_page != rqstp->rq_respages) { - struct page **pp = --rqstp->rq_next_page; - if (*pp) { - put_page(*pp); - *pp = NULL; - } - } -} - struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct svc_xprt *xprt; @@ -424,6 +413,7 @@ struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); +void svc_rqst_release_pages(struct svc_rqst *rqstp); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 0aa8892fad63..0fc70cc405b2 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -869,6 +869,25 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) } EXPORT_SYMBOL_GPL(svc_rqst_replace_page); +/** + * svc_rqst_release_pages - Release Reply buffer pages + * @rqstp: RPC transaction context + * + * Release response pages that might still be in flight after + * svc_send, and any spliced filesystem-owned pages. + */ +void svc_rqst_release_pages(struct svc_rqst *rqstp) +{ + while (rqstp->rq_next_page != rqstp->rq_respages) { + struct page **pp = --rqstp->rq_next_page; + + if (*pp) { + put_page(*pp); + *pp = NULL; + } + } +} + /* * Called from a server thread as it's exiting. Caller must hold the "service * mutex" for the service. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 36c79b718323..533e08c4f319 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -542,7 +542,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp) rqstp->rq_deferred = NULL; pagevec_release(&rqstp->rq_pvec); - svc_free_res_pages(rqstp); + svc_rqst_release_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; -- cgit v1.2.3 From 5e052dda121e2870dd87181783da4a95d7d2927b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Apr 2023 09:42:14 -0400 Subject: SUNRPC: Recognize control messages in server-side TCP socket code To support kTLS, the server-side TCP socket receive path needs to watch for CMSGs. Acked-by: Jakub Kicinski Signed-off-by: Chuck Lever --- include/net/tls.h | 2 ++ net/sunrpc/svcsock.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 154949c7b0c8..6056ce5a2aa5 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -69,6 +69,8 @@ extern const struct tls_cipher_size_desc tls_cipher_size_desc[]; #define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type) +#define TLS_RECORD_TYPE_ALERT 0x15 +#define TLS_RECORD_TYPE_HANDSHAKE 0x16 #define TLS_RECORD_TYPE_DATA 0x17 #define TLS_AAD_SPACE_SIZE 13 diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 302a14dd7882..c5b74f523fc4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -216,6 +217,49 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) return len; } +static int +svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg, + struct cmsghdr *cmsg, int ret) +{ + if (cmsg->cmsg_level == SOL_TLS && + cmsg->cmsg_type == TLS_GET_RECORD_TYPE) { + u8 content_type = *((u8 *)CMSG_DATA(cmsg)); + + switch (content_type) { + case TLS_RECORD_TYPE_DATA: + /* TLS sets EOR at the end of each application data + * record, even though there might be more frames + * waiting to be decrypted. + */ + msg->msg_flags &= ~MSG_EOR; + break; + case TLS_RECORD_TYPE_ALERT: + ret = -ENOTCONN; + break; + default: + ret = -EAGAIN; + } + } + return ret; +} + +static int +svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) +{ + union { + struct cmsghdr cmsg; + u8 buf[CMSG_SPACE(sizeof(u8))]; + } u; + int ret; + + msg->msg_control = &u; + msg->msg_controllen = sizeof(u); + ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT); + if (unlikely(msg->msg_controllen != sizeof(u))) + ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret); + return ret; +} + #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) { @@ -263,7 +307,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, iov_iter_advance(&msg.msg_iter, seek); buflen -= seek; } - len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + len = svc_tcp_sock_recv_cmsg(svsk, &msg); if (len > 0) svc_flush_bvec(bvec, len, seek); @@ -877,7 +921,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); - len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + len = svc_tcp_sock_recv_cmsg(svsk, &msg); if (len < 0) return len; svsk->sk_tcplen += len; -- cgit v1.2.3 From b3cbf98e2fdf3cb147a95161560cd25987284330 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 20 Apr 2023 13:56:24 -0400 Subject: SUNRPC: Support TLS handshake in the server-side TCP socket code This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel NFS server. If the client requests RPC-with-TLS and the user space handshake agent is running, the server will set up a TLS session. There are no policy settings yet. For example, the server cannot yet require the use of RPC-with-TLS to access its data. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 5 +- include/linux/sunrpc/svcsock.h | 2 + include/trace/events/sunrpc.h | 16 ++++++- net/sunrpc/svc_xprt.c | 5 +- net/sunrpc/svcauth_unix.c | 11 ++++- net/sunrpc/svcsock.c | 101 ++++++++++++++++++++++++++++++++++++++-- 6 files changed, 132 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 775368802762..867479204840 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -27,7 +27,7 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); void (*xpo_kill_temp_xprt)(struct svc_xprt *); - void (*xpo_start_tls)(struct svc_xprt *); + void (*xpo_handshake)(struct svc_xprt *xprt); }; struct svc_xprt_class { @@ -70,6 +70,9 @@ struct svc_xprt { #define XPT_LOCAL 12 /* connection from loopback interface */ #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ #define XPT_CONG_CTRL 14 /* has congestion control */ +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ +#define XPT_TLS_SESSION 16 /* transport-layer security established */ +#define XPT_PEER_AUTH 17 /* peer has been authenticated */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index dd73fa174af5..d16ae621782c 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -38,6 +38,8 @@ struct svc_sock { /* Number of queued send requests */ atomic_t sk_sendqlen; + struct completion sk_handshake_done; + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5a3bb42e1f50..31bc7025cb44 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1857,7 +1857,10 @@ TRACE_EVENT(svc_stats_latency, { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ { BIT(XPT_LOCAL), "LOCAL" }, \ { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) TRACE_EVENT(svc_xprt_create_err, TP_PROTO( @@ -1990,6 +1993,17 @@ DEFINE_SVC_XPRT_EVENT(close); DEFINE_SVC_XPRT_EVENT(detach); DEFINE_SVC_XPRT_EVENT(free); +#define DEFINE_SVC_TLS_EVENT(name) \ + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ + TP_PROTO(const struct svc_xprt *xprt), \ + TP_ARGS(xprt)) + +DEFINE_SVC_TLS_EVENT(start); +DEFINE_SVC_TLS_EVENT(upcall); +DEFINE_SVC_TLS_EVENT(unavailable); +DEFINE_SVC_TLS_EVENT(not_started); +DEFINE_SVC_TLS_EVENT(timed_out); + TRACE_EVENT(svc_xprt_accept, TP_PROTO( const struct svc_xprt *xprt, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 3b9708b39e35..84e5d7d31481 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) if (xpt_flags & BIT(XPT_BUSY)) return false; - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) return true; if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { if (xprt->xpt_ops->xpo_has_wspace(xprt) && @@ -828,6 +828,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) module_put(xprt->xpt_class->xcl_owner); } svc_xprt_received(xprt); + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { + xprt->xpt_ops->xpo_handshake(xprt); + svc_xprt_received(xprt); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 4485088ce27b..174783f804fa 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -17,8 +17,9 @@ #include #include #include -#define RPCDBG_FACILITY RPCDBG_AUTH +#include +#define RPCDBG_FACILITY RPCDBG_AUTH #include "netns.h" @@ -832,6 +833,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct svc_cred *cred = &rqstp->rq_cred; + struct svc_xprt *xprt = rqstp->rq_xprt; u32 flavor, len; void *body; __be32 *p; @@ -865,14 +867,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) if (cred->cr_group_info == NULL) return SVC_CLOSE; - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { + if (xprt->xpt_ops->xpo_handshake) { p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); if (!p) return SVC_CLOSE; + trace_svc_tls_start(xprt); *p++ = rpc_auth_null; *p++ = cpu_to_be32(8); memcpy(p, "STARTTLS", 8); + + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); } else { + trace_svc_tls_unavailable(xprt); if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_NULL, NULL, 0) < 0) return SVC_CLOSE; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c5b74f523fc4..a51c9b989d58 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -44,9 +44,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -64,6 +66,12 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +/* To-do: to avoid tying up an nfsd thread while waiting for a + * handshake request, the request could instead be deferred. + */ +enum { + SVC_HANDSHAKE_TO = 5U * HZ +}; static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int flags); @@ -359,6 +367,8 @@ static void svc_data_ready(struct sock *sk) rmb(); svsk->sk_odata(sk); trace_svcsock_data_ready(&svsk->sk_xprt, 0); + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) + return; if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); } @@ -396,6 +406,88 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) sock_no_linger(svsk->sk_sock->sk); } +/** + * svc_tcp_handshake_done - Handshake completion handler + * @data: address of xprt to wake + * @status: status of handshake + * @peerid: serial number of key containing the remote peer's identity + * + * If a security policy is specified as an export option, we don't + * have a specific export here to check. So we set a "TLS session + * is present" flag on the xprt and let an upper layer enforce local + * security policy. + */ +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) +{ + struct svc_xprt *xprt = data; + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + + if (!status) { + if (peerid != TLS_NO_PEERID) + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); + } + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + complete_all(&svsk->sk_handshake_done); +} + +/** + * svc_tcp_handshake - Perform a transport-layer security handshake + * @xprt: connected transport endpoint + * + */ +static void svc_tcp_handshake(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct sock *sk = svsk->sk_sock->sk; + struct tls_handshake_args args = { + .ta_sock = svsk->sk_sock, + .ta_done = svc_tcp_handshake_done, + .ta_data = xprt, + }; + int ret; + + trace_svc_tls_upcall(xprt); + + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); + init_completion(&svsk->sk_handshake_done); + + ret = tls_server_hello_x509(&args, GFP_KERNEL); + if (ret) { + trace_svc_tls_not_started(xprt); + goto out_failed; + } + + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, + SVC_HANDSHAKE_TO); + if (ret <= 0) { + if (tls_handshake_cancel(sk)) { + trace_svc_tls_timed_out(xprt); + goto out_close; + } + } + + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { + trace_svc_tls_unavailable(xprt); + goto out_close; + } + + /* Mark the transport ready in case the remote sent RPC + * traffic before the kernel received the handshake + * completion downcall. + */ + set_bit(XPT_DATA, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); + return; + +out_close: + set_bit(XPT_CLOSE, &xprt->xpt_flags); +out_failed: + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + set_bit(XPT_DATA, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -1257,6 +1349,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, + .xpo_handshake = svc_tcp_handshake, }; static struct svc_xprt_class svc_tcp_class = { @@ -1580,10 +1673,12 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct socket *sock = svsk->sk_sock; - if (svsk->sk_sock->file) - sockfd_put(svsk->sk_sock); + tls_handshake_cancel(sock->sk); + if (sock->file) + sockfd_put(sock); else - sock_release(svsk->sk_sock); + sock_release(sock); kfree(svsk); } -- cgit v1.2.3 From 9280c577431401544e63dfb489a830a42bee25eb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 20 Apr 2023 13:56:31 -0400 Subject: NFSD: Handle new xprtsec= export option Enable administrators to require clients to use transport layer security when accessing particular exports. Signed-off-by: Chuck Lever --- fs/nfsd/export.c | 51 +++++++++++++++++++++++++++++++++++++--- fs/nfsd/export.h | 1 + include/uapi/linux/nfsd/export.h | 13 ++++++++++ 3 files changed, 62 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 6da74aebe1fb..ae85257b4238 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -439,7 +439,6 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid) return -EINVAL; } return 0; - } #ifdef CONFIG_NFSD_V4 @@ -546,6 +545,29 @@ static inline int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } #endif +static int xprtsec_parse(char **mesg, char *buf, struct svc_export *exp) +{ + unsigned int i, mode, listsize; + int err; + + err = get_uint(mesg, &listsize); + if (err) + return err; + if (listsize > NFSEXP_XPRTSEC_NUM) + return -EINVAL; + + exp->ex_xprtsec_modes = 0; + for (i = 0; i < listsize; i++) { + err = get_uint(mesg, &mode); + if (err) + return err; + if (mode > NFSEXP_XPRTSEC_MTLS) + return -EINVAL; + exp->ex_xprtsec_modes |= mode; + } + return 0; +} + static inline int nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid) { @@ -608,6 +630,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_client = dom; exp.cd = cd; exp.ex_devid_map = NULL; + exp.ex_xprtsec_modes = NFSEXP_XPRTSEC_ALL; /* expiry */ err = get_expiry(&mesg, &exp.h.expiry_time); @@ -649,6 +672,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid); else if (strcmp(buf, "secinfo") == 0) err = secinfo_parse(&mesg, buf, &exp); + else if (strcmp(buf, "xprtsec") == 0) + err = xprtsec_parse(&mesg, buf, &exp); else /* quietly ignore unknown words and anything * following. Newer user-space can try to set @@ -662,6 +687,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid); if (err) goto out4; + /* * No point caching this if it would immediately expire. * Also, this protects exportfs's dummy export from the @@ -823,6 +849,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) for (i = 0; i < MAX_SECINFO_LIST; i++) { new->ex_flavors[i] = item->ex_flavors[i]; } + new->ex_xprtsec_modes = item->ex_xprtsec_modes; } static struct cache_head *svc_export_alloc(void) @@ -1034,9 +1061,26 @@ static struct svc_export *exp_find(struct cache_detail *cd, __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) { - struct exp_flavor_info *f; - struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; + struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors; + struct svc_xprt *xprt = rqstp->rq_xprt; + + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) { + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) + goto ok; + } + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) { + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) && + !test_bit(XPT_PEER_AUTH, &xprt->xpt_flags)) + goto ok; + } + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) { + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) && + test_bit(XPT_PEER_AUTH, &xprt->xpt_flags)) + goto ok; + } + goto denied; +ok: /* legacy gss-only clients are always OK: */ if (exp->ex_client == rqstp->rq_gssclient) return 0; @@ -1061,6 +1105,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) if (nfsd4_spo_must_allow(rqstp)) return 0; +denied: return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec; } diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index d03f7f6a8642..2df8ae25aad3 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -77,6 +77,7 @@ struct svc_export { struct cache_detail *cd; struct rcu_head ex_rcu; struct export_stats ex_stats; + unsigned long ex_xprtsec_modes; }; /* an "export key" (expkey) maps a filehandlefragement to an diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h index 2124ba904779..a73ca3703abb 100644 --- a/include/uapi/linux/nfsd/export.h +++ b/include/uapi/linux/nfsd/export.h @@ -62,5 +62,18 @@ | NFSEXP_ALLSQUASH \ | NFSEXP_INSECURE_PORT) +/* + * Transport layer security policies that are permitted to access + * an export + */ +#define NFSEXP_XPRTSEC_NONE 0x0001 +#define NFSEXP_XPRTSEC_TLS 0x0002 +#define NFSEXP_XPRTSEC_MTLS 0x0004 + +#define NFSEXP_XPRTSEC_NUM (3) + +#define NFSEXP_XPRTSEC_ALL (NFSEXP_XPRTSEC_NONE | \ + NFSEXP_XPRTSEC_TLS | \ + NFSEXP_XPRTSEC_MTLS) #endif /* _UAPINFSD_EXPORT_H */ -- cgit v1.2.3