From 251a658bbfceafb4d58c76b77682c8bf7bcfad65 Mon Sep 17 00:00:00 2001 From: Jorge Mora Date: Thu, 25 Jan 2024 07:56:12 -0700 Subject: NFSv4.2: fix nfs4_listxattr kernel BUG at mm/usercopy.c:102 A call to listxattr() with a buffer size = 0 returns the actual size of the buffer needed for a subsequent call. When size > 0, nfs4_listxattr() does not return an error because either generic_listxattr() or nfs4_listxattr_nfs4_label() consumes exactly all the bytes then size is 0 when calling nfs4_listxattr_nfs4_user() which then triggers the following kernel BUG: [ 99.403778] kernel BUG at mm/usercopy.c:102! [ 99.404063] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP [ 99.408463] CPU: 0 PID: 3310 Comm: python3 Not tainted 6.6.0-61.fc40.aarch64 #1 [ 99.415827] Call trace: [ 99.415985] usercopy_abort+0x70/0xa0 [ 99.416227] __check_heap_object+0x134/0x158 [ 99.416505] check_heap_object+0x150/0x188 [ 99.416696] __check_object_size.part.0+0x78/0x168 [ 99.416886] __check_object_size+0x28/0x40 [ 99.417078] listxattr+0x8c/0x120 [ 99.417252] path_listxattr+0x78/0xe0 [ 99.417476] __arm64_sys_listxattr+0x28/0x40 [ 99.417723] invoke_syscall+0x78/0x100 [ 99.417929] el0_svc_common.constprop.0+0x48/0xf0 [ 99.418186] do_el0_svc+0x24/0x38 [ 99.418376] el0_svc+0x3c/0x110 [ 99.418554] el0t_64_sync_handler+0x120/0x130 [ 99.418788] el0t_64_sync+0x194/0x198 [ 99.418994] Code: aa0003e3 d000a3e0 91310000 97f49bdb (d4210000) Issue is reproduced when generic_listxattr() returns 'system.nfs4_acl', thus calling lisxattr() with size = 16 will trigger the bug. Add check on nfs4_listxattr() to return ERANGE error when it is called with size > 0 and the return value is greater than size. Fixes: 012a211abd5d ("NFSv4.2: hook in the user extended attribute handlers") Signed-off-by: Jorge Mora Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 23819a756508..b2ff8c7a2149 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10615,29 +10615,33 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) { ssize_t error, error2, error3; + size_t left = size; - error = generic_listxattr(dentry, list, size); + error = generic_listxattr(dentry, list, left); if (error < 0) return error; if (list) { list += error; - size -= error; + left -= error; } - error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size); + error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, left); if (error2 < 0) return error2; if (list) { list += error2; - size -= error2; + left -= error2; } - error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size); + error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left); if (error3 < 0) return error3; - return error + error2 + error3; + error += error2 + error3; + if (size && error > size) + return -ERANGE; + return error; } static void nfs4_enable_swap(struct inode *inode) -- cgit v1.2.3 From 7e5ae43b2d0eb89560bf7da7c9c745d31bf72ffe Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 2 Feb 2024 15:21:13 -0500 Subject: NFSv4.1: add tracepoint to trunked nfs4_exchange_id calls Add a tracepoint to track when the client sends EXCHANGE_ID to test a new transport for session trunking. nfs4_detect_session_trunking() tests for trunking and returns EINVAL if trunking can't be done, add EINVAL mapping to show_nfs4_status() in tracepoints. Signed-off-by: Olga Kornievskaia Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 ++++-- fs/nfs/nfs4trace.h | 30 ++++++++++++++++++++++++++++++ include/trace/misc/nfs.h | 1 + 3 files changed, 35 insertions(+), 2 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2ff8c7a2149..206b4607b29a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8970,10 +8970,12 @@ try_again: return; status = task->tk_status; - if (status == 0) + if (status == 0) { status = nfs4_detect_session_trunking(adata->clp, task->tk_msg.rpc_resp, xprt); - + trace_nfs4_trunked_exchange_id(adata->clp, + xprt->address_strings[RPC_DISPLAY_ADDR], status); + } if (status == 0) rpc_clnt_xprt_switch_add_xprt(clnt, xprt); else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt, diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 713d080fd268..7d9cb980865d 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -77,6 +77,36 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session); DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); +TRACE_EVENT(nfs4_trunked_exchange_id, + TP_PROTO( + const struct nfs_client *clp, + const char *addr, + int error + ), + + TP_ARGS(clp, addr, error), + + TP_STRUCT__entry( + __string(main_addr, clp->cl_hostname) + __string(trunk_addr, addr) + __field(unsigned long, error) + ), + + TP_fast_assign( + __entry->error = error < 0 ? -error : 0; + __assign_str(main_addr, clp->cl_hostname); + __assign_str(trunk_addr, addr); + ), + + TP_printk( + "error=%ld (%s) main_addr=%s trunk_addr=%s", + -__entry->error, + show_nfs4_status(__entry->error), + __get_str(main_addr), + __get_str(trunk_addr) + ) +); + TRACE_EVENT(nfs4_sequence_done, TP_PROTO( const struct nfs4_session *session, diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h index 0d9d48dca38a..5387eb0a6a08 100644 --- a/include/trace/misc/nfs.h +++ b/include/trace/misc/nfs.h @@ -239,6 +239,7 @@ TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); { EHOSTDOWN, "EHOSTDOWN" }, \ { EPIPE, "EPIPE" }, \ { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ + { EINVAL, "EINVAL" }, \ { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ { NFS4ERR_ACCESS, "ACCESS" }, \ { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ -- cgit v1.2.3 From 0460253913e50a2aec911fe83090d60397f17664 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 24 Feb 2024 15:59:28 -0500 Subject: NFSv4: nfs4_do_open() is incorrectly triggering state recovery We're seeing spurious calls to nfs4_schedule_stateid_recovery() from nfs4_do_open() in situations where there is no trigger coming from the server. In theory the code path being triggered is supposed to notice that state recovery happened while we were processing the open call result from the server, before the open stateid is published. However in the years since that code was added, we've also added the 'session draining' mechanism, which ensures that the state recovery will wait until all the session slots have been returned. In nfs4_do_open() the session slot is only returned on exit of the function, so we don't need the legacy mechanism. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 4 ---- fs/nfs/nfs4_fs.h | 1 - fs/nfs/nfs4proc.c | 7 +------ fs/nfs/nfs4state.c | 12 +++++++----- 4 files changed, 8 insertions(+), 16 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index fa1a14def45c..4ba612e78da8 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -181,7 +181,6 @@ static int nfs_delegation_claim_opens(struct inode *inode, struct nfs_open_context *ctx; struct nfs4_state_owner *sp; struct nfs4_state *state; - unsigned int seq; int err; again: @@ -202,12 +201,9 @@ again: sp = state->owner; /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); err = nfs4_open_delegation_recall(ctx, state, stateid); if (!err) err = nfs_delegation_claim_locks(state, stateid); - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) - err = -EAGAIN; mutex_unlock(&sp->so_delegreturn_mutex); put_nfs_open_context(ctx); if (err != 0) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 581698f1b7b2..cf17f4bc9815 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -120,7 +120,6 @@ struct nfs4_state_owner { unsigned long so_flags; struct list_head so_states; struct nfs_seqid_counter so_seqid; - seqcount_spinlock_t so_reclaim_seqcount; struct mutex so_delegreturn_mutex; }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 206b4607b29a..e9c4f7b9d44e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3069,10 +3069,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx); struct inode *dir = d_inode(opendata->dir); unsigned long dir_verifier; - unsigned int seq; int ret; - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); dir_verifier = nfs_save_change_attribute(dir); ret = _nfs4_proc_open(opendata, ctx); @@ -3125,11 +3123,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (ret != 0) goto out; - if (d_inode(dentry) == state->inode) { + if (d_inode(dentry) == state->inode) nfs_inode_attach_open_context(ctx); - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) - nfs4_schedule_stateid_recovery(server, state); - } out: if (!opendata->cancelled) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9a5d911a7edc..8486230d99e1 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -513,7 +513,6 @@ nfs4_alloc_state_owner(struct nfs_server *server, nfs4_init_seqid_counter(&sp->so_seqid); atomic_set(&sp->so_count, 1); INIT_LIST_HEAD(&sp->so_lru); - seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock); mutex_init(&sp->so_delegreturn_mutex); return sp; } @@ -1667,7 +1666,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, * server that doesn't support a grace period. */ spin_lock(&sp->so_lock); - raw_write_seqcount_begin(&sp->so_reclaim_seqcount); restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) @@ -1735,7 +1733,6 @@ restart: spin_lock(&sp->so_lock); goto restart; } - raw_write_seqcount_end(&sp->so_reclaim_seqcount); spin_unlock(&sp->so_lock); #ifdef CONFIG_NFS_V4_2 if (found_ssc_copy_state) @@ -1745,7 +1742,6 @@ restart: out_err: nfs4_put_open_state(state); spin_lock(&sp->so_lock); - raw_write_seqcount_end(&sp->so_reclaim_seqcount); spin_unlock(&sp->so_lock); return status; } @@ -1928,9 +1924,12 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov struct nfs_server *server; struct rb_node *pos; LIST_HEAD(freeme); - int status = 0; int lost_locks = 0; + int status; + status = nfs4_begin_drain_session(clp); + if (status < 0) + return status; restart: rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { @@ -2694,6 +2693,9 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Detect expired delegations... */ if (test_and_clear_bit(NFS4CLNT_DELEGATION_EXPIRED, &clp->cl_state)) { section = "detect expired delegations"; + status = nfs4_begin_drain_session(clp); + if (status < 0) + goto out_error; nfs_reap_expired_delegations(clp); continue; } -- cgit v1.2.3