diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-19 00:32:33 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-19 00:32:33 +0300 |
commit | 6860c981b9672324cb53b883cfda8d2ea1445ff1 (patch) | |
tree | d97a47712479bc2f886012d7f2da8b00ac504f3a /net/sunrpc/xprtsock.c | |
parent | 0570bc8b7c9b41deba6f61ac218922e7168ad648 (diff) | |
parent | d5b9216fd5114be4ed98ca9c1ecc5f164cd8cf5e (diff) | |
download | linux-6860c981b9672324cb53b883cfda8d2ea1445ff1.tar.xz |
Merge tag 'nfs-for-5.3-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Stable fixes:
- SUNRPC: Ensure bvecs are re-synced when we re-encode the RPC
request
- Fix an Oops in ff_layout_track_ds_error due to a PTR_ERR()
dereference
- Revert buggy NFS readdirplus optimisation
- NFSv4: Handle the special Linux file open access mode
- pnfs: Fix a problem where we gratuitously start doing I/O through
the MDS
Features:
- Allow NFS client to set up multiple TCP connections to the server
using a new 'nconnect=X' mount option. Queue length is used to
balance load.
- Enhance statistics reporting to report on all transports when using
multiple connections.
- Speed up SUNRPC by removing bh-safe spinlocks
- Add a mechanism to allow NFSv4 to request that containers set a
unique per-host identifier for when the hostname is not set.
- Ensure NFSv4 updates the lease_time after a clientid update
Bugfixes and cleanup:
- Fix use-after-free in rpcrdma_post_recvs
- Fix a memory leak when nfs_match_client() is interrupted
- Fix buggy file access checking in NFSv4 open for execute
- disable unsupported client side deduplication
- Fix spurious client disconnections
- Fix occasional RDMA transport deadlock
- Various RDMA cleanups
- Various tracepoint fixes
- Fix the TCP callback channel to guarantee the server can actually
send the number of callback requests that was negotiated at mount
time"
* tag 'nfs-for-5.3-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (68 commits)
pnfs/flexfiles: Add tracepoints for detecting pnfs fallback to MDS
pnfs: Fix a problem where we gratuitously start doing I/O through the MDS
SUNRPC: Optimise transport balancing code
SUNRPC: Ensure the bvecs are reset when we re-encode the RPC request
pnfs/flexfiles: Fix PTR_ERR() dereferences in ff_layout_track_ds_error
NFSv4: Don't use the zero stateid with layoutget
SUNRPC: Fix up backchannel slot table accounting
SUNRPC: Fix initialisation of struct rpc_xprt_switch
SUNRPC: Skip zero-refcount transports
SUNRPC: Replace division by multiplication in calculation of queue length
NFSv4: Validate the stateid before applying it to state recovery
nfs4.0: Refetch lease_time after clientid update
nfs4: Rename nfs41_setup_state_renewal
nfs4: Make nfs4_proc_get_lease_time available for nfs4.0
nfs: Fix copy-and-paste error in debug message
NFS: Replace 16 seq_printf() calls by seq_puts()
NFS: Use seq_putc() in nfs_show_stats()
Revert "NFS: readdirplus optimization by cache mechanism" (memleak)
SUNRPC: Fix transport accounting when caller specifies an rpc_xprt
NFS: Record task, client ID, and XID in xdr_status trace points
...
Diffstat (limited to 'net/sunrpc/xprtsock.c')
-rw-r--r-- | net/sunrpc/xprtsock.c | 126 |
1 files changed, 87 insertions, 39 deletions
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 36652352a38c..e2176c167a57 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -880,7 +880,7 @@ static int xs_nospace(struct rpc_rqst *req) req->rq_slen); /* Protect against races with write_space */ - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->transport_lock); /* Don't race with disconnect */ if (xprt_connected(xprt)) { @@ -890,7 +890,7 @@ static int xs_nospace(struct rpc_rqst *req) } else ret = -ENOTCONN; - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->transport_lock); /* Race breaker in case memory is freed before above code is called */ if (ret == -EAGAIN) { @@ -909,6 +909,7 @@ static int xs_nospace(struct rpc_rqst *req) static void xs_stream_prepare_request(struct rpc_rqst *req) { + xdr_free_bvec(&req->rq_rcv_buf); req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL); } @@ -1211,6 +1212,15 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); + clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); + clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); + clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); +} + +static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) +{ + set_bit(nr, &transport->sock_state); + queue_work(xprtiod_workqueue, &transport->error_worker); } static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) @@ -1231,6 +1241,7 @@ static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) */ static void xs_error_report(struct sock *sk) { + struct sock_xprt *transport; struct rpc_xprt *xprt; int err; @@ -1238,13 +1249,14 @@ static void xs_error_report(struct sock *sk) if (!(xprt = xprt_from_sock(sk))) goto out; + transport = container_of(xprt, struct sock_xprt, xprt); err = -sk->sk_err; if (err == 0) goto out; dprintk("RPC: xs_error_report client %p, error=%d...\n", xprt, -err); trace_rpc_socket_error(xprt, sk->sk_socket, err); - xprt_wake_pending_tasks(xprt, err); + xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); out: read_unlock_bh(&sk->sk_callback_lock); } @@ -1333,6 +1345,7 @@ static void xs_destroy(struct rpc_xprt *xprt) cancel_delayed_work_sync(&transport->connect_worker); xs_close(xprt); cancel_work_sync(&transport->recv_worker); + cancel_work_sync(&transport->error_worker); xs_xprt_free(xprt); module_put(THIS_MODULE); } @@ -1386,9 +1399,9 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, } - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->transport_lock); xprt_adjust_cwnd(xprt, task, copied); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->transport_lock); spin_lock(&xprt->queue_lock); xprt_complete_rqst(task, copied); __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); @@ -1498,7 +1511,6 @@ static void xs_tcp_state_change(struct sock *sk) trace_rpc_socket_state_change(xprt, sk->sk_socket); switch (sk->sk_state) { case TCP_ESTABLISHED: - spin_lock(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { xprt->connect_cookie++; clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); @@ -1507,9 +1519,8 @@ static void xs_tcp_state_change(struct sock *sk) xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; - xprt_wake_pending_tasks(xprt, -EAGAIN); + xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING); } - spin_unlock(&xprt->transport_lock); break; case TCP_FIN_WAIT1: /* The client initiated a shutdown of the socket */ @@ -1525,7 +1536,7 @@ static void xs_tcp_state_change(struct sock *sk) /* The server initiated a shutdown of the socket */ xprt->connect_cookie++; clear_bit(XPRT_CONNECTED, &xprt->state); - xs_tcp_force_close(xprt); + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); /* fall through */ case TCP_CLOSING: /* @@ -1547,7 +1558,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt_clear_connecting(xprt); clear_bit(XPRT_CLOSING, &xprt->state); /* Trigger the socket release */ - xs_tcp_force_close(xprt); + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); } out: read_unlock_bh(&sk->sk_callback_lock); @@ -1556,6 +1567,7 @@ static void xs_tcp_state_change(struct sock *sk) static void xs_write_space(struct sock *sk) { struct socket_wq *wq; + struct sock_xprt *transport; struct rpc_xprt *xprt; if (!sk->sk_socket) @@ -1564,13 +1576,14 @@ static void xs_write_space(struct sock *sk) if (unlikely(!(xprt = xprt_from_sock(sk)))) return; + transport = container_of(xprt, struct sock_xprt, xprt); rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) goto out; - if (xprt_write_space(xprt)) - sk->sk_write_pending--; + xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); + sk->sk_write_pending--; out: rcu_read_unlock(); } @@ -1664,9 +1677,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t */ static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) { - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->transport_lock); xprt_adjust_cwnd(xprt, task, -ETIMEDOUT); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->transport_lock); } static int xs_get_random_port(void) @@ -2201,13 +2214,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, unsigned int opt_on = 1; unsigned int timeo; - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->transport_lock); keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ); keepcnt = xprt->timeout->to_retries + 1; timeo = jiffies_to_msecs(xprt->timeout->to_initval) * (xprt->timeout->to_retries + 1); clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->transport_lock); /* TCP Keepalive options */ kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, @@ -2232,7 +2245,7 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, struct rpc_timeout to; unsigned long initval; - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->transport_lock); if (reconnect_timeout < xprt->max_reconnect_timeout) xprt->max_reconnect_timeout = reconnect_timeout; if (connect_timeout < xprt->connect_timeout) { @@ -2249,7 +2262,7 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, xprt->connect_timeout = connect_timeout; } set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->transport_lock); } static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -2402,25 +2415,6 @@ out: xprt_wake_pending_tasks(xprt, status); } -static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt) -{ - unsigned long start, now = jiffies; - - start = xprt->stat.connect_start + xprt->reestablish_timeout; - if (time_after(start, now)) - return start - now; - return 0; -} - -static void xs_reconnect_backoff(struct rpc_xprt *xprt) -{ - xprt->reestablish_timeout <<= 1; - if (xprt->reestablish_timeout > xprt->max_reconnect_timeout) - xprt->reestablish_timeout = xprt->max_reconnect_timeout; - if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) - xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; -} - /** * xs_connect - connect a socket to a remote endpoint * @xprt: pointer to transport structure @@ -2450,8 +2444,8 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) /* Start by resetting any existing state */ xs_reset_transport(transport); - delay = xs_reconnect_delay(xprt); - xs_reconnect_backoff(xprt); + delay = xprt_reconnect_delay(xprt); + xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); } else dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); @@ -2461,6 +2455,56 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) delay); } +static void xs_wake_disconnect(struct sock_xprt *transport) +{ + if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state)) + xs_tcp_force_close(&transport->xprt); +} + +static void xs_wake_write(struct sock_xprt *transport) +{ + if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state)) + xprt_write_space(&transport->xprt); +} + +static void xs_wake_error(struct sock_xprt *transport) +{ + int sockerr; + int sockerr_len = sizeof(sockerr); + + if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) + return; + mutex_lock(&transport->recv_mutex); + if (transport->sock == NULL) + goto out; + if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) + goto out; + if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR, + (char *)&sockerr, &sockerr_len) != 0) + goto out; + if (sockerr < 0) + xprt_wake_pending_tasks(&transport->xprt, sockerr); +out: + mutex_unlock(&transport->recv_mutex); +} + +static void xs_wake_pending(struct sock_xprt *transport) +{ + if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, &transport->sock_state)) + xprt_wake_pending_tasks(&transport->xprt, -EAGAIN); +} + +static void xs_error_handle(struct work_struct *work) +{ + struct sock_xprt *transport = container_of(work, + struct sock_xprt, error_worker); + + xs_wake_disconnect(transport); + xs_wake_write(transport); + xs_wake_error(transport); + xs_wake_pending(transport); +} + /** * xs_local_print_stats - display AF_LOCAL socket-specifc stats * @xprt: rpc_xprt struct containing statistics @@ -2745,6 +2789,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = { #ifdef CONFIG_SUNRPC_BACKCHANNEL .bc_setup = xprt_setup_bc, .bc_maxpayload = xs_tcp_bc_maxpayload, + .bc_num_slots = xprt_bc_max_slots, .bc_free_rqst = xprt_free_bc_rqst, .bc_destroy = xprt_destroy_bc, #endif @@ -2873,6 +2918,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) xprt->timeout = &xs_local_default_timeout; INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); + INIT_WORK(&transport->error_worker, xs_error_handle); INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket); switch (sun->sun_family) { @@ -2943,6 +2989,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xprt->timeout = &xs_udp_default_timeout; INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn); + INIT_WORK(&transport->error_worker, xs_error_handle); INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket); switch (addr->sa_family) { @@ -3024,6 +3071,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) (xprt->timeout->to_retries + 1); INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); + INIT_WORK(&transport->error_worker, xs_error_handle); INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); switch (addr->sa_family) { |