summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c173
1 files changed, 115 insertions, 58 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2d26b9114373..08dc53f56bc2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -49,18 +49,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
-/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
- * completed yet or has failed, return the subflow socket.
- * Otherwise return NULL.
- */
-struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
-{
- if (!msk->subflow || READ_ONCE(msk->can_ack))
- return NULL;
-
- return msk->subflow;
-}
-
/* Returns end sequence number of the receiver's advertised window */
static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
{
@@ -116,6 +104,31 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
return 0;
}
+/* If the MPC handshake is not started, returns the first subflow,
+ * eventually allocating it.
+ */
+struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
+{
+ struct sock *sk = (struct sock *)msk;
+ int ret;
+
+ if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ return ERR_PTR(-EINVAL);
+
+ if (!msk->subflow) {
+ if (msk->first)
+ return ERR_PTR(-EINVAL);
+
+ ret = __mptcp_socket_create(msk);
+ if (ret)
+ return ERR_PTR(ret);
+
+ mptcp_sockopt_sync(msk, msk->first);
+ }
+
+ return msk->subflow;
+}
+
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
{
sk_drops_add(sk, skb);
@@ -1662,13 +1675,31 @@ static void mptcp_set_nospace(struct sock *sk)
static int mptcp_disconnect(struct sock *sk, int flags);
-static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
+static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
size_t len, int *copied_syn)
{
unsigned int saved_flags = msg->msg_flags;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *ssock;
+ struct sock *ssk;
int ret;
+ /* on flags based fastopen the mptcp is supposed to create the
+ * first subflow right now. Otherwise we are in the defer_connect
+ * path, and the first subflow must be already present.
+ * Since the defer_connect flag is cleared after the first succsful
+ * fastopen attempt, no need to check for additional subflow status.
+ */
+ if (msg->msg_flags & MSG_FASTOPEN) {
+ ssock = __mptcp_nmpc_socket(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
+ }
+ if (!msk->first)
+ return -EINVAL;
+
+ ssk = msk->first;
+
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
msk->connect_flags = O_NONBLOCK;
@@ -1691,6 +1722,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
} else if (ret && ret != -EINPROGRESS) {
mptcp_disconnect(sk, 0);
}
+ inet_sk(sk)->defer_connect = 0;
return ret;
}
@@ -1699,7 +1731,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
- struct socket *ssock;
size_t copied = 0;
int ret = 0;
long timeo;
@@ -1709,12 +1740,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
- msg->msg_flags & MSG_FASTOPEN))) {
+ if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
- ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
+ ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn);
copied += copied_syn;
if (ret == -EINPROGRESS && copied_syn > 0)
goto out;
@@ -2315,7 +2344,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- bool need_push, dispose_it;
+ bool dispose_it, need_push = false;
+
+ /* If the first subflow moved to a close state before accept, e.g. due
+ * to an incoming reset, mptcp either:
+ * - if either the subflow or the msk are dead, destroy the context
+ * (the subflow socket is deleted by inet_child_forget) and the msk
+ * - otherwise do nothing at the moment and take action at accept and/or
+ * listener shutdown - user-space must be able to accept() the closed
+ * socket.
+ */
+ if (msk->in_accept_queue && msk->first == ssk) {
+ if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
+ return;
+
+ /* ensure later check in mptcp_worker() will dispose the msk */
+ sock_set_flag(sk, SOCK_DEAD);
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ mptcp_subflow_drop_ctx(ssk);
+ goto out_release;
+ }
dispose_it = !msk->subflow || ssk != msk->subflow->sk;
if (dispose_it)
@@ -2351,28 +2399,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (!inet_csk(ssk)->icsk_ulp_ops) {
WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
- } else if (msk->in_accept_queue && msk->first == ssk) {
- /* if the first subflow moved to a close state, e.g. due to
- * incoming reset and we reach here before inet_child_forget()
- * the TCP stack could later try to close it via
- * inet_csk_listen_stop(), or deliver it to the user space via
- * accept().
- * We can't delete the subflow - or risk a double free - nor let
- * the msk survive - or will be leaked in the non accept scenario:
- * fallback and let TCP cope with the subflow cleanup.
- */
- WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
- mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
- if (ssk->sk_state == TCP_LISTEN)
+ if (ssk->sk_state == TCP_LISTEN) {
+ tcp_set_state(ssk, TCP_CLOSE);
+ mptcp_subflow_queue_clean(sk, ssk);
+ inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
+ }
__tcp_close(ssk, 0);
/* close acquired an extra ref */
__sock_put(ssk);
}
+
+out_release:
release_sock(ssk);
sock_put(ssk);
@@ -2427,21 +2469,14 @@ static void __mptcp_close_subflow(struct sock *sk)
mptcp_close_ssk(sk, ssk, subflow);
}
- /* if the MPC subflow has been closed before the msk is accepted,
- * msk will never be accept-ed, close it now
- */
- if (!msk->first && msk->in_accept_queue) {
- sock_set_flag(sk, SOCK_DEAD);
- inet_sk_state_store(sk, TCP_CLOSE);
- }
}
-static bool mptcp_check_close_timeout(const struct sock *sk)
+static bool mptcp_should_close(const struct sock *sk)
{
s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
struct mptcp_subflow_context *subflow;
- if (delta >= TCP_TIMEWAIT_LEN)
+ if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
return true;
/* if all subflows are in closed status don't bother with additional
@@ -2626,7 +2661,7 @@ static void mptcp_worker(struct work_struct *work)
lock_sock(sk);
state = sk->sk_state;
- if (unlikely(state == TCP_CLOSE))
+ if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto unlock;
mptcp_check_data_fin_ack(sk);
@@ -2649,7 +2684,7 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_check_close_timeout(sk)) {
+ if (mptcp_should_close(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
}
@@ -2728,10 +2763,6 @@ static int mptcp_init_sock(struct sock *sk)
if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
return -ENOMEM;
- ret = __mptcp_socket_create(mptcp_sk(sk));
- if (ret)
- return ret;
-
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
@@ -2895,6 +2926,14 @@ static void __mptcp_destroy_sock(struct sock *sk)
sock_put(sk);
}
+void __mptcp_unaccepted_force_close(struct sock *sk)
+{
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_do_fastclose(sk);
+ __mptcp_destroy_sock(sk);
+}
+
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
/* Concurrent splices from sk_receive_queue into receive_queue will
@@ -2928,10 +2967,13 @@ bool __mptcp_close(struct sock *sk, long timeout)
goto cleanup;
}
- if (mptcp_check_readable(msk)) {
- /* the msk has read data, do the MPTCP equivalent of TCP reset */
+ if (mptcp_check_readable(msk) || timeout < 0) {
+ /* If the msk has read data, or the caller explicitly ask it,
+ * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
+ */
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
+ timeout = 0;
} else if (mptcp_close_state(sk)) {
__mptcp_wr_shutdown(sk);
}
@@ -3143,7 +3185,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
struct socket *listener;
struct sock *newsk;
- listener = __mptcp_nmpc_socket(msk);
+ listener = msk->subflow;
if (WARN_ON_ONCE(!listener)) {
*err = -EINVAL;
return NULL;
@@ -3363,7 +3405,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum)
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
- ssock = __mptcp_nmpc_socket(msk);
+ ssock = msk->subflow;
pr_debug("msk=%p, subflow=%p", msk, ssock);
if (WARN_ON_ONCE(!ssock))
return -EINVAL;
@@ -3551,8 +3593,8 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int err = -EINVAL;
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock)
- return -EINVAL;
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_SYN_SENT);
@@ -3640,8 +3682,8 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
lock_sock(sock->sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
goto unlock;
}
@@ -3677,8 +3719,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
goto unlock;
}
@@ -3709,7 +3751,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
- ssock = __mptcp_nmpc_socket(msk);
+ /* buggy applications can call accept on socket states other then LISTEN
+ * but no need to allocate the first subflow just to error out.
+ */
+ ssock = msk->subflow;
if (!ssock)
return -EINVAL;
@@ -3733,6 +3778,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
+
+ /* Do late cleanup for the first subflow as necessary. Also
+ * deal with bad peers not doing a complete shutdown.
+ */
+ if (msk->first &&
+ unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
+ __mptcp_close_ssk(newsk, msk->first,
+ mptcp_subflow_ctx(msk->first), 0);
+ if (unlikely(list_empty(&msk->conn_list)))
+ inet_sk_state_store(newsk, TCP_CLOSE);
+ }
+
release_sock(newsk);
}