summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/mib.c3
-rw-r--r--net/mptcp/mib.h3
-rw-r--r--net/mptcp/options.c292
-rw-r--r--net/mptcp/pm.c64
-rw-r--r--net/mptcp/pm_netlink.c328
-rw-r--r--net/mptcp/protocol.c62
-rw-r--r--net/mptcp/protocol.h89
-rw-r--r--net/mptcp/subflow.c43
8 files changed, 540 insertions, 344 deletions
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 3780c29c321d..eb2dc6dbe212 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -10,9 +10,12 @@
static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE),
+ SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE),
+ SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK),
SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
+ SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 72afbc135f8e..f0da4f060fe1 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -3,9 +3,12 @@
enum linux_mptcp_mib_field {
MPTCP_MIB_NUM = 0,
MPTCP_MIB_MPCAPABLEPASSIVE, /* Received SYN with MP_CAPABLE */
+ MPTCP_MIB_MPCAPABLEACTIVE, /* Sent SYN with MP_CAPABLE */
+ MPTCP_MIB_MPCAPABLEACTIVEACK, /* Received SYN/ACK with MP_CAPABLE */
MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
+ MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 89a4225ed321..d51c3ad54d9a 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
int expected_opsize;
u8 version;
u8 flags;
+ u8 i;
switch (subtype) {
case MPTCPOPT_MP_CAPABLE:
@@ -219,45 +220,45 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (!mp_opt->echo) {
if (opsize == TCPOLEN_MPTCP_ADD_ADDR ||
opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT)
- mp_opt->family = MPTCP_ADDR_IPVERSION_4;
+ mp_opt->addr.family = AF_INET;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 ||
opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT)
- mp_opt->family = MPTCP_ADDR_IPVERSION_6;
+ mp_opt->addr.family = AF_INET6;
#endif
else
break;
} else {
if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE ||
opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT)
- mp_opt->family = MPTCP_ADDR_IPVERSION_4;
+ mp_opt->addr.family = AF_INET;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE ||
opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT)
- mp_opt->family = MPTCP_ADDR_IPVERSION_6;
+ mp_opt->addr.family = AF_INET6;
#endif
else
break;
}
mp_opt->add_addr = 1;
- mp_opt->addr_id = *ptr++;
- if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
- memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
+ mp_opt->addr.id = *ptr++;
+ if (mp_opt->addr.family == AF_INET) {
+ memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4);
ptr += 4;
if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT ||
opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) {
- mp_opt->port = get_unaligned_be16(ptr);
+ mp_opt->addr.port = htons(get_unaligned_be16(ptr));
ptr += 2;
}
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
else {
- memcpy(mp_opt->addr6.s6_addr, (u8 *)ptr, 16);
+ memcpy(mp_opt->addr.addr6.s6_addr, (u8 *)ptr, 16);
ptr += 16;
if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT ||
opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) {
- mp_opt->port = get_unaligned_be16(ptr);
+ mp_opt->addr.port = htons(get_unaligned_be16(ptr));
ptr += 2;
}
}
@@ -267,19 +268,22 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 8;
}
pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
- (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
- mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port);
+ (mp_opt->addr.family == AF_INET6) ? "6" : "",
+ mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port));
break;
case MPTCPOPT_RM_ADDR:
- if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
+ if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 ||
+ opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX)
break;
ptr++;
mp_opt->rm_addr = 1;
- mp_opt->rm_id = *ptr++;
- pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
+ mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
+ for (i = 0; i < mp_opt->rm_list.nr; i++)
+ mp_opt->rm_list.ids[i] = *ptr++;
+ pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr);
break;
case MPTCPOPT_MP_PRIO:
@@ -301,6 +305,18 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->fastclose = 1;
break;
+ case MPTCPOPT_RST:
+ if (opsize != TCPOLEN_MPTCP_RST)
+ break;
+
+ if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
+ break;
+ mp_opt->reset = 1;
+ flags = *ptr++;
+ mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
+ mp_opt->reset_reason = *ptr;
+ break;
+
default:
break;
}
@@ -319,10 +335,11 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->add_addr = 0;
mp_opt->ahmac = 0;
mp_opt->fastclose = 0;
- mp_opt->port = 0;
+ mp_opt->addr.port = 0;
mp_opt->rm_addr = 0;
mp_opt->dss = 0;
mp_opt->mp_prio = 0;
+ mp_opt->reset = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -566,39 +583,32 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
return true;
}
-static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
- struct in_addr *addr, u16 port)
-{
- u8 hmac[SHA256_DIGEST_SIZE];
- u8 msg[7];
-
- msg[0] = addr_id;
- memcpy(&msg[1], &addr->s_addr, 4);
- msg[5] = port >> 8;
- msg[6] = port & 0xFF;
-
- mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
-
- return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
-}
-
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
- struct in6_addr *addr, u16 port)
+static u64 add_addr_generate_hmac(u64 key1, u64 key2,
+ struct mptcp_addr_info *addr)
{
+ u16 port = ntohs(addr->port);
u8 hmac[SHA256_DIGEST_SIZE];
u8 msg[19];
+ int i = 0;
- msg[0] = addr_id;
- memcpy(&msg[1], &addr->s6_addr, 16);
- msg[17] = port >> 8;
- msg[18] = port & 0xFF;
+ msg[i++] = addr->id;
+ if (addr->family == AF_INET) {
+ memcpy(&msg[i], &addr->addr.s_addr, 4);
+ i += 4;
+ }
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (addr->family == AF_INET6) {
+ memcpy(&msg[i], &addr->addr6.s6_addr, 16);
+ i += 16;
+ }
+#endif
+ msg[i++] = port >> 8;
+ msg[i++] = port & 0xFF;
- mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
+ mptcp_crypto_hmac_sha(key1, key2, msg, i, hmac);
return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
}
-#endif
static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb,
unsigned int *size,
@@ -609,13 +619,13 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
bool drop_other_suboptions = false;
unsigned int opt_size = *size;
- struct mptcp_addr_info saddr;
bool echo;
bool port;
int len;
if ((mptcp_pm_should_add_signal_ipv6(msk) ||
- mptcp_pm_should_add_signal_port(msk)) &&
+ mptcp_pm_should_add_signal_port(msk) ||
+ mptcp_pm_should_add_signal_echo(msk)) &&
skb && skb_is_tcp_pure_ack(skb)) {
pr_debug("drop other suboptions");
opts->suboptions = 0;
@@ -626,45 +636,24 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
}
if (!mptcp_pm_should_add_signal(msk) ||
- !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port)))
+ !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port)))
return false;
- len = mptcp_add_addr_len(saddr.family, echo, port);
+ len = mptcp_add_addr_len(opts->addr.family, echo, port);
if (remaining < len)
return false;
*size = len;
if (drop_other_suboptions)
*size -= opt_size;
- opts->addr_id = saddr.id;
- if (port)
- opts->port = ntohs(saddr.port);
- if (saddr.family == AF_INET) {
- opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
- opts->addr = saddr.addr;
- if (!echo) {
- opts->ahmac = add_addr_generate_hmac(msk->local_key,
- msk->remote_key,
- opts->addr_id,
- &opts->addr,
- opts->port);
- }
- }
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- else if (saddr.family == AF_INET6) {
- opts->suboptions |= OPTION_MPTCP_ADD_ADDR6;
- opts->addr6 = saddr.addr6;
- if (!echo) {
- opts->ahmac = add_addr6_generate_hmac(msk->local_key,
- msk->remote_key,
- opts->addr_id,
- &opts->addr6,
- opts->port);
- }
+ opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
+ if (!echo) {
+ opts->ahmac = add_addr_generate_hmac(msk->local_key,
+ msk->remote_key,
+ &opts->addr);
}
-#endif
pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
- opts->addr_id, opts->ahmac, echo, opts->port);
+ opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
return true;
}
@@ -676,20 +665,25 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- u8 rm_id;
+ struct mptcp_rm_list rm_list;
+ int i, len;
if (!mptcp_pm_should_rm_signal(msk) ||
- !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_id)))
+ !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list)))
return false;
- if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
+ len = mptcp_rm_addr_len(&rm_list);
+ if (len < 0)
+ return false;
+ if (remaining < len)
return false;
- *size = TCPOLEN_MPTCP_RM_ADDR_BASE;
+ *size = len;
opts->suboptions |= OPTION_MPTCP_RM_ADDR;
- opts->rm_id = rm_id;
+ opts->rm_list = rm_list;
- pr_debug("rm_id=%d", opts->rm_id);
+ for (i = 0; i < opts->rm_list.nr; i++)
+ pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
return true;
}
@@ -717,6 +711,22 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
return true;
}
+static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ if (remaining < TCPOLEN_MPTCP_RST)
+ return;
+
+ *size = TCPOLEN_MPTCP_RST;
+ opts->suboptions |= OPTION_MPTCP_RST;
+ opts->reset_transient = subflow->reset_transient;
+ opts->reset_reason = subflow->reset_reason;
+}
+
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts)
@@ -732,11 +742,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
if (unlikely(__mptcp_check_fallback(msk)))
return false;
- /* prevent adding of any MPTCP related options on reset packet
- * until we support MP_TCPRST/MP_FASTCLOSE
- */
- if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
- return false;
+ if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
+ mptcp_established_options_rst(sk, skb, size, remaining, opts);
+ return true;
+ }
snd_data_fin = mptcp_data_fin_enabled(msk);
if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
@@ -873,7 +882,7 @@ fully_established:
subflow->pm_notified = 1;
if (subflow->mp_join) {
clear_3rdack_retransmission(ssk);
- mptcp_pm_subflow_established(msk, subflow);
+ mptcp_pm_subflow_established(msk);
} else {
mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
}
@@ -943,7 +952,7 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool us
* should match. If they mismatch, the peer is misbehaving and
* we will prefer the most recent information.
*/
- if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
+ if (READ_ONCE(msk->rcv_data_fin))
return false;
WRITE_ONCE(msk->rcv_data_fin_seq,
@@ -961,18 +970,9 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
if (mp_opt->echo)
return true;
- if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
- hmac = add_addr_generate_hmac(msk->remote_key,
- msk->local_key,
- mp_opt->addr_id, &mp_opt->addr,
- mp_opt->port);
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- else
- hmac = add_addr6_generate_hmac(msk->remote_key,
- msk->local_key,
- mp_opt->addr_id, &mp_opt->addr6,
- mp_opt->port);
-#endif
+ hmac = add_addr_generate_hmac(msk->remote_key,
+ msk->local_key,
+ &mp_opt->addr);
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
msk, (unsigned long long)hmac,
@@ -1013,36 +1013,23 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
- struct mptcp_addr_info addr;
-
- addr.port = htons(mp_opt.port);
- addr.id = mp_opt.addr_id;
- if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
- addr.family = AF_INET;
- addr.addr = mp_opt.addr;
- }
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
- addr.family = AF_INET6;
- addr.addr6 = mp_opt.addr6;
- }
-#endif
if (!mp_opt.echo) {
- mptcp_pm_add_addr_received(msk, &addr);
+ mptcp_pm_add_addr_received(msk, &mp_opt.addr);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
} else {
- mptcp_pm_del_add_timer(msk, &addr);
+ mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
+ mptcp_pm_del_add_timer(msk, &mp_opt.addr);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
}
- if (mp_opt.port)
+ if (mp_opt.addr.port)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
mp_opt.add_addr = 0;
}
if (mp_opt.rm_addr) {
- mptcp_pm_rm_addr_received(msk, mp_opt.rm_id);
+ mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
mp_opt.rm_addr = 0;
}
@@ -1052,6 +1039,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mp_opt.mp_prio = 0;
}
+ if (mp_opt.reset) {
+ subflow->reset_seen = 1;
+ subflow->reset_reason = mp_opt.reset_reason;
+ subflow->reset_transient = mp_opt.reset_transient;
+ }
+
if (!mp_opt.dss)
return;
@@ -1160,20 +1153,16 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
}
mp_capable_done:
- if ((OPTION_MPTCP_ADD_ADDR
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- | OPTION_MPTCP_ADD_ADDR6
-#endif
- ) & opts->suboptions) {
+ if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
u8 echo = MPTCP_ADDR_ECHO;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions)
+ if (opts->addr.family == AF_INET6)
len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
#endif
- if (opts->port)
+ if (opts->addr.port)
len += TCPOLEN_MPTCP_PORT_LEN;
if (opts->ahmac) {
@@ -1182,28 +1171,30 @@ mp_capable_done:
}
*ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
- len, echo, opts->addr_id);
- if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
- memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
+ len, echo, opts->addr.id);
+ if (opts->addr.family == AF_INET) {
+ memcpy((u8 *)ptr, (u8 *)&opts->addr.addr.s_addr, 4);
ptr += 1;
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
- memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
+ else if (opts->addr.family == AF_INET6) {
+ memcpy((u8 *)ptr, opts->addr.addr6.s6_addr, 16);
ptr += 4;
}
#endif
- if (!opts->port) {
+ if (!opts->addr.port) {
if (opts->ahmac) {
put_unaligned_be64(opts->ahmac, ptr);
ptr += 2;
}
} else {
+ u16 port = ntohs(opts->addr.port);
+
if (opts->ahmac) {
u8 *bptr = (u8 *)ptr;
- put_unaligned_be16(opts->port, bptr);
+ put_unaligned_be16(port, bptr);
bptr += 2;
put_unaligned_be64(opts->ahmac, bptr);
bptr += 8;
@@ -1212,7 +1203,7 @@ mp_capable_done:
ptr += 3;
} else {
- put_unaligned_be32(opts->port << 16 |
+ put_unaligned_be32(port << 16 |
TCPOPT_NOP << 8 |
TCPOPT_NOP, ptr);
ptr += 1;
@@ -1221,9 +1212,23 @@ mp_capable_done:
}
if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
+ u8 i = 1;
+
*ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
- TCPOLEN_MPTCP_RM_ADDR_BASE,
- 0, opts->rm_id);
+ TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr,
+ 0, opts->rm_list.ids[0]);
+
+ while (i < opts->rm_list.nr) {
+ u8 id1, id2, id3, id4;
+
+ id1 = opts->rm_list.ids[i];
+ id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP;
+ id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP;
+ id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP;
+ put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr);
+ ptr += 1;
+ i += 4;
+ }
}
if (OPTION_MPTCP_PRIO & opts->suboptions) {
@@ -1265,6 +1270,12 @@ mp_capable_done:
ptr += 5;
}
+ if (OPTION_MPTCP_RST & opts->suboptions)
+ *ptr++ = mptcp_option(MPTCPOPT_RST,
+ TCPOLEN_MPTCP_RST,
+ opts->reset_transient,
+ opts->reset_reason);
+
if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
struct mptcp_ext *mpext = &opts->ext_copy;
u8 len = TCPOLEN_MPTCP_DSS_BASE;
@@ -1316,3 +1327,20 @@ mp_capable_done:
if (tp)
mptcp_set_rwin(tp);
}
+
+__be32 mptcp_get_reset_option(const struct sk_buff *skb)
+{
+ const struct mptcp_ext *ext = mptcp_get_ext(skb);
+ u8 flags, reason;
+
+ if (ext) {
+ flags = ext->reset_transient;
+ reason = ext->reset_reason;
+
+ return mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST,
+ flags, reason);
+ }
+
+ return htonl(0u);
+}
+EXPORT_SYMBOL_GPL(mptcp_get_reset_option);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 6fd4b2c1b076..9d00fa6d22e9 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -14,7 +14,7 @@
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
- bool echo, bool port)
+ bool echo)
{
u8 add_addr = READ_ONCE(msk->pm.addr_signal);
@@ -33,35 +33,36 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
if (addr->family == AF_INET6)
add_addr |= BIT(MPTCP_ADD_ADDR_IPV6);
- if (port)
+ if (addr->port)
add_addr |= BIT(MPTCP_ADD_ADDR_PORT);
WRITE_ONCE(msk->pm.addr_signal, add_addr);
return 0;
}
-int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id)
+int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
{
u8 rm_addr = READ_ONCE(msk->pm.addr_signal);
- pr_debug("msk=%p, local_id=%d", msk, local_id);
+ pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
if (rm_addr) {
pr_warn("addr_signal error, rm_addr=%d", rm_addr);
return -EINVAL;
}
- msk->pm.rm_id = local_id;
+ msk->pm.rm_list_tx = *rm_list;
rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
WRITE_ONCE(msk->pm.addr_signal, rm_addr);
+ mptcp_pm_nl_addr_send_ack(msk);
return 0;
}
-int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id)
+int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
{
- pr_debug("msk=%p, local_id=%d", msk, local_id);
+ pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
spin_lock_bh(&msk->pm.lock);
- mptcp_pm_nl_rm_subflow_received(msk, local_id);
+ mptcp_pm_nl_rm_subflow_received(msk, rm_list);
spin_unlock_bh(&msk->pm.lock);
return 0;
}
@@ -152,8 +153,7 @@ void mptcp_pm_connection_closed(struct mptcp_sock *msk)
pr_debug("msk=%p", msk);
}
-void mptcp_pm_subflow_established(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
+void mptcp_pm_subflow_established(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
@@ -188,7 +188,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
spin_lock_bh(&pm->lock);
if (!READ_ONCE(pm->accept_addr)) {
- mptcp_pm_announce_addr(msk, addr, true, addr->port);
+ mptcp_pm_announce_addr(msk, addr, true);
mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->remote = *addr;
@@ -197,6 +197,21 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
spin_unlock_bh(&pm->lock);
}
+void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr)
+{
+ struct mptcp_pm_data *pm = &msk->pm;
+
+ pr_debug("msk=%p", msk);
+
+ spin_lock_bh(&pm->lock);
+
+ if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending))
+ mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
+
+ spin_unlock_bh(&pm->lock);
+}
+
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
{
if (!mptcp_pm_should_add_signal(msk))
@@ -205,17 +220,20 @@ void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
}
-void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)
+void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rm_list)
{
struct mptcp_pm_data *pm = &msk->pm;
+ u8 i;
- pr_debug("msk=%p remote_id=%d", msk, rm_id);
+ pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr);
- mptcp_event_addr_removed(msk, rm_id);
+ for (i = 0; i < rm_list->nr; i++)
+ mptcp_event_addr_removed(msk, rm_list->ids[i]);
spin_lock_bh(&pm->lock);
mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
- pm->rm_id = rm_id;
+ pm->rm_list_rx = *rm_list;
spin_unlock_bh(&pm->lock);
}
@@ -258,9 +276,9 @@ out_unlock:
}
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
- u8 *rm_id)
+ struct mptcp_rm_list *rm_list)
{
- int ret = false;
+ int ret = false, len;
spin_lock_bh(&msk->pm.lock);
@@ -268,10 +286,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
if (!mptcp_pm_should_rm_signal(msk))
goto out_unlock;
- if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
+ len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
+ if (len < 0) {
+ WRITE_ONCE(msk->pm.addr_signal, 0);
+ goto out_unlock;
+ }
+ if (remaining < len)
goto out_unlock;
- *rm_id = msk->pm.rm_id;
+ *rm_list = msk->pm.rm_list_tx;
WRITE_ONCE(msk->pm.addr_signal, 0);
ret = true;
@@ -291,7 +314,8 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
msk->pm.add_addr_accepted = 0;
msk->pm.local_addr_used = 0;
msk->pm.subflows = 0;
- msk->pm.rm_id = 0;
+ msk->pm.rm_list_tx.nr = 0;
+ msk->pm.rm_list_rx.nr = 0;
WRITE_ONCE(msk->pm.work_pending, false);
WRITE_ONCE(msk->pm.addr_signal, 0);
WRITE_ONCE(msk->pm.accept_addr, false);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 8e8e35fa4002..6ba040897738 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -25,6 +25,8 @@ static int pm_nl_pernet_id;
struct mptcp_pm_addr_entry {
struct list_head list;
struct mptcp_addr_info addr;
+ u8 flags;
+ int ifindex;
struct rcu_head rcu;
struct socket *lsk;
};
@@ -56,8 +58,6 @@ struct pm_nl_pernet {
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
-static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
-
static bool addresses_equal(const struct mptcp_addr_info *a,
struct mptcp_addr_info *b, bool use_port)
{
@@ -140,6 +140,24 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
return false;
}
+static bool lookup_subflow_by_daddr(const struct list_head *list,
+ struct mptcp_addr_info *daddr)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_addr_info cur;
+ struct sock_common *skc;
+
+ list_for_each_entry(subflow, list, node) {
+ skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
+
+ remote_address(skc, &cur);
+ if (addresses_equal(&cur, daddr, daddr->port))
+ return true;
+ }
+
+ return false;
+}
+
static struct mptcp_pm_addr_entry *
select_local_address(const struct pm_nl_pernet *pernet,
struct mptcp_sock *msk)
@@ -152,7 +170,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
rcu_read_lock();
__mptcp_flush_join_list(msk);
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
continue;
if (entry->addr.family != sk->sk_family) {
@@ -190,7 +208,7 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
* can lead to additional addresses not being announced.
*/
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
if (i++ == pos) {
ret = entry;
@@ -245,9 +263,9 @@ static void check_work_pending(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.work_pending, false);
}
-static struct mptcp_pm_add_entry *
-lookup_anno_list_by_saddr(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr)
+struct mptcp_pm_add_entry *
+mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *entry;
@@ -308,7 +326,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
if (!mptcp_pm_should_add_signal(msk)) {
pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
- mptcp_pm_announce_addr(msk, &entry->addr, false, entry->addr.port);
+ mptcp_pm_announce_addr(msk, &entry->addr, false);
mptcp_pm_add_addr_send_ack(msk);
entry->retrans_times++;
}
@@ -319,6 +337,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
spin_unlock_bh(&msk->pm.lock);
+ if (entry->retrans_times == ADD_ADDR_RETRANS_MAX)
+ mptcp_pm_subflow_established(msk);
+
out:
__sock_put(sk);
}
@@ -331,7 +352,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct sock *sk = (struct sock *)msk;
spin_lock_bh(&msk->pm.lock);
- entry = lookup_anno_list_by_saddr(msk, addr);
+ entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
if (entry)
entry->retrans_times = ADD_ADDR_RETRANS_MAX;
spin_unlock_bh(&msk->pm.lock);
@@ -351,7 +372,7 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock);
- if (lookup_anno_list_by_saddr(msk, &entry->addr))
+ if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr))
return false;
add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC);
@@ -417,8 +438,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) {
msk->pm.add_addr_signaled++;
- mptcp_pm_announce_addr(msk, &local->addr, false, local->addr.port);
- mptcp_pm_nl_add_addr_send_ack(msk);
+ mptcp_pm_announce_addr(msk, &local->addr, false);
+ mptcp_pm_nl_addr_send_ack(msk);
}
} else {
/* pick failed, avoid fourther attempts later */
@@ -440,7 +461,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
check_work_pending(msk);
remote_address((struct sock_common *)sk, &remote);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local->addr, &remote);
+ __mptcp_subflow_connect(sk, &local->addr, &remote,
+ local->flags, local->ifindex);
spin_lock_bh(&msk->pm.lock);
return;
}
@@ -468,7 +490,6 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
struct mptcp_addr_info remote;
struct mptcp_addr_info local;
unsigned int subflows_max;
- bool use_port = false;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
@@ -476,6 +497,10 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
pr_debug("accepted %d:%d remote family %d",
msk->pm.add_addr_accepted, add_addr_accept_max,
msk->pm.remote.family);
+
+ if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
+ goto add_addr_echo;
+
msk->pm.add_addr_accepted++;
msk->pm.subflows++;
if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
@@ -488,37 +513,37 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
remote = msk->pm.remote;
if (!remote.port)
remote.port = sk->sk_dport;
- else
- use_port = true;
memset(&local, 0, sizeof(local));
local.family = remote.family;
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local, &remote);
+ __mptcp_subflow_connect(sk, &local, &remote, 0, 0);
spin_lock_bh(&msk->pm.lock);
- mptcp_pm_announce_addr(msk, &remote, true, use_port);
- mptcp_pm_nl_add_addr_send_ack(msk);
+add_addr_echo:
+ mptcp_pm_announce_addr(msk, &msk->pm.remote, true);
+ mptcp_pm_nl_addr_send_ack(msk);
}
-static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
+void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
msk_owned_by_me(msk);
lockdep_assert_held(&msk->pm.lock);
- if (!mptcp_pm_should_add_signal(msk))
+ if (!mptcp_pm_should_add_signal(msk) &&
+ !mptcp_pm_should_rm_signal(msk))
return;
__mptcp_flush_join_list(msk);
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
if (subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- u8 add_addr;
spin_unlock_bh(&msk->pm.lock);
- pr_debug("send ack for add_addr%s%s",
+ pr_debug("send ack for %s%s%s",
+ mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr",
mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
@@ -526,13 +551,6 @@ static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
tcp_send_ack(ssk);
release_sock(ssk);
spin_lock_bh(&msk->pm.lock);
-
- add_addr = READ_ONCE(msk->pm.addr_signal);
- if (mptcp_pm_should_add_signal_ipv6(msk))
- add_addr &= ~BIT(MPTCP_ADD_ADDR_IPV6);
- if (mptcp_pm_should_add_signal_port(msk))
- add_addr &= ~BIT(MPTCP_ADD_ADDR_PORT);
- WRITE_ONCE(msk->pm.addr_signal, add_addr);
}
}
@@ -571,43 +589,68 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
return -EINVAL;
}
-static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
+static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rm_list,
+ enum linux_mptcp_mib_field rm_type)
{
struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = (struct sock *)msk;
+ u8 i;
- pr_debug("address rm_id %d", msk->pm.rm_id);
+ pr_debug("%s rm_list_nr %d",
+ rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr);
msk_owned_by_me(msk);
- if (!msk->pm.rm_id)
+ if (!rm_list->nr)
return;
if (list_empty(&msk->conn_list))
return;
- list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+ for (i = 0; i < rm_list->nr; i++) {
+ list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+ u8 id = subflow->local_id;
- if (msk->pm.rm_id != subflow->remote_id)
- continue;
+ if (rm_type == MPTCP_MIB_RMADDR)
+ id = subflow->remote_id;
- spin_unlock_bh(&msk->pm.lock);
- mptcp_subflow_shutdown(sk, ssk, how);
- mptcp_close_ssk(sk, ssk, subflow);
- spin_lock_bh(&msk->pm.lock);
-
- msk->pm.add_addr_accepted--;
- msk->pm.subflows--;
- WRITE_ONCE(msk->pm.accept_addr, true);
+ if (rm_list->ids[i] != id)
+ continue;
- __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR);
+ pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u",
+ rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
+ i, rm_list->ids[i], subflow->local_id, subflow->remote_id);
+ spin_unlock_bh(&msk->pm.lock);
+ mptcp_subflow_shutdown(sk, ssk, how);
+ mptcp_close_ssk(sk, ssk, subflow);
+ spin_lock_bh(&msk->pm.lock);
- break;
+ if (rm_type == MPTCP_MIB_RMADDR) {
+ msk->pm.add_addr_accepted--;
+ WRITE_ONCE(msk->pm.accept_addr, true);
+ } else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
+ msk->pm.local_addr_used--;
+ }
+ msk->pm.subflows--;
+ __MPTCP_INC_STATS(sock_net(sk), rm_type);
+ }
}
}
+static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
+{
+ mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR);
+}
+
+void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rm_list)
+{
+ mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW);
+}
+
void mptcp_pm_nl_work(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
@@ -623,7 +666,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
}
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
- mptcp_pm_nl_add_addr_send_ack(msk);
+ mptcp_pm_nl_addr_send_ack(msk);
}
if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
@@ -641,45 +684,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
}
-void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
-{
- struct mptcp_subflow_context *subflow, *tmp;
- struct sock *sk = (struct sock *)msk;
-
- pr_debug("subflow rm_id %d", rm_id);
-
- msk_owned_by_me(msk);
-
- if (!rm_id)
- return;
-
- if (list_empty(&msk->conn_list))
- return;
-
- list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
-
- if (rm_id != subflow->local_id)
- continue;
-
- spin_unlock_bh(&msk->pm.lock);
- mptcp_subflow_shutdown(sk, ssk, how);
- mptcp_close_ssk(sk, ssk, subflow);
- spin_lock_bh(&msk->pm.lock);
-
- msk->pm.local_addr_used--;
- msk->pm.subflows--;
-
- __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
-
- break;
- }
-}
-
static bool address_use_port(struct mptcp_pm_addr_entry *entry)
{
- return (entry->addr.flags &
+ return (entry->flags &
(MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) ==
MPTCP_PM_ADDR_FLAG_SIGNAL;
}
@@ -731,11 +738,11 @@ find_next:
if (entry->addr.id > pernet->next_id)
pernet->next_id = entry->addr.id;
- if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
addr_max = pernet->add_addr_signal_max;
WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1);
}
- if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+ if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
addr_max = pernet->local_addr_max;
WRITE_ONCE(pernet->local_addr_max, addr_max + 1);
}
@@ -837,10 +844,10 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return -ENOMEM;
entry->addr = skc_local;
- entry->addr.ifindex = 0;
- entry->addr.flags = 0;
entry->addr.id = 0;
entry->addr.port = 0;
+ entry->ifindex = 0;
+ entry->flags = 0;
entry->lsk = NULL;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0)
@@ -955,14 +962,14 @@ skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) {
u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
- entry->addr.ifindex = val;
+ entry->ifindex = val;
}
if (tb[MPTCP_PM_ADDR_ATTR_ID])
entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
- entry->addr.flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+ entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
@@ -1071,12 +1078,15 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
bool force)
{
+ struct mptcp_rm_list list = { .nr = 0 };
bool ret;
+ list.ids[list.nr++] = addr->id;
+
ret = remove_anno_list_by_saddr(msk, addr);
if (ret || force) {
spin_lock_bh(&msk->pm.lock);
- mptcp_pm_remove_addr(msk, addr->id);
+ mptcp_pm_remove_addr(msk, &list);
spin_unlock_bh(&msk->pm.lock);
}
return ret;
@@ -1087,9 +1097,12 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
{
struct mptcp_sock *msk;
long s_slot = 0, s_num = 0;
+ struct mptcp_rm_list list = { .nr = 0 };
pr_debug("remove_id=%d", addr->id);
+ list.ids[list.nr++] = addr->id;
+
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
bool remove_subflow;
@@ -1103,7 +1116,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
mptcp_pm_remove_anno_addr(msk, addr, remove_subflow);
if (remove_subflow)
- mptcp_pm_remove_subflow(msk, addr->id);
+ mptcp_pm_remove_subflow(msk, &list);
release_sock(sk);
next:
@@ -1146,6 +1159,41 @@ static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry)
}
}
+static int mptcp_nl_remove_id_zero_address(struct net *net,
+ struct mptcp_addr_info *addr)
+{
+ struct mptcp_rm_list list = { .nr = 0 };
+ long s_slot = 0, s_num = 0;
+ struct mptcp_sock *msk;
+
+ list.ids[list.nr++] = 0;
+
+ while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_addr_info msk_local;
+
+ if (list_empty(&msk->conn_list))
+ goto next;
+
+ local_address((struct sock_common *)msk, &msk_local);
+ if (!addresses_equal(&msk_local, addr, addr->port))
+ goto next;
+
+ lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_remove_addr(msk, &list);
+ mptcp_pm_nl_rm_subflow_received(msk, &list);
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+
+next:
+ sock_put(sk);
+ cond_resched();
+ }
+
+ return 0;
+}
+
static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
@@ -1158,6 +1206,14 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
+ /* the zero id address is special: the first address used by the msk
+ * always gets such an id, so different subflows can have different zero
+ * id addresses. Additionally zero id is not accounted for in id_bitmap.
+ * Let's use an 'mptcp_rm_list' instead of the common remove code.
+ */
+ if (addr.addr.id == 0)
+ return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr);
+
spin_lock_bh(&pernet->lock);
entry = __lookup_addr_by_id(pernet, addr.addr.id);
if (!entry) {
@@ -1165,11 +1221,11 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&pernet->lock);
return -EINVAL;
}
- if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
addr_max = pernet->add_addr_signal_max;
WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1);
}
- if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+ if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
addr_max = pernet->local_addr_max;
WRITE_ONCE(pernet->local_addr_max, addr_max - 1);
}
@@ -1185,14 +1241,61 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
return ret;
}
-static void __flush_addrs(struct net *net, struct list_head *list)
+static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
+{
+ struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, rm_list, list) {
+ if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
+ alist.nr < MPTCP_RM_IDS_MAX &&
+ slist.nr < MPTCP_RM_IDS_MAX) {
+ alist.ids[alist.nr++] = entry->addr.id;
+ slist.ids[slist.nr++] = entry->addr.id;
+ } else if (remove_anno_list_by_saddr(msk, &entry->addr) &&
+ alist.nr < MPTCP_RM_IDS_MAX) {
+ alist.ids[alist.nr++] = entry->addr.id;
+ }
+ }
+
+ if (alist.nr) {
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_remove_addr(msk, &alist);
+ spin_unlock_bh(&msk->pm.lock);
+ }
+ if (slist.nr)
+ mptcp_pm_remove_subflow(msk, &slist);
+}
+
+static void mptcp_nl_remove_addrs_list(struct net *net,
+ struct list_head *rm_list)
+{
+ long s_slot = 0, s_num = 0;
+ struct mptcp_sock *msk;
+
+ if (list_empty(rm_list))
+ return;
+
+ while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
+ struct sock *sk = (struct sock *)msk;
+
+ lock_sock(sk);
+ mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+ release_sock(sk);
+
+ sock_put(sk);
+ cond_resched();
+ }
+}
+
+static void __flush_addrs(struct list_head *list)
{
while (!list_empty(list)) {
struct mptcp_pm_addr_entry *cur;
cur = list_entry(list->next,
struct mptcp_pm_addr_entry, list);
- mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr);
list_del_rcu(&cur->list);
mptcp_pm_free_addr_entry(cur);
}
@@ -1217,7 +1320,8 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
pernet->next_id = 1;
bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
- __flush_addrs(sock_net(skb->sk), &free_list);
+ mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+ __flush_addrs(&free_list);
return 0;
}
@@ -1237,10 +1341,10 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb,
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id))
goto nla_put_failure;
- if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->addr.flags))
+ if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags))
goto nla_put_failure;
- if (entry->addr.ifindex &&
- nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->addr.ifindex))
+ if (entry->ifindex &&
+ nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex))
goto nla_put_failure;
if (addr->family == AF_INET &&
@@ -1468,7 +1572,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
- if (addr.addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
list_for_each_entry(entry, &pernet->local_addr_list, list) {
@@ -1478,9 +1582,9 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
return ret;
if (bkup)
- entry->addr.flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
else
- entry->addr.flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
}
}
@@ -1586,9 +1690,21 @@ static int mptcp_event_sub_closed(struct sk_buff *skb,
const struct mptcp_sock *msk,
const struct sock *ssk)
{
+ const struct mptcp_subflow_context *sf;
+
if (mptcp_event_put_token_and_ssk(skb, msk, ssk))
return -EMSGSIZE;
+ sf = mptcp_subflow_ctx(ssk);
+ if (!sf->reset_seen)
+ return 0;
+
+ if (nla_put_u32(skb, MPTCP_ATTR_RESET_REASON, sf->reset_reason))
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, MPTCP_ATTR_RESET_FLAGS, sf->reset_transient))
+ return -EMSGSIZE;
+
return 0;
}
@@ -1814,7 +1930,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
/* net is removed from namespace list, can't race with
* other modifiers
*/
- __flush_addrs(net, &pernet->local_addr_list);
+ __flush_addrs(&pernet->local_addr_list);
}
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4bde960e19dc..8009b3f8e4c1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -491,7 +491,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
u64 rcv_data_fin_seq;
bool ret = false;
- if (__mptcp_check_fallback(msk) || !msk->first)
+ if (__mptcp_check_fallback(msk))
return ret;
/* Need to ack a DATA_FIN received from a peer while this side
@@ -2045,28 +2045,21 @@ out_err:
return copied;
}
-static void mptcp_retransmit_handler(struct sock *sk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- set_bit(MPTCP_WORK_RTX, &msk->flags);
- mptcp_schedule_work(sk);
-}
-
static void mptcp_retransmit_timer(struct timer_list *t)
{
struct inet_connection_sock *icsk = from_timer(icsk, t,
icsk_retransmit_timer);
struct sock *sk = &icsk->icsk_inet.sk;
+ struct mptcp_sock *msk = mptcp_sk(sk);
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- mptcp_retransmit_handler(sk);
+ /* we need a process context to retransmit */
+ if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags))
+ mptcp_schedule_work(sk);
} else {
/* delegate our work to tcp_release_cb() */
- if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED,
- &sk->sk_tsq_flags))
- sock_hold(sk);
+ set_bit(MPTCP_RETRANSMIT, &msk->flags);
}
bh_unlock_sock(sk);
sock_put(sk);
@@ -3001,17 +2994,16 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
}
}
-#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED)
-
/* processes deferred events and flush wmem */
static void mptcp_release_cb(struct sock *sk)
{
- unsigned long flags, nflags;
-
for (;;) {
- flags = 0;
+ unsigned long flags = 0;
+
if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
flags |= BIT(MPTCP_PUSH_PENDING);
+ if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags))
+ flags |= BIT(MPTCP_RETRANSMIT);
if (!flags)
break;
@@ -3026,6 +3018,8 @@ static void mptcp_release_cb(struct sock *sk)
spin_unlock_bh(&sk->sk_lock.slock);
if (flags & BIT(MPTCP_PUSH_PENDING))
__mptcp_push_pending(sk, 0);
+ if (flags & BIT(MPTCP_RETRANSMIT))
+ __mptcp_retrans(sk);
cond_resched();
spin_lock_bh(&sk->sk_lock.slock);
@@ -3041,20 +3035,6 @@ static void mptcp_release_cb(struct sock *sk)
*/
__mptcp_update_wmem(sk);
__mptcp_update_rmem(sk);
-
- do {
- flags = sk->sk_tsq_flags;
- if (!(flags & MPTCP_DEFERRED_ALL))
- return;
- nflags = flags & ~MPTCP_DEFERRED_ALL;
- } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
-
- sock_release_ownership(sk);
-
- if (flags & TCPF_WRITE_TIMER_DEFERRED) {
- mptcp_retransmit_handler(sk);
- __sock_put(sk);
- }
}
void mptcp_subflow_process_delegated(struct sock *ssk)
@@ -3153,14 +3133,18 @@ bool mptcp_finish_join(struct sock *ssk)
pr_debug("msk=%p, subflow=%p", msk, subflow);
/* mptcp socket already closing? */
- if (!mptcp_is_fully_established(parent))
+ if (!mptcp_is_fully_established(parent)) {
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
return false;
+ }
if (!msk->pm.server_side)
goto out;
- if (!mptcp_pm_allow_new_subflow(msk))
+ if (!mptcp_pm_allow_new_subflow(msk)) {
+ subflow->reset_reason = MPTCP_RST_EPROHIBIT;
return false;
+ }
/* active connections are already on conn_list, and we can't acquire
* msk lock here.
@@ -3174,8 +3158,10 @@ bool mptcp_finish_join(struct sock *ssk)
sock_hold(ssk);
}
spin_unlock_bh(&msk->join_list_lock);
- if (!ret)
+ if (!ret) {
+ subflow->reset_reason = MPTCP_RST_EPROHIBIT;
return false;
+ }
/* attach to msk socket only after we are sure he will deal with us
* at close time
@@ -3287,8 +3273,12 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk))
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
+ MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
mptcp_subflow_early_fallback(msk, subflow);
+ }
+ if (likely(!__mptcp_check_fallback(msk)))
+ MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);
do_connect:
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e21a5bc36cf0..d8de1e961ab0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -22,10 +22,10 @@
#define OPTION_MPTCP_MPJ_SYNACK BIT(4)
#define OPTION_MPTCP_MPJ_ACK BIT(5)
#define OPTION_MPTCP_ADD_ADDR BIT(6)
-#define OPTION_MPTCP_ADD_ADDR6 BIT(7)
-#define OPTION_MPTCP_RM_ADDR BIT(8)
-#define OPTION_MPTCP_FASTCLOSE BIT(9)
-#define OPTION_MPTCP_PRIO BIT(10)
+#define OPTION_MPTCP_RM_ADDR BIT(7)
+#define OPTION_MPTCP_FASTCLOSE BIT(8)
+#define OPTION_MPTCP_PRIO BIT(9)
+#define OPTION_MPTCP_RST BIT(10)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
@@ -36,6 +36,7 @@
#define MPTCPOPT_MP_PRIO 5
#define MPTCPOPT_MP_FAIL 6
#define MPTCPOPT_MP_FASTCLOSE 7
+#define MPTCPOPT_RST 8
/* MPTCP suboption lengths */
#define TCPOLEN_MPTCP_MPC_SYN 4
@@ -61,10 +62,11 @@
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22
#define TCPOLEN_MPTCP_PORT_LEN 2
#define TCPOLEN_MPTCP_PORT_ALIGN 2
-#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
+#define TCPOLEN_MPTCP_RM_ADDR_BASE 3
#define TCPOLEN_MPTCP_PRIO 3
#define TCPOLEN_MPTCP_PRIO_ALIGN 4
#define TCPOLEN_MPTCP_FASTCLOSE 12
+#define TCPOLEN_MPTCP_RST 4
/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
@@ -88,12 +90,13 @@
/* MPTCP ADD_ADDR flags */
#define MPTCP_ADDR_ECHO BIT(0)
-#define MPTCP_ADDR_IPVERSION_4 4
-#define MPTCP_ADDR_IPVERSION_6 6
/* MPTCP MP_PRIO flags */
#define MPTCP_PRIO_BKUP BIT(0)
+/* MPTCP TCPRST flags */
+#define MPTCP_RST_TRANSIENT BIT(0)
+
/* MPTCP socket flags */
#define MPTCP_DATA_READY 0
#define MPTCP_NOSPACE 1
@@ -104,6 +107,7 @@
#define MPTCP_PUSH_PENDING 6
#define MPTCP_CLEAN_UNA 7
#define MPTCP_ERROR_REPORT 8
+#define MPTCP_RETRANSMIT 9
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -122,11 +126,11 @@ struct mptcp_options_received {
u16 mp_capable : 1,
mp_join : 1,
fastclose : 1,
+ reset : 1,
dss : 1,
add_addr : 1,
rm_addr : 1,
mp_prio : 1,
- family : 4,
echo : 1,
backup : 1;
u32 token;
@@ -141,16 +145,11 @@ struct mptcp_options_received {
ack64:1,
mpc_map:1,
__unused:2;
- u8 addr_id;
- u8 rm_id;
- union {
- struct in_addr addr;
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- struct in6_addr addr6;
-#endif
- };
+ struct mptcp_addr_info addr;
+ struct mptcp_rm_list rm_list;
u64 ahmac;
- u16 port;
+ u8 reset_reason:4;
+ u8 reset_transient:1;
};
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -159,20 +158,6 @@ static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
((nib & 0xF) << 8) | field);
}
-struct mptcp_addr_info {
- sa_family_t family;
- __be16 port;
- u8 id;
- u8 flags;
- int ifindex;
- union {
- struct in_addr addr;
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- struct in6_addr addr6;
-#endif
- };
-};
-
enum mptcp_pm_status {
MPTCP_PM_ADD_ADDR_RECEIVED,
MPTCP_PM_ADD_ADDR_SEND_ACK,
@@ -207,7 +192,8 @@ struct mptcp_pm_data {
u8 local_addr_used;
u8 subflows;
u8 status;
- u8 rm_id;
+ struct mptcp_rm_list rm_list_tx;
+ struct mptcp_rm_list rm_list_rx;
};
struct mptcp_data_frag {
@@ -420,6 +406,9 @@ struct mptcp_subflow_context {
u8 hmac[MPTCPOPT_HMAC_LEN];
u8 local_id;
u8 remote_id;
+ u8 reset_seen:1;
+ u8 reset_transient:1;
+ u8 reset_reason:4;
long delegated_status;
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
@@ -543,7 +532,8 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
- const struct mptcp_addr_info *remote);
+ const struct mptcp_addr_info *remote,
+ u8 flags, int ifindex);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
@@ -641,13 +631,16 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
-void mptcp_pm_subflow_established(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow);
+void mptcp_pm_subflow_established(struct mptcp_sock *msk);
void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
+void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
-void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id);
+void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk);
+void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rm_list);
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
@@ -657,12 +650,15 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_addr_info *addr);
+struct mptcp_pm_add_entry *
+mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
- bool echo, bool port);
-int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
-int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id);
+ bool echo);
+int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
+int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
@@ -709,23 +705,32 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
return len;
}
+static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
+{
+ if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX)
+ return -EINVAL;
+
+ return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
+}
+
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_addr_info *saddr, bool *echo, bool *port);
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
- u8 *rm_id);
+ struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
-void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
+void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rm_list);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
-static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
{
return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d17d39ccdf34..3c19a5265a0f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -115,6 +115,16 @@ static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct soc
return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
}
+static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason)
+{
+ struct mptcp_ext *mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
+
+ if (mpext) {
+ memset(mpext, 0, sizeof(*mpext));
+ mpext->reset_reason = reason;
+ }
+}
+
/* Init mptcp request socket.
*
* Returns an error code if a JOIN has failed and a TCP reset
@@ -165,6 +175,7 @@ again:
if (mptcp_token_exists(subflow_req->token)) {
if (retries-- > 0)
goto again;
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
} else {
subflow_req->mp_capable = 1;
}
@@ -176,6 +187,8 @@ again:
subflow_req->mp_capable = 1;
else if (retries-- > 0)
goto again;
+ else
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
} else if (mp_opt.mp_join && listener->request_mptcp) {
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -187,8 +200,10 @@ again:
subflow_req->msk = subflow_token_join_request(req);
/* Can't fall back to TCP in this case. */
- if (!subflow_req->msk)
+ if (!subflow_req->msk) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
return -EPERM;
+ }
if (subflow_use_different_sport(subflow_req->msk, sk_listener)) {
pr_debug("syn inet_sport=%d %d",
@@ -392,12 +407,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->remote_key = mp_opt.sndr_key;
pr_debug("subflow=%p, remote_key=%llu", subflow,
subflow->remote_key);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
- if (!mp_opt.mp_join)
+ if (!mp_opt.mp_join) {
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
goto do_reset;
+ }
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
@@ -406,6 +424,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (!subflow_thmac_valid(subflow)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
goto do_reset;
}
@@ -434,6 +453,7 @@ fallback:
return;
do_reset:
+ subflow->reset_transient = 0;
mptcp_subflow_reset(sk);
}
@@ -650,8 +670,10 @@ create_child:
* to reset the context to non MPTCP status.
*/
if (!ctx || fallback) {
- if (fallback_is_fatal)
+ if (fallback_is_fatal) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
goto dispose_child;
+ }
subflow_drop_ctx(child);
goto out;
@@ -686,8 +708,10 @@ create_child:
struct mptcp_sock *owner;
owner = subflow_req->msk;
- if (!owner)
+ if (!owner) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
goto dispose_child;
+ }
/* move the msk reference ownership to the subflow */
subflow_req->msk = NULL;
@@ -1052,6 +1076,8 @@ fatal:
smp_wmb();
ssk->sk_error_report(ssk);
tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
tcp_send_active_reset(ssk, GFP_ATOMIC);
subflow->data_avail = 0;
return false;
@@ -1081,7 +1107,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
* In mptcp, rwin is about the mptcp-level connection data.
*
* Data that is still on the ssk rx queue can thus be ignored,
- * as far as mptcp peer is concerened that data is still inflight.
+ * as far as mptcp peer is concerned that data is still inflight.
* DSS ACK is updated when skb is moved to the mptcp rx queue.
*/
void mptcp_space(const struct sock *ssk, int *space, int *full_space)
@@ -1230,7 +1256,8 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
}
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
- const struct mptcp_addr_info *remote)
+ const struct mptcp_addr_info *remote,
+ u8 flags, int ifindex)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
@@ -1274,7 +1301,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- ssk->sk_bound_dev_if = loc->ifindex;
+ ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
goto failed;
@@ -1286,7 +1313,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->local_id = local_id;
subflow->remote_id = remote_id;
subflow->request_join = 1;
- subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
mptcp_add_pending_subflow(msk, subflow);