diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/mib.c | 6 | ||||
-rw-r--r-- | net/mptcp/mib.h | 18 | ||||
-rw-r--r-- | net/mptcp/options.c | 19 | ||||
-rw-r--r-- | net/mptcp/pm.c | 47 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 142 | ||||
-rw-r--r-- | net/mptcp/pm_userspace.c | 5 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 65 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 20 | ||||
-rw-r--r-- | net/mptcp/sockopt.c | 153 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 2 |
10 files changed, 347 insertions, 130 deletions
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 0dac2863c6e1..a0990c365a2e 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -34,7 +34,11 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA), SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), + SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), + SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), + SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX), + SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP), SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP), SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), @@ -44,6 +48,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP), + SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX), + SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 2be3596374f4..cae71d947252 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -27,7 +27,15 @@ enum linux_mptcp_mib_field { MPTCP_MIB_NODSSWINDOW, /* Segments not in MPTCP windows */ MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */ MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */ + MPTCP_MIB_ADDADDRTX, /* Sent ADD_ADDR with echo-flag=0 */ + MPTCP_MIB_ADDADDRTXDROP, /* ADD_ADDR with echo-flag=0 not send due to + * resource exhaustion + */ MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */ + MPTCP_MIB_ECHOADDTX, /* Send ADD_ADDR with echo-flag=1 */ + MPTCP_MIB_ECHOADDTXDROP, /* ADD_ADDR with echo-flag=1 not send due + * to resource exhaustion + */ MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */ MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */ MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */ @@ -37,6 +45,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */ MPTCP_MIB_RMADDR, /* Received RM_ADDR */ MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */ + MPTCP_MIB_RMADDRTX, /* Sent RM_ADDR */ + MPTCP_MIB_RMADDRTXDROP, /* RM_ADDR not sent due to resource exhaustion */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ @@ -63,6 +73,14 @@ struct mptcp_mib { unsigned long mibs[LINUX_MIB_MPTCP_MAX]; }; +static inline void MPTCP_ADD_STATS(struct net *net, + enum linux_mptcp_mib_field field, + int val) +{ + if (likely(net->mib.mptcp_statistics)) + SNMP_ADD_STATS(net->mib.mptcp_statistics, field, val); +} + static inline void MPTCP_INC_STATS(struct net *net, enum linux_mptcp_mib_field field) { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 19a01b6566f1..c254accb14de 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -687,9 +687,12 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * } opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX); opts->ahmac = add_addr_generate_hmac(msk->local_key, msk->remote_key, &opts->addr); + } else { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); } pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d", opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); @@ -723,7 +726,7 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, for (i = 0; i < opts->rm_list.nr; i++) pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]); - + MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr); return true; } @@ -1023,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) return cur_seq; } +static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) +{ + msk->bytes_acked += new_snd_una - msk->snd_una; + msk->snd_una = new_snd_una; +} + static void ack_update_msk(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_options_received *mp_opt) @@ -1054,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk, __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { - msk->snd_una = new_snd_una; + __mptcp_snd_una_update(msk, new_snd_una); __mptcp_data_acked(sk); } mptcp_data_unlock(sk); @@ -1116,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mptcp_data_lock(subflow->conn); if (sk_stream_memory_free(sk)) __mptcp_check_push(subflow->conn, sk); + + /* on fallback we just need to ignore the msk-level snd_una, as + * this is really plain TCP + */ + __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt)); + __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return true; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 76612bca275a..7dbbad1e4f55 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -26,7 +26,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, if (add_addr & (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { - pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); + MPTCP_INC_STATS(sock_net((struct sock *)msk), + echo ? MPTCP_MIB_ECHOADDTXDROP : MPTCP_MIB_ADDADDRTXDROP); return -EINVAL; } @@ -48,7 +49,8 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); if (rm_addr) { - pr_warn("addr_signal error, rm_addr=%d", rm_addr); + MPTCP_ADD_STATS(sock_net((struct sock *)msk), + MPTCP_MIB_RMADDRTXDROP, rm_list->nr); return -EINVAL; } @@ -413,7 +415,46 @@ out_unlock: int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) { - return mptcp_pm_nl_get_local_id(msk, skc); + struct mptcp_addr_info skc_local; + struct mptcp_addr_info msk_local; + + if (WARN_ON_ONCE(!msk)) + return -1; + + /* The 0 ID mapping is defined by the first subflow, copied into the msk + * addr + */ + mptcp_local_address((struct sock_common *)msk, &msk_local); + mptcp_local_address((struct sock_common *)skc, &skc_local); + if (mptcp_addresses_equal(&msk_local, &skc_local, false)) + return 0; + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_local_id(msk, &skc_local); + return mptcp_pm_nl_get_local_id(msk, &skc_local); +} + +int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, + u8 *flags, int *ifindex) +{ + *flags = 0; + *ifindex = 0; + + if (!id) + return 0; + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); + return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); +} + +int mptcp_pm_set_flags(struct net *net, struct nlattr *token, + struct mptcp_pm_addr_entry *loc, + struct mptcp_pm_addr_entry *rem, u8 bkup) +{ + if (token) + return mptcp_userspace_pm_set_flags(net, token, loc, rem, bkup); + return mptcp_pm_nl_set_flags(net, loc, bkup); } void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 1224dfca5bf3..5692daf57a4d 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -25,9 +25,9 @@ static int pm_nl_pernet_id; struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; + u8 retrans_times; struct timer_list add_timer; struct mptcp_sock *sock; - u8 retrans_times; }; struct pm_nl_pernet { @@ -86,8 +86,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a, return a->port == b->port; } -static void local_address(const struct sock_common *skc, - struct mptcp_addr_info *addr) +void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) { addr->family = skc->skc_family; addr->port = htons(skc->skc_num); @@ -122,7 +121,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list, list_for_each_entry(subflow, list, node) { skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); - local_address(skc, &cur); + mptcp_local_address(skc, &cur); if (mptcp_addresses_equal(&cur, saddr, saddr->port)) return true; } @@ -263,7 +262,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) struct mptcp_addr_info saddr; bool ret = false; - local_address((struct sock_common *)sk, &saddr); + mptcp_local_address((struct sock_common *)sk, &saddr); spin_lock_bh(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.anno_list, list) { @@ -342,7 +341,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, } bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_pm_addr_entry *entry) + const struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; @@ -350,7 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); - add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr); + add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (add_entry) { if (mptcp_pm_is_kernel(msk)) @@ -367,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, list_add(&add_entry->list, &msk->pm.anno_list); - add_entry->addr = entry->addr; + add_entry->addr = *addr; add_entry->sock = msk; add_entry->retrans_times = 0; @@ -541,7 +540,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) struct mptcp_addr_info mpc_addr; bool backup = false; - local_address((struct sock_common *)msk->first, &mpc_addr); + mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); rcu_read_lock(); entry = __lookup_addr(pernet, &mpc_addr, false); if (entry) { @@ -577,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) return; if (local) { - if (mptcp_pm_alloc_anno_list(msk, local)) { + if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &local->addr, false); @@ -752,7 +751,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; - local_address((struct sock_common *)ssk, &local); + mptcp_local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; @@ -1057,33 +1056,17 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, return 0; } -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { struct mptcp_pm_addr_entry *entry; - struct mptcp_addr_info skc_local; - struct mptcp_addr_info msk_local; struct pm_nl_pernet *pernet; int ret = -1; - if (WARN_ON_ONCE(!msk)) - return -1; - - /* The 0 ID mapping is defined by the first subflow, copied into the msk - * addr - */ - local_address((struct sock_common *)msk, &msk_local); - local_address((struct sock_common *)skc, &skc_local); - if (mptcp_addresses_equal(&msk_local, &skc_local, false)) - return 0; - - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_get_local_id(msk, &skc_local); - pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) { + if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { ret = entry->addr.id; break; } @@ -1097,7 +1080,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) if (!entry) return -ENOMEM; - entry->addr = skc_local; + entry->addr = *skc; entry->addr.id = 0; entry->addr.port = 0; entry->ifindex = 0; @@ -1374,31 +1357,20 @@ out_free: return ret; } -int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex) +int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, + u8 *flags, int *ifindex) { struct mptcp_pm_addr_entry *entry; struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); - *flags = 0; - *ifindex = 0; - - if (id) { - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, - id, - flags, - ifindex); - - rcu_read_lock(); - entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); - if (entry) { - *flags = entry->flags; - *ifindex = entry->ifindex; - } - rcu_read_unlock(); + rcu_read_lock(); + entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); + if (entry) { + *flags = entry->flags; + *ifindex = entry->ifindex; } + rcu_read_unlock(); return 0; } @@ -1492,7 +1464,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next; - local_address((struct sock_common *)msk, &msk_local); + mptcp_local_address((struct sock_common *)msk, &msk_local); if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) goto next; @@ -1911,18 +1883,50 @@ next: return ret; } +int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); + u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | + MPTCP_PM_ADDR_FLAG_FULLMESH; + struct mptcp_pm_addr_entry *entry; + u8 lookup_by_id = 0; + + if (addr->addr.family == AF_UNSPEC) { + lookup_by_id = 1; + if (!addr->addr.id) + return -EOPNOTSUPP; + } + + spin_lock_bh(&pernet->lock); + entry = __lookup_addr(pernet, &addr->addr, lookup_by_id); + if (!entry) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } + if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } + + changed = (addr->flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (addr->flags & mask); + *addr = *entry; + spin_unlock_bh(&pernet->lock); + + mptcp_nl_set_flags(net, &addr->addr, bkup, changed); + return 0; +} + static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) { - struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry; struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; + struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | - MPTCP_PM_ADDR_FLAG_FULLMESH; struct net *net = sock_net(skb->sk); - u8 bkup = 0, lookup_by_id = 0; + u8 bkup = 0; int ret; ret = mptcp_pm_parse_entry(attr, info, false, &addr); @@ -1937,34 +1941,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; - if (addr.addr.family == AF_UNSPEC) { - lookup_by_id = 1; - if (!addr.addr.id) - return -EOPNOTSUPP; - } - - if (token) - return mptcp_userspace_pm_set_flags(net, token, &addr, &remote, bkup); - spin_lock_bh(&pernet->lock); - entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); - if (!entry) { - spin_unlock_bh(&pernet->lock); - return -EINVAL; - } - if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - spin_unlock_bh(&pernet->lock); - return -EINVAL; - } - - changed = (addr.flags ^ entry->flags) & mask; - entry->flags = (entry->flags & ~mask) | (addr.flags & mask); - addr = *entry; - spin_unlock_bh(&pernet->lock); - - mptcp_nl_set_flags(net, &addr.addr, bkup, changed); - return 0; + return mptcp_pm_set_flags(net, token, &addr, &remote, bkup); } static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index b06aa58dfcf2..b5a8aa4c1ebd 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -111,9 +111,6 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, { struct mptcp_pm_addr_entry *entry, *match = NULL; - *flags = 0; - *ifindex = 0; - spin_lock_bh(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { if (id == entry->addr.id) { @@ -196,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) lock_sock((struct sock *)msk); spin_lock_bh(&msk->pm.lock); - if (mptcp_pm_alloc_anno_list(msk, &addr_val)) { + if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &addr_val.addr, false); mptcp_pm_nl_addr_send_ack(msk); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a6c7f2d24909..e892673deb73 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); subflow->request_mptcp = 1; + subflow->subflow_id = msk->subflow_id++; /* This is the first subflow, always with id 0 */ subflow->local_id_valid = 1; @@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { /* in sequence */ + msk->bytes_received += copy_len; WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); tail = skb_peek_tail(&sk->sk_receive_queue); if (tail && mptcp_try_coalesce(sk, tail, skb)) @@ -757,6 +759,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) MPTCP_SKB_CB(skb)->map_seq += delta; __skb_queue_tail(&sk->sk_receive_queue, skb); } + msk->bytes_received += end_seq - msk->ack_seq; msk->ack_seq = end_seq; moved = true; } @@ -842,6 +845,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); + mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); return true; @@ -958,12 +962,6 @@ static void __mptcp_clean_una(struct sock *sk) struct mptcp_data_frag *dtmp, *dfrag; u64 snd_una; - /* on fallback we just need to ignore snd_una, as this is really - * plain TCP - */ - if (__mptcp_check_fallback(msk)) - msk->snd_una = READ_ONCE(msk->snd_nxt); - snd_una = msk->snd_una; list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) @@ -1491,8 +1489,10 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, * that has been handed to the subflow for transmission * and skip update in case it was old dfrag. */ - if (likely(after64(snd_nxt_new, msk->snd_nxt))) + if (likely(after64(snd_nxt_new, msk->snd_nxt))) { + msk->bytes_sent += snd_nxt_new - msk->snd_nxt; msk->snd_nxt = snd_nxt_new; + } } void mptcp_check_and_set_pending(struct sock *sk) @@ -2549,6 +2549,7 @@ static void __mptcp_retrans(struct sock *sk) } if (copied) { dfrag->already_sent = max(dfrag->already_sent, info.sent); + msk->bytes_retrans += copied; tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); WRITE_ONCE(msk->allow_infinite_fallback, false); @@ -2607,6 +2608,7 @@ static void mptcp_do_fastclose(struct sock *sk) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); + inet_sk_state_store(sk, TCP_CLOSE); mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, MPTCP_CF_FASTCLOSE); @@ -2640,10 +2642,9 @@ static void mptcp_worker(struct work_struct *work) * even if it is orphaned and in FIN_WAIT2 state */ if (sock_flag(sk, SOCK_DEAD)) { - if (mptcp_should_close(sk)) { - inet_sk_state_store(sk, TCP_CLOSE); + if (mptcp_should_close(sk)) mptcp_do_fastclose(sk); - } + if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); goto unlock; @@ -2682,6 +2683,7 @@ static int __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); WRITE_ONCE(msk->allow_infinite_fallback, true); msk->recovery = false; + msk->subflow_id = 1; mptcp_pm_data_init(msk); @@ -2878,7 +2880,6 @@ static void __mptcp_destroy_sock(struct sock *sk) void __mptcp_unaccepted_force_close(struct sock *sk) { sock_set_flag(sk, SOCK_DEAD); - inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); __mptcp_destroy_sock(sk); } @@ -2934,7 +2935,6 @@ bool __mptcp_close(struct sock *sk, long timeout) /* If the msk has read data, or the caller explicitly ask it, * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose */ - inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); timeout = 0; } else if (mptcp_close_state(sk)) { @@ -3069,6 +3069,10 @@ static int mptcp_disconnect(struct sock *sk, int flags) WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); + msk->bytes_acked = 0; + msk->bytes_received = 0; + msk->bytes_sent = 0; + msk->bytes_retrans = 0; WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); @@ -3119,6 +3123,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; + /* passive msk is created after the first/MPC subflow */ + msk->subflow_id = 2; + sock_reset_flag(nsk, SOCK_RCU_FREE); security_inet_csk_clone(nsk, req); @@ -3538,11 +3545,10 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) return (int)delta; } -static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int mptcp_ioctl(struct sock *sk, int cmd, int *karg) { struct mptcp_sock *msk = mptcp_sk(sk); bool slow; - int answ; switch (cmd) { case SIOCINQ: @@ -3551,24 +3557,24 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) lock_sock(sk); __mptcp_move_skbs(msk); - answ = mptcp_inq_hint(sk); + *karg = mptcp_inq_hint(sk); release_sock(sk); break; case SIOCOUTQ: slow = lock_sock_fast(sk); - answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); + *karg = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); unlock_sock_fast(sk, slow); break; case SIOCOUTQNSD: slow = lock_sock_fast(sk); - answ = mptcp_ioctl_outq(msk, msk->snd_nxt); + *karg = mptcp_ioctl_outq(msk, msk->snd_nxt); unlock_sock_fast(sk, slow); break; default: return -ENOIOCTLCMD; } - return put_user(answ, (int __user *)arg); + return 0; } static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, @@ -3726,6 +3732,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct socket *ssock; + struct sock *newsk; int err; pr_debug("msk=%p", msk); @@ -3737,17 +3744,20 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssock) return -EINVAL; - err = ssock->ops->accept(sock, newsock, flags, kern); - if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) { - struct mptcp_sock *msk = mptcp_sk(newsock->sk); + newsk = mptcp_accept(sock->sk, flags, &err, kern); + if (!newsk) + return err; + + lock_sock(newsk); + + __inet_accept(sock, newsock, newsk); + if (!mptcp_is_tcpsk(newsock->sk)) { + struct mptcp_sock *msk = mptcp_sk(newsk); struct mptcp_subflow_context *subflow; - struct sock *newsk = newsock->sk; set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); msk->in_accept_queue = 0; - lock_sock(newsk); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ @@ -3768,11 +3778,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (unlikely(list_empty(&msk->conn_list))) inet_sk_state_store(newsk, TCP_CLOSE); } - - release_sock(newsk); } + release_sock(newsk); - return err; + return 0; } static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) @@ -3857,7 +3866,6 @@ static const struct proto_ops mptcp_stream_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, }; static struct inet_protosw mptcp_protosw = { @@ -3952,7 +3960,6 @@ static const struct proto_ops mptcp_v6_stream_ops = { .sendmsg = inet6_sendmsg, .recvmsg = inet6_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d3783a7056e1..37fbe22e2433 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -261,10 +261,13 @@ struct mptcp_sock { u64 local_key; u64 remote_key; u64 write_seq; + u64 bytes_sent; u64 snd_nxt; + u64 bytes_received; u64 ack_seq; atomic64_t rcv_wnd_sent; u64 rcv_data_fin_seq; + u64 bytes_retrans; int rmem_fwd_alloc; struct sock *last_snd; int snd_burst; @@ -273,6 +276,7 @@ struct mptcp_sock { * recovery related fields are under data_lock * protection */ + u64 bytes_acked; u64 snd_una; u64 wnd_end; unsigned long timer_ival; @@ -318,7 +322,8 @@ struct mptcp_sock { u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; - u32 setsockopt_seq; + u32 subflow_id; + u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; struct mptcp_sock *dl_next; }; @@ -498,6 +503,8 @@ struct mptcp_subflow_context { u8 reset_reason:4; u8 stale_count; + u32 subflow_id; + long delegated_status; unsigned long fail_tout; @@ -636,6 +643,7 @@ void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); +void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, @@ -806,7 +814,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *rem, u8 bkup); bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_pm_addr_entry *entry); + const struct mptcp_addr_info *addr); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * @@ -818,9 +826,15 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); +int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, + u8 *flags, int *ifindex); int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); +int mptcp_pm_set_flags(struct net *net, struct nlattr *token, + struct mptcp_pm_addr_entry *loc, + struct mptcp_pm_addr_entry *rem, u8 bkup); +int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup); int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, struct mptcp_pm_addr_entry *loc, struct mptcp_pm_addr_entry *rem, u8 bkup); @@ -913,13 +927,13 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index d4258869ac48..63f7a09335c5 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -14,7 +14,8 @@ #include <net/mptcp.h> #include "protocol.h" -#define MIN_INFO_OPTLEN_SIZE 16 +#define MIN_INFO_OPTLEN_SIZE 16 +#define MIN_FULL_INFO_OPTLEN_SIZE 40 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { @@ -355,6 +356,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_BROADCAST: case SO_BSDCOMPAT: case SO_PASSCRED: + case SO_PASSPIDFD: case SO_PASSSEC: case SO_RXQ_OVFL: case SO_WIFI_STATUS: @@ -888,7 +890,9 @@ out: void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) { + struct sock *sk = (struct sock *)msk; u32 flags = 0; + bool slow; memset(info, 0, sizeof(*info)); @@ -897,6 +901,9 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); + if (inet_sk_state_load(sk) == TCP_LISTEN) + return; + /* The following limits only make sense for the in-kernel PM */ if (mptcp_pm_is_kernel(msk)) { info->mptcpi_subflows_max = @@ -914,11 +921,21 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) if (READ_ONCE(msk->can_ack)) flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; info->mptcpi_flags = flags; - info->mptcpi_token = READ_ONCE(msk->token); - info->mptcpi_write_seq = READ_ONCE(msk->write_seq); - info->mptcpi_snd_una = READ_ONCE(msk->snd_una); - info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); - info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); + mptcp_data_lock(sk); + info->mptcpi_snd_una = msk->snd_una; + info->mptcpi_rcv_nxt = msk->ack_seq; + info->mptcpi_bytes_acked = msk->bytes_acked; + mptcp_data_unlock(sk); + + slow = lock_sock_fast(sk); + info->mptcpi_csum_enabled = msk->csum_enabled; + info->mptcpi_token = msk->token; + info->mptcpi_write_seq = msk->write_seq; + info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; + info->mptcpi_bytes_sent = msk->bytes_sent; + info->mptcpi_bytes_received = msk->bytes_received; + info->mptcpi_bytes_retrans = msk->bytes_retrans; + unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); @@ -965,7 +982,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, } static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, - char __user *optval, int __user *optlen) + char __user *optval, + int __user *optlen) { int len, copylen; @@ -1146,6 +1164,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o return 0; } +static int mptcp_get_full_info(struct mptcp_full_info *mfi, + char __user *optval, + int __user *optlen) +{ + int len; + + BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != + MIN_FULL_INFO_OPTLEN_SIZE); + + if (get_user(len, optlen)) + return -EFAULT; + + if (len < MIN_FULL_INFO_OPTLEN_SIZE) + return -EINVAL; + + memset(mfi, 0, sizeof(*mfi)); + if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) + return -EFAULT; + + if (mfi->size_tcpinfo_kernel || + mfi->size_sfinfo_kernel || + mfi->num_subflows) + return -EINVAL; + + if (mfi->size_sfinfo_user > INT_MAX || + mfi->size_tcpinfo_user > INT_MAX) + return -EINVAL; + + return len - MIN_FULL_INFO_OPTLEN_SIZE; +} + +static int mptcp_put_full_info(struct mptcp_full_info *mfi, + char __user *optval, + u32 copylen, + int __user *optlen) +{ + copylen += MIN_FULL_INFO_OPTLEN_SIZE; + if (put_user(copylen, optlen)) + return -EFAULT; + + if (copy_to_user(optval, mfi, copylen)) + return -EFAULT; + return 0; +} + +static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, + int __user *optlen) +{ + unsigned int sfcount = 0, copylen = 0; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + void __user *tcpinfoptr, *sfinfoptr; + struct mptcp_full_info mfi; + int len; + + len = mptcp_get_full_info(&mfi, optval, optlen); + if (len < 0) + return len; + + /* don't bother filling the mptcp info if there is not enough + * user-space-provided storage + */ + if (len > 0) { + mptcp_diag_fill_info(msk, &mfi.mptcp_info); + copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); + } + + mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); + mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, + sizeof(struct tcp_info)); + sfinfoptr = u64_to_user_ptr(mfi.subflow_info); + mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); + mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, + sizeof(struct mptcp_subflow_info)); + tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct mptcp_subflow_info sfinfo; + struct tcp_info tcp_info; + + if (sfcount++ >= mfi.size_arrays_user) + continue; + + /* fetch addr/tcp_info only if the user space buffers + * are wide enough + */ + memset(&sfinfo, 0, sizeof(sfinfo)); + sfinfo.id = subflow->subflow_id; + if (mfi.size_sfinfo_user > + offsetof(struct mptcp_subflow_info, addrs)) + mptcp_get_sub_addrs(ssk, &sfinfo.addrs); + if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) + goto fail_release; + + if (mfi.size_tcpinfo_user) { + tcp_get_info(ssk, &tcp_info); + if (copy_to_user(tcpinfoptr, &tcp_info, + mfi.size_tcpinfo_user)) + goto fail_release; + } + + tcpinfoptr += mfi.size_tcpinfo_user; + sfinfoptr += mfi.size_sfinfo_user; + } + release_sock(sk); + + mfi.num_subflows = sfcount; + if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) + return -EFAULT; + + return 0; + +fail_release: + release_sock(sk); + return -EFAULT; +} + static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, int __user *optlen, int val) { @@ -1219,6 +1356,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, switch (optname) { case MPTCP_INFO: return mptcp_getsockopt_info(msk, optval, optlen); + case MPTCP_FULL_INFO: + return mptcp_getsockopt_full_info(msk, optval, optlen); case MPTCP_TCPINFO: return mptcp_getsockopt_tcpinfo(msk, optval, optlen); case MPTCP_SUBFLOW_ADDRS: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 8ff5c9fab55b..9ee3b7abbaf6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -819,6 +819,7 @@ create_child: if (!ctx->conn) goto fallback; + ctx->subflow_id = 1; owner = mptcp_sk(ctx->conn); mptcp_pm_new_connection(owner, child, 1); @@ -1574,6 +1575,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->remote_id = remote_id; subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); + subflow->subflow_id = msk->subflow_id++; mptcp_info2sockaddr(remote, &addr, ssk->sk_family); sock_hold(ssk); |