summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-02-13 03:31:46 +0300
committerDavid S. Miller <davem@davemloft.net>2021-02-13 03:31:46 +0300
commit0a2f6b32cc45e3918321779fe90c28f1ed27d2af (patch)
tree53af04a955ad82a889b2495fe64e9b6a752c3251
parent0a82c37e34fe5179a0e18b7a267bbe088fefdee8 (diff)
parentb911c97c7dc771633c68ea9b8f15070f8af3d323 (diff)
downloadlinux-0a2f6b32cc45e3918321779fe90c28f1ed27d2af.tar.xz
Merge branch 'mptcp-genl-events'
Mat Martineau says: ==================== mptcp: Add genl events for connection info This series from the MPTCP tree adds genl multicast events that are important for implementing a userspace path manager. In MPTCP, a path manager is responsible for adding or removing additional subflows on each MPTCP connection. The in-kernel path manager (already part of the kernel) is a better fit for many server use cases, but the additional flexibility of userspace path managers is often useful for client devices. Patches 1, 2, 4, 5, and 6 do some refactoring to streamline the netlink event implementation in the final patch. Patch 3 improves the timeliness of subflow destruction to ensure the 'subflow closed' event will be sent soon enough. Patch 7 allows use of the GENL_UNS_ADMIN_PERM flag on genl mcast groups to mandate CAP_NET_ADMIN, which is important to protect token information in the MPTCP events. This is a genetlink change. Patch 8 adds the MPTCP netlink events. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/genetlink.h1
-rw-r--r--include/uapi/linux/mptcp.h74
-rw-r--r--net/mptcp/options.c2
-rw-r--r--net/mptcp/pm.c24
-rw-r--r--net/mptcp/pm_netlink.c310
-rw-r--r--net/mptcp/protocol.c72
-rw-r--r--net/mptcp/protocol.h20
-rw-r--r--net/mptcp/subflow.c27
-rw-r--r--net/netlink/genetlink.c32
9 files changed, 491 insertions, 71 deletions
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index e55ec1597ce7..7cb3fa8310ed 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -14,6 +14,7 @@
*/
struct genl_multicast_group {
char name[GENL_NAMSIZ];
+ u8 flags;
};
struct genl_ops;
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 3674a451a18c..c91578aaab32 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -36,6 +36,7 @@ enum {
/* netlink interface */
#define MPTCP_PM_NAME "mptcp_pm"
#define MPTCP_PM_CMD_GRP_NAME "mptcp_pm_cmds"
+#define MPTCP_PM_EV_GRP_NAME "mptcp_pm_events"
#define MPTCP_PM_VER 0x1
/*
@@ -104,4 +105,77 @@ struct mptcp_info {
__u64 mptcpi_rcv_nxt;
};
+/*
+ * MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ * sport, dport
+ * A new MPTCP connection has been created. It is the good time to allocate
+ * memory and send ADD_ADDR if needed. Depending on the traffic-patterns
+ * it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent.
+ *
+ * MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ * sport, dport
+ * A MPTCP connection is established (can start new subflows).
+ *
+ * MPTCP_EVENT_CLOSED: token
+ * A MPTCP connection has stopped.
+ *
+ * MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport]
+ * A new address has been announced by the peer.
+ *
+ * MPTCP_EVENT_REMOVED: token, rem_id
+ * An address has been lost by the peer.
+ *
+ * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6,
+ * daddr4 | daddr6, sport, dport, backup,
+ * if_idx [, error]
+ * A new subflow has been established. 'error' should not be set.
+ *
+ * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ * sport, dport, backup, if_idx [, error]
+ * A subflow has been closed. An error (copy of sk_err) could be set if an
+ * error has been detected for this subflow.
+ *
+ * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ * sport, dport, backup, if_idx [, error]
+ * The priority of a subflow has changed. 'error' should not be set.
+ */
+enum mptcp_event_type {
+ MPTCP_EVENT_UNSPEC = 0,
+ MPTCP_EVENT_CREATED = 1,
+ MPTCP_EVENT_ESTABLISHED = 2,
+ MPTCP_EVENT_CLOSED = 3,
+
+ MPTCP_EVENT_ANNOUNCED = 6,
+ MPTCP_EVENT_REMOVED = 7,
+
+ MPTCP_EVENT_SUB_ESTABLISHED = 10,
+ MPTCP_EVENT_SUB_CLOSED = 11,
+
+ MPTCP_EVENT_SUB_PRIORITY = 13,
+};
+
+enum mptcp_event_attr {
+ MPTCP_ATTR_UNSPEC = 0,
+
+ MPTCP_ATTR_TOKEN, /* u32 */
+ MPTCP_ATTR_FAMILY, /* u16 */
+ MPTCP_ATTR_LOC_ID, /* u8 */
+ MPTCP_ATTR_REM_ID, /* u8 */
+ MPTCP_ATTR_SADDR4, /* be32 */
+ MPTCP_ATTR_SADDR6, /* struct in6_addr */
+ MPTCP_ATTR_DADDR4, /* be32 */
+ MPTCP_ATTR_DADDR6, /* struct in6_addr */
+ MPTCP_ATTR_SPORT, /* be16 */
+ MPTCP_ATTR_DPORT, /* be16 */
+ MPTCP_ATTR_BACKUP, /* u8 */
+ MPTCP_ATTR_ERROR, /* u8 */
+ MPTCP_ATTR_FLAGS, /* u16 */
+ MPTCP_ATTR_TIMEOUT, /* u32 */
+ MPTCP_ATTR_IF_IDX, /* s32 */
+
+ __MPTCP_ATTR_AFTER_LAST
+};
+
+#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1)
+
#endif /* _UAPI_MPTCP_H */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 3b71d68b3863..bb874c5d663a 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -867,7 +867,7 @@ fully_established:
clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk, subflow);
} else {
- mptcp_pm_fully_established(msk);
+ mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
}
return true;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 1a25003fd8e3..6fd4b2c1b076 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -68,13 +68,14 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id)
/* path manager event handlers */
-void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side)
+void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
WRITE_ONCE(pm->server_side, server_side);
+ mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
}
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
@@ -119,16 +120,13 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
return true;
}
-void mptcp_pm_fully_established(struct mptcp_sock *msk)
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
{
struct mptcp_pm_data *pm = &msk->pm;
+ bool announce = false;
pr_debug("msk=%p", msk);
- /* try to avoid acquiring the lock below */
- if (!READ_ONCE(pm->work_pending))
- return;
-
spin_lock_bh(&pm->lock);
/* mptcp_pm_fully_established() can be invoked by multiple
@@ -138,9 +136,15 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk)
if (READ_ONCE(pm->work_pending) &&
!(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
- msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
+ if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
+ announce = true;
+
+ msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
spin_unlock_bh(&pm->lock);
+
+ if (announce)
+ mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
@@ -179,6 +183,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr));
+ mptcp_event_addr_announced(msk, addr);
+
spin_lock_bh(&pm->lock);
if (!READ_ONCE(pm->accept_addr)) {
@@ -205,6 +211,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)
pr_debug("msk=%p remote_id=%d", msk, rm_id);
+ mptcp_event_addr_removed(msk, rm_id);
+
spin_lock_bh(&pm->lock);
mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
pm->rm_id = rm_id;
@@ -217,6 +225,8 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
subflow->backup = bkup;
+
+ mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
}
/* path manager helpers */
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 23780a13b934..229fd1af2e29 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -56,6 +56,8 @@ struct pm_nl_pernet {
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
+static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
+
static bool addresses_equal(const struct mptcp_addr_info *a,
struct mptcp_addr_info *b, bool use_port)
{
@@ -448,17 +450,17 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
}
}
-void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
+static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
{
mptcp_pm_create_subflow_or_signal_addr(msk);
}
-void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
+static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
{
mptcp_pm_create_subflow_or_signal_addr(msk);
}
-void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
+static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
@@ -498,7 +500,7 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
mptcp_pm_nl_add_addr_send_ack(msk);
}
-void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
+static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
@@ -568,7 +570,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
return -EINVAL;
}
-void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
+static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = (struct sock *)msk;
@@ -592,7 +594,7 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
- __mptcp_close_ssk(sk, ssk, subflow);
+ mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.add_addr_accepted--;
@@ -605,6 +607,39 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
}
}
+void mptcp_pm_nl_work(struct mptcp_sock *msk)
+{
+ struct mptcp_pm_data *pm = &msk->pm;
+
+ msk_owned_by_me(msk);
+
+ spin_lock_bh(&msk->pm.lock);
+
+ pr_debug("msk=%p status=%x", msk, pm->status);
+ if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
+ pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
+ mptcp_pm_nl_add_addr_received(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
+ pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
+ mptcp_pm_nl_add_addr_send_ack(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
+ pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
+ mptcp_pm_nl_rm_addr_received(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
+ pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
+ mptcp_pm_nl_fully_established(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
+ pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
+ mptcp_pm_nl_subflow_established(msk);
+ }
+
+ spin_unlock_bh(&msk->pm.lock);
+}
+
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
{
struct mptcp_subflow_context *subflow, *tmp;
@@ -629,7 +664,7 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
- __mptcp_close_ssk(sk, ssk, subflow);
+ mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.local_addr_used--;
@@ -825,10 +860,14 @@ void mptcp_pm_nl_data_init(struct mptcp_sock *msk)
WRITE_ONCE(pm->accept_subflow, subflows);
}
-#define MPTCP_PM_CMD_GRP_OFFSET 0
+#define MPTCP_PM_CMD_GRP_OFFSET 0
+#define MPTCP_PM_EV_GRP_OFFSET 1
static const struct genl_multicast_group mptcp_pm_mcgrps[] = {
[MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, },
+ [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
};
static const struct nla_policy
@@ -1447,6 +1486,261 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
return 0;
}
+static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
+{
+ genlmsg_multicast_netns(&mptcp_genl_family, net,
+ nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp);
+}
+
+static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
+{
+ const struct inet_sock *issk = inet_sk(ssk);
+ const struct mptcp_subflow_context *sf;
+
+ if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family))
+ return -EMSGSIZE;
+
+ switch (ssk->sk_family) {
+ case AF_INET:
+ if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr))
+ return -EMSGSIZE;
+ if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr))
+ return -EMSGSIZE;
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case AF_INET6: {
+ const struct ipv6_pinfo *np = inet6_sk(ssk);
+
+ if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr))
+ return -EMSGSIZE;
+ if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr))
+ return -EMSGSIZE;
+ break;
+ }
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ return -EMSGSIZE;
+ }
+
+ if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport))
+ return -EMSGSIZE;
+ if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport))
+ return -EMSGSIZE;
+
+ sf = mptcp_subflow_ctx(ssk);
+ if (WARN_ON_ONCE(!sf))
+ return -EINVAL;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
+ const struct mptcp_sock *msk,
+ const struct sock *ssk)
+{
+ const struct sock *sk = (const struct sock *)msk;
+ const struct mptcp_subflow_context *sf;
+ u8 sk_err;
+
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ return -EMSGSIZE;
+
+ if (mptcp_event_add_subflow(skb, ssk))
+ return -EMSGSIZE;
+
+ sf = mptcp_subflow_ctx(ssk);
+ if (WARN_ON_ONCE(!sf))
+ return -EINVAL;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup))
+ return -EMSGSIZE;
+
+ if (ssk->sk_bound_dev_if &&
+ nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
+ return -EMSGSIZE;
+
+ sk_err = ssk->sk_err;
+ if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
+ nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int mptcp_event_sub_established(struct sk_buff *skb,
+ const struct mptcp_sock *msk,
+ const struct sock *ssk)
+{
+ return mptcp_event_put_token_and_ssk(skb, msk, ssk);
+}
+
+static int mptcp_event_sub_closed(struct sk_buff *skb,
+ const struct mptcp_sock *msk,
+ const struct sock *ssk)
+{
+ if (mptcp_event_put_token_and_ssk(skb, msk, ssk))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int mptcp_event_created(struct sk_buff *skb,
+ const struct mptcp_sock *msk,
+ const struct sock *ssk)
+{
+ int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token);
+
+ if (err)
+ return err;
+
+ return mptcp_event_add_subflow(skb, ssk);
+}
+
+void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
+{
+ struct net *net = sock_net((const struct sock *)msk);
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED);
+ if (!nlh)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id))
+ goto nla_put_failure;
+
+ genlmsg_end(skb, nlh);
+ mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
+ return;
+
+nla_put_failure:
+ kfree_skb(skb);
+}
+
+void mptcp_event_addr_announced(const struct mptcp_sock *msk,
+ const struct mptcp_addr_info *info)
+{
+ struct net *net = sock_net((const struct sock *)msk);
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0,
+ MPTCP_EVENT_ANNOUNCED);
+ if (!nlh)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
+ goto nla_put_failure;
+
+ if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port))
+ goto nla_put_failure;
+
+ switch (info->family) {
+ case AF_INET:
+ if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr))
+ goto nla_put_failure;
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case AF_INET6:
+ if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6))
+ goto nla_put_failure;
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(skb, nlh);
+ mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
+ return;
+
+nla_put_failure:
+ kfree_skb(skb);
+}
+
+void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
+ const struct sock *ssk, gfp_t gfp)
+{
+ struct net *net = sock_net((const struct sock *)msk);
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type);
+ if (!nlh)
+ goto nla_put_failure;
+
+ switch (type) {
+ case MPTCP_EVENT_UNSPEC:
+ WARN_ON_ONCE(1);
+ break;
+ case MPTCP_EVENT_CREATED:
+ case MPTCP_EVENT_ESTABLISHED:
+ if (mptcp_event_created(skb, msk, ssk) < 0)
+ goto nla_put_failure;
+ break;
+ case MPTCP_EVENT_CLOSED:
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0)
+ goto nla_put_failure;
+ break;
+ case MPTCP_EVENT_ANNOUNCED:
+ case MPTCP_EVENT_REMOVED:
+ /* call mptcp_event_addr_announced()/removed instead */
+ WARN_ON_ONCE(1);
+ break;
+ case MPTCP_EVENT_SUB_ESTABLISHED:
+ case MPTCP_EVENT_SUB_PRIORITY:
+ if (mptcp_event_sub_established(skb, msk, ssk) < 0)
+ goto nla_put_failure;
+ break;
+ case MPTCP_EVENT_SUB_CLOSED:
+ if (mptcp_event_sub_closed(skb, msk, ssk) < 0)
+ goto nla_put_failure;
+ break;
+ }
+
+ genlmsg_end(skb, nlh);
+ mptcp_nl_mcast_send(net, skb, gfp);
+ return;
+
+nla_put_failure:
+ kfree_skb(skb);
+}
+
static const struct genl_small_ops mptcp_pm_ops[] = {
{
.cmd = MPTCP_PM_CMD_ADD_ADDR,
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b9f16a1535d2..c2a8392254dc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2114,8 +2114,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
* so we need to use tcp_close() after detaching them from the mptcp
* parent socket.
*/
-void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
- struct mptcp_subflow_context *subflow)
+static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow)
{
list_del(&subflow->node);
@@ -2147,40 +2147,17 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
sock_put(ssk);
}
-static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
+void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow)
{
- return 0;
+ if (sk->sk_state == TCP_ESTABLISHED)
+ mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
+ __mptcp_close_ssk(sk, ssk, subflow);
}
-static void pm_work(struct mptcp_sock *msk)
+static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
{
- struct mptcp_pm_data *pm = &msk->pm;
-
- spin_lock_bh(&msk->pm.lock);
-
- pr_debug("msk=%p status=%x", msk, pm->status);
- if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
- pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
- mptcp_pm_nl_add_addr_received(msk);
- }
- if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
- pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
- mptcp_pm_nl_add_addr_send_ack(msk);
- }
- if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
- pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
- mptcp_pm_nl_rm_addr_received(msk);
- }
- if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
- pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
- mptcp_pm_nl_fully_established(msk);
- }
- if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
- pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
- mptcp_pm_nl_subflow_established(msk);
- }
-
- spin_unlock_bh(&msk->pm.lock);
+ return 0;
}
static void __mptcp_close_subflow(struct mptcp_sock *msk)
@@ -2195,7 +2172,11 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
if (inet_sk_state_load(ssk) != TCP_CLOSE)
continue;
- __mptcp_close_ssk((struct sock *)msk, ssk, subflow);
+ /* 'subflow_data_ready' will re-sched once rx queue is empty */
+ if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
+ continue;
+
+ mptcp_close_ssk((struct sock *)msk, ssk, subflow);
}
}
@@ -2267,11 +2248,8 @@ static void mptcp_worker(struct work_struct *work)
mptcp_check_fastclose(msk);
- if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
- __mptcp_close_subflow(msk);
-
if (msk->pm.status)
- pm_work(msk);
+ mptcp_pm_nl_work(msk);
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
mptcp_check_for_eof(msk);
@@ -2291,6 +2269,9 @@ static void mptcp_worker(struct work_struct *work)
goto unlock;
}
+ if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ __mptcp_close_subflow(msk);
+
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
goto unlock;
@@ -2607,6 +2588,10 @@ cleanup:
release_sock(sk);
if (do_cancel_work)
mptcp_cancel_work(sk);
+
+ if (mptcp_sk(sk)->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+
sock_put(sk);
}
@@ -3049,7 +3034,7 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
- mptcp_pm_new_connection(msk, 0);
+ mptcp_pm_new_connection(msk, ssk, 0);
mptcp_rcv_space_init(msk, ssk);
}
@@ -3078,7 +3063,7 @@ bool mptcp_finish_join(struct sock *ssk)
return false;
if (!msk->pm.server_side)
- return true;
+ goto out;
if (!mptcp_pm_allow_new_subflow(msk))
return false;
@@ -3105,6 +3090,8 @@ bool mptcp_finish_join(struct sock *ssk)
if (parent_sock && !ssk->sk_socket)
mptcp_sock_graft(ssk, parent_sock);
subflow->map_seq = READ_ONCE(msk->ack_seq);
+out:
+ mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
return true;
}
@@ -3281,9 +3268,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct mptcp_sock *msk = mptcp_sk(newsock->sk);
struct mptcp_subflow_context *subflow;
struct sock *newsk = newsock->sk;
- bool slowpath;
- slowpath = lock_sock_fast(newsk);
+ lock_sock(newsk);
/* PM/worker can now acquire the first subflow socket
* lock without racing with listener queue cleanup,
@@ -3293,7 +3279,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
list_add(&subflow->node, &msk->conn_list);
sock_hold(msk->first);
if (mptcp_is_fully_established(newsk))
- mptcp_pm_fully_established(msk);
+ mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);
mptcp_copy_inaddrs(newsk, msk->first);
mptcp_rcv_space_init(msk, msk->first);
@@ -3309,7 +3295,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
- unlock_sock_fast(newsk, slowpath);
+ release_sock(newsk);
}
if (inet_csk_listen_poll(ssock->sk))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 73a923d02aad..d31edbae8da8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -10,6 +10,7 @@
#include <linux/random.h>
#include <net/tcp.h>
#include <net/inet_connection_sock.h>
+#include <uapi/linux/mptcp.h>
#define MPTCP_SUPPORTED_VERSION 1
@@ -539,8 +540,8 @@ void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
-void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
- struct mptcp_subflow_context *subflow);
+void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
@@ -639,8 +640,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
-void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
-void mptcp_pm_fully_established(struct mptcp_sock *msk);
+void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk,
@@ -666,6 +667,11 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id);
+void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
+ const struct sock *ssk, gfp_t gfp);
+void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info);
+void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
+
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
@@ -713,11 +719,7 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
-void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
-void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
-void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk);
-void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
-void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk);
+void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 280da418d60b..ce2dea2a6e0a 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -675,7 +675,7 @@ create_child:
* created mptcp socket
*/
new_msk->sk_destruct = mptcp_sock_destruct;
- mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
+ mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
new_msk = NULL;
@@ -953,6 +953,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
subflow->map_valid = 0;
}
+/* sched mptcp worker to remove the subflow if no more data is pending */
+static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct sock *sk = (struct sock *)msk;
+
+ if (likely(ssk->sk_state != TCP_CLOSE))
+ return;
+
+ if (skb_queue_empty(&ssk->sk_receive_queue) &&
+ !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
+ sock_hold(sk);
+ if (!schedule_work(&msk->work))
+ sock_put(sk);
+ }
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -991,11 +1007,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
if (status != MAPPING_OK)
- return false;
+ goto no_data;
skb = skb_peek(&ssk->sk_receive_queue);
if (WARN_ON_ONCE(!skb))
- return false;
+ goto no_data;
/* if msk lacks the remote key, this subflow must provide an
* MP_CAPABLE-based mapping
@@ -1029,6 +1045,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
return true;
+no_data:
+ subflow_sched_work_if_closed(msk, ssk);
+ return false;
fatal:
/* fatal protocol error, close the socket */
/* This barrier is coupled with smp_rmb() in tcp_poll() */
@@ -1413,6 +1432,8 @@ static void subflow_state_change(struct sock *sk)
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
+ subflow_sched_work_if_closed(mptcp_sk(parent), sk);
+
if (__mptcp_check_fallback(mptcp_sk(parent)) &&
!subflow->rx_eof && subflow_is_done(sk)) {
subflow->rx_eof = 1;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c992424e4d63..2d6fdf40df66 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1360,11 +1360,43 @@ static struct genl_family genl_ctrl __ro_after_init = {
.netnsok = true,
};
+static int genl_bind(struct net *net, int group)
+{
+ const struct genl_family *family;
+ unsigned int id;
+ int ret = 0;
+
+ genl_lock_all();
+
+ idr_for_each_entry(&genl_fam_idr, family, id) {
+ const struct genl_multicast_group *grp;
+ int i;
+
+ if (family->n_mcgrps == 0)
+ continue;
+
+ i = group - family->mcgrp_offset;
+ if (i < 0 || i >= family->n_mcgrps)
+ continue;
+
+ grp = &family->mcgrps[i];
+ if ((grp->flags & GENL_UNS_ADMIN_PERM) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ ret = -EPERM;
+
+ break;
+ }
+
+ genl_unlock_all();
+ return ret;
+}
+
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
+ .bind = genl_bind,
};
/* we'll bump the group number right afterwards */