summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-12-13 02:33:39 +0300
committerJakub Kicinski <kuba@kernel.org>2022-12-13 02:33:39 +0300
commit8150f0cfb24f781aa61a332bf2426365462afff8 (patch)
tree31b8a89eec4f4886b40022c7b622a21e24ac2cfc /net
parent02abf84aa52da86586ec6323969afa158ec6e4aa (diff)
parentb6d00da08610e2b17412330d7cfc7c04a3b6570d (diff)
downloadlinux-8150f0cfb24f781aa61a332bf2426365462afff8.tar.xz
Merge branch 'bridge-mcast-extensions-for-evpn'
Ido Schimmel says: ==================== bridge: mcast: Extensions for EVPN tl;dr ===== This patchset creates feature parity between user space and the kernel and allows the former to install and replace MDB port group entries with a source list and associated filter mode. This is required for EVPN use cases where multicast state is not derived from snooped IGMP/MLD packets, but instead derived from EVPN routes exchanged by the control plane in user space. Background ========== IGMPv3 [1] and MLDv2 [2] differ from earlier versions of the protocols in that they add support for source-specific multicast. That is, hosts can advertise interest in listening to a particular multicast address only from specific source addresses or from all sources except for specific source addresses. In kernel 5.10 [3][4], the bridge driver gained the ability to snoop IGMPv3/MLDv2 packets and install corresponding MDB port group entries. For example, a snooped IGMPv3 Membership Report that contains a single MODE_IS_EXCLUDE record for group 239.10.10.10 with sources 192.0.2.1, 192.0.2.2, 192.0.2.20 and 192.0.2.21 would trigger the creation of these entries: # bridge -d mdb show dev br0 port veth1 grp 239.10.10.10 src 192.0.2.21 temp filter_mode include proto kernel blocked dev br0 port veth1 grp 239.10.10.10 src 192.0.2.20 temp filter_mode include proto kernel blocked dev br0 port veth1 grp 239.10.10.10 src 192.0.2.2 temp filter_mode include proto kernel blocked dev br0 port veth1 grp 239.10.10.10 src 192.0.2.1 temp filter_mode include proto kernel blocked dev br0 port veth1 grp 239.10.10.10 temp filter_mode exclude source_list 192.0.2.21/0.00,192.0.2.20/0.00,192.0.2.2/0.00,192.0.2.1/0.00 proto kernel While the kernel can install and replace entries with a filter mode and source list, user space cannot. It can only add EXCLUDE entries with an empty source list, which is sufficient for IGMPv2/MLDv1, but not for IGMPv3/MLDv2. Use cases where the multicast state is not derived from snooped packets, but instead derived from routes exchanged by the user space control plane require feature parity between user space and the kernel in terms of MDB configuration. Such a use case is detailed in the next section. Motivation ========== RFC 7432 [5] defines a "MAC/IP Advertisement route" (type 2) [6] that allows NVE switches in the EVPN network to advertise and learn reachability information for unicast MAC addresses. Traffic destined to a unicast MAC address can therefore be selectively forwarded to a single NVE switch behind which the MAC is located. The same is not true for IP multicast traffic. Such traffic is simply flooded as BUM to all NVE switches in the broadcast domain (BD), regardless if a switch has interested receivers for the multicast stream or not. This is especially problematic for overlay networks that make heavy use of multicast. The issue is addressed by RFC 9251 [7] that defines a "Selective Multicast Ethernet Tag Route" (type 6) [8] which allows NVE switches in the EVPN network to advertise multicast streams that they are interested in. This is done by having each switch suppress IGMP/MLD packets from being transmitted to the NVE network and instead communicate the information over BGP to other switches. As far as the bridge driver is concerned, the above means that the multicast state (i.e., {multicast address, group timer, filter-mode, (source records)}) for the VXLAN bridge port is not populated by the kernel from snooped IGMP/MLD packets (they are suppressed), but instead by user space. Specifically, by the routing daemon that is exchanging EVPN routes with other NVE switches. Changes are obviously also required in the VXLAN driver, but they are the subject of future patchsets. See the "Future work" section. Implementation ============== The user interface is extended to allow user space to specify the filter mode of the MDB port group entry and its source list. Replace support is also added so that user space would not need to remove an entry and re-add it only to edit its source list or filter mode, as that would result in packet loss. Example usage: # bridge mdb replace dev br0 port dummy10 grp 239.1.1.1 permanent \ source_list 192.0.2.1,192.0.2.3 filter_mode exclude proto zebra # bridge -d -s mdb show dev br0 port dummy10 grp 239.1.1.1 src 192.0.2.3 permanent filter_mode include proto zebra blocked 0.00 dev br0 port dummy10 grp 239.1.1.1 src 192.0.2.1 permanent filter_mode include proto zebra blocked 0.00 dev br0 port dummy10 grp 239.1.1.1 permanent filter_mode exclude source_list 192.0.2.3/0.00,192.0.2.1/0.00 proto zebra 0.00 The netlink interface is extended with a few new attributes in the RTM_NEWMDB request message: [ struct nlmsghdr ] [ struct br_port_msg ] [ MDBA_SET_ENTRY ] struct br_mdb_entry [ MDBA_SET_ENTRY_ATTRS ] [ MDBE_ATTR_SOURCE ] struct in_addr / struct in6_addr [ MDBE_ATTR_SRC_LIST ] // new [ MDBE_SRC_LIST_ENTRY ] [ MDBE_SRCATTR_ADDRESS ] struct in_addr / struct in6_addr [ ...] [ MDBE_ATTR_GROUP_MODE ] // new u8 [ MDBE_ATTR_RTPORT ] // new u8 No changes are required in RTM_NEWMDB responses and notifications, as all the information can already be dumped by the kernel today. Testing ======= Tested with existing bridge multicast selftests: bridge_igmp.sh, bridge_mdb_port_down.sh, bridge_mdb.sh, bridge_mld.sh, bridge_vlan_mcast.sh. In addition, added many new test cases for existing as well as for new MDB functionality. Patchset overview ================= Patches #1-#8 are non-functional preparations for the core changes in later patches. Patches #9-#10 allow user space to install (*, G) entries with a source list and associated filter mode. Specifically, patch #9 adds the necessary kernel plumbing and patch #10 exposes the new functionality to user space via a few new attributes. Patch #11 allows user space to specify the routing protocol of new MDB port group entries so that a routing daemon could differentiate between entries installed by it and those installed by an administrator. Patch #12 allows user space to replace MDB port group entries. This is useful, for example, when user space wants to add a new source to a source list. Instead of deleting a (*, G) entry and re-adding it with an extended source list (which would result in packet loss), user space can simply replace the current entry. Patches #13-#14 add tests for existing MDB functionality as well as for all new functionality added in this patchset. Future work =========== The VXLAN driver will need to be extended with an MDB so that it could selectively forward IP multicast traffic to NVE switches with interested receivers instead of simply flooding it to all switches as BUM. The idea is to reuse the existing MDB interface for the VXLAN driver in a similar way to how the FDB interface is shared between the bridge and VXLAN drivers. From command line perspective, configuration will look as follows: # bridge mdb add dev br0 port vxlan0 grp 239.1.1.1 permanent \ filter_mode exclude source_list 198.50.100.1,198.50.100.2 # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent \ filter_mode include source_list 198.50.100.3,198.50.100.4 \ dst 192.0.2.1 dst_port 4789 src_vni 2 # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent \ filter_mode exclude source_list 198.50.100.1,198.50.100.2 \ dst 192.0.2.2 dst_port 4789 src_vni 2 Where the first command is enabled by this set, but the next two will be the subject of future work. From netlink perspective, the existing PF_BRIDGE/RTM_*MDB messages will be extended to the VXLAN driver. This means that a few new attributes will be added (e.g., 'MDBE_ATTR_SRC_VNI') and that the handlers for these messages will need to move to net/core/rtnetlink.c. The rtnetlink code will call into the appropriate driver based on the ifindex specified in the ancillary header. iproute2 patches can be found here [9]. Changelog ========= Since v1 [10]: * Patch #12: Remove extack from br_mdb_replace_group_sg(). * Patch #12: Change 'nlflags' to u16 and move it after 'filter_mode' to pack the structure. Since RFC [11]: * Patch #6: New patch. * Patch #9: Use an array instead of a list to store source entries. * Patch #10: Use an array instead of list to store source entries. * Patch #10: Drop br_mdb_config_attrs_fini(). * Patch #11: Reject protocol for host entries. * Patch #13: New patch. * Patch #14: New patch. [1] https://datatracker.ietf.org/doc/html/rfc3376 [2] https://www.rfc-editor.org/rfc/rfc3810 [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6af52ae2ed14a6bc756d5606b29097dfd76740b8 [4] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=68d4fd30c83b1b208e08c954cd45e6474b148c87 [5] https://datatracker.ietf.org/doc/html/rfc7432 [6] https://datatracker.ietf.org/doc/html/rfc7432#section-7.2 [7] https://datatracker.ietf.org/doc/html/rfc9251 [8] https://datatracker.ietf.org/doc/html/rfc9251#section-9.1 [9] https://github.com/idosch/iproute2/commits/submit/mdb_v1 [10] https://lore.kernel.org/netdev/20221208152839.1016350-1-idosch@nvidia.com/ [11] https://lore.kernel.org/netdev/20221018120420.561846-1-idosch@nvidia.com/ ==================== Link: https://lore.kernel.org/r/20221210145633.1328511-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_mdb.c525
-rw-r--r--net/bridge/br_multicast.c16
-rw-r--r--net/bridge/br_private.h15
3 files changed, 489 insertions, 67 deletions
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index ae7d93c08880..00e5743647b0 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -663,6 +663,28 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
+static const struct nla_policy
+br_mdbe_src_list_entry_pol[MDBE_SRCATTR_MAX + 1] = {
+ [MDBE_SRCATTR_ADDRESS] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+};
+
+static const struct nla_policy
+br_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = {
+ [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(br_mdbe_src_list_entry_pol),
+};
+
+static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
+ [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+ [MDBE_ATTR_GROUP_MODE] = NLA_POLICY_RANGE(NLA_U8, MCAST_EXCLUDE,
+ MCAST_INCLUDE),
+ [MDBE_ATTR_SRC_LIST] = NLA_POLICY_NESTED(br_mdbe_src_list_pol),
+ [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
+};
+
static bool is_valid_mdb_entry(struct br_mdb_entry *entry,
struct netlink_ext_ack *extack)
{
@@ -748,12 +770,6 @@ static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto,
return true;
}
-static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
- [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
- sizeof(struct in_addr),
- sizeof(struct in6_addr)),
-};
-
static struct net_bridge_mcast *
__br_mdb_choose_context(struct net_bridge *br,
const struct br_mdb_entry *entry,
@@ -786,21 +802,320 @@ out:
return brmctx;
}
+static int br_mdb_replace_group_sg(const struct br_mdb_config *cfg,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ struct net_bridge_mcast *brmctx,
+ unsigned char flags)
+{
+ unsigned long now = jiffies;
+
+ pg->flags = flags;
+ pg->rt_protocol = cfg->rt_protocol;
+ if (!(flags & MDB_PG_FLAGS_PERMANENT) && !cfg->src_entry)
+ mod_timer(&pg->timer,
+ now + brmctx->multicast_membership_interval);
+ else
+ del_timer(&pg->timer);
+
+ br_mdb_notify(cfg->br->dev, mp, pg, RTM_NEWMDB);
+
+ return 0;
+}
+
+static int br_mdb_add_group_sg(const struct br_mdb_config *cfg,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_mcast *brmctx,
+ unsigned char flags,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+ unsigned long now = jiffies;
+
+ for (pp = &mp->ports;
+ (p = mlock_dereference(*pp, cfg->br)) != NULL;
+ pp = &p->next) {
+ if (p->key.port == cfg->p) {
+ if (!(cfg->nlflags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "(S, G) group is already joined by port");
+ return -EEXIST;
+ }
+ return br_mdb_replace_group_sg(cfg, mp, p, brmctx,
+ flags);
+ }
+ if ((unsigned long)p->key.port < (unsigned long)cfg->p)
+ break;
+ }
+
+ p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
+ MCAST_INCLUDE, cfg->rt_protocol);
+ if (unlikely(!p)) {
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (S, G) port group");
+ return -ENOMEM;
+ }
+ rcu_assign_pointer(*pp, p);
+ if (!(flags & MDB_PG_FLAGS_PERMANENT) && !cfg->src_entry)
+ mod_timer(&p->timer,
+ now + brmctx->multicast_membership_interval);
+ br_mdb_notify(cfg->br->dev, mp, p, RTM_NEWMDB);
+
+ /* All of (*, G) EXCLUDE ports need to be added to the new (S, G) for
+ * proper replication.
+ */
+ if (br_multicast_should_handle_mode(brmctx, cfg->group.proto)) {
+ struct net_bridge_mdb_entry *star_mp;
+ struct br_ip star_group;
+
+ star_group = p->key.addr;
+ memset(&star_group.src, 0, sizeof(star_group.src));
+ star_mp = br_mdb_ip_get(cfg->br, &star_group);
+ if (star_mp)
+ br_multicast_sg_add_exclude_ports(star_mp, p);
+ }
+
+ return 0;
+}
+
+static int br_mdb_add_group_src_fwd(const struct br_mdb_config *cfg,
+ struct br_ip *src_ip,
+ struct net_bridge_mcast *brmctx,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_mdb_entry *sgmp;
+ struct br_mdb_config sg_cfg;
+ struct br_ip sg_ip;
+ u8 flags = 0;
+
+ sg_ip = cfg->group;
+ sg_ip.src = src_ip->src;
+ sgmp = br_multicast_new_group(cfg->br, &sg_ip);
+ if (IS_ERR(sgmp)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to add (S, G) MDB entry");
+ return PTR_ERR(sgmp);
+ }
+
+ if (cfg->entry->state == MDB_PERMANENT)
+ flags |= MDB_PG_FLAGS_PERMANENT;
+ if (cfg->filter_mode == MCAST_EXCLUDE)
+ flags |= MDB_PG_FLAGS_BLOCKED;
+
+ memset(&sg_cfg, 0, sizeof(sg_cfg));
+ sg_cfg.br = cfg->br;
+ sg_cfg.p = cfg->p;
+ sg_cfg.entry = cfg->entry;
+ sg_cfg.group = sg_ip;
+ sg_cfg.src_entry = true;
+ sg_cfg.filter_mode = MCAST_INCLUDE;
+ sg_cfg.rt_protocol = cfg->rt_protocol;
+ sg_cfg.nlflags = cfg->nlflags;
+ return br_mdb_add_group_sg(&sg_cfg, sgmp, brmctx, flags, extack);
+}
+
+static int br_mdb_add_group_src(const struct br_mdb_config *cfg,
+ struct net_bridge_port_group *pg,
+ struct net_bridge_mcast *brmctx,
+ struct br_mdb_src_entry *src,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_group_src *ent;
+ unsigned long now = jiffies;
+ int err;
+
+ ent = br_multicast_find_group_src(pg, &src->addr);
+ if (!ent) {
+ ent = br_multicast_new_group_src(pg, &src->addr);
+ if (!ent) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to add new source entry");
+ return -ENOSPC;
+ }
+ } else if (!(cfg->nlflags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Source entry already exists");
+ return -EEXIST;
+ }
+
+ if (cfg->filter_mode == MCAST_INCLUDE &&
+ cfg->entry->state == MDB_TEMPORARY)
+ mod_timer(&ent->timer, now + br_multicast_gmi(brmctx));
+ else
+ del_timer(&ent->timer);
+
+ /* Install a (S, G) forwarding entry for the source. */
+ err = br_mdb_add_group_src_fwd(cfg, &src->addr, brmctx, extack);
+ if (err)
+ goto err_del_sg;
+
+ ent->flags = BR_SGRP_F_INSTALLED | BR_SGRP_F_USER_ADDED;
+
+ return 0;
+
+err_del_sg:
+ __br_multicast_del_group_src(ent);
+ return err;
+}
+
+static void br_mdb_del_group_src(struct net_bridge_port_group *pg,
+ struct br_mdb_src_entry *src)
+{
+ struct net_bridge_group_src *ent;
+
+ ent = br_multicast_find_group_src(pg, &src->addr);
+ if (WARN_ON_ONCE(!ent))
+ return;
+ br_multicast_del_group_src(ent, false);
+}
+
+static int br_mdb_add_group_srcs(const struct br_mdb_config *cfg,
+ struct net_bridge_port_group *pg,
+ struct net_bridge_mcast *brmctx,
+ struct netlink_ext_ack *extack)
+{
+ int i, err;
+
+ for (i = 0; i < cfg->num_src_entries; i++) {
+ err = br_mdb_add_group_src(cfg, pg, brmctx,
+ &cfg->src_entries[i], extack);
+ if (err)
+ goto err_del_group_srcs;
+ }
+
+ return 0;
+
+err_del_group_srcs:
+ for (i--; i >= 0; i--)
+ br_mdb_del_group_src(pg, &cfg->src_entries[i]);
+ return err;
+}
+
+static int br_mdb_replace_group_srcs(const struct br_mdb_config *cfg,
+ struct net_bridge_port_group *pg,
+ struct net_bridge_mcast *brmctx,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_group_src *ent;
+ struct hlist_node *tmp;
+ int err;
+
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ ent->flags |= BR_SGRP_F_DELETE;
+
+ err = br_mdb_add_group_srcs(cfg, pg, brmctx, extack);
+ if (err)
+ goto err_clear_delete;
+
+ hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) {
+ if (ent->flags & BR_SGRP_F_DELETE)
+ br_multicast_del_group_src(ent, false);
+ }
+
+ return 0;
+
+err_clear_delete:
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ ent->flags &= ~BR_SGRP_F_DELETE;
+ return err;
+}
+
+static int br_mdb_replace_group_star_g(const struct br_mdb_config *cfg,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ struct net_bridge_mcast *brmctx,
+ unsigned char flags,
+ struct netlink_ext_ack *extack)
+{
+ unsigned long now = jiffies;
+ int err;
+
+ err = br_mdb_replace_group_srcs(cfg, pg, brmctx, extack);
+ if (err)
+ return err;
+
+ pg->flags = flags;
+ pg->filter_mode = cfg->filter_mode;
+ pg->rt_protocol = cfg->rt_protocol;
+ if (!(flags & MDB_PG_FLAGS_PERMANENT) &&
+ cfg->filter_mode == MCAST_EXCLUDE)
+ mod_timer(&pg->timer,
+ now + brmctx->multicast_membership_interval);
+ else
+ del_timer(&pg->timer);
+
+ br_mdb_notify(cfg->br->dev, mp, pg, RTM_NEWMDB);
+
+ if (br_multicast_should_handle_mode(brmctx, cfg->group.proto))
+ br_multicast_star_g_handle_mode(pg, cfg->filter_mode);
+
+ return 0;
+}
+
+static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_mcast *brmctx,
+ unsigned char flags,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+ unsigned long now = jiffies;
+ int err;
+
+ for (pp = &mp->ports;
+ (p = mlock_dereference(*pp, cfg->br)) != NULL;
+ pp = &p->next) {
+ if (p->key.port == cfg->p) {
+ if (!(cfg->nlflags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "(*, G) group is already joined by port");
+ return -EEXIST;
+ }
+ return br_mdb_replace_group_star_g(cfg, mp, p, brmctx,
+ flags, extack);
+ }
+ if ((unsigned long)p->key.port < (unsigned long)cfg->p)
+ break;
+ }
+
+ p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
+ cfg->filter_mode, cfg->rt_protocol);
+ if (unlikely(!p)) {
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (*, G) port group");
+ return -ENOMEM;
+ }
+
+ err = br_mdb_add_group_srcs(cfg, p, brmctx, extack);
+ if (err)
+ goto err_del_port_group;
+
+ rcu_assign_pointer(*pp, p);
+ if (!(flags & MDB_PG_FLAGS_PERMANENT) &&
+ cfg->filter_mode == MCAST_EXCLUDE)
+ mod_timer(&p->timer,
+ now + brmctx->multicast_membership_interval);
+ br_mdb_notify(cfg->br->dev, mp, p, RTM_NEWMDB);
+ /* If we are adding a new EXCLUDE port group (*, G), it needs to be
+ * also added to all (S, G) entries for proper replication.
+ */
+ if (br_multicast_should_handle_mode(brmctx, cfg->group.proto) &&
+ cfg->filter_mode == MCAST_EXCLUDE)
+ br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE);
+
+ return 0;
+
+err_del_port_group:
+ hlist_del_init(&p->mglist);
+ kfree(p);
+ return err;
+}
+
static int br_mdb_add_group(const struct br_mdb_config *cfg,
struct netlink_ext_ack *extack)
{
- struct net_bridge_mdb_entry *mp, *star_mp;
- struct net_bridge_port_group __rcu **pp;
struct br_mdb_entry *entry = cfg->entry;
struct net_bridge_port *port = cfg->p;
+ struct net_bridge_mdb_entry *mp;
struct net_bridge *br = cfg->br;
- struct net_bridge_port_group *p;
struct net_bridge_mcast *brmctx;
struct br_ip group = cfg->group;
- unsigned long now = jiffies;
unsigned char flags = 0;
- struct br_ip star_group;
- u8 filter_mode;
brmctx = __br_mdb_choose_context(br, entry, extack);
if (!brmctx)
@@ -823,55 +1138,13 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg,
return 0;
}
- for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
- pp = &p->next) {
- if (p->key.port == port) {
- NL_SET_ERR_MSG_MOD(extack, "Group is already joined by port");
- return -EEXIST;
- }
- if ((unsigned long)p->key.port < (unsigned long)port)
- break;
- }
-
- filter_mode = br_multicast_is_star_g(&group) ? MCAST_EXCLUDE :
- MCAST_INCLUDE;
-
if (entry->state == MDB_PERMANENT)
flags |= MDB_PG_FLAGS_PERMANENT;
- p = br_multicast_new_port_group(port, &group, *pp, flags, NULL,
- filter_mode, RTPROT_STATIC);
- if (unlikely(!p)) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
- return -ENOMEM;
- }
- rcu_assign_pointer(*pp, p);
- if (entry->state == MDB_TEMPORARY)
- mod_timer(&p->timer,
- now + brmctx->multicast_membership_interval);
- br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
- /* if we are adding a new EXCLUDE port group (*,G) it needs to be also
- * added to all S,G entries for proper replication, if we are adding
- * a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be
- * added to it for proper replication
- */
- if (br_multicast_should_handle_mode(brmctx, group.proto)) {
- switch (filter_mode) {
- case MCAST_EXCLUDE:
- br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE);
- break;
- case MCAST_INCLUDE:
- star_group = p->key.addr;
- memset(&star_group.src, 0, sizeof(star_group.src));
- star_mp = br_mdb_ip_get(br, &star_group);
- if (star_mp)
- br_multicast_sg_add_exclude_ports(star_mp, p);
- break;
- }
- }
-
- return 0;
+ if (br_multicast_is_star_g(&group))
+ return br_mdb_add_group_star_g(cfg, mp, brmctx, flags, extack);
+ else
+ return br_mdb_add_group_sg(cfg, mp, brmctx, flags, extack);
}
static int __br_mdb_add(const struct br_mdb_config *cfg,
@@ -886,6 +1159,76 @@ static int __br_mdb_add(const struct br_mdb_config *cfg,
return ret;
}
+static int br_mdb_config_src_entry_init(struct nlattr *src_entry,
+ struct br_mdb_src_entry *src,
+ __be16 proto,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBE_SRCATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, MDBE_SRCATTR_MAX, src_entry,
+ br_mdbe_src_list_entry_pol, extack);
+ if (err)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(extack, src_entry, tb, MDBE_SRCATTR_ADDRESS))
+ return -EINVAL;
+
+ if (!is_valid_mdb_source(tb[MDBE_SRCATTR_ADDRESS], proto, extack))
+ return -EINVAL;
+
+ src->addr.proto = proto;
+ nla_memcpy(&src->addr.src, tb[MDBE_SRCATTR_ADDRESS],
+ nla_len(tb[MDBE_SRCATTR_ADDRESS]));
+
+ return 0;
+}
+
+static int br_mdb_config_src_list_init(struct nlattr *src_list,
+ struct br_mdb_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *src_entry;
+ int rem, err;
+ int i = 0;
+
+ nla_for_each_nested(src_entry, src_list, rem)
+ cfg->num_src_entries++;
+
+ if (cfg->num_src_entries >= PG_SRC_ENT_LIMIT) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Exceeded maximum number of source entries (%u)",
+ PG_SRC_ENT_LIMIT - 1);
+ return -EINVAL;
+ }
+
+ cfg->src_entries = kcalloc(cfg->num_src_entries,
+ sizeof(struct br_mdb_src_entry), GFP_KERNEL);
+ if (!cfg->src_entries)
+ return -ENOMEM;
+
+ nla_for_each_nested(src_entry, src_list, rem) {
+ err = br_mdb_config_src_entry_init(src_entry,
+ &cfg->src_entries[i],
+ cfg->entry->addr.proto,
+ extack);
+ if (err)
+ goto err_src_entry_init;
+ i++;
+ }
+
+ return 0;
+
+err_src_entry_init:
+ kfree(cfg->src_entries);
+ return err;
+}
+
+static void br_mdb_config_src_list_fini(struct br_mdb_config *cfg)
+{
+ kfree(cfg->src_entries);
+}
+
static int br_mdb_config_attrs_init(struct nlattr *set_attrs,
struct br_mdb_config *cfg,
struct netlink_ext_ack *extack)
@@ -905,6 +1248,52 @@ static int br_mdb_config_attrs_init(struct nlattr *set_attrs,
__mdb_entry_to_br_ip(cfg->entry, &cfg->group, mdb_attrs);
+ if (mdb_attrs[MDBE_ATTR_GROUP_MODE]) {
+ if (!cfg->p) {
+ NL_SET_ERR_MSG_MOD(extack, "Filter mode cannot be set for host groups");
+ return -EINVAL;
+ }
+ if (!br_multicast_is_star_g(&cfg->group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Filter mode can only be set for (*, G) entries");
+ return -EINVAL;
+ }
+ cfg->filter_mode = nla_get_u8(mdb_attrs[MDBE_ATTR_GROUP_MODE]);
+ } else {
+ cfg->filter_mode = MCAST_EXCLUDE;
+ }
+
+ if (mdb_attrs[MDBE_ATTR_SRC_LIST]) {
+ if (!cfg->p) {
+ NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set for host groups");
+ return -EINVAL;
+ }
+ if (!br_multicast_is_star_g(&cfg->group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Source list can only be set for (*, G) entries");
+ return -EINVAL;
+ }
+ if (!mdb_attrs[MDBE_ATTR_GROUP_MODE]) {
+ NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set without filter mode");
+ return -EINVAL;
+ }
+ err = br_mdb_config_src_list_init(mdb_attrs[MDBE_ATTR_SRC_LIST],
+ cfg, extack);
+ if (err)
+ return err;
+ }
+
+ if (!cfg->num_src_entries && cfg->filter_mode == MCAST_INCLUDE) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot add (*, G) INCLUDE with an empty source list");
+ return -EINVAL;
+ }
+
+ if (mdb_attrs[MDBE_ATTR_RTPROT]) {
+ if (!cfg->p) {
+ NL_SET_ERR_MSG_MOD(extack, "Protocol cannot be set for host groups");
+ return -EINVAL;
+ }
+ cfg->rt_protocol = nla_get_u8(mdb_attrs[MDBE_ATTR_RTPROT]);
+ }
+
return 0;
}
@@ -923,6 +1312,9 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
return err;
memset(cfg, 0, sizeof(*cfg));
+ cfg->filter_mode = MCAST_EXCLUDE;
+ cfg->rt_protocol = RTPROT_STATIC;
+ cfg->nlflags = nlh->nlmsg_flags;
bpm = nlmsg_data(nlh);
if (!bpm->ifindex) {
@@ -996,6 +1388,11 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
return 0;
}
+static void br_mdb_config_fini(struct br_mdb_config *cfg)
+{
+ br_mdb_config_src_list_fini(cfg);
+}
+
static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -1009,28 +1406,29 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
+ err = -EINVAL;
/* host join errors which can happen before creating the group */
if (!cfg.p && !br_group_is_l2(&cfg.group)) {
/* don't allow any flags for host-joined IP groups */
if (cfg.entry->state) {
NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
- return -EINVAL;
+ goto out;
}
if (!br_multicast_is_star_g(&cfg.group)) {
NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined");
- return -EINVAL;
+ goto out;
}
}
if (br_group_is_l2(&cfg.group) && cfg.entry->state != MDB_PERMANENT) {
NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed");
- return -EINVAL;
+ goto out;
}
if (cfg.p) {
if (cfg.p->state == BR_STATE_DISABLED && cfg.entry->state != MDB_PERMANENT) {
NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent");
- return -EINVAL;
+ goto out;
}
vg = nbp_vlan_group(cfg.p);
} else {
@@ -1052,6 +1450,8 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
err = __br_mdb_add(&cfg, extack);
}
+out:
+ br_mdb_config_fini(&cfg);
return err;
}
@@ -1127,6 +1527,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
err = __br_mdb_del(&cfg);
}
+ br_mdb_config_fini(&cfg);
return err;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index db4c3900ae95..48170bd3785e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -552,7 +552,8 @@ static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src,
continue;
if (p->rt_protocol != RTPROT_KERNEL &&
- (p->flags & MDB_PG_FLAGS_PERMANENT))
+ (p->flags & MDB_PG_FLAGS_PERMANENT) &&
+ !(src->flags & BR_SGRP_F_USER_ADDED))
break;
if (fastleave)
@@ -650,18 +651,23 @@ static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc)
kfree_rcu(src, rcu);
}
-void br_multicast_del_group_src(struct net_bridge_group_src *src,
- bool fastleave)
+void __br_multicast_del_group_src(struct net_bridge_group_src *src)
{
struct net_bridge *br = src->pg->key.port->br;
- br_multicast_fwd_src_remove(src, fastleave);
hlist_del_init_rcu(&src->node);
src->pg->src_ents--;
hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list);
queue_work(system_long_wq, &br->mcast_gc_work);
}
+void br_multicast_del_group_src(struct net_bridge_group_src *src,
+ bool fastleave)
+{
+ br_multicast_fwd_src_remove(src, fastleave);
+ __br_multicast_del_group_src(src);
+}
+
static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
{
struct net_bridge_port_group *pg;
@@ -1232,7 +1238,7 @@ br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip)
return NULL;
}
-static struct net_bridge_group_src *
+struct net_bridge_group_src *
br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_ip)
{
struct net_bridge_group_src *grp_src;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3997e16c15fc..15ef7fd508ee 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -93,11 +93,21 @@ struct bridge_mcast_stats {
struct u64_stats_sync syncp;
};
+struct br_mdb_src_entry {
+ struct br_ip addr;
+};
+
struct br_mdb_config {
struct net_bridge *br;
struct net_bridge_port *p;
struct br_mdb_entry *entry;
struct br_ip group;
+ bool src_entry;
+ u8 filter_mode;
+ u16 nlflags;
+ struct br_mdb_src_entry *src_entries;
+ int num_src_entries;
+ u8 rt_protocol;
};
#endif
@@ -300,6 +310,7 @@ struct net_bridge_fdb_flush_desc {
#define BR_SGRP_F_DELETE BIT(0)
#define BR_SGRP_F_SEND BIT(1)
#define BR_SGRP_F_INSTALLED BIT(2)
+#define BR_SGRP_F_USER_ADDED BIT(3)
struct net_bridge_mcast_gc {
struct hlist_node gc_node;
@@ -974,6 +985,10 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
struct net_bridge_port_group *sg);
struct net_bridge_group_src *
br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip);
+struct net_bridge_group_src *
+br_multicast_new_group_src(struct net_bridge_port_group *pg,
+ struct br_ip *src_ip);
+void __br_multicast_del_group_src(struct net_bridge_group_src *src);
void br_multicast_del_group_src(struct net_bridge_group_src *src,
bool fastleave);
void br_multicast_ctx_init(struct net_bridge *br,