diff options
Diffstat (limited to 'net')
38 files changed, 959 insertions, 559 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ec8408d1638f..dc1a197792e6 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -510,9 +510,17 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev) netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); } +static __be16 vlan_parse_protocol(const struct sk_buff *skb) +{ + struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); + + return __vlan_get_protocol(skb, veth->h_vlan_proto, NULL); +} + static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .parse = eth_header_parse, + .parse_protocol = vlan_parse_protocol, }; static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev, @@ -532,6 +540,7 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev static const struct header_ops vlan_passthru_header_ops = { .create = vlan_passthru_hard_header, .parse = eth_header_parse, + .parse_protocol = vlan_parse_protocol, }; static struct device_type vlan_type = { diff --git a/net/bridge/br.c b/net/bridge/br.c index 1b169f8e7491..ef743f94254d 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -122,7 +122,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v break; case NETDEV_PRE_TYPE_CHANGE: - /* Forbid underlaying device to change its type. */ + /* Forbid underlying device to change its type. */ return NOTIFY_BAD; case NETDEV_RESEND_IGMP: diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 32ac8343b0ba..b7490237f3fc 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -602,6 +602,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* fastpath: update of existing entry */ if (unlikely(source != fdb->dst && !test_bit(BR_FDB_STICKY, &fdb->flags))) { + br_switchdev_fdb_notify(fdb, RTM_DELNEIGH); fdb->dst = source; fdb_modified = true; /* Take over HW learned entry */ diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 8ca1f1bc6d12..222285d9dae2 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -40,7 +40,7 @@ static int br_pass_frame_up(struct sk_buff *skb) vg = br_vlan_group_rcu(br); /* Bridge is just like any other port. Make sure the - * packet is allowed except in promisc modue when someone + * packet is allowed except in promisc mode when someone * may be running packet capture. */ if (!(brdev->flags & IFF_PROMISC) && diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index cec2c4e4561d..fc0a98874bfc 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -825,7 +825,7 @@ int br_mrp_start_in_test(struct net_bridge *br, return 0; } -/* Determin if the frame type is a ring frame */ +/* Determine if the frame type is a ring frame */ static bool br_mrp_ring_frame(struct sk_buff *skb) { const struct br_mrp_tlv_hdr *hdr; @@ -845,7 +845,7 @@ static bool br_mrp_ring_frame(struct sk_buff *skb) return false; } -/* Determin if the frame type is an interconnect frame */ +/* Determine if the frame type is an interconnect frame */ static bool br_mrp_in_frame(struct sk_buff *skb) { const struct br_mrp_tlv_hdr *hdr; @@ -894,7 +894,7 @@ static void br_mrp_mrm_process(struct br_mrp *mrp, struct net_bridge_port *port, br_mrp_ring_port_open(port->dev, false); } -/* Determin if the test hdr has a better priority than the node */ +/* Determine if the test hdr has a better priority than the node */ static bool br_mrp_test_better_than_own(struct br_mrp *mrp, struct net_bridge *br, const struct br_mrp_ring_test_hdr *hdr) diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 3e88be7aa269..a3a5745660dd 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -601,8 +601,8 @@ int __set_ageing_time(struct net_device *dev, unsigned long t) /* Set time interval that dynamic forwarding entries live * For pure software bridge, allow values outside the 802.1 * standard specification for special cases: - * 0 - entry never ages (all permanant) - * 1 - entry disappears (no persistance) + * 0 - entry never ages (all permanent) + * 1 - entry disappears (no persistence) * * Offloaded switch entries maybe more restrictive */ diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 015209bf44aa..a9c23ef83443 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -153,8 +153,7 @@ int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .flags = flags, - .vid_begin = vid, - .vid_end = vid, + .vid = vid, }; return switchdev_port_obj_add(dev, &v.obj, extack); @@ -165,8 +164,7 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) struct switchdev_obj_port_vlan v = { .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .vid_begin = vid, - .vid_end = vid, + .vid = vid, }; return switchdev_port_obj_del(dev, &v.obj); diff --git a/net/can/raw.c b/net/can/raw.c index 6ec8aa1d0da4..37b47a39a3ed 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -665,10 +665,18 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (ro->count > 0) { int fsize = ro->count * sizeof(struct can_filter); - if (len > fsize) - len = fsize; - if (copy_to_user(optval, ro->filter, len)) - err = -EFAULT; + /* user space buffer to small for filter list? */ + if (len < fsize) { + /* return -ERANGE and needed space in optlen */ + err = -ERANGE; + if (put_user(fsize, optlen)) + err = -EFAULT; + } else { + if (len > fsize) + len = fsize; + if (copy_to_user(optval, ro->filter, len)) + err = -EFAULT; + } } else { len = 0; } diff --git a/net/core/dev.c b/net/core/dev.c index c360bb5367e2..0a31d4ea6f4d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5151,8 +5151,7 @@ another_round: skb_reset_mac_len(skb); } - if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || - skb->protocol == cpu_to_be16(ETH_P_8021AD)) { + if (eth_type_vlan(skb->protocol)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) goto out; @@ -5236,8 +5235,7 @@ check_vlan_id: * find vlan device. */ skb->pkt_type = PACKET_OTHERHOST; - } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || - skb->protocol == cpu_to_be16(ETH_P_8021AD)) { + } else if (eth_type_vlan(skb->protocol)) { /* Outer header is 802.1P with vlan 0, inner header is * 802.1Q or 802.1AD and vlan_do_receive() above could * not find vlan dev for vlan id 0. @@ -6070,10 +6068,6 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi, gro_normal_one(napi, skb); break; - case GRO_DROP: - kfree_skb(skb); - break; - case GRO_MERGED_FREE: if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) napi_skb_free_stolen_head(skb); @@ -6158,10 +6152,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, gro_normal_one(napi, skb); break; - case GRO_DROP: - napi_reuse_skb(napi, skb); - break; - case GRO_MERGED_FREE: if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) napi_skb_free_stolen_head(skb); @@ -6223,9 +6213,6 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) gro_result_t ret; struct sk_buff *skb = napi_frags_skb(napi); - if (!skb) - return GRO_DROP; - trace_napi_gro_frags_entry(skb); ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); @@ -10008,7 +9995,7 @@ int register_netdevice(struct net_device *dev) dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF); dev->features |= NETIF_F_SOFT_FEATURES; - if (dev->netdev_ops->ndo_udp_tunnel_add) { + if (dev->udp_tunnel_nic_info) { dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT; dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6f1adba6695f..2d70ded389ae 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -23,6 +23,7 @@ #include <linux/if_ether.h> #include <linux/mpls.h> #include <linux/tcp.h> +#include <linux/ptp_classify.h> #include <net/flow_dissector.h> #include <scsi/fc/fc_fcoe.h> #include <uapi/linux/batadv_packet.h> @@ -1251,6 +1252,21 @@ proto_again: &proto, &nhoff, hlen, flags); break; + case htons(ETH_P_1588): { + struct ptp_header *hdr, _hdr; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, + hlen, &_hdr); + if (!hdr) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + + nhoff += ntohs(hdr->message_length); + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } + default: fdret = FLOW_DISSECT_RET_OUT_BAD; break; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c1a6f262636a..60390696e184 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -610,13 +610,14 @@ static void skb_release_data(struct sk_buff *skb) &shinfo->dataref)) return; + skb_zcopy_clear(skb, true); + for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i]); if (shinfo->frag_list) kfree_skb_list(shinfo->frag_list); - skb_zcopy_clear(skb, true); skb_free_head(skb); } @@ -1098,7 +1099,7 @@ void mm_unaccount_pinned_pages(struct mmpin *mmp) } EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); -struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) +struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) { struct ubuf_info *uarg; struct sk_buff *skb; @@ -1118,25 +1119,26 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) return NULL; } - uarg->callback = sock_zerocopy_callback; + uarg->callback = msg_zerocopy_callback; uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; uarg->len = 1; uarg->bytelen = size; uarg->zerocopy = 1; + uarg->flags = SKBFL_ZEROCOPY_FRAG; refcount_set(&uarg->refcnt, 1); sock_hold(sk); return uarg; } -EXPORT_SYMBOL_GPL(sock_zerocopy_alloc); +EXPORT_SYMBOL_GPL(msg_zerocopy_alloc); static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) { return container_of((void *)uarg, struct sk_buff, cb); } -struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, - struct ubuf_info *uarg) +struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg) { if (uarg) { const u32 byte_limit = 1 << 19; /* limit to a few TSO */ @@ -1168,16 +1170,16 @@ struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, /* no extra ref when appending to datagram (MSG_MORE) */ if (sk->sk_type == SOCK_STREAM) - sock_zerocopy_get(uarg); + net_zcopy_get(uarg); return uarg; } } new_alloc: - return sock_zerocopy_alloc(sk, size); + return msg_zerocopy_alloc(sk, size); } -EXPORT_SYMBOL_GPL(sock_zerocopy_realloc); +EXPORT_SYMBOL_GPL(msg_zerocopy_realloc); static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) { @@ -1199,7 +1201,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) return true; } -void sock_zerocopy_callback(struct ubuf_info *uarg, bool success) +static void __msg_zerocopy_callback(struct ubuf_info *uarg) { struct sk_buff *tail, *skb = skb_from_uarg(uarg); struct sock_exterr_skb *serr; @@ -1227,7 +1229,7 @@ void sock_zerocopy_callback(struct ubuf_info *uarg, bool success) serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; serr->ee.ee_data = hi; serr->ee.ee_info = lo; - if (!success) + if (!uarg->zerocopy) serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED; q = &sk->sk_error_queue; @@ -1246,32 +1248,28 @@ release: consume_skb(skb); sock_put(sk); } -EXPORT_SYMBOL_GPL(sock_zerocopy_callback); -void sock_zerocopy_put(struct ubuf_info *uarg) +void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, + bool success) { - if (uarg && refcount_dec_and_test(&uarg->refcnt)) { - if (uarg->callback) - uarg->callback(uarg, uarg->zerocopy); - else - consume_skb(skb_from_uarg(uarg)); - } + uarg->zerocopy = uarg->zerocopy & success; + + if (refcount_dec_and_test(&uarg->refcnt)) + __msg_zerocopy_callback(uarg); } -EXPORT_SYMBOL_GPL(sock_zerocopy_put); +EXPORT_SYMBOL_GPL(msg_zerocopy_callback); -void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) +void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) { - if (uarg) { - struct sock *sk = skb_from_uarg(uarg)->sk; + struct sock *sk = skb_from_uarg(uarg)->sk; - atomic_dec(&sk->sk_zckey); - uarg->len--; + atomic_dec(&sk->sk_zckey); + uarg->len--; - if (have_uref) - sock_zerocopy_put(uarg); - } + if (have_uref) + msg_zerocopy_callback(NULL, uarg, true); } -EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); +EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort); int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { @@ -1335,7 +1333,7 @@ static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig, * @skb: the skb to modify * @gfp_mask: allocation priority * - * This must be called on SKBTX_DEV_ZEROCOPY skb. + * This must be called on skb with SKBFL_ZEROCOPY_ENABLE. * It will copy all frags into kernel and drop the reference * to userspace pages. * @@ -3272,8 +3270,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags & - SKBTX_SHARED_FRAG; + skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); @@ -3998,8 +3995,8 @@ normal: skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & - SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags & + SKBFL_SHARED_FRAG; if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index a1b1dc8a4d87..f4ce3c5826a0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -219,11 +219,21 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } skb = nskb; - p = netdev_priv(skb->dev); skb_push(skb, ETH_HLEN); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); + if (unlikely(!dsa_slave_dev_check(skb->dev))) { + /* Packet is to be injected directly on an upper + * device, e.g. a team/bond, so skip all DSA-port + * specific actions. + */ + netif_rx(skb); + return 0; + } + + p = netdev_priv(skb->dev); + if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { nskb = dsa_untag_bridge_pvid(skb); if (!nskb) { @@ -309,28 +319,6 @@ bool dsa_schedule_work(struct work_struct *work) return queue_work(dsa_owq, work); } -static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain); - -int register_dsa_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&dsa_notif_chain, nb); -} -EXPORT_SYMBOL_GPL(register_dsa_notifier); - -int unregister_dsa_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&dsa_notif_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_dsa_notifier); - -int call_dsa_notifiers(unsigned long val, struct net_device *dev, - struct dsa_notifier_info *info) -{ - info->dev = dev; - return atomic_notifier_call_chain(&dsa_notif_chain, val, info); -} -EXPORT_SYMBOL_GPL(call_dsa_notifiers); - int dsa_devlink_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx) { diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a47e0f9b20d0..6f65ea0eef9f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,6 +21,65 @@ static DEFINE_MUTEX(dsa2_mutex); LIST_HEAD(dsa_tree_list); +/** + * dsa_lag_map() - Map LAG netdev to a linear LAG ID + * @dst: Tree in which to record the mapping. + * @lag: Netdev that is to be mapped to an ID. + * + * dsa_lag_id/dsa_lag_dev can then be used to translate between the + * two spaces. The size of the mapping space is determined by the + * driver by setting ds->num_lag_ids. It is perfectly legal to leave + * it unset if it is not needed, in which case these functions become + * no-ops. + */ +void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag) +{ + unsigned int id; + + if (dsa_lag_id(dst, lag) >= 0) + /* Already mapped */ + return; + + for (id = 0; id < dst->lags_len; id++) { + if (!dsa_lag_dev(dst, id)) { + dst->lags[id] = lag; + return; + } + } + + /* No IDs left, which is OK. Some drivers do not need it. The + * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id + * returns an error for this device when joining the LAG. The + * driver can then return -EOPNOTSUPP back to DSA, which will + * fall back to a software LAG. + */ +} + +/** + * dsa_lag_unmap() - Remove a LAG ID mapping + * @dst: Tree in which the mapping is recorded. + * @lag: Netdev that was mapped. + * + * As there may be multiple users of the mapping, it is only removed + * if there are no other references to it. + */ +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag) +{ + struct dsa_port *dp; + unsigned int id; + + dsa_lag_foreach_port(dp, dst, lag) + /* There are remaining users of this mapping */ + return; + + dsa_lags_foreach_id(id, dst) { + if (dsa_lag_dev(dst, id) == lag) { + dst->lags[id] = NULL; + break; + } + } +} + struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) { struct dsa_switch_tree *dst; @@ -582,6 +641,32 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) dsa_master_teardown(dp->master); } +static int dsa_tree_setup_lags(struct dsa_switch_tree *dst) +{ + unsigned int len = 0; + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->num_lag_ids > len) + len = dp->ds->num_lag_ids; + } + + if (!len) + return 0; + + dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL); + if (!dst->lags) + return -ENOMEM; + + dst->lags_len = len; + return 0; +} + +static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) +{ + kfree(dst->lags); +} + static int dsa_tree_setup(struct dsa_switch_tree *dst) { bool complete; @@ -609,12 +694,18 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_switches; + err = dsa_tree_setup_lags(dst); + if (err) + goto teardown_master; + dst->setup = true; pr_info("DSA: tree %d setup\n", dst->index); return 0; +teardown_master: + dsa_tree_teardown_master(dst); teardown_switches: dsa_tree_teardown_switches(dst); teardown_default_cpu: @@ -630,6 +721,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) if (!dst->setup) return; + dsa_tree_teardown_lags(dst); + dsa_tree_teardown_master(dst); dsa_tree_teardown_switches(dst); @@ -787,6 +880,8 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds, goto out_put_node; if (reg >= ds->num_ports) { + dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n", + port, reg, ds->num_ports); err = -EINVAL; goto out_put_node; } diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 7c96aae9062c..2ce46bb87703 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -20,6 +20,9 @@ enum { DSA_NOTIFIER_BRIDGE_LEAVE, DSA_NOTIFIER_FDB_ADD, DSA_NOTIFIER_FDB_DEL, + DSA_NOTIFIER_LAG_CHANGE, + DSA_NOTIFIER_LAG_JOIN, + DSA_NOTIFIER_LAG_LEAVE, DSA_NOTIFIER_MDB_ADD, DSA_NOTIFIER_MDB_DEL, DSA_NOTIFIER_VLAN_ADD, @@ -29,7 +32,6 @@ enum { /* DSA_NOTIFIER_AGEING_TIME */ struct dsa_notifier_ageing_time_info { - struct switchdev_trans *trans; unsigned int ageing_time; }; @@ -52,15 +54,22 @@ struct dsa_notifier_fdb_info { /* DSA_NOTIFIER_MDB_* */ struct dsa_notifier_mdb_info { const struct switchdev_obj_port_mdb *mdb; - struct switchdev_trans *trans; int sw_index; int port; }; +/* DSA_NOTIFIER_LAG_* */ +struct dsa_notifier_lag_info { + struct net_device *lag; + int sw_index; + int port; + + struct netdev_lag_upper_info *info; +}; + /* DSA_NOTIFIER_VLAN_* */ struct dsa_notifier_vlan_info { const struct switchdev_obj_port_vlan *vlan; - struct switchdev_trans *trans; int sw_index; int port; }; @@ -73,6 +82,18 @@ struct dsa_notifier_mtu_info { int mtu; }; +struct dsa_switchdev_event_work { + struct dsa_switch *ds; + int port; + struct work_struct work; + unsigned long event; + /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and + * SWITCHDEV_FDB_DEL_TO_DEVICE + */ + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + struct dsa_slave_priv { /* Copy of CPU port xmit for faster access in slave transmit hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, @@ -98,15 +119,6 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops); bool dsa_schedule_work(struct work_struct *work); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); -int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid, - u16 flags, - struct netlink_ext_ack *extack); -int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid); - /* master.c */ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); void dsa_master_teardown(struct net_device *dev); @@ -127,19 +139,21 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, } /* port.c */ -int dsa_port_set_state(struct dsa_port *dp, u8 state, - struct switchdev_trans *trans); +int dsa_port_set_state(struct dsa_port *dp, u8 state); int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); -int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, - struct switchdev_trans *trans); +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo); +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo); +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); +int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering); bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); -int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, - struct switchdev_trans *trans); +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock); int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, bool propagate_upstream); int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, @@ -148,31 +162,41 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); int dsa_port_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); + const struct switchdev_obj_port_mdb *mdb); int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); -int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans); -int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans); -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct switchdev_trans *trans); +int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags); +int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags); +int dsa_port_mrouter(struct dsa_port *dp, bool mrouter); int dsa_port_vlan_add(struct dsa_port *dp, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); + const struct switchdev_obj_port_vlan *vlan); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; +static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, + struct net_device *dev) +{ + /* Switchdev offloading can be configured on: */ + + if (dev == dp->slave) + /* DSA ports directly connected to a bridge. */ + return true; + + if (dp->lag_dev == dev) + /* DSA ports connected to a bridge via a LAG */ + return true; + + return false; +} + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); int dsa_slave_create(struct dsa_port *dp); void dsa_slave_destroy(struct net_device *slave_dev); -bool dsa_slave_dev_check(const struct net_device *dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); @@ -257,6 +281,9 @@ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); /* dsa2.c */ +void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag); +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag); + extern struct list_head dsa_tree_list; #endif diff --git a/net/dsa/port.c b/net/dsa/port.c index 73569c9af3cc..f5b0f72ee7cd 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -40,17 +40,15 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) return notifier_to_errno(err); } -int dsa_port_set_state(struct dsa_port *dp, u8 state, - struct switchdev_trans *trans) +int dsa_port_set_state(struct dsa_port *dp, u8 state) { struct dsa_switch *ds = dp->ds; int port = dp->index; - if (switchdev_trans_ph_prepare(trans)) - return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; + if (!ds->ops->port_stp_state_set) + return -EOPNOTSUPP; - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, port, state); + ds->ops->port_stp_state_set(ds, port, state); if (ds->ops->port_fast_age) { /* Fast age FDB entries or flush appropriate forwarding database @@ -75,7 +73,7 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) { int err; - err = dsa_port_set_state(dp, state, NULL); + err = dsa_port_set_state(dp, state); if (err) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } @@ -145,7 +143,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) int err; /* Set the flooding mode before joining the port in the switch */ - err = dsa_port_bridge_flags(dp, BR_FLOOD | BR_MCAST_FLOOD, NULL); + err = dsa_port_bridge_flags(dp, BR_FLOOD | BR_MCAST_FLOOD); if (err) return err; @@ -158,7 +156,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) /* The bridging is rolled back on error */ if (err) { - dsa_port_bridge_flags(dp, 0, NULL); + dsa_port_bridge_flags(dp, 0); dp->bridge_dev = NULL; } @@ -185,7 +183,7 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); /* Port is leaving the bridge, disable flooding */ - dsa_port_bridge_flags(dp, 0, NULL); + dsa_port_bridge_flags(dp, 0); /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional @@ -193,6 +191,85 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) dsa_port_set_state_now(dp, BR_STATE_FORWARDING); } +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + }; + bool tx_enabled; + + if (!dp->lag_dev) + return 0; + + /* On statically configured aggregates (e.g. loadbalance + * without LACP) ports will always be tx_enabled, even if the + * link is down. Thus we require both link_up and tx_enabled + * in order to include it in the tx set. + */ + tx_enabled = linfo->link_up && linfo->tx_enabled; + + if (tx_enabled == dp->lag_tx_enabled) + return 0; + + dp->lag_tx_enabled = tx_enabled; + + return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info); +} + +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag, + struct netdev_lag_upper_info *uinfo) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .lag = lag, + .info = uinfo, + }; + int err; + + dsa_lag_map(dp->ds->dst, lag); + dp->lag_dev = lag; + + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info); + if (err) { + dp->lag_dev = NULL; + dsa_lag_unmap(dp->ds->dst, lag); + } + + return err; +} + +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .lag = lag, + }; + int err; + + if (!dp->lag_dev) + return; + + /* Port might have been part of a LAG that in turn was + * attached to a bridge. + */ + if (dp->bridge_dev) + dsa_port_bridge_leave(dp, dp->bridge_dev); + + dp->lag_tx_enabled = false; + dp->lag_dev = NULL; + + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); + if (err) + pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n", + err); + + dsa_lag_unmap(dp->ds->dst, lag); +} + /* Must be called under rcu_read_lock() */ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, bool vlan_filtering) @@ -259,43 +336,36 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, return true; } -int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, - struct switchdev_trans *trans) +int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering) { struct dsa_switch *ds = dp->ds; + bool apply; int err; - if (switchdev_trans_ph_prepare(trans)) { - bool apply; - - if (!ds->ops->port_vlan_filtering) - return -EOPNOTSUPP; + if (!ds->ops->port_vlan_filtering) + return -EOPNOTSUPP; - /* We are called from dsa_slave_switchdev_blocking_event(), - * which is not under rcu_read_lock(), unlike - * dsa_slave_switchdev_event(). - */ - rcu_read_lock(); - apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering); - rcu_read_unlock(); - if (!apply) - return -EINVAL; - } + /* We are called from dsa_slave_switchdev_blocking_event(), + * which is not under rcu_read_lock(), unlike + * dsa_slave_switchdev_event(). + */ + rcu_read_lock(); + apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering); + rcu_read_unlock(); + if (!apply) + return -EINVAL; if (dsa_port_is_vlan_filtering(dp) == vlan_filtering) return 0; - err = ds->ops->port_vlan_filtering(ds, dp->index, vlan_filtering, - trans); + err = ds->ops->port_vlan_filtering(ds, dp->index, vlan_filtering); if (err) return err; - if (switchdev_trans_ph_commit(trans)) { - if (ds->vlan_filtering_is_global) - ds->vlan_filtering = vlan_filtering; - else - dp->vlan_filtering = vlan_filtering; - } + if (ds->vlan_filtering_is_global) + ds->vlan_filtering = vlan_filtering; + else + dp->vlan_filtering = vlan_filtering; return 0; } @@ -314,26 +384,25 @@ bool dsa_port_skip_vlan_configuration(struct dsa_port *dp) !br_vlan_enabled(dp->bridge_dev)); } -int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, - struct switchdev_trans *trans) +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock) { unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock); unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); - struct dsa_notifier_ageing_time_info info = { - .ageing_time = ageing_time, - .trans = trans, - }; + struct dsa_notifier_ageing_time_info info; + int err; - if (switchdev_trans_ph_prepare(trans)) - return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + info.ageing_time = ageing_time; + + err = dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + if (err) + return err; dp->ageing_time = ageing_time; - return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + return 0; } -int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans) +int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags) { struct dsa_switch *ds = dp->ds; @@ -344,16 +413,12 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, return 0; } -int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans) +int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags) { struct dsa_switch *ds = dp->ds; int port = dp->index; int err = 0; - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (ds->ops->port_egress_floods) err = ds->ops->port_egress_floods(ds, port, flags & BR_FLOOD, flags & BR_MCAST_FLOOD); @@ -361,14 +426,13 @@ int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, return err; } -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct switchdev_trans *trans) +int dsa_port_mrouter(struct dsa_port *dp, bool mrouter) { struct dsa_switch *ds = dp->ds; int port = dp->index; - if (switchdev_trans_ph_prepare(trans)) - return ds->ops->port_egress_floods ? 0 : -EOPNOTSUPP; + if (!ds->ops->port_egress_floods) + return -EOPNOTSUPP; return ds->ops->port_egress_floods(ds, port, true, mrouter); } @@ -425,13 +489,11 @@ int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) } int dsa_port_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct dsa_notifier_mdb_info info = { .sw_index = dp->ds->index, .port = dp->index, - .trans = trans, .mdb = mdb, }; @@ -451,13 +513,11 @@ int dsa_port_mdb_del(const struct dsa_port *dp, } int dsa_port_vlan_add(struct dsa_port *dp, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { struct dsa_notifier_vlan_info info = { .sw_index = dp->ds->index, .port = dp->index, - .trans = trans, .vlan = vlan, }; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 4a0498bf6c65..c5c81cba8259 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -268,32 +268,32 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } static int dsa_slave_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { struct dsa_port *dp = dsa_slave_to_port(dev); int ret; + if (!dsa_port_offloads_netdev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - ret = dsa_port_set_state(dp, attr->u.stp_state, trans); + ret = dsa_port_set_state(dp, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, - trans); + ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans); + ret = dsa_port_ageing_time(dp, attr->u.ageing_time); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: - ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags, - trans); + ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, trans); + ret = dsa_port_bridge_flags(dp, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: - ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, trans); + ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter); break; default: ret = -EOPNOTSUPP; @@ -318,7 +318,7 @@ dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave, continue; vid = vlan_dev_vlan_id(upper_dev); - if (vid >= vlan->vid_begin && vid <= vlan->vid_end) + if (vid == vlan->vid) return -EBUSY; } @@ -326,15 +326,14 @@ dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave, } static int dsa_slave_vlan_add(struct net_device *dev, - const struct switchdev_obj *obj, - struct switchdev_trans *trans) + const struct switchdev_obj *obj) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); struct switchdev_obj_port_vlan vlan; - int vid, err; + int err; - if (obj->orig_dev != dev) + if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) return -EOPNOTSUPP; if (dsa_port_skip_vlan_configuration(dp)) @@ -345,7 +344,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, /* Deny adding a bridge VLAN when there is already an 802.1Q upper with * the same VID. */ - if (trans->ph_prepare && br_vlan_enabled(dp->bridge_dev)) { + if (br_vlan_enabled(dp->bridge_dev)) { rcu_read_lock(); err = dsa_slave_vlan_check_for_8021q_uppers(dev, &vlan); rcu_read_unlock(); @@ -353,7 +352,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, return err; } - err = dsa_port_vlan_add(dp, &vlan, trans); + err = dsa_port_vlan_add(dp, &vlan); if (err) return err; @@ -363,47 +362,34 @@ static int dsa_slave_vlan_add(struct net_device *dev, */ vlan.flags &= ~BRIDGE_VLAN_INFO_PVID; - err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans); + err = dsa_port_vlan_add(dp->cpu_dp, &vlan); if (err) return err; - for (vid = vlan.vid_begin; vid <= vlan.vid_end; vid++) { - err = vlan_vid_add(master, htons(ETH_P_8021Q), vid); - if (err) - return err; - } - - return 0; + return vlan_vid_add(master, htons(ETH_P_8021Q), vlan.vid); } static int dsa_slave_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); int err; - /* For the prepare phase, ensure the full set of changes is feasable in - * one go in order to signal a failure properly. If an operation is not - * supported, return -EOPNOTSUPP. - */ - switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) return -EOPNOTSUPP; - err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); + err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: /* DSA can directly translate this to a normal MDB add, * but on the CPU port. */ - err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj), - trans); + err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_vlan_add(dev, obj, trans); + err = dsa_slave_vlan_add(dev, obj); break; default: err = -EOPNOTSUPP; @@ -419,9 +405,9 @@ static int dsa_slave_vlan_del(struct net_device *dev, struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); struct switchdev_obj_port_vlan *vlan; - int vid, err; + int err; - if (obj->orig_dev != dev) + if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) return -EOPNOTSUPP; if (dsa_port_skip_vlan_configuration(dp)) @@ -436,8 +422,7 @@ static int dsa_slave_vlan_del(struct net_device *dev, if (err) return err; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) - vlan_vid_del(master, htons(ETH_P_8021Q), vid); + vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid); return 0; } @@ -450,7 +435,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; @@ -1289,33 +1274,19 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, struct dsa_port *dp = dsa_slave_to_port(dev); struct switchdev_obj_port_vlan vlan = { .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .vid_begin = vid, - .vid_end = vid, + .vid = vid, /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, }; - struct switchdev_trans trans; int ret; /* User port... */ - trans.ph_prepare = true; - ret = dsa_port_vlan_add(dp, &vlan, &trans); - if (ret) - return ret; - - trans.ph_prepare = false; - ret = dsa_port_vlan_add(dp, &vlan, &trans); + ret = dsa_port_vlan_add(dp, &vlan); if (ret) return ret; /* And CPU port... */ - trans.ph_prepare = true; - ret = dsa_port_vlan_add(dp->cpu_dp, &vlan, &trans); - if (ret) - return ret; - - trans.ph_prepare = false; - ret = dsa_port_vlan_add(dp->cpu_dp, &vlan, &trans); + ret = dsa_port_vlan_add(dp->cpu_dp, &vlan); if (ret) return ret; @@ -1328,8 +1299,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); struct switchdev_obj_port_vlan vlan = { - .vid_begin = vid, - .vid_end = vid, + .vid = vid, /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, }; @@ -1575,20 +1545,20 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { }; /* legacy way, bypassing the bridge *****************************************/ -int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid, - u16 flags, - struct netlink_ext_ack *extack) +static int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); return dsa_port_fdb_add(dp, addr, vid); } -int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) +static int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -1602,6 +1572,18 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev) return dp->ds->devlink ? &dp->devlink_port : NULL; } +static void dsa_slave_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_stats64) + ds->ops->get_stats64(ds, dp->index, s); + else + dev_get_tstats64(dev, s); +} + static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, @@ -1621,7 +1603,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { #endif .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, .ndo_setup_tc = dsa_slave_setup_tc, - .ndo_get_stats64 = dev_get_tstats64, + .ndo_get_stats64 = dsa_slave_get_stats64, .ndo_get_port_parent_id = dsa_slave_get_port_parent_id, .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid, @@ -1764,20 +1746,6 @@ int dsa_slave_resume(struct net_device *slave_dev) return 0; } -static void dsa_slave_notify(struct net_device *dev, unsigned long val) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_notifier_register_info rinfo = { - .switch_number = dp->ds->index, - .port_number = dp->index, - .master = master, - .info.dev = dev, - }; - - call_dsa_notifiers(val, dev, &rinfo.info); -} - int dsa_slave_create(struct dsa_port *port) { const struct dsa_port *cpu_dp = port->cpu_dp; @@ -1863,8 +1831,6 @@ int dsa_slave_create(struct dsa_port *port) goto out_gcells; } - dsa_slave_notify(slave_dev, DSA_PORT_REGISTER); - rtnl_lock(); ret = register_netdevice(slave_dev); @@ -1913,7 +1879,6 @@ void dsa_slave_destroy(struct net_device *slave_dev) phylink_disconnect_phy(dp->pl); rtnl_unlock(); - dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); phylink_destroy(dp->pl); gro_cells_destroy(&p->gcells); free_percpu(slave_dev->tstats); @@ -1924,6 +1889,7 @@ bool dsa_slave_dev_check(const struct net_device *dev) { return dev->netdev_ops == &dsa_slave_netdev_ops; } +EXPORT_SYMBOL_GPL(dsa_slave_dev_check); static int dsa_slave_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) @@ -1941,6 +1907,46 @@ static int dsa_slave_changeupper(struct net_device *dev, dsa_port_bridge_leave(dp, info->upper_dev); err = NOTIFY_OK; } + } else if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_lag_join(dp, info->upper_dev, + info->upper_info); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Offloading not supported"); + err = 0; + } + err = notifier_from_errno(err); + } else { + dsa_port_lag_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } + } + + return err; +} + +static int +dsa_slave_lag_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + struct dsa_port *dp; + + netdev_for_each_lower_dev(dev, lower, iter) { + if (!dsa_slave_dev_check(lower)) + continue; + + dp = dsa_slave_to_port(lower); + if (!dp->lag_dev) + /* Software LAG */ + continue; + + err = dsa_slave_changeupper(lower, info); + if (notifier_to_errno(err)) + break; } return err; @@ -2038,128 +2044,192 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, break; } case NETDEV_CHANGEUPPER: + if (dsa_slave_dev_check(dev)) + return dsa_slave_changeupper(dev, ptr); + + if (netif_is_lag_master(dev)) + return dsa_slave_lag_changeupper(dev, ptr); + + break; + case NETDEV_CHANGELOWERSTATE: { + struct netdev_notifier_changelowerstate_info *info = ptr; + struct dsa_port *dp; + int err; + if (!dsa_slave_dev_check(dev)) - return NOTIFY_DONE; + break; + + dp = dsa_slave_to_port(dev); - return dsa_slave_changeupper(dev, ptr); + err = dsa_port_lag_change(dp, info->lower_state_info); + return notifier_from_errno(err); + } } return NOTIFY_DONE; } -struct dsa_switchdev_event_work { - struct work_struct work; - struct switchdev_notifier_fdb_info fdb_info; - struct net_device *dev; - unsigned long event; -}; +static void +dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) +{ + struct dsa_switch *ds = switchdev_work->ds; + struct switchdev_notifier_fdb_info info; + struct dsa_port *dp; + + if (!dsa_is_user_port(ds, switchdev_work->port)) + return; + + info.addr = switchdev_work->addr; + info.vid = switchdev_work->vid; + info.offloaded = true; + dp = dsa_to_port(ds, switchdev_work->port); + call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, + dp->slave, &info.info, NULL); +} static void dsa_slave_switchdev_event_work(struct work_struct *work) { struct dsa_switchdev_event_work *switchdev_work = container_of(work, struct dsa_switchdev_event_work, work); - struct net_device *dev = switchdev_work->dev; - struct switchdev_notifier_fdb_info *fdb_info; - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = switchdev_work->ds; + struct dsa_port *dp; int err; + dp = dsa_to_port(ds, switchdev_work->port); + rtnl_lock(); switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: - fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) - break; - - err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid); + err = dsa_port_fdb_add(dp, switchdev_work->addr, + switchdev_work->vid); if (err) { - netdev_dbg(dev, "fdb add failed err=%d\n", err); + dev_err(ds->dev, + "port %d failed to add %pM vid %d to fdb: %d\n", + dp->index, switchdev_work->addr, + switchdev_work->vid, err); break; } - fdb_info->offloaded = true; - call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, - &fdb_info->info, NULL); + dsa_fdb_offload_notify(switchdev_work); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: - fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) - break; - - err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid); + err = dsa_port_fdb_del(dp, switchdev_work->addr, + switchdev_work->vid); if (err) { - netdev_dbg(dev, "fdb del failed err=%d\n", err); - dev_close(dev); + dev_err(ds->dev, + "port %d failed to delete %pM vid %d from fdb: %d\n", + dp->index, switchdev_work->addr, + switchdev_work->vid, err); } + break; } rtnl_unlock(); - kfree(switchdev_work->fdb_info.addr); kfree(switchdev_work); - dev_put(dev); + if (dsa_is_user_port(ds, dp->index)) + dev_put(dp->slave); } -static int -dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work * - switchdev_work, - const struct switchdev_notifier_fdb_info * - fdb_info) -{ - memcpy(&switchdev_work->fdb_info, fdb_info, - sizeof(switchdev_work->fdb_info)); - switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); - if (!switchdev_work->fdb_info.addr) - return -ENOMEM; - ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, - fdb_info->addr); +static int dsa_lower_dev_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) +{ + if (dsa_slave_dev_check(lower_dev)) { + priv->data = (void *)netdev_priv(lower_dev); + return 1; + } + return 0; } +static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev) +{ + struct netdev_nested_priv priv = { + .data = NULL, + }; + + netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv); + + return (struct dsa_slave_priv *)priv.data; +} + /* Called under rcu_read_lock() */ static int dsa_slave_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + const struct switchdev_notifier_fdb_info *fdb_info; struct dsa_switchdev_event_work *switchdev_work; + struct dsa_port *dp; int err; - if (event == SWITCHDEV_PORT_ATTR_SET) { + switch (event) { + case SWITCHDEV_PORT_ATTR_SET: err = switchdev_handle_port_attr_set(dev, ptr, dsa_slave_dev_check, dsa_slave_port_attr_set); return notifier_from_errno(err); - } + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb_info = ptr; - if (!dsa_slave_dev_check(dev)) - return NOTIFY_DONE; + if (dsa_slave_dev_check(dev)) { + if (!fdb_info->added_by_user) + return NOTIFY_OK; - switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); - if (!switchdev_work) - return NOTIFY_BAD; + dp = dsa_slave_to_port(dev); + } else { + /* Snoop addresses learnt on foreign interfaces + * bridged with us, for switches that don't + * automatically learn SA from CPU-injected traffic + */ + struct net_device *br_dev; + struct dsa_slave_priv *p; - INIT_WORK(&switchdev_work->work, - dsa_slave_switchdev_event_work); - switchdev_work->dev = dev; - switchdev_work->event = event; + br_dev = netdev_master_upper_dev_get_rcu(dev); + if (!br_dev) + return NOTIFY_DONE; - switch (event) { - case SWITCHDEV_FDB_ADD_TO_DEVICE: - case SWITCHDEV_FDB_DEL_TO_DEVICE: - if (dsa_slave_switchdev_fdb_work_init(switchdev_work, ptr)) - goto err_fdb_work_init; - dev_hold(dev); + if (!netif_is_bridge_master(br_dev)) + return NOTIFY_DONE; + + p = dsa_slave_dev_lower_find(br_dev); + if (!p) + return NOTIFY_DONE; + + dp = p->dp->cpu_dp; + + if (!dp->ds->assisted_learning_on_cpu_port) + return NOTIFY_DONE; + } + + if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del) + return NOTIFY_DONE; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) + return NOTIFY_BAD; + + INIT_WORK(&switchdev_work->work, + dsa_slave_switchdev_event_work); + switchdev_work->ds = dp->ds; + switchdev_work->port = dp->index; + switchdev_work->event = event; + + ether_addr_copy(switchdev_work->addr, + fdb_info->addr); + switchdev_work->vid = fdb_info->vid; + + /* Hold a reference on the slave for dsa_fdb_offload_notify */ + if (dsa_is_user_port(dp->ds, dp->index)) + dev_hold(dev); + dsa_schedule_work(&switchdev_work->work); break; default: - kfree(switchdev_work); return NOTIFY_DONE; } - dsa_schedule_work(&switchdev_work->work); return NOTIFY_OK; - -err_fdb_work_init: - kfree(switchdev_work); - return NOTIFY_BAD; } static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused, diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 3fb362b6874e..cc0b25f3adea 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -33,15 +33,12 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds, struct dsa_notifier_ageing_time_info *info) { unsigned int ageing_time = info->ageing_time; - struct switchdev_trans *trans = info->trans; - - if (switchdev_trans_ph_prepare(trans)) { - if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) - return -ERANGE; - if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) - return -ERANGE; - return 0; - } + + if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) + return -ERANGE; + + if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) + return -ERANGE; /* Program the fastest ageing time in case of multiple bridges */ ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time); @@ -139,17 +136,8 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, } } if (unset_vlan_filtering) { - struct switchdev_trans trans; - - trans.ph_prepare = true; err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port), - false, &trans); - if (err && err != EOPNOTSUPP) - return err; - - trans.ph_prepare = false; - err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port), - false, &trans); + false); if (err && err != EOPNOTSUPP) return err; } @@ -178,6 +166,47 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); } +static int dsa_switch_lag_change(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_change) + return ds->ops->port_lag_change(ds, info->port); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_change) + return ds->ops->crosschip_lag_change(ds, info->sw_index, + info->port); + + return 0; +} + +static int dsa_switch_lag_join(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_join) + return ds->ops->port_lag_join(ds, info->port, info->lag, + info->info); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_join) + return ds->ops->crosschip_lag_join(ds, info->sw_index, + info->port, info->lag, + info->info); + + return 0; +} + +static int dsa_switch_lag_leave(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_leave) + return ds->ops->port_lag_leave(ds, info->port, info->lag); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_leave) + return ds->ops->crosschip_lag_leave(ds, info->sw_index, + info->port, info->lag); + + return 0; +} + static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, struct dsa_notifier_mdb_info *info) { @@ -190,41 +219,24 @@ static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, return false; } -static int dsa_switch_mdb_prepare(struct dsa_switch *ds, - struct dsa_notifier_mdb_info *info) +static int dsa_switch_mdb_add(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) { - int port, err; + int err = 0; + int port; - if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; for (port = 0; port < ds->num_ports; port++) { if (dsa_switch_mdb_match(ds, port, info)) { - err = ds->ops->port_mdb_prepare(ds, port, info->mdb); + err = ds->ops->port_mdb_add(ds, port, info->mdb); if (err) - return err; + break; } } - return 0; -} - -static int dsa_switch_mdb_add(struct dsa_switch *ds, - struct dsa_notifier_mdb_info *info) -{ - int port; - - if (switchdev_trans_ph_prepare(info->trans)) - return dsa_switch_mdb_prepare(ds, info); - - if (!ds->ops->port_mdb_add) - return 0; - - for (port = 0; port < ds->num_ports; port++) - if (dsa_switch_mdb_match(ds, port, info)) - ds->ops->port_mdb_add(ds, port, info->mdb); - - return 0; + return err; } static int dsa_switch_mdb_del(struct dsa_switch *ds, @@ -251,17 +263,17 @@ static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, return false; } -static int dsa_switch_vlan_prepare(struct dsa_switch *ds, - struct dsa_notifier_vlan_info *info) +static int dsa_switch_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) { int port, err; - if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) + if (!ds->ops->port_vlan_add) return -EOPNOTSUPP; for (port = 0; port < ds->num_ports; port++) { if (dsa_switch_vlan_match(ds, port, info)) { - err = ds->ops->port_vlan_prepare(ds, port, info->vlan); + err = ds->ops->port_vlan_add(ds, port, info->vlan); if (err) return err; } @@ -270,24 +282,6 @@ static int dsa_switch_vlan_prepare(struct dsa_switch *ds, return 0; } -static int dsa_switch_vlan_add(struct dsa_switch *ds, - struct dsa_notifier_vlan_info *info) -{ - int port; - - if (switchdev_trans_ph_prepare(info->trans)) - return dsa_switch_vlan_prepare(ds, info); - - if (!ds->ops->port_vlan_add) - return 0; - - for (port = 0; port < ds->num_ports; port++) - if (dsa_switch_vlan_match(ds, port, info)) - ds->ops->port_vlan_add(ds, port, info->vlan); - - return 0; -} - static int dsa_switch_vlan_del(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { @@ -325,6 +319,15 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_FDB_DEL: err = dsa_switch_fdb_del(ds, info); break; + case DSA_NOTIFIER_LAG_CHANGE: + err = dsa_switch_lag_change(ds, info); + break; + case DSA_NOTIFIER_LAG_JOIN: + err = dsa_switch_lag_join(ds, info); + break; + case DSA_NOTIFIER_LAG_LEAVE: + err = dsa_switch_lag_leave(ds, info); + break; case DSA_NOTIFIER_MDB_ADD: err = dsa_switch_mdb_add(ds, info); break; @@ -345,10 +348,6 @@ static int dsa_switch_event(struct notifier_block *nb, break; } - /* Non-switchdev operations cannot be rolled back. If a DSA driver - * returns an error during the chained call, switch chips may be in an - * inconsistent state. - */ if (err) dev_dbg(ds->dev, "breaking chain for DSA event %lu (%d)\n", event, err); diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index e934dace3922..e2577a7dcbca 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -5,6 +5,7 @@ * Copyright (C) 2014 Broadcom Corporation */ +#include <linux/dsa/brcm.h> #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 112c7c6dd568..7e7b7decdf39 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -163,6 +163,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, u8 extra) { int source_device, source_port; + bool trunk = false; enum dsa_code code; enum dsa_cmd cmd; u8 *dsa_header; @@ -174,6 +175,8 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, switch (cmd) { case DSA_CMD_FORWARD: skb->offload_fwd_mark = 1; + + trunk = !!(dsa_header[1] & 7); break; case DSA_CMD_TO_CPU: @@ -216,7 +219,19 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; - skb->dev = dsa_master_find_slave(dev, source_device, source_port); + if (trunk) { + struct dsa_port *cpu_dp = dev->dsa_ptr; + + /* The exact source port is not available in the tag, + * so we inject the frame directly on the upper + * team/bond. + */ + skb->dev = dsa_lag_dev(cpu_dp->dst, source_port); + } else { + skb->dev = dsa_master_find_slave(dev, source_device, + source_port); + } + if (!skb->dev) return NULL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2ed0b01f72f0..959b94e32f2b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1018,7 +1018,7 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { - uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); + uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) return -ENOBUFS; extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ @@ -1230,8 +1230,7 @@ alloc_new_skb: error_efault: err = -EFAULT; error: - if (uarg) - sock_zerocopy_put_abort(uarg, extra_uref); + net_zcopy_put_abort(uarg, extra_uref); cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 7ca338fbe8ba..6b2dc7b2b612 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -222,7 +222,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) .code = ICMP_FRAG_NEEDED, .checksum = 0, .un.frag.__unused = 0, - .un.frag.mtu = ntohs(mtu), + .un.frag.mtu = htons(mtu), }; icmph->checksum = ip_compute_csum(icmph, len); skb_reset_transport_header(skb); @@ -245,7 +245,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) skb->ip_summed = CHECKSUM_NONE; - eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0); + eth_header(skb, skb->dev, ntohs(eh.h_proto), eh.h_source, eh.h_dest, 0); skb_reset_mac_header(skb); return skb->len; @@ -338,7 +338,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) skb->ip_summed = CHECKSUM_NONE; - eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0); + eth_header(skb, skb->dev, ntohs(eh.h_proto), eh.h_source, eh.h_dest, 0); skb_reset_mac_header(skb); return skb->len; @@ -583,8 +583,9 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr, static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, struct netlink_ext_ack *extack) { - int err, rem, opt_len, opts_len = 0, type = 0; + int err, rem, opt_len, opts_len = 0; struct nlattr *nla; + __be16 type = 0; if (!attr) return 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ed42d2193c5c..2267d21c73a6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1010,7 +1010,7 @@ new_segment: } if (!(flags & MSG_NO_SHARED_FRAGS)) - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; skb->len += copy; skb->data_len += copy; @@ -1217,7 +1217,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { skb = tcp_write_queue_tail(sk); - uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb)); + uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb)); if (!uarg) { err = -ENOBUFS; goto out_err; @@ -1429,7 +1429,7 @@ out: tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } out_nopush: - sock_zerocopy_put(uarg); + net_zcopy_put(uarg); return copied + copied_syn; do_error: @@ -1440,7 +1440,7 @@ do_fault: if (copied + copied_syn) goto out; out_err: - sock_zerocopy_put_abort(uarg, true); + net_zcopy_put_abort(uarg, true); err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f322e798a351..899d053cb10e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1319,7 +1319,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, skb_orphan(skb); skb->sk = sk; skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; - skb_set_hash_from_sk(skb, sk); refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); @@ -1390,6 +1389,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tcp_skb_pcount(skb)); tp->segs_out += tcp_skb_pcount(skb); + skb_set_hash_from_sk(skb, sk); /* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */ skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 3eecba0874aa..b97e3635acf5 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -90,15 +90,11 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, struct sock *sk = sock->sk; struct udp_tunnel_info ti; - if (!dev->netdev_ops->ndo_udp_tunnel_add || - !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - return; - ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; - dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); + udp_tunnel_nic_add_port(dev, &ti); } EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port); @@ -108,15 +104,11 @@ void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, struct sock *sk = sock->sk; struct udp_tunnel_info ti; - if (!dev->netdev_ops->ndo_udp_tunnel_del || - !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - return; - ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; - dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); + udp_tunnel_nic_del_port(dev, &ti); } EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port); @@ -134,11 +126,7 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) rcu_read_lock(); for_each_netdev_rcu(net, dev) { - if (!dev->netdev_ops->ndo_udp_tunnel_add) - continue; - if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - continue; - dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); + udp_tunnel_nic_add_port(dev, &ti); } rcu_read_unlock(); } @@ -158,11 +146,7 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) rcu_read_lock(); for_each_netdev_rcu(net, dev) { - if (!dev->netdev_ops->ndo_udp_tunnel_del) - continue; - if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - continue; - dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); + udp_tunnel_nic_del_port(dev, &ti); } rcu_read_unlock(); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 077d43af8226..117cd95df213 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1510,7 +1510,7 @@ emsgsize: csummode = CHECKSUM_PARTIAL; if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { - uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); + uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) return -ENOBUFS; extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ @@ -1754,8 +1754,7 @@ alloc_new_skb: error_efault: err = -EFAULT; error: - if (uarg) - sock_zerocopy_put_abort(uarg, extra_uref); + net_zcopy_put_abort(uarg, extra_uref); cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 56dad9565bc9..d0b56ffbb057 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -786,7 +786,7 @@ static ssize_t kcm_sendpage(struct socket *sock, struct page *page, if (skb_can_coalesce(skb, i, page, offset)) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; goto coalesced; } @@ -834,7 +834,7 @@ static ssize_t kcm_sendpage(struct socket *sock, struct page *page, get_page(page); skb_fill_page_desc(skb, i, page, offset, size); - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; coalesced: skb->len += size; @@ -1496,7 +1496,7 @@ static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) return 0; out: - fput(csock->file); + sockfd_put(csock); return err; } @@ -1644,7 +1644,7 @@ static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) spin_unlock_bh(&mux->lock); out: - fput(csock->file); + sockfd_put(csock); return err; } diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index b921cbdd9aaa..8ca196489893 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -31,6 +31,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), + SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), + SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 47bcecce1106..63914a5ef6a5 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -24,6 +24,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */ MPTCP_MIB_RMADDR, /* Received RM_ADDR */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ + MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ + MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index e0d21c0607e5..c9643344a8d7 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -282,6 +282,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, pr_debug("RM_ADDR: id=%d", mp_opt->rm_id); break; + case MPTCPOPT_MP_PRIO: + if (opsize != TCPOLEN_MPTCP_PRIO) + break; + + mp_opt->mp_prio = 1; + mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP; + pr_debug("MP_PRIO: prio=%d", mp_opt->backup); + break; + case MPTCPOPT_MP_FASTCLOSE: if (opsize != TCPOLEN_MPTCP_FASTCLOSE) break; @@ -313,6 +322,7 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->port = 0; mp_opt->rm_addr = 0; mp_opt->dss = 0; + mp_opt->mp_prio = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -679,6 +689,28 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, return true; } +static bool mptcp_established_options_mp_prio(struct sock *sk, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + + if (!subflow->send_mp_prio) + return false; + + if (remaining < TCPOLEN_MPTCP_PRIO) + return false; + + *size = TCPOLEN_MPTCP_PRIO; + opts->suboptions |= OPTION_MPTCP_PRIO; + opts->backup = subflow->request_bkup; + + pr_debug("prio=%d", opts->backup); + + return true; +} + bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -721,6 +753,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, ret = true; } + if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + ret = true; + } + return ret; } @@ -994,6 +1032,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mp_opt.rm_addr = 0; } + if (mp_opt.mp_prio) { + mptcp_pm_mp_prio_received(sk, mp_opt.backup); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); + mp_opt.mp_prio = 0; + } + if (!mp_opt.dss) return; @@ -1168,6 +1212,18 @@ mp_capable_done: 0, opts->rm_id); } + if (OPTION_MPTCP_PRIO & opts->suboptions) { + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + + subflow = mptcp_subflow_ctx(ssk); + subflow->send_mp_prio = 0; + + *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO, + TCPOLEN_MPTCP_PRIO, + opts->backup, TCPOPT_NOP); + } + if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, TCPOLEN_MPTCP_MPJ_SYN, diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index da2ed576f289..0a6ebd0642ec 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -207,6 +207,14 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id) spin_unlock_bh(&pm->lock); } +void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + + pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); + subflow->backup = bkup; +} + /* path manager helpers */ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a6d983d80576..9b1f6298bbdb 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -36,6 +36,9 @@ struct mptcp_pm_add_entry { u8 retrans_times; }; +#define MAX_ADDR_ID 255 +#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG) + struct pm_nl_pernet { /* protects pernet updates */ spinlock_t lock; @@ -46,6 +49,7 @@ struct pm_nl_pernet { unsigned int local_addr_max; unsigned int subflows_max; unsigned int next_id; + unsigned long id_bitmap[BITMAP_SZ]; }; #define MPTCP_PM_ADDR_MAX 8 @@ -438,6 +442,41 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) } } +int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + u8 bkup) +{ + struct mptcp_subflow_context *subflow; + + pr_debug("bkup=%d", bkup); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info local; + + local_address((struct sock_common *)ssk, &local); + if (!addresses_equal(&local, addr, addr->port)) + continue; + + subflow->backup = bkup; + subflow->send_mp_prio = 1; + subflow->request_bkup = bkup; + __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX); + + spin_unlock_bh(&msk->pm.lock); + pr_debug("send ack for mp_prio"); + lock_sock(ssk); + tcp_send_ack(ssk); + release_sock(ssk); + spin_lock_bh(&msk->pm.lock); + + return 0; + } + + return -EINVAL; +} + void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow, *tmp; @@ -524,10 +563,12 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, /* to keep the code simple, don't do IDR-like allocation for address ID, * just bail when we exceed limits */ - if (pernet->next_id > 255) - goto out; + if (pernet->next_id == MAX_ADDR_ID) + pernet->next_id = 1; if (pernet->addrs >= MPTCP_PM_ADDR_MAX) goto out; + if (test_bit(entry->addr.id, pernet->id_bitmap)) + goto out; /* do not insert duplicate address, differentiate on port only * singled addresses @@ -539,12 +580,30 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, goto out; } + if (!entry->addr.id) { +find_next: + entry->addr.id = find_next_zero_bit(pernet->id_bitmap, + MAX_ADDR_ID + 1, + pernet->next_id); + if ((!entry->addr.id || entry->addr.id > MAX_ADDR_ID) && + pernet->next_id != 1) { + pernet->next_id = 1; + goto find_next; + } + } + + if (!entry->addr.id || entry->addr.id > MAX_ADDR_ID) + goto out; + + __set_bit(entry->addr.id, pernet->id_bitmap); + if (entry->addr.id > pernet->next_id) + pernet->next_id = entry->addr.id; + if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) pernet->add_addr_signal_max++; if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) pernet->local_addr_max++; - entry->addr.id = pernet->next_id++; pernet->addrs++; list_add_tail_rcu(&entry->list, &pernet->local_addr_list); ret = entry->addr.id; @@ -597,6 +656,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) entry->addr = skc_local; entry->addr.ifindex = 0; entry->addr.flags = 0; + entry->addr.id = 0; ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); if (ret < 0) kfree(entry); @@ -857,6 +917,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) pernet->addrs--; list_del_rcu(&entry->list); + __clear_bit(entry->addr.id, pernet->id_bitmap); spin_unlock_bh(&pernet->lock); mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr); @@ -894,6 +955,8 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&pernet->lock); list_splice_init(&pernet->local_addr_list, &free_list); __reset_counters(pernet); + pernet->next_id = 1; + bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); __flush_addrs(sock_net(skb->sk), &free_list); return 0; @@ -994,27 +1057,34 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, struct pm_nl_pernet *pernet; int id = cb->args[0]; void *hdr; + int i; pernet = net_generic(net, pm_nl_pernet_id); spin_lock_bh(&pernet->lock); - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if (entry->addr.id <= id) - continue; - - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, &mptcp_genl_family, - NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); - if (!hdr) - break; + for (i = id; i < MAX_ADDR_ID + 1; i++) { + if (test_bit(i, pernet->id_bitmap)) { + entry = __lookup_addr_by_id(pernet, i); + if (!entry) + break; + + if (entry->addr.id <= id) + continue; + + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &mptcp_genl_family, + NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); + if (!hdr) + break; + + if (mptcp_nl_fill_addr(msg, entry) < 0) { + genlmsg_cancel(msg, hdr); + break; + } - if (mptcp_nl_fill_addr(msg, entry) < 0) { - genlmsg_cancel(msg, hdr); - break; + id = entry->addr.id; + genlmsg_end(msg, hdr); } - - id = entry->addr.id; - genlmsg_end(msg, hdr); } spin_unlock_bh(&pernet->lock); @@ -1096,6 +1166,66 @@ fail: return -EMSGSIZE; } +static int mptcp_nl_addr_backup(struct net *net, + struct mptcp_addr_info *addr, + u8 bkup) +{ + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + int ret = -EINVAL; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + + if (list_empty(&msk->conn_list)) + goto next; + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return ret; +} + +static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct mptcp_pm_addr_entry addr, *entry; + struct net *net = sock_net(skb->sk); + u8 bkup = 0; + int ret; + + ret = mptcp_pm_parse_addr(attr, info, true, &addr); + if (ret < 0) + return ret; + + if (addr.addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + bkup = 1; + + list_for_each_entry(entry, &pernet->local_addr_list, list) { + if (addresses_equal(&entry->addr, &addr.addr, true)) { + ret = mptcp_nl_addr_backup(net, &entry->addr, bkup); + if (ret) + return ret; + + if (bkup) + entry->addr.flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + entry->addr.flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + } + } + + return 0; +} + static const struct genl_small_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_ADD_ADDR, @@ -1126,6 +1256,11 @@ static const struct genl_small_ops mptcp_pm_ops[] = { .cmd = MPTCP_PM_CMD_GET_LIMITS, .doit = mptcp_nl_cmd_get_limits, }, + { + .cmd = MPTCP_PM_CMD_SET_FLAGS, + .doit = mptcp_nl_cmd_set_flags, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family mptcp_genl_family __ro_after_init = { @@ -1148,6 +1283,7 @@ static int __net_init pm_nl_init_net(struct net *net) INIT_LIST_HEAD_RCU(&pernet->local_addr_list); __reset_counters(pernet); pernet->next_id = 1; + bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); spin_lock_init(&pernet->lock); return 0; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d67de793d363..d6400ad2d615 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -24,6 +24,7 @@ #define OPTION_MPTCP_ADD_ADDR6 BIT(7) #define OPTION_MPTCP_RM_ADDR BIT(8) #define OPTION_MPTCP_FASTCLOSE BIT(9) +#define OPTION_MPTCP_PRIO BIT(10) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 @@ -59,6 +60,7 @@ #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24 #define TCPOLEN_MPTCP_PORT_LEN 4 #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 +#define TCPOLEN_MPTCP_PRIO 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 /* MPTCP MP_JOIN flags */ @@ -86,6 +88,9 @@ #define MPTCP_ADDR_IPVERSION_4 4 #define MPTCP_ADDR_IPVERSION_6 6 +/* MPTCP MP_PRIO flags */ +#define MPTCP_PRIO_BKUP BIT(0) + /* MPTCP socket flags */ #define MPTCP_DATA_READY 0 #define MPTCP_NOSPACE 1 @@ -116,6 +121,7 @@ struct mptcp_options_received { dss : 1, add_addr : 1, rm_addr : 1, + mp_prio : 1, family : 4, echo : 1, backup : 1; @@ -396,6 +402,7 @@ struct mptcp_subflow_context { map_valid : 1, mpc_map : 1, backup : 1, + send_mp_prio : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ disposable : 1; /* ctx can be free at ulp release time */ @@ -550,6 +557,10 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id); +void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); +int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + u8 bkup); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 0eb4ddc056e7..c0c8fea3a186 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -236,7 +236,7 @@ static void llc_shdlc_rcv_i_frame(struct llc_shdlc *shdlc, goto exit; } - if (shdlc->t1_active == false) { + if (!shdlc->t1_active) { shdlc->t1_active = true; mod_timer(&shdlc->t1_timer, jiffies + msecs_to_jiffies(SHDLC_T1_VALUE_MS(shdlc->w))); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index e64727e1a72f..79bebf4b0796 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -579,11 +579,11 @@ static int nci_close_device(struct nci_dev *ndev) clear_bit(NCI_INIT, &ndev->flags); - del_timer_sync(&ndev->cmd_timer); - /* Flush cmd wq */ flush_workqueue(ndev->cmd_wq); + del_timer_sync(&ndev->cmd_timer); + /* Clear flags */ ndev->flags = 0; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e8902a7e60f2..92a0b67b2728 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -957,14 +957,14 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, - const struct nlattr *attr, bool last) + const struct nlattr *attr) { /* The first attribute is always 'OVS_DEC_TTL_ATTR_ACTION'. */ struct nlattr *actions = nla_data(attr); if (nla_len(actions)) return clone_execute(dp, skb, key, 0, nla_data(actions), - nla_len(actions), last, false); + nla_len(actions), true, false); consume_skb(skb); return 0; @@ -1418,11 +1418,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_DEC_TTL: err = execute_dec_ttl(skb, key); - if (err == -EHOSTUNREACH) { - err = dec_ttl_exception_handler(dp, skb, key, - a, true); - return err; - } + if (err == -EHOSTUNREACH) + return dec_ttl_exception_handler(dp, skb, + key, a); break; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4c5c2331e764..fd1f809e9bc1 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2515,15 +2515,25 @@ static int validate_and_copy_dec_ttl(struct net *net, if (type > OVS_DEC_TTL_ATTR_MAX) continue; - if (!type || attrs[type]) + if (!type || attrs[type]) { + OVS_NLERR(log, "Duplicate or invalid key (type %d).", + type); return -EINVAL; + } attrs[type] = a; } + if (rem) { + OVS_NLERR(log, "Message has %d unknown bytes.", rem); + return -EINVAL; + } + actions = attrs[OVS_DEC_TTL_ATTR_ACTION]; - if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) + if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) { + OVS_NLERR(log, "Missing valid actions attribute."); return -EINVAL; + } start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log); if (start < 0) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 23d868545362..855a10feef3d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -100,15 +100,13 @@ static int switchdev_deferred_enqueue(struct net_device *dev, static int switchdev_port_attr_notify(enum switchdev_notifier_type nt, struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { int err; int rc; struct switchdev_notifier_port_attr_info attr_info = { .attr = attr, - .trans = trans, .handled = false, }; @@ -129,34 +127,7 @@ static int switchdev_port_attr_notify(enum switchdev_notifier_type nt, static int switchdev_port_attr_set_now(struct net_device *dev, const struct switchdev_attr *attr) { - struct switchdev_trans trans; - int err; - - /* Phase I: prepare for attr set. Driver/device should fail - * here if there are going to be issues in the commit phase, - * such as lack of resources or support. The driver/device - * should reserve resources needed for the commit phase here, - * but should not commit the attr. - */ - - trans.ph_prepare = true; - err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr, - &trans); - if (err) - return err; - - /* Phase II: commit attr set. This cannot fail as a fault - * of driver/device. If it does, it's a bug in the driver/device - * because the driver said everythings was OK in phase I. - */ - - trans.ph_prepare = false; - err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr, - &trans); - WARN(err, "%s: Commit of attribute (id=%d) failed.\n", - dev->name, attr->id); - - return err; + return switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr); } static void switchdev_port_attr_set_deferred(struct net_device *dev, @@ -186,10 +157,6 @@ static int switchdev_port_attr_set_defer(struct net_device *dev, * @dev: port device * @attr: attribute to set * - * Use a 2-phase prepare-commit transaction model to ensure - * system is not left in a partially updated state due to - * failure from driver/device. - * * rtnl_lock must be held and must not be in atomic section, * in case SWITCHDEV_F_DEFER flag is not set. */ @@ -221,7 +188,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) static int switchdev_port_obj_notify(enum switchdev_notifier_type nt, struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { int rc; @@ -229,7 +195,6 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt, struct switchdev_notifier_port_obj_info obj_info = { .obj = obj, - .trans = trans, .handled = false, }; @@ -244,48 +209,15 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt, return 0; } -static int switchdev_port_obj_add_now(struct net_device *dev, - const struct switchdev_obj *obj, - struct netlink_ext_ack *extack) -{ - struct switchdev_trans trans; - int err; - - ASSERT_RTNL(); - - /* Phase I: prepare for obj add. Driver/device should fail - * here if there are going to be issues in the commit phase, - * such as lack of resources or support. The driver/device - * should reserve resources needed for the commit phase here, - * but should not commit the obj. - */ - - trans.ph_prepare = true; - err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, - dev, obj, &trans, extack); - if (err) - return err; - - /* Phase II: commit obj add. This cannot fail as a fault - * of driver/device. If it does, it's a bug in the driver/device - * because the driver said everythings was OK in phase I. - */ - - trans.ph_prepare = false; - err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, - dev, obj, &trans, extack); - WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); - - return err; -} - static void switchdev_port_obj_add_deferred(struct net_device *dev, const void *data) { const struct switchdev_obj *obj = data; int err; - err = switchdev_port_obj_add_now(dev, obj, NULL); + ASSERT_RTNL(); + err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, + dev, obj, NULL); if (err && err != -EOPNOTSUPP) netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", err, obj->id); @@ -307,10 +239,6 @@ static int switchdev_port_obj_add_defer(struct net_device *dev, * @obj: object to add * @extack: netlink extended ack * - * Use a 2-phase prepare-commit transaction model to ensure - * system is not left in a partially updated state due to - * failure from driver/device. - * * rtnl_lock must be held and must not be in atomic section, * in case SWITCHDEV_F_DEFER flag is not set. */ @@ -321,7 +249,8 @@ int switchdev_port_obj_add(struct net_device *dev, if (obj->flags & SWITCHDEV_F_DEFER) return switchdev_port_obj_add_defer(dev, obj); ASSERT_RTNL(); - return switchdev_port_obj_add_now(dev, obj, extack); + return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, + dev, obj, extack); } EXPORT_SYMBOL_GPL(switchdev_port_obj_add); @@ -329,7 +258,7 @@ static int switchdev_port_obj_del_now(struct net_device *dev, const struct switchdev_obj *obj) { return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL, - dev, obj, NULL, NULL); + dev, obj, NULL); } static void switchdev_port_obj_del_deferred(struct net_device *dev, @@ -449,7 +378,6 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*add_cb)(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack)) { struct netlink_ext_ack *extack; @@ -462,8 +390,7 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, if (check_cb(dev)) { /* This flag is only checked if the return value is success. */ port_obj_info->handled = true; - return add_cb(dev, port_obj_info->obj, port_obj_info->trans, - extack); + return add_cb(dev, port_obj_info->obj, extack); } /* Switch ports might be stacked under e.g. a LAG. Ignore the @@ -491,7 +418,6 @@ int switchdev_handle_port_obj_add(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*add_cb)(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack)) { int err; @@ -560,8 +486,7 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), int (*set_cb)(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans)) + const struct switchdev_attr *attr)) { struct net_device *lower_dev; struct list_head *iter; @@ -569,8 +494,7 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev, if (check_cb(dev)) { port_attr_info->handled = true; - return set_cb(dev, port_attr_info->attr, - port_attr_info->trans); + return set_cb(dev, port_attr_info->attr); } /* Switch ports might be stacked under e.g. a LAG. Ignore the @@ -597,8 +521,7 @@ int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), int (*set_cb)(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans)) + const struct switchdev_attr *attr)) { int err; diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 6dce2abf436e..48fac3b17e40 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -108,7 +108,7 @@ const int tipc_max_domain_size = sizeof(struct tipc_mon_domain); */ static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt) { - return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32)); + return (offsetof(struct tipc_mon_domain, members)) + (mcnt * sizeof(u32)); } /* dom_size() : calculate size of own domain based on number of peers |