summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/hci_conn.c8
-rw-r--r--net/bluetooth/hci_core.c13
-rw-r--r--net/bluetooth/smp.c6
-rw-r--r--net/bpf/test_run.c9
-rw-r--r--net/bridge/br_fdb.c149
-rw-r--r--net/bridge/br_input.c6
-rw-r--r--net/bridge/br_private.h19
-rw-r--r--net/bridge/br_switchdev.c12
-rw-r--r--net/caif/Kconfig10
-rw-r--r--net/core/dev.c406
-rw-r--r--net/core/devlink.c264
-rw-r--r--net/core/fib_notifier.c95
-rw-r--r--net/core/fib_rules.c23
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/flow_dissector.c155
-rw-r--r--net/core/net-procfs.c4
-rw-r--r--net/core/pktgen.c1
-rw-r--r--net/core/rtnetlink.c206
-rw-r--r--net/core/sock.c16
-rw-r--r--net/core/xdp.c2
-rw-r--r--net/dsa/dsa.c56
-rw-r--r--net/dsa/dsa2.c383
-rw-r--r--net/dsa/dsa_priv.h23
-rw-r--r--net/dsa/slave.c18
-rw-r--r--net/dsa/switch.c4
-rw-r--r--net/dsa/tag_8021q.c11
-rw-r--r--net/ieee802154/nl802154.c39
-rw-r--r--net/ipv4/fib_notifier.c13
-rw-r--r--net/ipv4/fib_rules.c5
-rw-r--r--net/ipv4/fib_trie.c44
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ipconfig.c10
-rw-r--r--net/ipv4/ipmr.c13
-rw-r--r--net/ipv4/ipmr_base.c30
-rw-r--r--net/ipv4/tcp.c12
-rw-r--r--net/ipv4/tcp_fastopen.c5
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/ipv6/fib6_notifier.c11
-rw-r--r--net/ipv6/fib6_rules.c5
-rw-r--r--net/ipv6/ip6_fib.c50
-rw-r--r--net/ipv6/ip6_input.c3
-rw-r--r--net/ipv6/ip6mr.c13
-rw-r--r--net/ipv6/netfilter/nf_tproxy_ipv6.c2
-rw-r--r--net/mac80211/agg-tx.c9
-rw-r--r--net/mac80211/ibss.c9
-rw-r--r--net/mac80211/rc80211_minstrel.c48
-rw-r--r--net/mac80211/rc80211_minstrel.h57
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c8
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c73
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h2
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c8
-rw-r--r--net/mac80211/tx.c15
-rw-r--r--net/netfilter/core.c20
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c26
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c18
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c41
-rw-r--r--net/netfilter/ipset/ip_set_core.c212
-rw-r--r--net/netfilter/ipset/ip_set_getport.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c28
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c47
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ovf.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c18
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_ecache.c23
-rw-r--r--net/netfilter/nf_conntrack_extend.c21
-rw-r--r--net/netfilter/nf_conntrack_netlink.c76
-rw-r--r--net/netfilter/nf_tables_api.c572
-rw-r--r--net/netfilter/nf_tables_offload.c188
-rw-r--r--net/netfilter/nft_chain_filter.c45
-rw-r--r--net/netlink/genetlink.c303
-rw-r--r--net/nfc/netlink.c17
-rw-r--r--net/openvswitch/conntrack.c21
-rw-r--r--net/qrtr/tun.c6
-rw-r--r--net/rds/ib.c10
-rw-r--r--net/rds/ib.h15
-rw-r--r--net/rds/ib_cm.c167
-rw-r--r--net/rds/ib_recv.c13
-rw-r--r--net/rds/ib_send.c19
-rw-r--r--net/rxrpc/peer_object.c2
-rw-r--r--net/sched/act_api.c55
-rw-r--r--net/sched/act_bpf.c5
-rw-r--r--net/sched/act_connmark.c4
-rw-r--r--net/sched/act_csum.c10
-rw-r--r--net/sched/act_ct.c16
-rw-r--r--net/sched/act_ctinfo.c4
-rw-r--r--net/sched/act_gact.c21
-rw-r--r--net/sched/act_ife.c5
-rw-r--r--net/sched/act_ipt.c12
-rw-r--r--net/sched/act_mirred.c19
-rw-r--r--net/sched/act_mpls.c5
-rw-r--r--net/sched/act_nat.c4
-rw-r--r--net/sched/act_pedit.c5
-rw-r--r--net/sched/act_police.c14
-rw-r--r--net/sched/act_sample.c4
-rw-r--r--net/sched/act_simple.c5
-rw-r--r--net/sched/act_skbedit.c4
-rw-r--r--net/sched/act_skbmod.c4
-rw-r--r--net/sched/act_tunnel_key.c9
-rw-r--r--net/sched/act_vlan.c16
-rw-r--r--net/sched/sch_fq.c3
-rw-r--r--net/sched/sch_fq_codel.c1
-rw-r--r--net/sched/sch_generic.c16
-rw-r--r--net/sctp/associola.c22
-rw-r--r--net/sctp/chunk.c40
-rw-r--r--net/sctp/ulpevent.c57
-rw-r--r--net/smc/af_smc.c4
-rw-r--r--net/smc/smc.h1
-rw-r--r--net/smc/smc_cdc.c4
-rw-r--r--net/smc/smc_close.c70
-rw-r--r--net/smc/smc_close.h2
-rw-r--r--net/smc/smc_core.c238
-rw-r--r--net/smc/smc_core.h9
-rw-r--r--net/smc/smc_ib.c15
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/smc/smc_ism.c5
-rw-r--r--net/smc/smc_llc.c2
-rw-r--r--net/smc/smc_pnet.c5
-rw-r--r--net/smc/smc_rx.c10
-rw-r--r--net/smc/smc_tx.c26
-rw-r--r--net/smc/smc_wr.c10
-rw-r--r--net/tipc/core.c16
-rw-r--r--net/tipc/core.h6
-rw-r--r--net/tipc/discover.c4
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/msg.c53
-rw-r--r--net/tipc/msg.h26
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/netlink.c21
-rw-r--r--net/tipc/netlink.h1
-rw-r--r--net/tipc/netlink_compat.c28
-rw-r--r--net/tipc/node.c161
-rw-r--r--net/tipc/node.h12
-rw-r--r--net/tipc/socket.c129
-rw-r--r--net/tipc/udp_media.c6
-rw-r--r--net/tls/Kconfig10
-rw-r--r--net/tls/Makefile5
-rw-r--r--net/tls/tls_device.c46
-rw-r--r--net/tls/tls_main.c172
-rw-r--r--net/tls/tls_proc.c47
-rw-r--r--net/tls/tls_sw.c18
-rw-r--r--net/tls/tls_toe.c139
-rw-r--r--net/tls/trace.c10
-rw-r--r--net/tls/trace.h202
-rw-r--r--net/unix/af_unix.c6
-rw-r--r--net/vmw_vsock/hyperv_transport.c22
-rw-r--r--net/vmw_vsock/virtio_transport_common.c55
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/wireless/reg.h2
166 files changed, 4537 insertions, 1990 deletions
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index ad5b0ac1f9ce..7ff92dd4c53c 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -934,6 +934,14 @@ static void hci_req_directed_advertising(struct hci_request *req,
return;
memset(&cp, 0, sizeof(cp));
+
+ /* Some controllers might reject command if intervals are not
+ * within range for undirected advertising.
+ * BCM20702A0 is known to be affected by this.
+ */
+ cp.min_interval = cpu_to_le16(0x0020);
+ cp.max_interval = cpu_to_le16(0x0020);
+
cp.type = LE_ADV_DIRECT_IND;
cp.own_address_type = own_addr_type;
cp.direct_addr_type = conn->dst_type;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 04bc79359a17..0cc9ce917222 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -842,8 +842,8 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
struct hci_cp_le_write_def_data_len cp;
- cp.tx_len = hdev->le_max_tx_len;
- cp.tx_time = hdev->le_max_tx_time;
+ cp.tx_len = cpu_to_le16(hdev->le_max_tx_len);
+ cp.tx_time = cpu_to_le16(hdev->le_max_tx_time);
hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp);
}
@@ -4440,7 +4440,14 @@ static void hci_rx_work(struct work_struct *work)
hci_send_to_sock(hdev, skb);
}
- if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
+ /* If the device has been opened in HCI_USER_CHANNEL,
+ * the userspace has exclusive access to device.
+ * When device is HCI_INIT, we still need to process
+ * the data packets to the driver in order
+ * to complete its setup().
+ */
+ if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ !test_bit(HCI_INIT, &hdev->flags)) {
kfree_skb(skb);
continue;
}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 26e8cfad22b8..6b42be4b5861 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -502,15 +502,12 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16],
const bdaddr_t *bdaddr)
{
struct l2cap_chan *chan = hdev->smp_data;
- struct smp_dev *smp;
u8 hash[3];
int err;
if (!chan || !chan->data)
return false;
- smp = chan->data;
-
BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk);
err = smp_ah(irk, &bdaddr->b[3], hash);
@@ -523,14 +520,11 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16],
int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa)
{
struct l2cap_chan *chan = hdev->smp_data;
- struct smp_dev *smp;
int err;
if (!chan || !chan->data)
return -EOPNOTSUPP;
- smp = chan->data;
-
get_random_bytes(&rpa->b[3], 3);
rpa->b[5] &= 0x3f; /* Clear two most significant bits */
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 1153bbcdff72..0be4497cb832 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -218,10 +218,18 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
FIELD_SIZEOF(struct __sk_buff, cb),
+ offsetof(struct __sk_buff, tstamp)))
+ return -EINVAL;
+
+ /* tstamp is allowed */
+
+ if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) +
+ FIELD_SIZEOF(struct __sk_buff, tstamp),
sizeof(struct __sk_buff)))
return -EINVAL;
skb->priority = __skb->priority;
+ skb->tstamp = __skb->tstamp;
memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
return 0;
@@ -235,6 +243,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
return;
__skb->priority = skb->priority;
+ __skb->tstamp = skb->tstamp;
memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b1d3248c0252..284b3662d234 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -75,8 +75,9 @@ static inline unsigned long hold_time(const struct net_bridge *br)
static inline int has_expired(const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb)
{
- return !fdb->is_static && !fdb->added_by_external_learn &&
- time_before_eq(fdb->updated + hold_time(br), jiffies);
+ return !test_bit(BR_FDB_STATIC, &fdb->flags) &&
+ !test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags) &&
+ time_before_eq(fdb->updated + hold_time(br), jiffies);
}
static void fdb_rcu_free(struct rcu_head *head)
@@ -197,7 +198,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
{
trace_fdb_delete(br, f);
- if (f->is_static)
+ if (test_bit(BR_FDB_STATIC, &f->flags))
fdb_del_hw_addr(br, f->key.addr.addr);
hlist_del_init_rcu(&f->fdb_node);
@@ -224,7 +225,7 @@ static void fdb_delete_local(struct net_bridge *br,
if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
(!vid || br_vlan_find(vg, vid))) {
f->dst = op;
- f->added_by_user = 0;
+ clear_bit(BR_FDB_ADDED_BY_USER, &f->flags);
return;
}
}
@@ -235,7 +236,7 @@ static void fdb_delete_local(struct net_bridge *br,
if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
(!vid || (v && br_vlan_should_use(v)))) {
f->dst = NULL;
- f->added_by_user = 0;
+ clear_bit(BR_FDB_ADDED_BY_USER, &f->flags);
return;
}
@@ -250,7 +251,8 @@ void br_fdb_find_delete_local(struct net_bridge *br,
spin_lock_bh(&br->hash_lock);
f = br_fdb_find(br, addr, vid);
- if (f && f->is_local && !f->added_by_user && f->dst == p)
+ if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
+ !test_bit(BR_FDB_ADDED_BY_USER, &f->flags) && f->dst == p)
fdb_delete_local(br, p, f);
spin_unlock_bh(&br->hash_lock);
}
@@ -265,7 +267,8 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
spin_lock_bh(&br->hash_lock);
vg = nbp_vlan_group(p);
hlist_for_each_entry(f, &br->fdb_list, fdb_node) {
- if (f->dst == p && f->is_local && !f->added_by_user) {
+ if (f->dst == p && test_bit(BR_FDB_LOCAL, &f->flags) &&
+ !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) {
/* delete old one */
fdb_delete_local(br, p, f);
@@ -306,7 +309,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
/* If old entry was unassociated with any port, then delete it. */
f = br_fdb_find(br, br->dev->dev_addr, 0);
- if (f && f->is_local && !f->dst && !f->added_by_user)
+ if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
+ !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, 0);
@@ -321,7 +325,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
if (!br_vlan_should_use(v))
continue;
f = br_fdb_find(br, br->dev->dev_addr, v->vid);
- if (f && f->is_local && !f->dst && !f->added_by_user)
+ if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
+ !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, v->vid);
}
@@ -346,7 +351,8 @@ void br_fdb_cleanup(struct work_struct *work)
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
unsigned long this_timer;
- if (f->is_static || f->added_by_external_learn)
+ if (test_bit(BR_FDB_STATIC, &f->flags) ||
+ test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags))
continue;
this_timer = f->updated + delay;
if (time_after(this_timer, now)) {
@@ -373,7 +379,7 @@ void br_fdb_flush(struct net_bridge *br)
spin_lock_bh(&br->hash_lock);
hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
- if (!f->is_static)
+ if (!test_bit(BR_FDB_STATIC, &f->flags))
fdb_delete(br, f, true);
}
spin_unlock_bh(&br->hash_lock);
@@ -397,10 +403,11 @@ void br_fdb_delete_by_port(struct net_bridge *br,
continue;
if (!do_all)
- if (f->is_static || (vid && f->key.vlan_id != vid))
+ if (test_bit(BR_FDB_STATIC, &f->flags) ||
+ (vid && f->key.vlan_id != vid))
continue;
- if (f->is_local)
+ if (test_bit(BR_FDB_LOCAL, &f->flags))
fdb_delete_local(br, p, f);
else
fdb_delete(br, f, true);
@@ -469,8 +476,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
fe->port_no = f->dst->port_no;
fe->port_hi = f->dst->port_no >> 8;
- fe->is_local = f->is_local;
- if (!f->is_static)
+ fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags);
+ if (!test_bit(BR_FDB_STATIC, &f->flags))
fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
++fe;
++num;
@@ -484,8 +491,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
struct net_bridge_port *source,
const unsigned char *addr,
__u16 vid,
- unsigned char is_local,
- unsigned char is_static)
+ unsigned long flags)
{
struct net_bridge_fdb_entry *fdb;
@@ -494,12 +500,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
fdb->dst = source;
fdb->key.vlan_id = vid;
- fdb->is_local = is_local;
- fdb->is_static = is_static;
- fdb->added_by_user = 0;
- fdb->added_by_external_learn = 0;
- fdb->offloaded = 0;
- fdb->is_sticky = 0;
+ fdb->flags = flags;
fdb->updated = fdb->used = jiffies;
if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
&fdb->rhnode,
@@ -526,14 +527,15 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
/* it is okay to have multiple ports with same
* address, just use the first one.
*/
- if (fdb->is_local)
+ if (test_bit(BR_FDB_LOCAL, &fdb->flags))
return 0;
br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
source ? source->dev->name : br->dev->name, addr, vid);
fdb_delete(br, fdb, true);
}
- fdb = fdb_create(br, source, addr, vid, 1, 1);
+ fdb = fdb_create(br, source, addr, vid,
+ BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC));
if (!fdb)
return -ENOMEM;
@@ -555,7 +557,7 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
}
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid, bool added_by_user)
+ const unsigned char *addr, u16 vid, unsigned long flags)
{
struct net_bridge_fdb_entry *fdb;
bool fdb_modified = false;
@@ -572,7 +574,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
if (likely(fdb)) {
/* attempt to update an entry for a local interface */
- if (unlikely(fdb->is_local)) {
+ if (unlikely(test_bit(BR_FDB_LOCAL, &fdb->flags))) {
if (net_ratelimit())
br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n",
source->dev->name, addr, vid);
@@ -580,30 +582,30 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
unsigned long now = jiffies;
/* fastpath: update of existing entry */
- if (unlikely(source != fdb->dst && !fdb->is_sticky)) {
+ if (unlikely(source != fdb->dst &&
+ !test_bit(BR_FDB_STICKY, &fdb->flags))) {
fdb->dst = source;
fdb_modified = true;
/* Take over HW learned entry */
- if (unlikely(fdb->added_by_external_learn))
- fdb->added_by_external_learn = 0;
+ if (unlikely(test_bit(BR_FDB_ADDED_BY_EXT_LEARN,
+ &fdb->flags)))
+ clear_bit(BR_FDB_ADDED_BY_EXT_LEARN,
+ &fdb->flags);
}
if (now != fdb->updated)
fdb->updated = now;
- if (unlikely(added_by_user))
- fdb->added_by_user = 1;
+ if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags)))
+ set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
if (unlikely(fdb_modified)) {
- trace_br_fdb_update(br, source, addr, vid, added_by_user);
+ trace_br_fdb_update(br, source, addr, vid, flags);
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
}
} else {
spin_lock(&br->hash_lock);
- fdb = fdb_create(br, source, addr, vid, 0, 0);
+ fdb = fdb_create(br, source, addr, vid, flags);
if (fdb) {
- if (unlikely(added_by_user))
- fdb->added_by_user = 1;
- trace_br_fdb_update(br, source, addr, vid,
- added_by_user);
+ trace_br_fdb_update(br, source, addr, vid, flags);
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
/* else we lose race and someone else inserts
@@ -616,9 +618,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
static int fdb_to_nud(const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb)
{
- if (fdb->is_local)
+ if (test_bit(BR_FDB_LOCAL, &fdb->flags))
return NUD_PERMANENT;
- else if (fdb->is_static)
+ else if (test_bit(BR_FDB_STATIC, &fdb->flags))
return NUD_NOARP;
else if (has_expired(br, fdb))
return NUD_STALE;
@@ -648,11 +650,11 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
ndm->ndm_state = fdb_to_nud(br, fdb);
- if (fdb->offloaded)
+ if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
ndm->ndm_flags |= NTF_OFFLOADED;
- if (fdb->added_by_external_learn)
+ if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
ndm->ndm_flags |= NTF_EXT_LEARNED;
- if (fdb->is_sticky)
+ if (test_bit(BR_FDB_STICKY, &fdb->flags))
ndm->ndm_flags |= NTF_STICKY;
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
@@ -799,7 +801,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
const u8 *addr, u16 state, u16 flags, u16 vid,
u8 ndm_flags)
{
- u8 is_sticky = !!(ndm_flags & NTF_STICKY);
+ bool is_sticky = !!(ndm_flags & NTF_STICKY);
struct net_bridge_fdb_entry *fdb;
bool modified = false;
@@ -823,7 +825,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
- fdb = fdb_create(br, source, addr, vid, 0, 0);
+ fdb = fdb_create(br, source, addr, vid, 0);
if (!fdb)
return -ENOMEM;
@@ -840,34 +842,28 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (fdb_to_nud(br, fdb) != state) {
if (state & NUD_PERMANENT) {
- fdb->is_local = 1;
- if (!fdb->is_static) {
- fdb->is_static = 1;
+ set_bit(BR_FDB_LOCAL, &fdb->flags);
+ if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags))
fdb_add_hw_addr(br, addr);
- }
} else if (state & NUD_NOARP) {
- fdb->is_local = 0;
- if (!fdb->is_static) {
- fdb->is_static = 1;
+ clear_bit(BR_FDB_LOCAL, &fdb->flags);
+ if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags))
fdb_add_hw_addr(br, addr);
- }
} else {
- fdb->is_local = 0;
- if (fdb->is_static) {
- fdb->is_static = 0;
+ clear_bit(BR_FDB_LOCAL, &fdb->flags);
+ if (test_and_clear_bit(BR_FDB_STATIC, &fdb->flags))
fdb_del_hw_addr(br, addr);
- }
}
modified = true;
}
- if (is_sticky != fdb->is_sticky) {
- fdb->is_sticky = is_sticky;
+ if (is_sticky != test_bit(BR_FDB_STICKY, &fdb->flags)) {
+ change_bit(BR_FDB_STICKY, &fdb->flags);
modified = true;
}
- fdb->added_by_user = 1;
+ set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
fdb->used = jiffies;
if (modified) {
@@ -892,7 +888,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
}
local_bh_disable();
rcu_read_lock();
- br_fdb_update(br, p, addr, vid, true);
+ br_fdb_update(br, p, addr, vid, BIT(BR_FDB_ADDED_BY_USER));
rcu_read_unlock();
local_bh_enable();
} else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
@@ -1064,7 +1060,7 @@ int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
rcu_read_lock();
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
/* We only care for static entries */
- if (!f->is_static)
+ if (!test_bit(BR_FDB_STATIC, &f->flags))
continue;
err = dev_uc_add(p->dev, f->key.addr.addr);
if (err)
@@ -1078,7 +1074,7 @@ done:
rollback:
hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) {
/* We only care for static entries */
- if (!tmp->is_static)
+ if (!test_bit(BR_FDB_STATIC, &tmp->flags))
continue;
if (tmp == f)
break;
@@ -1097,7 +1093,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
rcu_read_lock();
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
/* We only care for static entries */
- if (!f->is_static)
+ if (!test_bit(BR_FDB_STATIC, &f->flags))
continue;
dev_uc_del(p->dev, f->key.addr.addr);
@@ -1119,14 +1115,15 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
fdb = br_fdb_find(br, addr, vid);
if (!fdb) {
- fdb = fdb_create(br, p, addr, vid, 0, 0);
+ unsigned long flags = BIT(BR_FDB_ADDED_BY_EXT_LEARN);
+
+ if (swdev_notify)
+ flags |= BIT(BR_FDB_ADDED_BY_USER);
+ fdb = fdb_create(br, p, addr, vid, flags);
if (!fdb) {
err = -ENOMEM;
goto err_unlock;
}
- if (swdev_notify)
- fdb->added_by_user = 1;
- fdb->added_by_external_learn = 1;
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
} else {
fdb->updated = jiffies;
@@ -1136,17 +1133,17 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
modified = true;
}
- if (fdb->added_by_external_learn) {
+ if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) {
/* Refresh entry */
fdb->used = jiffies;
- } else if (!fdb->added_by_user) {
+ } else if (!test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags)) {
/* Take over SW learned entry */
- fdb->added_by_external_learn = 1;
+ set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags);
modified = true;
}
if (swdev_notify)
- fdb->added_by_user = 1;
+ set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
if (modified)
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
@@ -1168,7 +1165,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
spin_lock_bh(&br->hash_lock);
fdb = br_fdb_find(br, addr, vid);
- if (fdb && fdb->added_by_external_learn)
+ if (fdb && test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
fdb_delete(br, fdb, swdev_notify);
else
err = -ENOENT;
@@ -1186,8 +1183,8 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
spin_lock_bh(&br->hash_lock);
fdb = br_fdb_find(br, addr, vid);
- if (fdb)
- fdb->offloaded = offloaded;
+ if (fdb && offloaded != test_bit(BR_FDB_OFFLOADED, &fdb->flags))
+ change_bit(BR_FDB_OFFLOADED, &fdb->flags);
spin_unlock_bh(&br->hash_lock);
}
@@ -1206,7 +1203,7 @@ void br_fdb_clear_offload(const struct net_device *dev, u16 vid)
spin_lock_bh(&p->br->hash_lock);
hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) {
if (f->dst == p && f->key.vlan_id == vid)
- f->offloaded = 0;
+ clear_bit(BR_FDB_OFFLOADED, &f->flags);
}
spin_unlock_bh(&p->br->hash_lock);
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 09b1dd8cd853..f37b05090f45 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -88,7 +88,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
if (p->flags & BR_LEARNING)
- br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
+ br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
local_rcv = !!(br->dev->flags & IFF_PROMISC);
if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
@@ -151,7 +151,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (dst) {
unsigned long now = jiffies;
- if (dst->is_local)
+ if (test_bit(BR_FDB_LOCAL, &dst->flags))
return br_pass_frame_up(skb);
if (now != dst->used)
@@ -184,7 +184,7 @@ static void __br_handle_local_finish(struct sk_buff *skb)
if ((p->flags & BR_LEARNING) &&
!br_opt_get(p->br, BROPT_NO_LL_LEARN) &&
br_should_learn(p, skb, &vid))
- br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
+ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, 0);
}
/* note: already called with rcu_read_lock */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index ce2ab14ee605..08742bff9bf0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -172,6 +172,16 @@ struct net_bridge_vlan_group {
u16 pvid;
};
+/* bridge fdb flags */
+enum {
+ BR_FDB_LOCAL,
+ BR_FDB_STATIC,
+ BR_FDB_STICKY,
+ BR_FDB_ADDED_BY_USER,
+ BR_FDB_ADDED_BY_EXT_LEARN,
+ BR_FDB_OFFLOADED,
+};
+
struct net_bridge_fdb_key {
mac_addr addr;
u16 vlan_id;
@@ -183,12 +193,7 @@ struct net_bridge_fdb_entry {
struct net_bridge_fdb_key key;
struct hlist_node fdb_node;
- unsigned char is_local:1,
- is_static:1,
- is_sticky:1,
- added_by_user:1,
- added_by_external_learn:1,
- offloaded:1;
+ unsigned long flags;
/* write-heavy members should not affect lookups */
unsigned long updated ____cacheline_aligned_in_smp;
@@ -566,7 +571,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count,
int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid, bool added_by_user);
+ const unsigned char *addr, u16 vid, unsigned long flags);
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr, u16 vid);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 921310d3cbae..015209bf44aa 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -129,15 +129,19 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
fdb->key.vlan_id,
fdb->dst->dev,
- fdb->added_by_user,
- fdb->offloaded);
+ test_bit(BR_FDB_ADDED_BY_USER,
+ &fdb->flags),
+ test_bit(BR_FDB_OFFLOADED,
+ &fdb->flags));
break;
case RTM_NEWNEIGH:
br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
fdb->key.vlan_id,
fdb->dst->dev,
- fdb->added_by_user,
- fdb->offloaded);
+ test_bit(BR_FDB_ADDED_BY_USER,
+ &fdb->flags),
+ test_bit(BR_FDB_OFFLOADED,
+ &fdb->flags));
break;
}
}
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index eb83051c8330..b7532a79ca7a 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -13,11 +13,11 @@ menuconfig CAIF
with its modems. It is accessed from user space as sockets (PF_CAIF).
Say Y (or M) here if you build for a phone product (e.g. Android or
- MeeGo ) that uses CAIF as transport, if unsure say N.
+ MeeGo) that uses CAIF as transport. If unsure say N.
If you select to build it as module then CAIF_NETDEV also needs to be
- built as modules. You will also need to say yes to any CAIF physical
- devices that your platform requires.
+ built as a module. You will also need to say Y (or M) to any CAIF
+ physical devices that your platform requires.
See Documentation/networking/caif for a further explanation on how to
use and configure CAIF.
@@ -37,7 +37,7 @@ config CAIF_NETDEV
default CAIF
---help---
Say Y if you will be using a CAIF based GPRS network device.
- This can be either built-in or a loadable module,
+ This can be either built-in or a loadable module.
If you select to build it as a built-in then the main CAIF device must
also be a built-in.
If unsure say Y.
@@ -48,7 +48,7 @@ config CAIF_USB
default n
---help---
Say Y if you are using CAIF over USB CDC NCM.
- This can be either built-in or a loadable module,
+ This can be either built-in or a loadable module.
If you select to build it as a built-in then the main CAIF device must
also be a built-in.
If unsure say N.
diff --git a/net/core/dev.c b/net/core/dev.c
index 99ac84ff398f..bb15800c8cb5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -229,6 +229,122 @@ static inline void rps_unlock(struct softnet_data *sd)
#endif
}
+static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
+ const char *name)
+{
+ struct netdev_name_node *name_node;
+
+ name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
+ if (!name_node)
+ return NULL;
+ INIT_HLIST_NODE(&name_node->hlist);
+ name_node->dev = dev;
+ name_node->name = name;
+ return name_node;
+}
+
+static struct netdev_name_node *
+netdev_name_node_head_alloc(struct net_device *dev)
+{
+ struct netdev_name_node *name_node;
+
+ name_node = netdev_name_node_alloc(dev, dev->name);
+ if (!name_node)
+ return NULL;
+ INIT_LIST_HEAD(&name_node->list);
+ return name_node;
+}
+
+static void netdev_name_node_free(struct netdev_name_node *name_node)
+{
+ kfree(name_node);
+}
+
+static void netdev_name_node_add(struct net *net,
+ struct netdev_name_node *name_node)
+{
+ hlist_add_head_rcu(&name_node->hlist,
+ dev_name_hash(net, name_node->name));
+}
+
+static void netdev_name_node_del(struct netdev_name_node *name_node)
+{
+ hlist_del_rcu(&name_node->hlist);
+}
+
+static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
+ const char *name)
+{
+ struct hlist_head *head = dev_name_hash(net, name);
+ struct netdev_name_node *name_node;
+
+ hlist_for_each_entry(name_node, head, hlist)
+ if (!strcmp(name_node->name, name))
+ return name_node;
+ return NULL;
+}
+
+static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
+ const char *name)
+{
+ struct hlist_head *head = dev_name_hash(net, name);
+ struct netdev_name_node *name_node;
+
+ hlist_for_each_entry_rcu(name_node, head, hlist)
+ if (!strcmp(name_node->name, name))
+ return name_node;
+ return NULL;
+}
+
+int netdev_name_node_alt_create(struct net_device *dev, const char *name)
+{
+ struct netdev_name_node *name_node;
+ struct net *net = dev_net(dev);
+
+ name_node = netdev_name_node_lookup(net, name);
+ if (name_node)
+ return -EEXIST;
+ name_node = netdev_name_node_alloc(dev, name);
+ if (!name_node)
+ return -ENOMEM;
+ netdev_name_node_add(net, name_node);
+ /* The node that holds dev->name acts as a head of per-device list. */
+ list_add_tail(&name_node->list, &dev->name_node->list);
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_name_node_alt_create);
+
+static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+{
+ list_del(&name_node->list);
+ netdev_name_node_del(name_node);
+ kfree(name_node->name);
+ netdev_name_node_free(name_node);
+}
+
+int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
+{
+ struct netdev_name_node *name_node;
+ struct net *net = dev_net(dev);
+
+ name_node = netdev_name_node_lookup(net, name);
+ if (!name_node)
+ return -ENOENT;
+ __netdev_name_node_alt_destroy(name_node);
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_name_node_alt_destroy);
+
+static void netdev_name_node_alt_flush(struct net_device *dev)
+{
+ struct netdev_name_node *name_node, *tmp;
+
+ list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
+ __netdev_name_node_alt_destroy(name_node);
+}
+
/* Device list insertion */
static void list_netdevice(struct net_device *dev)
{
@@ -238,7 +354,7 @@ static void list_netdevice(struct net_device *dev)
write_lock_bh(&dev_base_lock);
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
- hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+ netdev_name_node_add(net, dev->name_node);
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock_bh(&dev_base_lock);
@@ -256,7 +372,7 @@ static void unlist_netdevice(struct net_device *dev)
/* Unlink dev from the device chain */
write_lock_bh(&dev_base_lock);
list_del_rcu(&dev->dev_list);
- hlist_del_rcu(&dev->name_hlist);
+ netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
@@ -652,14 +768,10 @@ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
struct net_device *__dev_get_by_name(struct net *net, const char *name)
{
- struct net_device *dev;
- struct hlist_head *head = dev_name_hash(net, name);
+ struct netdev_name_node *node_name;
- hlist_for_each_entry(dev, head, name_hlist)
- if (!strncmp(dev->name, name, IFNAMSIZ))
- return dev;
-
- return NULL;
+ node_name = netdev_name_node_lookup(net, name);
+ return node_name ? node_name->dev : NULL;
}
EXPORT_SYMBOL(__dev_get_by_name);
@@ -677,14 +789,10 @@ EXPORT_SYMBOL(__dev_get_by_name);
struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
{
- struct net_device *dev;
- struct hlist_head *head = dev_name_hash(net, name);
-
- hlist_for_each_entry_rcu(dev, head, name_hlist)
- if (!strncmp(dev->name, name, IFNAMSIZ))
- return dev;
+ struct netdev_name_node *node_name;
- return NULL;
+ node_name = netdev_name_node_lookup_rcu(net, name);
+ return node_name ? node_name->dev : NULL;
}
EXPORT_SYMBOL(dev_get_by_name_rcu);
@@ -1060,8 +1168,8 @@ int dev_alloc_name(struct net_device *dev, const char *name)
}
EXPORT_SYMBOL(dev_alloc_name);
-int dev_get_valid_name(struct net *net, struct net_device *dev,
- const char *name)
+static int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
{
BUG_ON(!net);
@@ -1077,7 +1185,6 @@ int dev_get_valid_name(struct net *net, struct net_device *dev,
return 0;
}
-EXPORT_SYMBOL(dev_get_valid_name);
/**
* dev_change_name - change name of a device
@@ -1151,13 +1258,13 @@ rollback:
netdev_adjacent_rename_links(dev, oldname);
write_lock_bh(&dev_base_lock);
- hlist_del_rcu(&dev->name_hlist);
+ netdev_name_node_del(dev->name_node);
write_unlock_bh(&dev_base_lock);
synchronize_rcu();
write_lock_bh(&dev_base_lock);
- hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+ netdev_name_node_add(net, dev->name_node);
write_unlock_bh(&dev_base_lock);
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@ -1536,6 +1643,62 @@ static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
return nb->notifier_call(nb, val, &info);
}
+static int call_netdevice_register_notifiers(struct notifier_block *nb,
+ struct net_device *dev)
+{
+ int err;
+
+ err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
+ err = notifier_to_errno(err);
+ if (err)
+ return err;
+
+ if (!(dev->flags & IFF_UP))
+ return 0;
+
+ call_netdevice_notifier(nb, NETDEV_UP, dev);
+ return 0;
+}
+
+static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
+ struct net_device *dev)
+{
+ if (dev->flags & IFF_UP) {
+ call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+ dev);
+ call_netdevice_notifier(nb, NETDEV_DOWN, dev);
+ }
+ call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
+}
+
+static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
+ struct net *net)
+{
+ struct net_device *dev;
+ int err;
+
+ for_each_netdev(net, dev) {
+ err = call_netdevice_register_notifiers(nb, dev);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ for_each_netdev_continue_reverse(net, dev)
+ call_netdevice_unregister_notifiers(nb, dev);
+ return err;
+}
+
+static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
+ struct net *net)
+{
+ struct net_device *dev;
+
+ for_each_netdev(net, dev)
+ call_netdevice_unregister_notifiers(nb, dev);
+}
+
static int dev_boot_phase = 1;
/**
@@ -1554,8 +1717,6 @@ static int dev_boot_phase = 1;
int register_netdevice_notifier(struct notifier_block *nb)
{
- struct net_device *dev;
- struct net_device *last;
struct net *net;
int err;
@@ -1568,17 +1729,9 @@ int register_netdevice_notifier(struct notifier_block *nb)
if (dev_boot_phase)
goto unlock;
for_each_net(net) {
- for_each_netdev(net, dev) {
- err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
- err = notifier_to_errno(err);
- if (err)
- goto rollback;
-
- if (!(dev->flags & IFF_UP))
- continue;
-
- call_netdevice_notifier(nb, NETDEV_UP, dev);
- }
+ err = call_netdevice_register_net_notifiers(nb, net);
+ if (err)
+ goto rollback;
}
unlock:
@@ -1587,22 +1740,9 @@ unlock:
return err;
rollback:
- last = dev;
- for_each_net(net) {
- for_each_netdev(net, dev) {
- if (dev == last)
- goto outroll;
+ for_each_net_continue_reverse(net)
+ call_netdevice_unregister_net_notifiers(nb, net);
- if (dev->flags & IFF_UP) {
- call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
- dev);
- call_netdevice_notifier(nb, NETDEV_DOWN, dev);
- }
- call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
- }
- }
-
-outroll:
raw_notifier_chain_unregister(&netdev_chain, nb);
goto unlock;
}
@@ -1653,6 +1793,80 @@ unlock:
EXPORT_SYMBOL(unregister_netdevice_notifier);
/**
+ * register_netdevice_notifier_net - register a per-netns network notifier block
+ * @net: network namespace
+ * @nb: notifier
+ *
+ * Register a notifier to be called when network device events occur.
+ * The notifier passed is linked into the kernel structures and must
+ * not be reused until it has been unregistered. A negative errno code
+ * is returned on a failure.
+ *
+ * When registered all registration and up events are replayed
+ * to the new notifier to allow device to have a race free
+ * view of the network device list.
+ */
+
+int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = raw_notifier_chain_register(&net->netdev_chain, nb);
+ if (err)
+ goto unlock;
+ if (dev_boot_phase)
+ goto unlock;
+
+ err = call_netdevice_register_net_notifiers(nb, net);
+ if (err)
+ goto chain_unregister;
+
+unlock:
+ rtnl_unlock();
+ return err;
+
+chain_unregister:
+ raw_notifier_chain_unregister(&netdev_chain, nb);
+ goto unlock;
+}
+EXPORT_SYMBOL(register_netdevice_notifier_net);
+
+/**
+ * unregister_netdevice_notifier_net - unregister a per-netns
+ * network notifier block
+ * @net: network namespace
+ * @nb: notifier
+ *
+ * Unregister a notifier previously registered by
+ * register_netdevice_notifier(). The notifier is unlinked into the
+ * kernel structures and may then be reused. A negative errno code
+ * is returned on a failure.
+ *
+ * After unregistering unregister and down device events are synthesized
+ * for all devices on the device list to the removed notifier to remove
+ * the need for special case cleanup code.
+ */
+
+int unregister_netdevice_notifier_net(struct net *net,
+ struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
+ if (err)
+ goto unlock;
+
+ call_netdevice_unregister_net_notifiers(nb, net);
+
+unlock:
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL(unregister_netdevice_notifier_net);
+
+/**
* call_netdevice_notifiers_info - call all network notifier blocks
* @val: value passed unmodified to notifier function
* @info: notifier information data
@@ -1664,7 +1878,18 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
static int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info)
{
+ struct net *net = dev_net(info->dev);
+ int ret;
+
ASSERT_RTNL();
+
+ /* Run per-netns notifier block chain first, then run the global one.
+ * Hopefully, one day, the global one is going to be removed after
+ * all notifier block registrators get converted to be per-netns.
+ */
+ ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
+ if (ret & NOTIFY_STOP_MASK)
+ return ret;
return raw_notifier_call_chain(&netdev_chain, val, info);
}
@@ -2690,7 +2915,7 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
void netif_schedule_queue(struct netdev_queue *txq)
{
rcu_read_lock();
- if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
+ if (!netif_xmit_stopped(txq)) {
struct Qdisc *q = rcu_dereference(txq->qdisc);
__netif_schedule(q);
@@ -2858,12 +3083,9 @@ int skb_checksum_help(struct sk_buff *skb)
offset += skb->csum_offset;
BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(__sum16))) {
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (ret)
- goto out;
- }
+ ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
+ if (ret)
+ goto out;
*(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
out_set_summed:
@@ -2898,12 +3120,11 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
ret = -EINVAL;
goto out;
}
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(__le32))) {
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (ret)
- goto out;
- }
+
+ ret = skb_ensure_writable(skb, offset + sizeof(__le32));
+ if (ret)
+ goto out;
+
crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
skb->len - start, ~(__u32)0,
crc32c_csum_stub));
@@ -5582,6 +5803,26 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+ if (!napi->rx_count)
+ return;
+ netif_receive_skb_list_internal(&napi->rx_list);
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+ list_add_tail(&skb->list, &napi->rx_list);
+ if (++napi->rx_count >= gro_normal_batch)
+ gro_normal_list(napi);
+}
+
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
@@ -5589,12 +5830,13 @@ static void napi_skb_free_stolen_head(struct sk_buff *skb)
kmem_cache_free(skbuff_head_cache, skb);
}
-static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+static gro_result_t napi_skb_finish(struct napi_struct *napi,
+ struct sk_buff *skb,
+ gro_result_t ret)
{
switch (ret) {
case GRO_NORMAL:
- if (netif_receive_skb_internal(skb))
- ret = GRO_DROP;
+ gro_normal_one(napi, skb);
break;
case GRO_DROP:
@@ -5626,7 +5868,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
skb_gro_reset_offset(skb);
- ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
trace_napi_gro_receive_exit(ret);
return ret;
@@ -5672,26 +5914,6 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_get_frags);
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static void gro_normal_list(struct napi_struct *napi)
-{
- if (!napi->rx_count)
- return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
-}
-
-/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
- * pass the whole batch up to the stack.
- */
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
-{
- list_add_tail(&skb->list, &napi->rx_list);
- if (++napi->rx_count >= gro_normal_batch)
- gro_normal_list(napi);
-}
-
static gro_result_t napi_frags_finish(struct napi_struct *napi,
struct sk_buff *skb,
gro_result_t ret)
@@ -8532,6 +8754,9 @@ static void rollback_registered_many(struct list_head *head)
dev_uc_flush(dev);
dev_mc_flush(dev);
+ netdev_name_node_alt_flush(dev);
+ netdev_name_node_free(dev->name_node);
+
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -9011,6 +9236,11 @@ int register_netdevice(struct net_device *dev)
if (ret < 0)
goto out;
+ ret = -ENOMEM;
+ dev->name_node = netdev_name_node_head_alloc(dev);
+ if (!dev->name_node)
+ goto out;
+
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -9132,6 +9362,8 @@ out:
return ret;
err_uninit:
+ if (dev->name_node)
+ netdev_name_node_free(dev->name_node);
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
if (dev->priv_destructor)
@@ -9946,6 +10178,8 @@ static int __net_init netdev_init(struct net *net)
if (net->dev_index_head == NULL)
goto err_idx;
+ RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
+
return 0;
err_idx:
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f80151eeaf51..97e9a2246929 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -95,16 +95,25 @@ static LIST_HEAD(devlink_list);
*/
static DEFINE_MUTEX(devlink_mutex);
-static struct net *devlink_net(const struct devlink *devlink)
+struct net *devlink_net(const struct devlink *devlink)
{
return read_pnet(&devlink->_net);
}
+EXPORT_SYMBOL_GPL(devlink_net);
-static void devlink_net_set(struct devlink *devlink, struct net *net)
+static void __devlink_net_set(struct devlink *devlink, struct net *net)
{
write_pnet(&devlink->_net, net);
}
+void devlink_net_set(struct devlink *devlink, struct net *net)
+{
+ if (WARN_ON(devlink->registered))
+ return;
+ __devlink_net_set(devlink, net);
+}
+EXPORT_SYMBOL_GPL(devlink_net_set);
+
static struct devlink *devlink_get_from_attrs(struct net *net,
struct nlattr **attrs)
{
@@ -434,8 +443,16 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
{
struct devlink *devlink;
- devlink = devlink_get_from_info(info);
- if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ /* When devlink changes netns, it would not be found
+ * by devlink_get_from_info(). So try if it is stored first.
+ */
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
+ devlink = info->user_ptr[0];
+ } else {
+ devlink = devlink_get_from_info(info);
+ WARN_ON(IS_ERR(devlink));
+ }
+ if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
}
@@ -1035,7 +1052,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
struct devlink_sb *devlink_sb;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -1058,6 +1075,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -1233,7 +1253,7 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
struct devlink_sb *devlink_sb;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -1256,6 +1276,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -1460,7 +1483,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
struct devlink_sb *devlink_sb;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -1485,6 +1508,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -2674,6 +2700,72 @@ devlink_resources_validate(struct devlink *devlink,
return err;
}
+static struct net *devlink_netns_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
+ struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
+ struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
+ struct net *net;
+
+ if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
+ NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (netns_pid_attr) {
+ net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
+ } else if (netns_fd_attr) {
+ net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
+ } else if (netns_id_attr) {
+ net = get_net_ns_by_id(sock_net(skb->sk),
+ nla_get_u32(netns_id_attr));
+ if (!net)
+ net = ERR_PTR(-EINVAL);
+ } else {
+ WARN_ON(1);
+ net = ERR_PTR(-EINVAL);
+ }
+ if (IS_ERR(net)) {
+ NL_SET_ERR_MSG(info->extack, "Unknown network namespace");
+ return ERR_PTR(-EINVAL);
+ }
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EPERM);
+ }
+ return net;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+ unsigned int port_index,
+ struct devlink_param_item *param_item,
+ enum devlink_command cmd);
+
+static void devlink_reload_netns_change(struct devlink *devlink,
+ struct net *dest_net)
+{
+ struct devlink_param_item *param_item;
+
+ /* Userspace needs to be notified about devlink objects
+ * removed from original and entering new network namespace.
+ * The rest of the devlink objects are re-created during
+ * reload process so the notifications are generated separatelly.
+ */
+
+ list_for_each_entry(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_DEL);
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
+
+ __devlink_net_set(devlink, dest_net);
+
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+ list_for_each_entry(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_NEW);
+}
+
static bool devlink_reload_supported(struct devlink *devlink)
{
return devlink->ops->reload_down && devlink->ops->reload_up;
@@ -2694,9 +2786,27 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+static int devlink_reload(struct devlink *devlink, struct net *dest_net,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ err = devlink->ops->reload_down(devlink, !!dest_net, extack);
+ if (err)
+ return err;
+
+ if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
+ devlink_reload_netns_change(devlink, dest_net);
+
+ err = devlink->ops->reload_up(devlink, extack);
+ devlink_reload_failed_set(devlink, !!err);
+ return err;
+}
+
static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
+ struct net *dest_net = NULL;
int err;
if (!devlink_reload_supported(devlink))
@@ -2707,11 +2817,20 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
return err;
}
- err = devlink->ops->reload_down(devlink, info->extack);
- if (err)
- return err;
- err = devlink->ops->reload_up(devlink, info->extack);
- devlink_reload_failed_set(devlink, !!err);
+
+ if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+ info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+ info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+ dest_net = devlink_netns_get(skb, info);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+ }
+
+ err = devlink_reload(devlink, dest_net, info->extack);
+
+ if (dest_net)
+ put_net(dest_net);
+
return err;
}
@@ -3155,7 +3274,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -3183,6 +3302,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -3411,7 +3533,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -3444,6 +3566,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -3818,29 +3943,19 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
u64 ret_offset, start_offset, end_offset = 0;
+ struct nlattr **attrs = info->attrs;
struct devlink_region *region;
struct nlattr *chunks_attr;
const char *region_name;
struct devlink *devlink;
- struct nlattr **attrs;
bool dump = true;
void *hdr;
int err;
start_offset = *((u64 *)&cb->args[0]);
- attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
- if (!attrs)
- return -ENOMEM;
-
- err = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + devlink_nl_family.hdrsize,
- attrs, DEVLINK_ATTR_MAX,
- devlink_nl_family.policy, cb->extack);
- if (err)
- goto out_free;
-
mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
if (IS_ERR(devlink)) {
@@ -3917,7 +4032,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
genlmsg_end(skb, hdr);
mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
- kfree(attrs);
return skb->len;
@@ -3927,8 +4041,6 @@ out_unlock:
mutex_unlock(&devlink->lock);
out_dev:
mutex_unlock(&devlink_mutex);
-out_free:
- kfree(attrs);
return err;
}
@@ -4066,7 +4178,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -4094,6 +4206,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
}
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -4732,14 +4847,17 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
static int
devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
- void *priv_ctx)
+ void *priv_ctx, struct netlink_ext_ack *extack)
{
int err;
+ if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY)
+ return 0;
+
if (!reporter->ops->recover)
return -EOPNOTSUPP;
- err = reporter->ops->recover(reporter, priv_ctx);
+ err = reporter->ops->recover(reporter, priv_ctx, extack);
if (err)
return err;
@@ -4760,7 +4878,8 @@ devlink_health_dump_clear(struct devlink_health_reporter *reporter)
}
static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
- void *priv_ctx)
+ void *priv_ctx,
+ struct netlink_ext_ack *extack)
{
int err;
@@ -4781,7 +4900,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
goto dump_err;
err = reporter->ops->dump(reporter, reporter->dump_fmsg,
- priv_ctx);
+ priv_ctx, extack);
if (err)
goto dump_err;
@@ -4828,11 +4947,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
mutex_lock(&reporter->dump_lock);
/* store current dump of current error, for later analysis */
- devlink_health_do_dump(reporter, priv_ctx);
+ devlink_health_do_dump(reporter, priv_ctx, NULL);
mutex_unlock(&reporter->dump_lock);
if (reporter->auto_recover)
- return devlink_health_reporter_recover(reporter, priv_ctx);
+ return devlink_health_reporter_recover(reporter,
+ priv_ctx, NULL);
return 0;
}
@@ -4867,21 +4987,10 @@ devlink_health_reporter_get_from_info(struct devlink *devlink,
static struct devlink_health_reporter *
devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct devlink_health_reporter *reporter;
+ struct nlattr **attrs = info->attrs;
struct devlink *devlink;
- struct nlattr **attrs;
- int err;
-
- attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
- if (!attrs)
- return NULL;
-
- err = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + devlink_nl_family.hdrsize,
- attrs, DEVLINK_ATTR_MAX,
- devlink_nl_family.policy, cb->extack);
- if (err)
- goto free;
mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
@@ -4890,12 +4999,9 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
mutex_unlock(&devlink_mutex);
- kfree(attrs);
return reporter;
unlock:
mutex_unlock(&devlink_mutex);
-free:
- kfree(attrs);
return NULL;
}
@@ -5084,7 +5190,7 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
if (!reporter)
return -EINVAL;
- err = devlink_health_reporter_recover(reporter, NULL);
+ err = devlink_health_reporter_recover(reporter, NULL, info->extack);
devlink_health_reporter_put(reporter);
return err;
@@ -5117,7 +5223,7 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
if (err)
goto out;
- err = reporter->ops->diagnose(reporter, fmsg);
+ err = reporter->ops->diagnose(reporter, fmsg, info->extack);
if (err)
goto out;
@@ -5152,7 +5258,7 @@ devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
}
mutex_lock(&reporter->dump_lock);
if (!start) {
- err = devlink_health_do_dump(reporter, NULL);
+ err = devlink_health_do_dump(reporter, NULL, cb->extack);
if (err)
goto unlock;
cb->args[1] = reporter->dump_ts;
@@ -5793,6 +5899,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -6023,7 +6132,8 @@ static const struct genl_ops devlink_nl_ops[] = {
},
{
.cmd = DEVLINK_CMD_REGION_READ,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = devlink_nl_cmd_region_read_dumpit,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
@@ -6071,7 +6181,8 @@ static const struct genl_ops devlink_nl_ops[] = {
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
@@ -6155,7 +6266,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
if (!devlink)
return NULL;
devlink->ops = ops;
- devlink_net_set(devlink, &init_net);
+ __devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
@@ -6181,6 +6292,7 @@ int devlink_register(struct devlink *devlink, struct device *dev)
{
mutex_lock(&devlink_mutex);
devlink->dev = dev;
+ devlink->registered = true;
list_add_tail(&devlink->list, &devlink_list);
devlink_notify(devlink, DEVLINK_CMD_NEW);
mutex_unlock(&devlink_mutex);
@@ -8060,9 +8172,43 @@ int devlink_compat_switch_id_get(struct net_device *dev,
return 0;
}
+static void __net_exit devlink_pernet_pre_exit(struct net *net)
+{
+ struct devlink *devlink;
+ int err;
+
+ /* In case network namespace is getting destroyed, reload
+ * all devlink instances from this namespace into init_net.
+ */
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (net_eq(devlink_net(devlink), net)) {
+ if (WARN_ON(!devlink_reload_supported(devlink)))
+ continue;
+ err = devlink_reload(devlink, &init_net, NULL);
+ if (err)
+ pr_warn("Failed to reload devlink instance into init_net\n");
+ }
+ }
+ mutex_unlock(&devlink_mutex);
+}
+
+static struct pernet_operations devlink_pernet_ops __net_initdata = {
+ .pre_exit = devlink_pernet_pre_exit,
+};
+
static int __init devlink_init(void)
{
- return genl_register_family(&devlink_nl_family);
+ int err;
+
+ err = genl_register_family(&devlink_nl_family);
+ if (err)
+ goto out;
+ err = register_pernet_subsys(&devlink_pernet_ops);
+
+out:
+ WARN_ON(err);
+ return err;
}
subsys_initcall(devlink_init);
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 470a606d5e8d..fc96259807b6 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -12,17 +12,15 @@ static unsigned int fib_notifier_net_id;
struct fib_notifier_net {
struct list_head fib_notifier_ops;
+ struct atomic_notifier_head fib_chain;
};
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
+int call_fib_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
int err;
- info->net = net;
err = nb->notifier_call(nb, event_type, info);
return notifier_to_errno(err);
}
@@ -31,106 +29,100 @@ EXPORT_SYMBOL(call_fib_notifier);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
int err;
- info->net = net;
- err = atomic_notifier_call_chain(&fib_chain, event_type, info);
+ err = atomic_notifier_call_chain(&fn_net->fib_chain, event_type, info);
return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifiers);
-static unsigned int fib_seq_sum(void)
+static unsigned int fib_seq_sum(struct net *net)
{
- struct fib_notifier_net *fn_net;
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
struct fib_notifier_ops *ops;
unsigned int fib_seq = 0;
- struct net *net;
rtnl_lock();
- down_read(&net_rwsem);
- for_each_net(net) {
- fn_net = net_generic(net, fib_notifier_net_id);
- rcu_read_lock();
- list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
- if (!try_module_get(ops->owner))
- continue;
- fib_seq += ops->fib_seq_read(net);
- module_put(ops->owner);
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
+ if (!try_module_get(ops->owner))
+ continue;
+ fib_seq += ops->fib_seq_read(net);
+ module_put(ops->owner);
}
- up_read(&net_rwsem);
+ rcu_read_unlock();
rtnl_unlock();
return fib_seq;
}
-static int fib_net_dump(struct net *net, struct notifier_block *nb)
+static int fib_net_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
struct fib_notifier_ops *ops;
+ int err = 0;
+ rcu_read_lock();
list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
- int err;
-
if (!try_module_get(ops->owner))
continue;
- err = ops->fib_dump(net, nb);
+ err = ops->fib_dump(net, nb, extack);
module_put(ops->owner);
if (err)
- return err;
+ goto unlock;
}
- return 0;
+unlock:
+ rcu_read_unlock();
+
+ return err;
}
-static bool fib_dump_is_consistent(struct notifier_block *nb,
+static bool fib_dump_is_consistent(struct net *net, struct notifier_block *nb,
void (*cb)(struct notifier_block *nb),
unsigned int fib_seq)
{
- atomic_notifier_chain_register(&fib_chain, nb);
- if (fib_seq == fib_seq_sum())
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+ atomic_notifier_chain_register(&fn_net->fib_chain, nb);
+ if (fib_seq == fib_seq_sum(net))
return true;
- atomic_notifier_chain_unregister(&fib_chain, nb);
+ atomic_notifier_chain_unregister(&fn_net->fib_chain, nb);
if (cb)
cb(nb);
return false;
}
#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb))
+int register_fib_notifier(struct net *net, struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb),
+ struct netlink_ext_ack *extack)
{
int retries = 0;
int err;
do {
- unsigned int fib_seq = fib_seq_sum();
- struct net *net;
-
- rcu_read_lock();
- for_each_net_rcu(net) {
- err = fib_net_dump(net, nb);
- if (err)
- goto err_fib_net_dump;
- }
- rcu_read_unlock();
-
- if (fib_dump_is_consistent(nb, cb, fib_seq))
+ unsigned int fib_seq = fib_seq_sum(net);
+
+ err = fib_net_dump(net, nb, extack);
+ if (err)
+ return err;
+
+ if (fib_dump_is_consistent(net, nb, cb, fib_seq))
return 0;
} while (++retries < FIB_DUMP_MAX_RETRIES);
return -EBUSY;
-
-err_fib_net_dump:
- rcu_read_unlock();
- return err;
}
EXPORT_SYMBOL(register_fib_notifier);
-int unregister_fib_notifier(struct notifier_block *nb)
+int unregister_fib_notifier(struct net *net, struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&fib_chain, nb);
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+ return atomic_notifier_chain_unregister(&fn_net->fib_chain, nb);
}
EXPORT_SYMBOL(unregister_fib_notifier);
@@ -181,6 +173,7 @@ static int __net_init fib_notifier_net_init(struct net *net)
struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
INIT_LIST_HEAD(&fn_net->fib_notifier_ops);
+ ATOMIC_INIT_NOTIFIER_HEAD(&fn_net->fib_chain);
return 0;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index dd220ce7ca7a..3e7e15278c46 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -321,16 +321,18 @@ out:
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib_rule_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
- struct fib_rule *rule, int family)
+ struct fib_rule *rule, int family,
+ struct netlink_ext_ack *extack)
{
struct fib_rule_notifier_info info = {
.info.family = family,
+ .info.extack = extack,
.rule = rule,
};
- return call_fib_notifier(nb, net, event_type, &info.info);
+ return call_fib_notifier(nb, event_type, &info.info);
}
static int call_fib_rule_notifiers(struct net *net,
@@ -350,20 +352,25 @@ static int call_fib_rule_notifiers(struct net *net,
}
/* Called with rcu_read_lock() */
-int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
+ struct netlink_ext_ack *extack)
{
struct fib_rules_ops *ops;
struct fib_rule *rule;
+ int err = 0;
ops = lookup_rules_ops(net, family);
if (!ops)
return -EAFNOSUPPORT;
- list_for_each_entry_rcu(rule, &ops->rules_list, list)
- call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
- family);
+ list_for_each_entry_rcu(rule, &ops->rules_list, list) {
+ err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD,
+ rule, family, extack);
+ if (err)
+ break;
+ }
rules_ops_put(ops);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(fib_rules_dump);
diff --git a/net/core/filter.c b/net/core/filter.c
index 3fed5755494b..fc303abec8fa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2245,7 +2245,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
* account for the headroom.
*/
bytes_sg_total = start - offset + bytes;
- if (!msg->sg.copy[i] && bytes_sg_total <= len)
+ if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
goto out;
/* At this point we need to linearize multiple scatterlist
@@ -2450,7 +2450,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
- msg->sg.copy[new] = false;
+ __clear_bit(new, &msg->sg.copy);
sg_set_page(&msg->sg.data[new], page, len + copy, 0);
if (rsge.length) {
get_page(sg_page(&rsge));
@@ -3798,7 +3798,7 @@ BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(skb_size > skb->len))
+ if (unlikely(!skb || skb_size > skb->len))
return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
@@ -3816,6 +3816,19 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+static u32 bpf_skb_output_btf_ids[5];
+const struct bpf_func_proto bpf_skb_output_proto = {
+ .func = bpf_skb_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ .btf_id = bpf_skb_output_btf_ids,
+};
+
static unsigned short bpf_tunnel_key_af(u64 flags)
{
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 68eda10d0680..ca871657a4c4 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -114,19 +114,50 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
{
struct bpf_prog *attached;
struct net *net;
+ int ret = 0;
net = current->nsproxy->net_ns;
mutex_lock(&flow_dissector_mutex);
+
+ if (net == &init_net) {
+ /* BPF flow dissector in the root namespace overrides
+ * any per-net-namespace one. When attaching to root,
+ * make sure we don't have any BPF program attached
+ * to the non-root namespaces.
+ */
+ struct net *ns;
+
+ for_each_net(ns) {
+ if (ns == &init_net)
+ continue;
+ if (rcu_access_pointer(ns->flow_dissector_prog)) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ } else {
+ /* Make sure root flow dissector is not attached
+ * when attaching to the non-root namespace.
+ */
+ if (rcu_access_pointer(init_net.flow_dissector_prog)) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+
attached = rcu_dereference_protected(net->flow_dissector_prog,
lockdep_is_held(&flow_dissector_mutex));
- if (attached) {
- /* Only one BPF program can be attached at a time */
- mutex_unlock(&flow_dissector_mutex);
- return -EEXIST;
+ if (attached == prog) {
+ /* The same program cannot be attached twice */
+ ret = -EINVAL;
+ goto out;
}
rcu_assign_pointer(net->flow_dissector_prog, prog);
+ if (attached)
+ bpf_prog_put(attached);
+out:
mutex_unlock(&flow_dissector_mutex);
- return 0;
+ return ret;
}
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
@@ -147,27 +178,6 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
mutex_unlock(&flow_dissector_mutex);
return 0;
}
-/**
- * skb_flow_get_be16 - extract be16 entity
- * @skb: sk_buff to extract from
- * @poff: offset to extract at
- * @data: raw buffer pointer to the packet
- * @hlen: packet header length
- *
- * The function will try to retrieve a be32 entity at
- * offset poff
- */
-static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
- void *data, int hlen)
-{
- __be16 *u, _u;
-
- u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
- if (u)
- return *u;
-
- return 0;
-}
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
@@ -203,6 +213,72 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+static bool icmp_has_id(u8 type)
+{
+ switch (type) {
+ case ICMP_ECHO:
+ case ICMP_ECHOREPLY:
+ case ICMP_TIMESTAMP:
+ case ICMP_TIMESTAMPREPLY:
+ case ICMPV6_ECHO_REQUEST:
+ case ICMPV6_ECHO_REPLY:
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields
+ * @skb: sk_buff to extract from
+ * @key_icmp: struct flow_dissector_key_icmp to fill
+ * @data: raw buffer pointer to the packet
+ * @toff: offset to extract at
+ * @hlen: packet header length
+ */
+void skb_flow_get_icmp_tci(const struct sk_buff *skb,
+ struct flow_dissector_key_icmp *key_icmp,
+ void *data, int thoff, int hlen)
+{
+ struct icmphdr *ih, _ih;
+
+ ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih);
+ if (!ih)
+ return;
+
+ key_icmp->type = ih->type;
+ key_icmp->code = ih->code;
+
+ /* As we use 0 to signal that the Id field is not present,
+ * avoid confusion with packets without such field
+ */
+ if (icmp_has_id(ih->type))
+ key_icmp->id = ih->un.echo.id ? : 1;
+ else
+ key_icmp->id = 0;
+}
+EXPORT_SYMBOL(skb_flow_get_icmp_tci);
+
+/* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet
+ * using skb_flow_get_icmp_tci().
+ */
+static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container,
+ void *data, int thoff, int hlen)
+{
+ struct flow_dissector_key_icmp *key_icmp;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP))
+ return;
+
+ key_icmp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP,
+ target_container);
+
+ skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
+}
+
void skb_flow_dissect_meta(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container)
@@ -853,7 +929,6 @@ bool __skb_flow_dissect(const struct net *net,
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
- struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
struct bpf_prog *attached = NULL;
@@ -910,7 +985,10 @@ bool __skb_flow_dissect(const struct net *net,
WARN_ON_ONCE(!net);
if (net) {
rcu_read_lock();
- attached = rcu_dereference(net->flow_dissector_prog);
+ attached = rcu_dereference(init_net.flow_dissector_prog);
+
+ if (!attached)
+ attached = rcu_dereference(net->flow_dissector_prog);
if (attached) {
struct bpf_flow_keys flow_keys;
@@ -1295,6 +1373,12 @@ ip_proto_again:
data, nhoff, hlen);
break;
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ __skb_flow_dissect_icmp(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
+
default:
break;
}
@@ -1308,14 +1392,6 @@ ip_proto_again:
data, hlen);
}
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ICMP)) {
- key_icmp = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_ICMP,
- target_container);
- key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
- }
-
/* Process result of IP proto processing */
switch (fdret) {
case FLOW_DISSECT_RET_PROTO_AGAIN:
@@ -1365,8 +1441,8 @@ static const void *flow_keys_hash_start(const struct flow_keys *flow)
static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
{
size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
- BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
- sizeof(*flow) - sizeof(flow->addrs));
+
+ BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
switch (flow->control.addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
@@ -1412,6 +1488,9 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
}
EXPORT_SYMBOL(flow_get_u32_dst);
+/* Sort the source and destination IP (and the ports if the IP are the same),
+ * to have consistent hash within the two directions
+ */
static inline void __flow_hash_consistentify(struct flow_keys *keys)
{
int addr_diff, i;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 36347933ec3a..6bbd06f7dc7d 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -20,8 +20,8 @@ static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff
struct hlist_head *h;
unsigned int count = 0, offset = get_offset(*pos);
- h = &net->dev_name_head[get_bucket(*pos)];
- hlist_for_each_entry_rcu(dev, h, name_hlist) {
+ h = &net->dev_index_head[get_bucket(*pos)];
+ hlist_for_each_entry_rcu(dev, h, index_hlist) {
if (++count == offset)
return dev;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 48b1e429857c..294bfcf0ce0e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3404,7 +3404,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
HARD_TX_LOCK(odev, txq, smp_processor_id());
if (unlikely(netif_xmit_frozen_or_drv_stopped(txq))) {
- ret = NETDEV_TX_BUSY;
pkt_dev->last_ok = 0;
goto unlock;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c81cd80114d9..000eddb1207d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -980,6 +980,19 @@ static size_t rtnl_xdp_size(void)
return xdp_size;
}
+static size_t rtnl_prop_list_size(const struct net_device *dev)
+{
+ struct netdev_name_node *name_node;
+ size_t size;
+
+ if (list_empty(&dev->name_node->list))
+ return 0;
+ size = nla_total_size(0);
+ list_for_each_entry(name_node, &dev->name_node->list, list)
+ size += nla_total_size(ALTIFNAMSIZ);
+ return size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
@@ -1027,6 +1040,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
+ nla_total_size(4) /* IFLA_MIN_MTU */
+ nla_total_size(4) /* IFLA_MAX_MTU */
+ + rtnl_prop_list_size(dev)
+ 0;
}
@@ -1584,6 +1598,42 @@ static int rtnl_fill_link_af(struct sk_buff *skb,
return 0;
}
+static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ struct netdev_name_node *name_node;
+ int count = 0;
+
+ list_for_each_entry(name_node, &dev->name_node->list, list) {
+ if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name))
+ return -EMSGSIZE;
+ count++;
+ }
+ return count;
+}
+
+static int rtnl_fill_prop_list(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ struct nlattr *prop_list;
+ int ret;
+
+ prop_list = nla_nest_start(skb, IFLA_PROP_LIST);
+ if (!prop_list)
+ return -EMSGSIZE;
+
+ ret = rtnl_fill_alt_ifnames(skb, dev);
+ if (ret <= 0)
+ goto nest_cancel;
+
+ nla_nest_end(skb, prop_list);
+ return 0;
+
+nest_cancel:
+ nla_nest_cancel(skb, prop_list);
+ return ret;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb,
struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
@@ -1697,6 +1747,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure_rcu;
rcu_read_unlock();
+ if (rtnl_fill_prop_list(skb, dev))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -1750,6 +1803,9 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
[IFLA_MIN_MTU] = { .type = NLA_U32 },
[IFLA_MAX_MTU] = { .type = NLA_U32 },
+ [IFLA_PROP_LIST] = { .type = NLA_NESTED },
+ [IFLA_ALT_IFNAME] = { .type = NLA_STRING,
+ .len = ALTIFNAMSIZ - 1 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2723,6 +2779,26 @@ errout:
return err;
}
+static struct net_device *rtnl_dev_get(struct net *net,
+ struct nlattr *ifname_attr,
+ struct nlattr *altifname_attr,
+ char *ifname)
+{
+ char buffer[ALTIFNAMSIZ];
+
+ if (!ifname) {
+ ifname = buffer;
+ if (ifname_attr)
+ nla_strlcpy(ifname, ifname_attr, IFNAMSIZ);
+ else if (altifname_attr)
+ nla_strlcpy(ifname, altifname_attr, ALTIFNAMSIZ);
+ else
+ return NULL;
+ }
+
+ return __dev_get_by_name(net, ifname);
+}
+
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -2751,8 +2827,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
- else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(net, ifname);
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
else
goto errout;
@@ -2825,7 +2901,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *tgt_net = net;
struct net_device *dev = NULL;
struct ifinfomsg *ifm;
- char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
int err;
int netnsid = -1;
@@ -2839,9 +2914,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
- if (tb[IFLA_IFNAME])
- nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
-
if (tb[IFLA_TARGET_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]);
tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid);
@@ -2853,8 +2925,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
- else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(tgt_net, ifname);
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
+ tb[IFLA_ALT_IFNAME], NULL);
else if (tb[IFLA_GROUP])
err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
@@ -3025,12 +3098,10 @@ replay:
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
- else {
- if (ifname[0])
- dev = __dev_get_by_name(net, ifname);
- else
- dev = NULL;
- }
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
+ else
+ dev = NULL;
if (dev) {
master_dev = netdev_master_upper_dev_get(dev);
@@ -3292,6 +3363,7 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb,
switch (i) {
case IFLA_IFNAME:
+ case IFLA_ALT_IFNAME:
case IFLA_EXT_MASK:
case IFLA_TARGET_NETNSID:
break;
@@ -3310,7 +3382,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct net *tgt_net = net;
struct ifinfomsg *ifm;
- char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
struct sk_buff *nskb;
@@ -3333,9 +3404,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(tgt_net);
}
- if (tb[IFLA_IFNAME])
- nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
-
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -3343,8 +3411,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
- else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(tgt_net, ifname);
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(tgt_net, tb[IFLA_IFNAME],
+ tb[IFLA_ALT_IFNAME], NULL);
else
goto out;
@@ -3374,6 +3443,100 @@ out:
return err;
}
+static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
+ bool *changed, struct netlink_ext_ack *extack)
+{
+ char *alt_ifname;
+ int err;
+
+ err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
+ if (err)
+ return err;
+
+ alt_ifname = nla_data(attr);
+ if (cmd == RTM_NEWLINKPROP) {
+ alt_ifname = kstrdup(alt_ifname, GFP_KERNEL);
+ if (!alt_ifname)
+ return -ENOMEM;
+ err = netdev_name_node_alt_create(dev, alt_ifname);
+ if (err) {
+ kfree(alt_ifname);
+ return err;
+ }
+ } else if (cmd == RTM_DELLINKPROP) {
+ err = netdev_name_node_alt_destroy(dev, alt_ifname);
+ if (err)
+ return err;
+ } else {
+ WARN_ON(1);
+ return 0;
+ }
+
+ *changed = true;
+ return 0;
+}
+
+static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct net_device *dev;
+ struct ifinfomsg *ifm;
+ bool changed = false;
+ struct nlattr *attr;
+ int err, rem;
+
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
+ if (err)
+ return err;
+
+ err = rtnl_ensure_unique_netns(tb, extack, true);
+ if (err)
+ return err;
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifi_index > 0)
+ dev = __dev_get_by_index(net, ifm->ifi_index);
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
+ tb[IFLA_ALT_IFNAME], NULL);
+ else
+ return -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (!tb[IFLA_PROP_LIST])
+ return 0;
+
+ nla_for_each_nested(attr, tb[IFLA_PROP_LIST], rem) {
+ switch (nla_type(attr)) {
+ case IFLA_ALT_IFNAME:
+ err = rtnl_alt_ifname(cmd, dev, attr, &changed, extack);
+ if (err)
+ return err;
+ break;
+ }
+ }
+
+ if (changed)
+ netdev_state_change(dev);
+ return 0;
+}
+
+static int rtnl_newlinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ return rtnl_linkprop(RTM_NEWLINKPROP, skb, nlh, extack);
+}
+
+static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack);
+}
+
static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
@@ -5332,6 +5495,9 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
+ rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);
+
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
diff --git a/net/core/sock.c b/net/core/sock.c
index ac78a570e43a..71787f7c4f8c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -333,7 +333,6 @@ EXPORT_SYMBOL(__sk_backlog_rcv);
static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
{
struct __kernel_sock_timeval tv;
- int size;
if (timeo == MAX_SCHEDULE_TIMEOUT) {
tv.tv_sec = 0;
@@ -354,13 +353,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
old_tv.tv_sec = tv.tv_sec;
old_tv.tv_usec = tv.tv_usec;
*(struct __kernel_old_timeval *)optval = old_tv;
- size = sizeof(old_tv);
- } else {
- *(struct __kernel_sock_timeval *)optval = tv;
- size = sizeof(tv);
+ return sizeof(old_tv);
}
- return size;
+ *(struct __kernel_sock_timeval *)optval = tv;
+ return sizeof(tv);
}
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval)
@@ -687,7 +684,8 @@ out:
return ret;
}
-static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
+static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
+ int valbool)
{
if (valbool)
sock_set_flag(sk, bit);
@@ -3015,7 +3013,7 @@ int sock_gettstamp(struct socket *sock, void __user *userstamp,
return -ENOENT;
if (ts.tv_sec == 0) {
ktime_t kt = ktime_get_real();
- sock_write_timestamp(sk, kt);;
+ sock_write_timestamp(sk, kt);
ts = ktime_to_timespec64(kt);
}
@@ -3042,7 +3040,7 @@ int sock_gettstamp(struct socket *sock, void __user *userstamp,
}
EXPORT_SYMBOL(sock_gettstamp);
-void sock_enable_timestamp(struct sock *sk, int flag)
+void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
{
if (!sock_flag(sk, flag)) {
unsigned long previous_flags = sk->sk_flags;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index d7bf62ffbb5e..20781ad5f9c3 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
/* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
- * while still leveraging this protection. The @napi_direct boolian
+ * while still leveraging this protection. The @napi_direct boolean
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 43120a3fb06f..db1c1c7e40e9 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -246,7 +246,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_PM_SLEEP
static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
{
- return dsa_is_user_port(ds, p) && ds->ports[p].slave;
+ const struct dsa_port *dp = dsa_to_port(ds, p);
+
+ return dp->type == DSA_PORT_TYPE_USER && dp->slave;
}
int dsa_switch_suspend(struct dsa_switch *ds)
@@ -258,7 +260,7 @@ int dsa_switch_suspend(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_suspend(ds->ports[i].slave);
+ ret = dsa_slave_suspend(dsa_to_port(ds, i)->slave);
if (ret)
return ret;
}
@@ -285,7 +287,7 @@ int dsa_switch_resume(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_resume(ds->ports[i].slave);
+ ret = dsa_slave_resume(dsa_to_port(ds, i)->slave);
if (ret)
return ret;
}
@@ -329,6 +331,54 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(call_dsa_notifiers);
+int dsa_devlink_param_get(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct dsa_devlink_priv *dl_priv;
+ struct dsa_switch *ds;
+
+ dl_priv = devlink_priv(dl);
+ ds = dl_priv->ds;
+
+ if (!ds->ops->devlink_param_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_param_get(ds, id, ctx);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_param_get);
+
+int dsa_devlink_param_set(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct dsa_devlink_priv *dl_priv;
+ struct dsa_switch *ds;
+
+ dl_priv = devlink_priv(dl);
+ ds = dl_priv->ds;
+
+ if (!ds->ops->devlink_param_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_param_set(ds, id, ctx);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_param_set);
+
+int dsa_devlink_params_register(struct dsa_switch *ds,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ return devlink_params_register(ds->devlink, params, params_count);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_params_register);
+
+void dsa_devlink_params_unregister(struct dsa_switch *ds,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ devlink_params_unregister(ds->devlink, params, params_count);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_params_unregister);
+
static int __init dsa_init_module(void)
{
int rc;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 716d265ba8ca..ff2fa3950c62 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -45,6 +45,10 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
dst->index = index;
+ INIT_LIST_HEAD(&dst->rtable);
+
+ INIT_LIST_HEAD(&dst->ports);
+
INIT_LIST_HEAD(&dst->list);
list_add_tail(&dst->list, &dsa_tree_list);
@@ -111,24 +115,38 @@ static bool dsa_port_is_user(struct dsa_port *dp)
static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
struct device_node *dn)
{
- struct dsa_switch *ds;
struct dsa_port *dp;
- int device, port;
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->dn == dn)
+ return dp;
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
+ return NULL;
+}
- if (dp->dn == dn)
- return dp;
- }
- }
+struct dsa_link *dsa_link_touch(struct dsa_port *dp, struct dsa_port *link_dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst;
+ struct dsa_link *dl;
- return NULL;
+ dst = ds->dst;
+
+ list_for_each_entry(dl, &dst->rtable, list)
+ if (dl->dp == dp && dl->link_dp == link_dp)
+ return dl;
+
+ dl = kzalloc(sizeof(*dl), GFP_KERNEL);
+ if (!dl)
+ return NULL;
+
+ dl->dp = dp;
+ dl->link_dp = link_dp;
+
+ INIT_LIST_HEAD(&dl->list);
+ list_add_tail(&dl->list, &dst->rtable);
+
+ return dl;
}
static bool dsa_port_setup_routing_table(struct dsa_port *dp)
@@ -138,6 +156,7 @@ static bool dsa_port_setup_routing_table(struct dsa_port *dp)
struct device_node *dn = dp->dn;
struct of_phandle_iterator it;
struct dsa_port *link_dp;
+ struct dsa_link *dl;
int err;
of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
@@ -147,24 +166,22 @@ static bool dsa_port_setup_routing_table(struct dsa_port *dp)
return false;
}
- ds->rtable[link_dp->ds->index] = dp->index;
+ dl = dsa_link_touch(dp, link_dp);
+ if (!dl) {
+ of_node_put(it.node);
+ return false;
+ }
}
return true;
}
-static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
+static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
{
bool complete = true;
struct dsa_port *dp;
- int i;
-
- for (i = 0; i < DSA_MAX_SWITCHES; i++)
- ds->rtable[i] = DSA_RTABLE_NONE;
-
- for (i = 0; i < ds->num_ports; i++) {
- dp = &ds->ports[i];
+ list_for_each_entry(dp, &dst->ports, list) {
if (dsa_port_is_dsa(dp)) {
complete = dsa_port_setup_routing_table(dp);
if (!complete)
@@ -175,81 +192,42 @@ static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
return complete;
}
-static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
-{
- struct dsa_switch *ds;
- bool complete = true;
- int device;
-
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
-
- complete = dsa_switch_setup_routing_table(ds);
- if (!complete)
- break;
- }
-
- return complete;
-}
-
static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
struct dsa_port *dp;
- int device, port;
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
-
- if (dsa_port_is_cpu(dp))
- return dp;
- }
- }
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_cpu(dp))
+ return dp;
return NULL;
}
static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
- struct dsa_port *dp;
- int device, port;
+ struct dsa_port *cpu_dp, *dp;
- /* DSA currently only supports a single CPU port */
- dst->cpu_dp = dsa_tree_find_first_cpu(dst);
- if (!dst->cpu_dp) {
- pr_warn("Tree has no master device\n");
+ cpu_dp = dsa_tree_find_first_cpu(dst);
+ if (!cpu_dp) {
+ pr_err("DSA: tree %d has no CPU port\n", dst->index);
return -EINVAL;
}
/* Assign the default CPU port to all ports of the fabric */
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
-
- if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
- dp->cpu_dp = dst->cpu_dp;
- }
- }
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = cpu_dp;
return 0;
}
static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
{
- /* DSA currently only supports a single CPU port */
- dst->cpu_dp = NULL;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = NULL;
}
static int dsa_port_setup(struct dsa_port *dp)
@@ -265,6 +243,9 @@ static int dsa_port_setup(struct dsa_port *dp)
bool dsa_port_enabled = false;
int err = 0;
+ if (dp->setup)
+ return 0;
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
dsa_port_disable(dp);
@@ -333,14 +314,21 @@ static int dsa_port_setup(struct dsa_port *dp)
dsa_port_link_unregister_of(dp);
if (err && devlink_port_registered)
devlink_port_unregister(dlp);
+ if (err)
+ return err;
- return err;
+ dp->setup = true;
+
+ return 0;
}
static void dsa_port_teardown(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
+ if (!dp->setup)
+ return;
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
break;
@@ -363,11 +351,17 @@ static void dsa_port_teardown(struct dsa_port *dp)
}
break;
}
+
+ dp->setup = false;
}
static int dsa_switch_setup(struct dsa_switch *ds)
{
- int err = 0;
+ struct dsa_devlink_priv *dl_priv;
+ int err;
+
+ if (ds->setup)
+ return 0;
/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
* driver and before ops->setup() has run, since the switch drivers and
@@ -379,9 +373,11 @@ static int dsa_switch_setup(struct dsa_switch *ds)
/* Add the switch to devlink before calling setup, so that setup can
* add dpipe tables
*/
- ds->devlink = devlink_alloc(&dsa_devlink_ops, 0);
+ ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv));
if (!ds->devlink)
return -ENOMEM;
+ dl_priv = devlink_priv(ds->devlink);
+ dl_priv->ds = ds;
err = devlink_register(ds->devlink, ds->dev);
if (err)
@@ -395,6 +391,8 @@ static int dsa_switch_setup(struct dsa_switch *ds)
if (err < 0)
goto unregister_notifier;
+ devlink_params_publish(ds->devlink);
+
if (!ds->slave_mii_bus && ds->ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus) {
@@ -409,6 +407,8 @@ static int dsa_switch_setup(struct dsa_switch *ds)
goto unregister_notifier;
}
+ ds->setup = true;
+
return 0;
unregister_notifier:
@@ -424,6 +424,9 @@ free_devlink:
static void dsa_switch_teardown(struct dsa_switch *ds)
{
+ if (!ds->setup)
+ return;
+
if (ds->slave_mii_bus && ds->ops->phy_read)
mdiobus_unregister(ds->slave_mii_bus);
@@ -438,95 +441,72 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
ds->devlink = NULL;
}
+ ds->setup = false;
}
static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
struct dsa_port *dp;
- int device, port, i;
- int err = 0;
-
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
+ int err;
- err = dsa_switch_setup(ds);
+ list_for_each_entry(dp, &dst->ports, list) {
+ err = dsa_switch_setup(dp->ds);
if (err)
- goto switch_teardown;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
+ goto teardown;
+ }
- err = dsa_port_setup(dp);
- if (err)
- goto ports_teardown;
- }
+ list_for_each_entry(dp, &dst->ports, list) {
+ err = dsa_port_setup(dp);
+ if (err)
+ goto teardown;
}
return 0;
-ports_teardown:
- for (i = 0; i < port; i++)
- dsa_port_teardown(&ds->ports[i]);
-
- dsa_switch_teardown(ds);
-
-switch_teardown:
- for (i = 0; i < device; i++) {
- ds = dst->ds[i];
- if (!ds)
- continue;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
+teardown:
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_port_teardown(dp);
- dsa_port_teardown(dp);
- }
-
- dsa_switch_teardown(ds);
- }
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_switch_teardown(dp->ds);
return err;
}
static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
struct dsa_port *dp;
- int device, port;
-
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
- dsa_port_teardown(dp);
- }
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_port_teardown(dp);
- dsa_switch_teardown(ds);
- }
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_switch_teardown(dp->ds);
}
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *cpu_dp = dst->cpu_dp;
- struct net_device *master = cpu_dp->master;
+ struct dsa_port *dp;
+ int err;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dsa_port_is_cpu(dp)) {
+ err = dsa_master_setup(dp->master, dp);
+ if (err)
+ return err;
+ }
+ }
- /* DSA currently supports a single pair of CPU port and master device */
- return dsa_master_setup(master, cpu_dp);
+ return 0;
}
static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *cpu_dp = dst->cpu_dp;
- struct net_device *master = cpu_dp->master;
+ struct dsa_port *dp;
- return dsa_master_teardown(master);
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_cpu(dp))
+ dsa_master_teardown(dp->master);
}
static int dsa_tree_setup(struct dsa_switch_tree *dst)
@@ -572,6 +552,8 @@ teardown_default_cpu:
static void dsa_tree_teardown(struct dsa_switch_tree *dst)
{
+ struct dsa_link *dl, *next;
+
if (!dst->setup)
return;
@@ -581,39 +563,36 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_default_cpu(dst);
+ list_for_each_entry_safe(dl, next, &dst->rtable, list) {
+ list_del(&dl->list);
+ kfree(dl);
+ }
+
pr_info("DSA: tree %d torn down\n", dst->index);
dst->setup = false;
}
-static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
- unsigned int index)
+static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
{
- dsa_tree_teardown(dst);
+ struct dsa_switch_tree *dst = ds->dst;
+ struct dsa_port *dp;
- dst->ds[index] = NULL;
- dsa_tree_put(dst);
-}
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->ds == ds && dp->index == index)
+ return dp;
-static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
- struct dsa_switch *ds)
-{
- unsigned int index = ds->index;
- int err;
+ dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+ if (!dp)
+ return NULL;
- if (dst->ds[index])
- return -EBUSY;
+ dp->ds = ds;
+ dp->index = index;
- dsa_tree_get(dst);
- dst->ds[index] = ds;
+ INIT_LIST_HEAD(&dp->list);
+ list_add_tail(&dp->list, &dst->ports);
- err = dsa_tree_setup(dst);
- if (err) {
- dst->ds[index] = NULL;
- dsa_tree_put(dst);
- }
-
- return err;
+ return dp;
}
static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
@@ -708,7 +687,7 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
goto out_put_node;
}
- dp = &ds->ports[reg];
+ dp = dsa_to_port(ds, reg);
err = dsa_port_parse_of(dp, port);
if (err)
@@ -732,8 +711,6 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
return sz;
ds->index = m[1];
- if (ds->index >= DSA_MAX_SWITCHES)
- return -EINVAL;
ds->dst = dsa_tree_touch(m[0]);
if (!ds->dst)
@@ -742,6 +719,20 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
return 0;
}
+static int dsa_switch_touch_ports(struct dsa_switch *ds)
+{
+ struct dsa_port *dp;
+ int port;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = dsa_port_touch(ds, port);
+ if (!dp)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
{
int err;
@@ -750,6 +741,10 @@ static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
if (err)
return err;
+ err = dsa_switch_touch_ports(ds);
+ if (err)
+ return err;
+
return dsa_switch_parse_ports_of(ds, dn);
}
@@ -787,7 +782,7 @@ static int dsa_switch_parse_ports(struct dsa_switch *ds,
for (i = 0; i < DSA_MAX_PORTS; i++) {
name = cd->port_names[i];
dev = cd->netdev[i];
- dp = &ds->ports[i];
+ dp = dsa_to_port(ds, i);
if (!name)
continue;
@@ -807,6 +802,8 @@ static int dsa_switch_parse_ports(struct dsa_switch *ds,
static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
{
+ int err;
+
ds->cd = cd;
/* We don't support interconnected switches nor multiple trees via
@@ -817,22 +814,29 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
if (!ds->dst)
return -ENOMEM;
- return dsa_switch_parse_ports(ds, cd);
-}
-
-static int dsa_switch_add(struct dsa_switch *ds)
-{
- struct dsa_switch_tree *dst = ds->dst;
+ err = dsa_switch_touch_ports(ds);
+ if (err)
+ return err;
- return dsa_tree_add_switch(dst, ds);
+ return dsa_switch_parse_ports(ds, cd);
}
static int dsa_switch_probe(struct dsa_switch *ds)
{
- struct dsa_chip_data *pdata = ds->dev->platform_data;
- struct device_node *np = ds->dev->of_node;
+ struct dsa_switch_tree *dst;
+ struct dsa_chip_data *pdata;
+ struct device_node *np;
int err;
+ if (!ds->dev)
+ return -ENODEV;
+
+ pdata = ds->dev->platform_data;
+ np = ds->dev->of_node;
+
+ if (!ds->num_ports)
+ return -EINVAL;
+
if (np)
err = dsa_switch_parse_of(ds, np);
else if (pdata)
@@ -843,29 +847,14 @@ static int dsa_switch_probe(struct dsa_switch *ds)
if (err)
return err;
- return dsa_switch_add(ds);
-}
-
-struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
-{
- struct dsa_switch *ds;
- int i;
-
- ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL);
- if (!ds)
- return NULL;
-
- ds->dev = dev;
- ds->num_ports = n;
-
- for (i = 0; i < ds->num_ports; ++i) {
- ds->ports[i].index = i;
- ds->ports[i].ds = ds;
- }
+ dst = ds->dst;
+ dsa_tree_get(dst);
+ err = dsa_tree_setup(dst);
+ if (err)
+ dsa_tree_put(dst);
- return ds;
+ return err;
}
-EXPORT_SYMBOL_GPL(dsa_switch_alloc);
int dsa_register_switch(struct dsa_switch *ds)
{
@@ -883,9 +872,15 @@ EXPORT_SYMBOL_GPL(dsa_register_switch);
static void dsa_switch_remove(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
- unsigned int index = ds->index;
+ struct dsa_port *dp, *next;
- dsa_tree_remove_switch(dst, index);
+ list_for_each_entry_safe(dp, next, &dst->ports, list) {
+ list_del(&dp->list);
+ kfree(dp);
+ }
+
+ dsa_tree_teardown(dst);
+ dsa_tree_put(dst);
}
void dsa_unregister_switch(struct dsa_switch *ds)
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 12f8c7ee4dd8..53e7577896b6 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -104,25 +104,14 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
- struct dsa_switch *ds;
- struct dsa_port *slave_port;
+ struct dsa_port *dp;
- if (device < 0 || device >= DSA_MAX_SWITCHES)
- return NULL;
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->ds->index == device && dp->index == port &&
+ dp->type == DSA_PORT_TYPE_USER)
+ return dp->slave;
- ds = dst->ds[device];
- if (!ds)
- return NULL;
-
- if (port < 0 || port >= ds->num_ports)
- return NULL;
-
- slave_port = &ds->ports[port];
-
- if (unlikely(slave_port->type != DSA_PORT_TYPE_USER))
- return NULL;
-
- return slave_port->slave;
+ return NULL;
}
/* port.c */
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 028e65f4b5ba..d18761649754 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -789,6 +789,22 @@ static int dsa_slave_set_link_ksettings(struct net_device *dev,
return phylink_ethtool_ksettings_set(dp->pl, cmd);
}
+static void dsa_slave_get_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ phylink_ethtool_get_pauseparam(dp->pl, pause);
+}
+
+static int dsa_slave_set_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return phylink_ethtool_set_pauseparam(dp->pl, pause);
+}
+
#ifdef CONFIG_NET_POLL_CONTROLLER
static int dsa_slave_netpoll_setup(struct net_device *dev,
struct netpoll_info *ni)
@@ -1192,6 +1208,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_eee = dsa_slave_get_eee,
.get_link_ksettings = dsa_slave_get_link_ksettings,
.set_link_ksettings = dsa_slave_set_link_ksettings,
+ .get_pauseparam = dsa_slave_get_pauseparam,
+ .set_pauseparam = dsa_slave_set_pauseparam,
.get_rxnfc = dsa_slave_get_rxnfc,
.set_rxnfc = dsa_slave_set_rxnfc,
.get_ts_info = dsa_slave_get_ts_info,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 6a9607518823..df4abe897ed6 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -20,7 +20,7 @@ static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
int i;
for (i = 0; i < ds->num_ports; ++i) {
- struct dsa_port *dp = &ds->ports[i];
+ struct dsa_port *dp = dsa_to_port(ds, i);
if (dp->ageing_time && dp->ageing_time < ageing_time)
ageing_time = dp->ageing_time;
@@ -98,7 +98,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
if (unset_vlan_filtering) {
struct switchdev_trans trans = {0};
- err = dsa_port_vlan_filtering(&ds->ports[info->port],
+ err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
false, &trans);
if (err && err != EOPNOTSUPP)
return err;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 9c1cc2482b68..bc5cb91bf052 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -31,15 +31,14 @@
* Must be transmitted as zero and ignored on receive.
*
* SWITCH_ID - VID[8:6]:
- * Index of switch within DSA tree. Must be between 0 and
- * DSA_MAX_SWITCHES - 1.
+ * Index of switch within DSA tree. Must be between 0 and 7.
*
* RSV - VID[5:4]:
* To be used for further expansion of PORT or for other purposes.
* Must be transmitted as zero and ignored on receive.
*
* PORT - VID[3:0]:
- * Index of switch port. Must be between 0 and DSA_MAX_PORTS - 1.
+ * Index of switch port. Must be between 0 and 15.
*/
#define DSA_8021Q_DIR_SHIFT 10
@@ -103,7 +102,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
if (!dsa_is_user_port(ds, port))
return 0;
- slave = ds->ports[port].slave;
+ slave = dsa_to_port(ds, port)->slave;
err = br_vlan_get_pvid(slave, &pvid);
if (err < 0)
@@ -118,7 +117,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
return err;
}
- return dsa_port_vid_add(&ds->ports[port], pvid, vinfo.flags);
+ return dsa_port_vid_add(dsa_to_port(ds, port), pvid, vinfo.flags);
}
/* If @enabled is true, installs @vid with @flags into the switch port's HW
@@ -130,7 +129,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid,
u16 flags, bool enabled)
{
- struct dsa_port *dp = &ds->ports[port];
+ struct dsa_port *dp = dsa_to_port(ds, port);
struct bridge_vlan_info vinfo;
int err;
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index ffcfcef76291..7c5a1aa5adb4 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -236,21 +236,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
struct cfg802154_registered_device **rdev,
struct wpan_dev **wpan_dev)
{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
int err;
rtnl_lock();
if (!cb->args[0]) {
- err = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + nl802154_fam.hdrsize,
- genl_family_attrbuf(&nl802154_fam),
- nl802154_fam.maxattr,
- nl802154_policy, NULL);
- if (err)
- goto out_unlock;
-
*wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
- genl_family_attrbuf(&nl802154_fam));
+ info->attrs);
if (IS_ERR(*wpan_dev)) {
err = PTR_ERR(*wpan_dev);
goto out_unlock;
@@ -557,17 +550,8 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct netlink_callback *cb,
struct nl802154_dump_wpan_phy_state *state)
{
- struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
- int ret = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + nl802154_fam.hdrsize,
- tb, nl802154_fam.maxattr,
- nl802154_policy, NULL);
-
- /* TODO check if we can handle error here,
- * we have no backward compatibility
- */
- if (ret)
- return 0;
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct nlattr **tb = info->attrs;
if (tb[NL802154_ATTR_WPAN_PHY])
state->filter_wpan_phy = nla_get_u32(tb[NL802154_ATTR_WPAN_PHY]);
@@ -2203,7 +2187,8 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
static const struct genl_ops nl802154_ops[] = {
{
.cmd = NL802154_CMD_GET_WPAN_PHY,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.doit = nl802154_get_wpan_phy,
.dumpit = nl802154_dump_wpan_phy,
.done = nl802154_dump_wpan_phy_done,
@@ -2343,7 +2328,8 @@ static const struct genl_ops nl802154_ops[] = {
},
{
.cmd = NL802154_CMD_GET_SEC_KEY,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
/* TODO .doit by matching key id? */
.dumpit = nl802154_dump_llsec_key,
.flags = GENL_ADMIN_PERM,
@@ -2369,7 +2355,8 @@ static const struct genl_ops nl802154_ops[] = {
/* TODO unique identifier must short+pan OR extended_addr */
{
.cmd = NL802154_CMD_GET_SEC_DEV,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
/* TODO .doit by matching extended_addr? */
.dumpit = nl802154_dump_llsec_dev,
.flags = GENL_ADMIN_PERM,
@@ -2395,7 +2382,8 @@ static const struct genl_ops nl802154_ops[] = {
/* TODO remove complete devkey, put it as nested? */
{
.cmd = NL802154_CMD_GET_SEC_DEVKEY,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
/* TODO doit by matching ??? */
.dumpit = nl802154_dump_llsec_devkey,
.flags = GENL_ADMIN_PERM,
@@ -2420,7 +2408,8 @@ static const struct genl_ops nl802154_ops[] = {
},
{
.cmd = NL802154_CMD_GET_SEC_LEVEL,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
/* TODO .doit by matching frame_type? */
.dumpit = nl802154_dump_llsec_seclevel,
.flags = GENL_ADMIN_PERM,
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index b804ccbdb241..0c28bd469a68 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -9,12 +9,12 @@
#include <net/netns/ipv4.h>
#include <net/ip_fib.h>
-int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+int call_fib4_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->family = AF_INET;
- return call_fib_notifier(nb, net, event_type, info);
+ return call_fib_notifier(nb, event_type, info);
}
int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
@@ -34,17 +34,16 @@ static unsigned int fib4_seq_read(struct net *net)
return net->ipv4.fib_seq + fib4_rules_seq_read(net);
}
-static int fib4_dump(struct net *net, struct notifier_block *nb)
+static int fib4_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
int err;
- err = fib4_rules_dump(net, nb);
+ err = fib4_rules_dump(net, nb, extack);
if (err)
return err;
- fib_notify(net, nb);
-
- return 0;
+ return fib_notify(net, nb, extack);
}
static const struct fib_notifier_ops fib4_notifier_ops_template = {
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b43a7ba5c6a4..f99e3bac5cab 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -65,9 +65,10 @@ bool fib4_rule_default(const struct fib_rule *rule)
}
EXPORT_SYMBOL_GPL(fib4_rule_default);
-int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+int fib4_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, AF_INET);
+ return fib_rules_dump(net, nb, AF_INET, extack);
}
unsigned int fib4_rules_seq_read(struct net *net)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1ab2fb6bb37d..b9df9c09b84e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -74,11 +74,13 @@
#include <trace/events/fib.h>
#include "fib_lookup.h"
-static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib_entry_notifier(struct notifier_block *nb,
enum fib_event_type event_type, u32 dst,
- int dst_len, struct fib_alias *fa)
+ int dst_len, struct fib_alias *fa,
+ struct netlink_ext_ack *extack)
{
struct fib_entry_notifier_info info = {
+ .info.extack = extack,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
@@ -86,7 +88,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
- return call_fib4_notifier(nb, net, event_type, &info.info);
+ return call_fib4_notifier(nb, event_type, &info.info);
}
static int call_fib_entry_notifiers(struct net *net,
@@ -2015,10 +2017,12 @@ void fib_info_notify_update(struct net *net, struct nl_info *info)
}
}
-static void fib_leaf_notify(struct net *net, struct key_vector *l,
- struct fib_table *tb, struct notifier_block *nb)
+static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct fib_alias *fa;
+ int err;
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
struct fib_info *fi = fa->fa_info;
@@ -2032,39 +2036,53 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
if (tb->tb_id != fa->tb_id)
continue;
- call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
- KEYLENGTH - fa->fa_slen, fa);
+ err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key,
+ KEYLENGTH - fa->fa_slen,
+ fa, extack);
+ if (err)
+ return err;
}
+ return 0;
}
-static void fib_table_notify(struct net *net, struct fib_table *tb,
- struct notifier_block *nb)
+static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct trie *t = (struct trie *)tb->tb_data;
struct key_vector *l, *tp = t->kv;
t_key key = 0;
+ int err;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- fib_leaf_notify(net, l, tb, nb);
+ err = fib_leaf_notify(l, tb, nb, extack);
+ if (err)
+ return err;
key = l->key + 1;
/* stop in case of wrap around */
if (key < l->key)
break;
}
+ return 0;
}
-void fib_notify(struct net *net, struct notifier_block *nb)
+int fib_notify(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
unsigned int h;
+ int err;
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv4.fib_table_hash[h];
struct fib_table *tb;
- hlist_for_each_entry_rcu(tb, head, tb_hlist)
- fib_table_notify(net, tb, nb);
+ hlist_for_each_entry_rcu(tb, head, tb_hlist) {
+ err = fib_table_notify(tb, nb, extack);
+ if (err)
+ return err;
+ }
}
+ return 0;
}
static void __trie_free_rcu(struct rcu_head *head)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 480d0b22db1a..3b9c7a2725a9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1563,7 +1563,7 @@ static int ip_mc_check_igmp_msg(struct sk_buff *skb)
}
}
-static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
+static __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
{
return skb_checksum_simple_validate(skb);
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c59a78a267c3..24a95126e698 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -611,5 +611,6 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
list_add_tail(&skb->list, &sublist);
}
/* dispatch final sublist */
- ip_sublist_rcv(&sublist, curr_dev, curr_net);
+ if (!list_empty(&sublist))
+ ip_sublist_rcv(&sublist, curr_dev, curr_net);
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 9bcca08efec9..32e20b758b68 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1483,10 +1483,10 @@ static int __init ip_auto_config(void)
* missing values.
*/
if (ic_myaddr == NONE ||
-#ifdef CONFIG_ROOT_NFS
+#if defined(CONFIG_ROOT_NFS) || defined(CONFIG_CIFS_ROOT)
(root_server_addr == NONE &&
ic_servaddr == NONE &&
- ROOT_DEV == Root_NFS) ||
+ (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CIFS)) ||
#endif
ic_first_dev->next) {
#ifdef IPCONFIG_DYNAMIC
@@ -1513,6 +1513,12 @@ static int __init ip_auto_config(void)
goto try_try_again;
}
#endif
+#ifdef CONFIG_CIFS_ROOT
+ if (ROOT_DEV == Root_CIFS) {
+ pr_err("IP-Config: Retrying forever (CIFS root)...\n");
+ goto try_try_again;
+ }
+#endif
if (--retries) {
pr_err("IP-Config: Reopening network devices...\n");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 716d5472c022..440294bdb752 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -278,9 +278,10 @@ static void __net_exit ipmr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack);
}
static unsigned int ipmr_rules_seq_read(struct net *net)
@@ -336,7 +337,8 @@ static void __net_exit ipmr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return 0;
}
@@ -3040,10 +3042,11 @@ static unsigned int ipmr_seq_read(struct net *net)
return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
}
-static int ipmr_dump(struct net *net, struct notifier_block *nb)
+static int ipmr_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
- ipmr_mr_table_iter, &mrt_lock);
+ ipmr_mr_table_iter, &mrt_lock, extack);
}
static const struct fib_notifier_ops ipmr_notifier_ops_template = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index ea48bd15a575..aa8738a91210 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -386,15 +386,17 @@ EXPORT_SYMBOL(mr_rtm_dumproute);
int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
int (*rules_dump)(struct net *net,
- struct notifier_block *nb),
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt),
- rwlock_t *mrt_lock)
+ rwlock_t *mrt_lock,
+ struct netlink_ext_ack *extack)
{
struct mr_table *mrt;
int err;
- err = rules_dump(net, nb);
+ err = rules_dump(net, nb, extack);
if (err)
return err;
@@ -409,17 +411,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
if (!v->dev)
continue;
- mr_call_vif_notifier(nb, net, family,
- FIB_EVENT_VIF_ADD,
- v, vifi, mrt->id);
+ err = mr_call_vif_notifier(nb, family,
+ FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id, extack);
+ if (err)
+ break;
}
read_unlock(mrt_lock);
+ if (err)
+ return err;
+
/* Notify on table MFC entries */
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- mr_call_mfc_notifier(nb, net, family,
- FIB_EVENT_ENTRY_ADD,
- mfc, mrt->id);
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ err = mr_call_mfc_notifier(nb, family,
+ FIB_EVENT_ENTRY_ADD,
+ mfc, mrt->id, extack);
+ if (err)
+ return err;
+ }
}
return 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d8876f0e9672..1dd25189d83f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1741,8 +1741,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
struct tcp_zerocopy_receive *zc)
{
unsigned long address = (unsigned long)zc->address;
+ u32 length = 0, seq, offset, zap_len;
const skb_frag_t *frags = NULL;
- u32 length = 0, seq, offset;
struct vm_area_struct *vma;
struct sk_buff *skb = NULL;
struct tcp_sock *tp;
@@ -1769,12 +1769,12 @@ static int tcp_zerocopy_receive(struct sock *sk,
seq = tp->copied_seq;
inq = tcp_inq(sk);
zc->length = min_t(u32, zc->length, inq);
- zc->length &= ~(PAGE_SIZE - 1);
- if (zc->length) {
- zap_page_range(vma, address, zc->length);
+ zap_len = zc->length & ~(PAGE_SIZE - 1);
+ if (zap_len) {
+ zap_page_range(vma, address, zap_len);
zc->recv_skip_hint = 0;
} else {
- zc->recv_skip_hint = inq;
+ zc->recv_skip_hint = zc->length;
}
ret = 0;
while (length + PAGE_SIZE <= zc->length) {
@@ -2666,6 +2666,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* Clean up fastopen related fields */
tcp_free_fastopen_req(tp);
inet->defer_connect = 0;
+ tp->fastopen_client_fail = 0;
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
@@ -3305,6 +3306,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_reord_seen = tp->reord_seen;
info->tcpi_rcv_ooopack = tp->rcv_ooopack;
info->tcpi_snd_wnd = tp->snd_wnd;
+ info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index a915ade0c818..19ad9586c720 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -422,7 +422,10 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
cookie->len = -1;
return true;
}
- return cookie->len > 0;
+ if (cookie->len > 0)
+ return true;
+ tcp_sk(sk)->fastopen_client_fail = TFO_COOKIE_UNAVAILABLE;
+ return false;
}
/* This function checks if we want to defer sending SYN until the first
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a2e52ad7cdab..88b987ca9ebb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5814,6 +5814,10 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
if (data) { /* Retransmit unacked data in SYN */
+ if (tp->total_retrans)
+ tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
+ else
+ tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
skb_rbtree_walk_from(data) {
if (__tcp_retransmit_skb(sk, data, 1))
break;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 67b2dc7a1727..899e100a68e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -121,11 +121,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
- (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
- (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
+ ipv6_addr_v4mapped_loopback(&tw->tw_v6_daddr) ||
ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
- (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
- (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
+ ipv6_addr_v4mapped_loopback(&tw->tw_v6_rcv_saddr))
loopback = true;
} else
#endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 34ccef18b40e..98d82305d6de 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5552,14 +5552,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (!nla)
goto nla_put_failure;
-
- if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
- goto nla_put_failure;
-
read_lock_bh(&idev->lock);
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index 05f82baaa99e..f87ae33e1d01 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -7,12 +7,12 @@
#include <net/netns/ipv6.h>
#include <net/ip6_fib.h>
-int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+int call_fib6_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->family = AF_INET6;
- return call_fib_notifier(nb, net, event_type, info);
+ return call_fib_notifier(nb, event_type, info);
}
int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
@@ -27,15 +27,16 @@ static unsigned int fib6_seq_read(struct net *net)
return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
}
-static int fib6_dump(struct net *net, struct notifier_block *nb)
+static int fib6_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
int err;
- err = fib6_rules_dump(net, nb);
+ err = fib6_rules_dump(net, nb, extack);
if (err)
return err;
- return fib6_tables_dump(net, nb);
+ return fib6_tables_dump(net, nb, extack);
}
static const struct fib_notifier_ops fib6_notifier_ops_template = {
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index f9e8fe3ff0c5..fafe556d21e0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -47,9 +47,10 @@ bool fib6_rule_default(const struct fib_rule *rule)
}
EXPORT_SYMBOL_GPL(fib6_rule_default);
-int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+int fib6_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, AF_INET6);
+ return fib_rules_dump(net, nb, AF_INET6, extack);
}
unsigned int fib6_rules_seq_read(struct net *net)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6e2af411cd9c..f66bc2af4e9d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -357,15 +357,17 @@ unsigned int fib6_tables_seq_read(struct net *net)
return fib_seq;
}
-static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib6_entry_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
- struct fib6_info *rt)
+ struct fib6_info *rt,
+ struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
.rt = rt,
};
- return call_fib6_notifier(nb, net, event_type, &info.info);
+ return call_fib6_notifier(nb, event_type, &info.info);
}
int call_fib6_entry_notifiers(struct net *net,
@@ -401,40 +403,51 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
struct fib6_dump_arg {
struct net *net;
struct notifier_block *nb;
+ struct netlink_ext_ack *extack;
};
-static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
+static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
if (rt == arg->net->ipv6.fib6_null_entry)
- return;
- call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+ return 0;
+ return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD,
+ rt, arg->extack);
}
static int fib6_node_dump(struct fib6_walker *w)
{
struct fib6_info *rt;
+ int err = 0;
- for_each_fib6_walker_rt(w)
- fib6_rt_dump(rt, w->args);
+ for_each_fib6_walker_rt(w) {
+ err = fib6_rt_dump(rt, w->args);
+ if (err)
+ break;
+ }
w->leaf = NULL;
- return 0;
+ return err;
}
-static void fib6_table_dump(struct net *net, struct fib6_table *tb,
- struct fib6_walker *w)
+static int fib6_table_dump(struct net *net, struct fib6_table *tb,
+ struct fib6_walker *w)
{
+ int err;
+
w->root = &tb->tb6_root;
spin_lock_bh(&tb->tb6_lock);
- fib6_walk(net, w);
+ err = fib6_walk(net, w);
spin_unlock_bh(&tb->tb6_lock);
+ return err;
}
/* Called with rcu_read_lock() */
-int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+int fib6_tables_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct fib6_dump_arg arg;
struct fib6_walker *w;
unsigned int h;
+ int err = 0;
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
@@ -443,19 +456,24 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb)
w->func = fib6_node_dump;
arg.net = net;
arg.nb = nb;
+ arg.extack = extack;
w->args = &arg;
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv6.fib_table_hash[h];
struct fib6_table *tb;
- hlist_for_each_entry_rcu(tb, head, tb6_hlist)
- fib6_table_dump(net, tb, w);
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+ err = fib6_table_dump(net, tb, w);
+ if (err < 0)
+ goto out;
+ }
}
+out:
kfree(w);
- return 0;
+ return err;
}
static int fib6_dump_node(struct fib6_walker *w)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 3d71c7d6102c..ef7f707d9ae3 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -325,7 +325,8 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
list_add_tail(&skb->list, &sublist);
}
/* dispatch final sublist */
- ip6_sublist_rcv(&sublist, curr_dev, curr_net);
+ if (!list_empty(&sublist))
+ ip6_sublist_rcv(&sublist, curr_dev, curr_net);
}
INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 857a89ad4d6c..bfa49ff70531 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -265,9 +265,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
}
static unsigned int ip6mr_rules_seq_read(struct net *net)
@@ -324,7 +325,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return 0;
}
@@ -1256,10 +1258,11 @@ static unsigned int ip6mr_seq_read(struct net *net)
return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
}
-static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
- ip6mr_mr_table_iter, &mrt_lock);
+ ip6mr_mr_table_iter, &mrt_lock, extack);
}
static struct notifier_block ip6_mr_notifier = {
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index 34d51cd426b0..6bac68fb27a3 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -150,4 +150,4 @@ EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v6);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
-MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
+MODULE_DESCRIPTION("Netfilter IPv6 transparent proxy support");
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index b11883d26875..33da6f738c99 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -485,7 +485,14 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
params.ssn = sta->tid_seq[tid] >> 4;
ret = drv_ampdu_action(local, sdata, &params);
- if (ret) {
+ if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) {
+ /*
+ * We didn't send the request yet, so don't need to check
+ * here if we already got a response, just mark as driver
+ * ready immediately.
+ */
+ set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state);
+ } else if (ret) {
ht_dbg(sdata,
"BA request denied - HW unavailable for %pM tid %d\n",
sta->sta.addr, tid);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 0a6ff01c68a9..d40744903fa9 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -538,7 +538,6 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct cfg80211_bss *cbss;
- int err, changed = 0;
sdata_assert_lock(sdata);
@@ -560,13 +559,7 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
ifibss->chandef = sdata->csa_chandef;
/* generate the beacon */
- err = ieee80211_ibss_csa_beacon(sdata, NULL);
- if (err < 0)
- return err;
-
- changed |= err;
-
- return changed;
+ return ieee80211_ibss_csa_beacon(sdata, NULL);
}
void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index ee86c3333999..86bc469a28bc 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -70,7 +70,7 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
}
/* return current EMWA throughput */
-int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma)
+int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg)
{
int usecs;
@@ -79,13 +79,13 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma)
usecs = 1000000;
/* reset thr. below 10% success */
- if (mr->stats.prob_ewma < MINSTREL_FRAC(10, 100))
+ if (mr->stats.prob_avg < MINSTREL_FRAC(10, 100))
return 0;
- if (prob_ewma > MINSTREL_FRAC(90, 100))
+ if (prob_avg > MINSTREL_FRAC(90, 100))
return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs));
else
- return MINSTREL_TRUNC(100000 * (prob_ewma / usecs));
+ return MINSTREL_TRUNC(100000 * (prob_avg / usecs));
}
/* find & sort topmost throughput rates */
@@ -98,8 +98,8 @@ minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
for (j = MAX_THR_RATES; j > 0; --j) {
tmp_mrs = &mi->r[tp_list[j - 1]].stats;
- if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <=
- minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))
+ if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_avg) <=
+ minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_avg))
break;
}
@@ -157,20 +157,24 @@ minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
* Recalculate statistics and counters of a given rate
*/
void
-minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs)
+minstrel_calc_rate_stats(struct minstrel_priv *mp,
+ struct minstrel_rate_stats *mrs)
{
unsigned int cur_prob;
if (unlikely(mrs->attempts > 0)) {
mrs->sample_skipped = 0;
cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts);
- if (unlikely(!mrs->att_hist)) {
- mrs->prob_ewma = cur_prob;
+ if (mp->new_avg) {
+ minstrel_filter_avg_add(&mrs->prob_avg,
+ &mrs->prob_avg_1, cur_prob);
+ } else if (unlikely(!mrs->att_hist)) {
+ mrs->prob_avg = cur_prob;
} else {
/*update exponential weighted moving avarage */
- mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma,
- cur_prob,
- EWMA_LEVEL);
+ mrs->prob_avg = minstrel_ewma(mrs->prob_avg,
+ cur_prob,
+ EWMA_LEVEL);
}
mrs->att_hist += mrs->attempts;
mrs->succ_hist += mrs->success;
@@ -200,12 +204,12 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats;
/* Update statistics of success probability per rate */
- minstrel_calc_rate_stats(mrs);
+ minstrel_calc_rate_stats(mp, mrs);
/* Sample less often below the 10% chance of success.
* Sample less often above the 95% chance of success. */
- if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) ||
- mrs->prob_ewma < MINSTREL_FRAC(10, 100)) {
+ if (mrs->prob_avg > MINSTREL_FRAC(95, 100) ||
+ mrs->prob_avg < MINSTREL_FRAC(10, 100)) {
mr->adjusted_retry_count = mrs->retry_count >> 1;
if (mr->adjusted_retry_count > 2)
mr->adjusted_retry_count = 2;
@@ -225,14 +229,14 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
* choose the maximum throughput rate as max_prob_rate
* (2) if all success probabilities < 95%, the rate with
* highest success probability is chosen as max_prob_rate */
- if (mrs->prob_ewma >= MINSTREL_FRAC(95, 100)) {
- tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_ewma);
+ if (mrs->prob_avg >= MINSTREL_FRAC(95, 100)) {
+ tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_avg);
tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate],
- tmp_mrs->prob_ewma);
+ tmp_mrs->prob_avg);
if (tmp_cur_tp >= tmp_prob_tp)
tmp_prob_rate = i;
} else {
- if (mrs->prob_ewma >= tmp_mrs->prob_ewma)
+ if (mrs->prob_avg >= tmp_mrs->prob_avg)
tmp_prob_rate = i;
}
}
@@ -290,7 +294,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
mi->sample_deferred--;
if (time_after(jiffies, mi->last_stats_update +
- (mp->update_interval * HZ) / 1000))
+ mp->update_interval / (mp->new_avg ? 2 : 1)))
minstrel_update_stats(mp, mi);
}
@@ -422,7 +426,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
* has a probability of >95%, we shouldn't be attempting
* to use it, as this only wastes precious airtime */
if (!mrr_capable &&
- (mi->r[ndx].stats.prob_ewma > MINSTREL_FRAC(95, 100)))
+ (mi->r[ndx].stats.prob_avg > MINSTREL_FRAC(95, 100)))
return;
mi->prev_sample = true;
@@ -573,7 +577,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
* computing cur_tp
*/
tmp_mrs = &mi->r[idx].stats;
- tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
+ tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_avg) * 10;
tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
return tmp_cur_tp;
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 51d8b2c846e7..dbb43bcd3c45 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -19,6 +19,21 @@
#define MAX_THR_RATES 4
/*
+ * Coefficients for moving average with noise filter (period=16),
+ * scaled by 10 bits
+ *
+ * a1 = exp(-pi * sqrt(2) / period)
+ * coeff2 = 2 * a1 * cos(sqrt(2) * 2 * pi / period)
+ * coeff3 = -sqr(a1)
+ * coeff1 = 1 - coeff2 - coeff3
+ */
+#define MINSTREL_AVG_COEFF1 (MINSTREL_FRAC(1, 1) - \
+ MINSTREL_AVG_COEFF2 - \
+ MINSTREL_AVG_COEFF3)
+#define MINSTREL_AVG_COEFF2 0x00001499
+#define MINSTREL_AVG_COEFF3 -0x0000092e
+
+/*
* Perform EWMA (Exponentially Weighted Moving Average) calculation
*/
static inline int
@@ -32,6 +47,37 @@ minstrel_ewma(int old, int new, int weight)
return old + incr;
}
+static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in)
+{
+ s32 out_1 = *prev_1;
+ s32 out_2 = *prev_2;
+ s32 val;
+
+ if (!in)
+ in += 1;
+
+ if (!out_1) {
+ val = out_1 = in;
+ goto out;
+ }
+
+ val = MINSTREL_AVG_COEFF1 * in;
+ val += MINSTREL_AVG_COEFF2 * out_1;
+ val += MINSTREL_AVG_COEFF3 * out_2;
+ val >>= MINSTREL_SCALE;
+
+ if (val > 1 << MINSTREL_SCALE)
+ val = 1 << MINSTREL_SCALE;
+ if (val < 0)
+ val = 1;
+
+out:
+ *prev_2 = out_1;
+ *prev_1 = val;
+
+ return val;
+}
+
struct minstrel_rate_stats {
/* current / last sampling period attempts/success counters */
u16 attempts, last_attempts;
@@ -40,8 +86,9 @@ struct minstrel_rate_stats {
/* total attempts/success counters */
u32 att_hist, succ_hist;
- /* prob_ewma - exponential weighted moving average of prob */
- u16 prob_ewma;
+ /* prob_avg - moving average of prob */
+ u16 prob_avg;
+ u16 prob_avg_1;
/* maximum retry counts */
u8 retry_count;
@@ -95,6 +142,7 @@ struct minstrel_sta_info {
struct minstrel_priv {
struct ieee80211_hw *hw;
bool has_mrr;
+ bool new_avg;
u32 sample_switch;
unsigned int cw_min;
unsigned int cw_max;
@@ -126,8 +174,9 @@ extern const struct rate_control_ops mac80211_minstrel;
void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
/* Recalculate success probabilities and counters for a given rate using EWMA */
-void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs);
-int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma);
+void minstrel_calc_rate_stats(struct minstrel_priv *mp,
+ struct minstrel_rate_stats *mrs);
+int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg);
/* debugfs */
int minstrel_stats_open(struct inode *inode, struct file *file);
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index c8afd85b51a0..9b8e0daeb7bb 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -90,8 +90,8 @@ minstrel_stats_open(struct inode *inode, struct file *file)
p += sprintf(p, "%6u ", mr->perfect_tx_time);
tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
- tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
- eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+ tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg);
+ eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u"
" %3u %3u %-3u "
@@ -147,8 +147,8 @@ minstrel_stats_csv_open(struct inode *inode, struct file *file)
p += sprintf(p, "%u,",mr->perfect_tx_time);
tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
- tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
- eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+ tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg);
+ eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,%u,"
"%llu,%llu,%d,%d\n",
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 0ef2633349b5..694a31978a04 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -346,12 +346,12 @@ minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi)
*/
int
minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
- int prob_ewma)
+ int prob_avg)
{
unsigned int nsecs = 0;
/* do not account throughput if sucess prob is below 10% */
- if (prob_ewma < MINSTREL_FRAC(10, 100))
+ if (prob_avg < MINSTREL_FRAC(10, 100))
return 0;
if (group != MINSTREL_CCK_GROUP)
@@ -365,11 +365,11 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
* account for collision related packet error rate fluctuation
* (prob is scaled - see MINSTREL_FRAC above)
*/
- if (prob_ewma > MINSTREL_FRAC(90, 100))
+ if (prob_avg > MINSTREL_FRAC(90, 100))
return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000)
/ nsecs));
else
- return MINSTREL_TRUNC(100000 * ((prob_ewma * 1000) / nsecs));
+ return MINSTREL_TRUNC(100000 * ((prob_avg * 1000) / nsecs));
}
/*
@@ -389,13 +389,13 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index,
cur_group = index / MCS_GROUP_RATES;
cur_idx = index % MCS_GROUP_RATES;
- cur_prob = mi->groups[cur_group].rates[cur_idx].prob_ewma;
+ cur_prob = mi->groups[cur_group].rates[cur_idx].prob_avg;
cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob);
do {
tmp_group = tp_list[j - 1] / MCS_GROUP_RATES;
tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx,
tmp_prob);
if (cur_tp_avg < tmp_tp_avg ||
@@ -432,7 +432,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
tmp_group = mi->max_prob_rate / MCS_GROUP_RATES;
tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
/* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from
@@ -444,11 +444,11 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
- max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
+ max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg;
- if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) {
+ if (mrs->prob_avg > MINSTREL_FRAC(75, 100)) {
cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
- mrs->prob_ewma);
+ mrs->prob_avg);
if (cur_tp_avg > tmp_tp_avg)
mi->max_prob_rate = index;
@@ -458,9 +458,9 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
if (cur_tp_avg > max_gpr_tp_avg)
mg->max_group_prob_rate = index;
} else {
- if (mrs->prob_ewma > tmp_prob)
+ if (mrs->prob_avg > tmp_prob)
mi->max_prob_rate = index;
- if (mrs->prob_ewma > max_gpr_prob)
+ if (mrs->prob_avg > max_gpr_prob)
mg->max_group_prob_rate = index;
}
}
@@ -482,12 +482,12 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi,
tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES;
tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES;
tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) {
@@ -518,7 +518,7 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
continue;
tmp_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
- tmp_prob = mi->groups[group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg;
if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) &&
(minstrel_mcs_groups[group].streams < tmp_max_streams)) {
@@ -623,7 +623,7 @@ minstrel_ht_rate_sample_switch(struct minstrel_priv *mp,
* If that fails, look again for a rate that is at least as fast
*/
mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
- faster_rate = mrs->prob_ewma > MINSTREL_FRAC(75, 100);
+ faster_rate = mrs->prob_avg > MINSTREL_FRAC(75, 100);
minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate);
if (!n_rates && faster_rate)
minstrel_ht_find_probe_rates(mi, rates, &n_rates, false);
@@ -737,8 +737,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
mrs = &mg->rates[i];
mrs->retry_updated = false;
- minstrel_calc_rate_stats(mrs);
- cur_prob = mrs->prob_ewma;
+ minstrel_calc_rate_stats(mp, mrs);
+ cur_prob = mrs->prob_avg;
if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0)
continue;
@@ -773,6 +773,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
/* try to sample all available rates during each interval */
mi->sample_count *= 8;
+ if (mp->new_avg)
+ mi->sample_count /= 2;
if (sample)
minstrel_ht_rate_sample_switch(mp, mi);
@@ -889,6 +891,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
struct ieee80211_tx_rate *ar = info->status.rates;
struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL;
struct minstrel_priv *mp = priv;
+ u32 update_interval = mp->update_interval / 2;
bool last, update = false;
bool sample_status = false;
int i;
@@ -943,6 +946,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
switch (mi->sample_mode) {
case MINSTREL_SAMPLE_IDLE:
+ if (mp->new_avg &&
+ (mp->hw->max_rates > 1 ||
+ mi->total_packets_cur < SAMPLE_SWITCH_THR))
+ update_interval /= 2;
break;
case MINSTREL_SAMPLE_ACTIVE:
@@ -970,23 +977,20 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
*/
rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
if (rate->attempts > 30 &&
- MINSTREL_FRAC(rate->success, rate->attempts) <
- MINSTREL_FRAC(20, 100)) {
+ rate->success < rate->attempts / 4) {
minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true);
update = true;
}
rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]);
if (rate2->attempts > 30 &&
- MINSTREL_FRAC(rate2->success, rate2->attempts) <
- MINSTREL_FRAC(20, 100)) {
+ rate2->success < rate2->attempts / 4) {
minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false);
update = true;
}
}
- if (time_after(jiffies, mi->last_stats_update +
- (mp->update_interval / 2 * HZ) / 1000)) {
+ if (time_after(jiffies, mi->last_stats_update + update_interval)) {
update = true;
minstrel_ht_update_stats(mp, mi, true);
}
@@ -1008,7 +1012,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
unsigned int overhead = 0, overhead_rtscts = 0;
mrs = minstrel_get_ratestats(mi, index);
- if (mrs->prob_ewma < MINSTREL_FRAC(1, 10)) {
+ if (mrs->prob_avg < MINSTREL_FRAC(1, 10)) {
mrs->retry_count = 1;
mrs->retry_count_rtscts = 1;
return;
@@ -1065,7 +1069,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
if (!mrs->retry_updated)
minstrel_calc_retransmit(mp, mi, index);
- if (mrs->prob_ewma < MINSTREL_FRAC(20, 100) || !mrs->retry_count) {
+ if (mrs->prob_avg < MINSTREL_FRAC(20, 100) || !mrs->retry_count) {
ratetbl->rate[offset].count = 2;
ratetbl->rate[offset].count_rts = 2;
ratetbl->rate[offset].count_cts = 2;
@@ -1099,11 +1103,11 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
}
static inline int
-minstrel_ht_get_prob_ewma(struct minstrel_ht_sta *mi, int rate)
+minstrel_ht_get_prob_avg(struct minstrel_ht_sta *mi, int rate)
{
int group = rate / MCS_GROUP_RATES;
rate %= MCS_GROUP_RATES;
- return mi->groups[group].rates[rate].prob_ewma;
+ return mi->groups[group].rates[rate].prob_avg;
}
static int
@@ -1115,7 +1119,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
unsigned int duration;
/* Disable A-MSDU if max_prob_rate is bad */
- if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100))
+ if (mi->groups[group].rates[rate].prob_avg < MINSTREL_FRAC(50, 100))
return 1;
duration = g->duration[rate];
@@ -1138,7 +1142,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
* data packet size
*/
if (duration > MCS_DURATION(1, 0, 260) ||
- (minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) <
+ (minstrel_ht_get_prob_avg(mi, mi->max_tp_rate[0]) <
MINSTREL_FRAC(75, 100)))
return 3200;
@@ -1243,7 +1247,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
* rate, to avoid wasting airtime.
*/
sample_dur = minstrel_get_duration(sample_idx);
- if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) ||
+ if (mrs->prob_avg > MINSTREL_FRAC(95, 100) ||
minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur)
return -1;
@@ -1666,7 +1670,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
mp->has_mrr = true;
mp->hw = hw;
- mp->update_interval = 100;
+ mp->update_interval = HZ / 10;
+ mp->new_avg = true;
#ifdef CONFIG_MAC80211_DEBUGFS
mp->fixed_rate_idx = (u32) -1;
@@ -1674,6 +1679,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
&mp->fixed_rate_idx);
debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir,
&mp->sample_switch);
+ debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir,
+ &mp->new_avg);
#endif
minstrel_ht_init_cck_rates(mp);
@@ -1698,7 +1705,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
i = mi->max_tp_rate[0] / MCS_GROUP_RATES;
j = mi->max_tp_rate[0] % MCS_GROUP_RATES;
- prob = mi->groups[i].rates[j].prob_ewma;
+ prob = mi->groups[i].rates[j].prob_avg;
/* convert tp_avg from pkt per second in kbps */
tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index f938701e7ab7..53ea3c29debf 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -119,6 +119,6 @@ struct minstrel_ht_sta_priv {
void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
- int prob_ewma);
+ int prob_avg);
#endif
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 5a6e9f3edc04..bebb71917742 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -98,8 +98,8 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
p += sprintf(p, "%6u ", tx_time);
tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
- tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
- eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg);
+ eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u"
" %3u %3u %-3u "
@@ -243,8 +243,8 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
p += sprintf(p, "%u,", tx_time);
tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
- tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
- eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg);
+ eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,"
"%u,%llu,%llu,",
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1fa422782905..938c10f7955b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1617,7 +1617,7 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
static bool ieee80211_tx_frags(struct ieee80211_local *local,
struct ieee80211_vif *vif,
- struct ieee80211_sta *sta,
+ struct sta_info *sta,
struct sk_buff_head *skbs,
bool txpending)
{
@@ -1679,7 +1679,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
info->control.vif = vif;
- control.sta = sta;
+ control.sta = sta ? &sta->sta : NULL;
__skb_unlink(skb, skbs);
drv_tx(local, &control, skb);
@@ -1698,7 +1698,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
struct ieee80211_tx_info *info;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_vif *vif;
- struct ieee80211_sta *pubsta;
struct sk_buff *skb;
bool result = true;
__le16 fc;
@@ -1713,11 +1712,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
if (sta && !sta->uploaded)
sta = NULL;
- if (sta)
- pubsta = &sta->sta;
- else
- pubsta = NULL;
-
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
@@ -1744,8 +1738,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
break;
}
- result = ieee80211_tx_frags(local, vif, pubsta, skbs,
- txpending);
+ result = ieee80211_tx_frags(local, vif, sta, skbs, txpending);
ieee80211_tpt_led_trig_tx(local, fc, led_len);
@@ -3529,7 +3522,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sub_if_data, u.ap);
__skb_queue_tail(&tx.skbs, skb);
- ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false);
+ ieee80211_tx_frags(local, &sdata->vif, sta, &tx.skbs, false);
return true;
}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5d5bdf450091..78f046ec506f 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -536,6 +536,26 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
}
EXPORT_SYMBOL(nf_hook_slow);
+void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
+ const struct nf_hook_entries *e)
+{
+ struct sk_buff *skb, *next;
+ struct list_head sublist;
+ int ret;
+
+ INIT_LIST_HEAD(&sublist);
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ skb_list_del_init(skb);
+ ret = nf_hook_slow(skb, state, e, 0);
+ if (ret == 1)
+ list_add_tail(&skb->list, &sublist);
+ }
+ /* Put passed packets back on main list */
+ list_splice(&sublist, head);
+}
+EXPORT_SYMBOL(nf_hook_slow_list);
+
/* This needs to be compiled in any case to avoid dependencies between the
* nfnetlink_queue code and nf_conntrack.
*/
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 063df74b4647..1abd6f0dc227 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -192,7 +192,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
#ifndef IP_SET_BITMAP_STORED_TIMEOUT
-static inline bool
+static bool
mtype_is_filled(const struct mtype_elem *x)
{
return true;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 11ff9d4a7006..abe8f77d7d23 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -55,7 +55,7 @@ struct bitmap_ip_adt_elem {
u16 id;
};
-static inline u32
+static u32
ip_to_id(const struct bitmap_ip *m, u32 ip)
{
return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
@@ -63,33 +63,33 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
/* Common functions */
-static inline int
+static int
bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
-static inline int
+static int
bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
u32 flags, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
{
return !test_and_clear_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
size_t dsize)
{
@@ -97,7 +97,7 @@ bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
htonl(map->first_ip + id * map->hosts));
}
-static inline int
+static int
bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)
{
return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
@@ -237,6 +237,18 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
return true;
}
+static u32
+range_to_mask(u32 from, u32 to, u8 *bits)
+{
+ u32 mask = 0xFFFFFFFE;
+
+ *bits = 32;
+ while (--(*bits) > 0 && mask && (to & mask) != from)
+ mask <<= 1;
+
+ return mask;
+}
+
static int
bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
u32 flags)
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1d4e63326e68..b618713297da 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -65,7 +65,7 @@ struct bitmap_ipmac_elem {
unsigned char filled;
} __aligned(__alignof__(u64));
-static inline u32
+static u32
ip_to_id(const struct bitmap_ipmac *m, u32 ip)
{
return ip - m->first_ip;
@@ -79,7 +79,7 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
/* Common functions */
-static inline int
+static int
bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
const struct bitmap_ipmac *map, size_t dsize)
{
@@ -94,7 +94,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
return -EAGAIN;
}
-static inline int
+static int
bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
{
const struct bitmap_ipmac_elem *elem;
@@ -106,13 +106,13 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
return elem->filled == MAC_FILLED;
}
-static inline int
+static int
bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
{
return elem->filled == MAC_FILLED;
}
-static inline int
+static int
bitmap_ipmac_add_timeout(unsigned long *timeout,
const struct bitmap_ipmac_adt_elem *e,
const struct ip_set_ext *ext, struct ip_set *set,
@@ -139,7 +139,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
return 0;
}
-static inline int
+static int
bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac *map, u32 flags, size_t dsize)
{
@@ -177,14 +177,14 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
return IPSET_ADD_STORE_PLAIN_TIMEOUT;
}
-static inline int
+static int
bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac *map)
{
return !test_and_clear_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
u32 id, size_t dsize)
{
@@ -197,7 +197,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether));
}
-static inline int
+static int
bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map)
{
return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 704a0dda1609..23d6095cb196 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -46,7 +46,7 @@ struct bitmap_port_adt_elem {
u16 id;
};
-static inline u16
+static u16
port_to_id(const struct bitmap_port *m, u16 port)
{
return port - m->first_port;
@@ -54,34 +54,34 @@ port_to_id(const struct bitmap_port *m, u16 port)
/* Common functions */
-static inline int
+static int
bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
-static inline int
+static int
bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map, u32 flags, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map)
{
return !test_and_clear_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
size_t dsize)
{
@@ -89,13 +89,40 @@ bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
htons(map->first_port + id));
}
-static inline int
+static int
bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)
{
return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
}
+static bool
+ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
+{
+ bool ret;
+ u8 proto;
+
+ switch (pf) {
+ case NFPROTO_IPV4:
+ ret = ip_set_get_ip4_port(skb, src, port, &proto);
+ break;
+ case NFPROTO_IPV6:
+ ret = ip_set_get_ip6_port(skb, src, port, &proto);
+ break;
+ default:
+ return false;
+ }
+ if (!ret)
+ return ret;
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ return true;
+ default:
+ return false;
+ }
+}
+
static int
bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e64d5f9a89dd..35cf59e4004b 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -35,7 +35,7 @@ struct ip_set_net {
static unsigned int ip_set_net_id __read_mostly;
-static inline struct ip_set_net *ip_set_pernet(struct net *net)
+static struct ip_set_net *ip_set_pernet(struct net *net)
{
return net_generic(net, ip_set_net_id);
}
@@ -67,13 +67,13 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
* serialized by ip_set_type_mutex.
*/
-static inline void
+static void
ip_set_type_lock(void)
{
mutex_lock(&ip_set_type_mutex);
}
-static inline void
+static void
ip_set_type_unlock(void)
{
mutex_unlock(&ip_set_type_mutex);
@@ -277,7 +277,7 @@ ip_set_free(void *members)
}
EXPORT_SYMBOL_GPL(ip_set_free);
-static inline bool
+static bool
flag_nested(const struct nlattr *nla)
{
return nla->nla_type & NLA_F_NESTED;
@@ -325,6 +325,83 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
}
EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
+static u32
+ip_set_timeout_get(const unsigned long *timeout)
+{
+ u32 t;
+
+ if (*timeout == IPSET_ELEM_PERMANENT)
+ return 0;
+
+ t = jiffies_to_msecs(*timeout - jiffies) / MSEC_PER_SEC;
+ /* Zero value in userspace means no timeout */
+ return t == 0 ? 1 : t;
+}
+
+static char *
+ip_set_comment_uget(struct nlattr *tb)
+{
+ return nla_data(tb);
+}
+
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
+void
+ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
+ const struct ip_set_ext *ext)
+{
+ struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
+ size_t len = ext->comment ? strlen(ext->comment) : 0;
+
+ if (unlikely(c)) {
+ set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
+ }
+ if (!len)
+ return;
+ if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
+ len = IPSET_MAX_COMMENT_SIZE;
+ c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+ if (unlikely(!c))
+ return;
+ strlcpy(c->str, ext->comment, len + 1);
+ set->ext_size += sizeof(*c) + strlen(c->str) + 1;
+ rcu_assign_pointer(comment->c, c);
+}
+EXPORT_SYMBOL_GPL(ip_set_init_comment);
+
+/* Used only when dumping a set, protected by rcu_read_lock() */
+static int
+ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
+{
+ struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
+
+ if (!c)
+ return 0;
+ return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
+}
+
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
+static void
+ip_set_comment_free(struct ip_set *set, void *ptr)
+{
+ struct ip_set_comment *comment = ptr;
+ struct ip_set_comment_rcu *c;
+
+ c = rcu_dereference_protected(comment->c, 1);
+ if (unlikely(!c))
+ return;
+ set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
+}
+
typedef void (*destroyer)(struct ip_set *, void *);
/* ipset data extension types, in size order */
@@ -351,12 +428,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
.flag = IPSET_FLAG_WITH_COMMENT,
.len = sizeof(struct ip_set_comment),
.align = __alignof__(struct ip_set_comment),
- .destroy = (destroyer) ip_set_comment_free,
+ .destroy = ip_set_comment_free,
},
};
EXPORT_SYMBOL_GPL(ip_set_extensions);
-static inline bool
+static bool
add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
{
return ip_set_extensions[id].flag ?
@@ -446,6 +523,46 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
+static u64
+ip_set_get_bytes(const struct ip_set_counter *counter)
+{
+ return (u64)atomic64_read(&(counter)->bytes);
+}
+
+static u64
+ip_set_get_packets(const struct ip_set_counter *counter)
+{
+ return (u64)atomic64_read(&(counter)->packets);
+}
+
+static bool
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
+{
+ return nla_put_net64(skb, IPSET_ATTR_BYTES,
+ cpu_to_be64(ip_set_get_bytes(counter)),
+ IPSET_ATTR_PAD) ||
+ nla_put_net64(skb, IPSET_ATTR_PACKETS,
+ cpu_to_be64(ip_set_get_packets(counter)),
+ IPSET_ATTR_PAD);
+}
+
+static bool
+ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
+{
+ /* Send nonzero parameters only */
+ return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+ nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+ cpu_to_be64((u64)skbinfo->skbmark << 32 |
+ skbinfo->skbmarkmask),
+ IPSET_ATTR_PAD)) ||
+ (skbinfo->skbprio &&
+ nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+ cpu_to_be32(skbinfo->skbprio))) ||
+ (skbinfo->skbqueue &&
+ nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+ cpu_to_be16(skbinfo->skbqueue)));
+}
+
int
ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
const void *e, bool active)
@@ -471,6 +588,55 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
}
EXPORT_SYMBOL_GPL(ip_set_put_extensions);
+static bool
+ip_set_match_counter(u64 counter, u64 match, u8 op)
+{
+ switch (op) {
+ case IPSET_COUNTER_NONE:
+ return true;
+ case IPSET_COUNTER_EQ:
+ return counter == match;
+ case IPSET_COUNTER_NE:
+ return counter != match;
+ case IPSET_COUNTER_LT:
+ return counter < match;
+ case IPSET_COUNTER_GT:
+ return counter > match;
+ }
+ return false;
+}
+
+static void
+ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
+{
+ atomic64_add((long long)bytes, &(counter)->bytes);
+}
+
+static void
+ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
+{
+ atomic64_add((long long)packets, &(counter)->packets);
+}
+
+static void
+ip_set_update_counter(struct ip_set_counter *counter,
+ const struct ip_set_ext *ext, u32 flags)
+{
+ if (ext->packets != ULLONG_MAX &&
+ !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
+ ip_set_add_bytes(ext->bytes, counter);
+ ip_set_add_packets(ext->packets, counter);
+ }
+}
+
+static void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+ const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ mext->skbinfo = *skbinfo;
+}
+
bool
ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags, void *data)
@@ -506,7 +672,7 @@ EXPORT_SYMBOL_GPL(ip_set_match_extensions);
* The set behind an index may change by swapping only, from userspace.
*/
-static inline void
+static void
__ip_set_get(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -514,7 +680,7 @@ __ip_set_get(struct ip_set *set)
write_unlock_bh(&ip_set_ref_lock);
}
-static inline void
+static void
__ip_set_put(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -526,7 +692,7 @@ __ip_set_put(struct ip_set *set)
/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
* a separate reference counter
*/
-static inline void
+static void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -541,7 +707,7 @@ __ip_set_put_netlink(struct ip_set *set)
* so it can't be destroyed (or changed) under our foot.
*/
-static inline struct ip_set *
+static struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
struct ip_set *set;
@@ -670,7 +836,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
*
*/
-static inline void
+static void
__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
{
struct ip_set *set;
@@ -1252,6 +1418,30 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF)
#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16)
+int
+ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
+{
+ u32 cadt_flags = 0;
+
+ if (SET_WITH_TIMEOUT(set))
+ if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+ htonl(set->timeout))))
+ return -EMSGSIZE;
+ if (SET_WITH_COUNTER(set))
+ cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
+ if (SET_WITH_COMMENT(set))
+ cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+ if (SET_WITH_SKBINFO(set))
+ cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
+ if (SET_WITH_FORCEADD(set))
+ cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
+
+ if (!cadt_flags)
+ return 0;
+ return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
+}
+EXPORT_SYMBOL_GPL(ip_set_put_flags);
+
static int
ip_set_dump_done(struct netlink_callback *cb)
{
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 2b8f959574b4..36615eb3eae1 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -148,31 +148,3 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
}
EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
#endif
-
-bool
-ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
-{
- bool ret;
- u8 proto;
-
- switch (pf) {
- case NFPROTO_IPV4:
- ret = ip_set_get_ip4_port(skb, src, port, &proto);
- break;
- case NFPROTO_IPV6:
- ret = ip_set_get_ip6_port(skb, src, port, &proto);
- break;
- default:
- return false;
- }
- if (!ret)
- return ret;
- switch (proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- return true;
- default:
- return false;
- }
-}
-EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index d098d87bc331..7480ce55b5c8 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -39,7 +39,7 @@
#ifdef IP_SET_HASH_WITH_MULTI
#define AHASH_MAX(h) ((h)->ahash_max)
-static inline u8
+static u8
tune_ahash_max(u8 curr, u32 multi)
{
u32 n;
@@ -909,7 +909,7 @@ out:
return ret;
}
-static inline int
+static int
mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
struct ip_set_ext *mext, struct ip_set *set, u32 flags)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index f4432d9fcad0..5d6d68eaf6a9 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -44,7 +44,7 @@ struct hash_ip4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ip4_data_equal(const struct hash_ip4_elem *e1,
const struct hash_ip4_elem *e2,
u32 *multi)
@@ -63,7 +63,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
{
next->ip = e->ip;
@@ -171,7 +171,7 @@ struct hash_ip6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
const struct hash_ip6_elem *ip2,
u32 *multi)
@@ -179,7 +179,7 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
}
-static inline void
+static void
hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
{
ip6_netmask(ip, prefix);
@@ -196,7 +196,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e)
{
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index 24d8f4df4230..e28cd72db6ad 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -47,7 +47,7 @@ struct hash_ipmac4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1,
const struct hash_ipmac4_elem *e2,
u32 *multi)
@@ -67,7 +67,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmac4_data_next(struct hash_ipmac4_elem *next,
const struct hash_ipmac4_elem *e)
{
@@ -154,7 +154,7 @@ struct hash_ipmac6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1,
const struct hash_ipmac6_elem *e2,
u32 *multi)
@@ -175,7 +175,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmac6_data_next(struct hash_ipmac6_elem *next,
const struct hash_ipmac6_elem *e)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 7a1734aad0c5..aba1df617d6e 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -42,7 +42,7 @@ struct hash_ipmark4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
const struct hash_ipmark4_elem *ip2,
u32 *multi)
@@ -64,7 +64,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
const struct hash_ipmark4_elem *d)
{
@@ -165,7 +165,7 @@ struct hash_ipmark6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
const struct hash_ipmark6_elem *ip2,
u32 *multi)
@@ -187,7 +187,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmark6_data_next(struct hash_ipmark6_elem *next,
const struct hash_ipmark6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 32e240658334..1ff228717e29 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -47,7 +47,7 @@ struct hash_ipport4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
const struct hash_ipport4_elem *ip2,
u32 *multi)
@@ -71,7 +71,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipport4_data_next(struct hash_ipport4_elem *next,
const struct hash_ipport4_elem *d)
{
@@ -202,7 +202,7 @@ struct hash_ipport6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
const struct hash_ipport6_elem *ip2,
u32 *multi)
@@ -226,7 +226,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipport6_data_next(struct hash_ipport6_elem *next,
const struct hash_ipport6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 15d419353179..fa88afd812fa 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -46,7 +46,7 @@ struct hash_ipportip4_elem {
u8 padding;
};
-static inline bool
+static bool
hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
const struct hash_ipportip4_elem *ip2,
u32 *multi)
@@ -72,7 +72,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
const struct hash_ipportip4_elem *d)
{
@@ -210,7 +210,7 @@ struct hash_ipportip6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
const struct hash_ipportip6_elem *ip2,
u32 *multi)
@@ -236,7 +236,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportip6_data_next(struct hash_ipportip6_elem *next,
const struct hash_ipportip6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7a4d7afd4121..eef6ecfcb409 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -59,7 +59,7 @@ struct hash_ipportnet4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
const struct hash_ipportnet4_elem *ip2,
u32 *multi)
@@ -71,25 +71,25 @@ hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
{
elem->ip2 &= ip_set_netmask(cidr);
@@ -116,7 +116,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
const struct hash_ipportnet4_elem *d)
{
@@ -308,7 +308,7 @@ struct hash_ipportnet6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
const struct hash_ipportnet6_elem *ip2,
u32 *multi)
@@ -320,25 +320,25 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip2, cidr);
@@ -365,7 +365,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next,
const struct hash_ipportnet6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index d94c585d33c5..0b61593165ef 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -37,7 +37,7 @@ struct hash_mac4_elem {
/* Common functions */
-static inline bool
+static bool
hash_mac4_data_equal(const struct hash_mac4_elem *e1,
const struct hash_mac4_elem *e2,
u32 *multi)
@@ -45,7 +45,7 @@ hash_mac4_data_equal(const struct hash_mac4_elem *e1,
return ether_addr_equal(e1->ether, e2->ether);
}
-static inline bool
+static bool
hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
{
if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
@@ -56,7 +56,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_mac4_data_next(struct hash_mac4_elem *next,
const struct hash_mac4_elem *e)
{
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index c259cbc3ef45..86133fae4b69 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -47,7 +47,7 @@ struct hash_net4_elem {
/* Common functions */
-static inline bool
+static bool
hash_net4_data_equal(const struct hash_net4_elem *ip1,
const struct hash_net4_elem *ip2,
u32 *multi)
@@ -56,25 +56,25 @@ hash_net4_data_equal(const struct hash_net4_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_net4_do_data_match(const struct hash_net4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
@@ -97,7 +97,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_net4_data_next(struct hash_net4_elem *next,
const struct hash_net4_elem *d)
{
@@ -212,7 +212,7 @@ struct hash_net6_elem {
/* Common functions */
-static inline bool
+static bool
hash_net6_data_equal(const struct hash_net6_elem *ip1,
const struct hash_net6_elem *ip2,
u32 *multi)
@@ -221,25 +221,25 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_net6_do_data_match(const struct hash_net6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
@@ -262,7 +262,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_net6_data_next(struct hash_net6_elem *next,
const struct hash_net6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 87b29f971226..1a04e0929738 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -62,7 +62,7 @@ struct hash_netiface4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
const struct hash_netiface4_elem *ip2,
u32 *multi)
@@ -74,25 +74,25 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
strcmp(ip1->iface, ip2->iface) == 0;
}
-static inline int
+static int
hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
@@ -119,7 +119,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netiface4_data_next(struct hash_netiface4_elem *next,
const struct hash_netiface4_elem *d)
{
@@ -285,7 +285,7 @@ struct hash_netiface6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
const struct hash_netiface6_elem *ip2,
u32 *multi)
@@ -297,25 +297,25 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
strcmp(ip1->iface, ip2->iface) == 0;
}
-static inline int
+static int
hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
@@ -342,7 +342,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netiface6_data_next(struct hash_netiface6_elem *next,
const struct hash_netiface6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index a3ae69bfee66..bcb6d0b4db36 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -52,7 +52,7 @@ struct hash_netnet4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
const struct hash_netnet4_elem *ip2,
u32 *multi)
@@ -61,32 +61,32 @@ hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
ip1->ccmp == ip2->ccmp;
}
-static inline int
+static int
hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
struct hash_netnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
{
if (inner) {
@@ -117,7 +117,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netnet4_data_next(struct hash_netnet4_elem *next,
const struct hash_netnet4_elem *d)
{
@@ -282,7 +282,7 @@ struct hash_netnet6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
const struct hash_netnet6_elem *ip2,
u32 *multi)
@@ -292,32 +292,32 @@ hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
ip1->ccmp == ip2->ccmp;
}
-static inline int
+static int
hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
struct hash_netnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
{
if (inner) {
@@ -348,7 +348,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netnet6_data_next(struct hash_netnet6_elem *next,
const struct hash_netnet6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 799f2272cc65..34448df80fb9 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -57,7 +57,7 @@ struct hash_netport4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
const struct hash_netport4_elem *ip2,
u32 *multi)
@@ -68,25 +68,25 @@ hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_netport4_do_data_match(const struct hash_netport4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
@@ -112,7 +112,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netport4_data_next(struct hash_netport4_elem *next,
const struct hash_netport4_elem *d)
{
@@ -270,7 +270,7 @@ struct hash_netport6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
const struct hash_netport6_elem *ip2,
u32 *multi)
@@ -281,25 +281,25 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_netport6_do_data_match(const struct hash_netport6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
@@ -325,7 +325,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netport6_data_next(struct hash_netport6_elem *next,
const struct hash_netport6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index a82b70e8b9a6..934c1712cba8 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -56,7 +56,7 @@ struct hash_netportnet4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
const struct hash_netportnet4_elem *ip2,
u32 *multi)
@@ -67,32 +67,32 @@ hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
struct hash_netportnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
u8 cidr, bool inner)
{
@@ -126,7 +126,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
const struct hash_netportnet4_elem *d)
{
@@ -331,7 +331,7 @@ struct hash_netportnet6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
const struct hash_netportnet6_elem *ip2,
u32 *multi)
@@ -343,32 +343,32 @@ hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
struct hash_netportnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
u8 cidr, bool inner)
{
@@ -402,7 +402,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netportnet6_data_next(struct hash_netportnet6_elem *next,
const struct hash_netportnet6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 67ac50104e6f..cd747c0962fd 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -149,7 +149,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
kfree(e);
}
-static inline void
+static void
list_set_del(struct ip_set *set, struct set_elem *e)
{
struct list_set *map = set->data;
@@ -160,7 +160,7 @@ list_set_del(struct ip_set *set, struct set_elem *e)
call_rcu(&e->rcu, __list_set_del_rcu);
}
-static inline void
+static void
list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
{
struct list_set *map = set->data;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 8b80ab794a92..512259f579d7 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2402,18 +2402,22 @@ estimator_fail:
return -ENOMEM;
}
-static void __net_exit __ip_vs_cleanup(struct net *net)
+static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
- ip_vs_conn_net_cleanup(ipvs);
- ip_vs_app_net_cleanup(ipvs);
- ip_vs_protocol_net_cleanup(ipvs);
- ip_vs_control_net_cleanup(ipvs);
- ip_vs_estimator_net_cleanup(ipvs);
- IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
- net->ipvs = NULL;
+ struct netns_ipvs *ipvs;
+ struct net *net;
+
+ ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */
+ list_for_each_entry(net, net_list, exit_list) {
+ ipvs = net_ipvs(net);
+ ip_vs_conn_net_cleanup(ipvs);
+ ip_vs_app_net_cleanup(ipvs);
+ ip_vs_protocol_net_cleanup(ipvs);
+ ip_vs_control_net_cleanup(ipvs);
+ ip_vs_estimator_net_cleanup(ipvs);
+ IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
+ net->ipvs = NULL;
+ }
}
static int __net_init __ip_vs_dev_init(struct net *net)
@@ -2429,27 +2433,32 @@ hook_fail:
return ret;
}
-static void __net_exit __ip_vs_dev_cleanup(struct net *net)
+static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs;
+ struct net *net;
+
EnterFunction(2);
- nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
- ipvs->enable = 0; /* Disable packet reception */
- smp_wmb();
- ip_vs_sync_net_cleanup(ipvs);
+ list_for_each_entry(net, net_list, exit_list) {
+ ipvs = net_ipvs(net);
+ nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ ipvs->enable = 0; /* Disable packet reception */
+ smp_wmb();
+ ip_vs_sync_net_cleanup(ipvs);
+ }
LeaveFunction(2);
}
static struct pernet_operations ipvs_core_ops = {
.init = __ip_vs_init,
- .exit = __ip_vs_cleanup,
+ .exit_batch = __ip_vs_cleanup_batch,
.id = &ip_vs_net_id,
.size = sizeof(struct netns_ipvs),
};
static struct pernet_operations ipvs_core_dev_ops = {
.init = __ip_vs_dev_init,
- .exit = __ip_vs_dev_cleanup,
+ .exit_batch = __ip_vs_dev_cleanup_batch,
};
/*
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 3cccc88ef817..3be7398901e0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1607,14 +1607,20 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
/*
* Delete service by {netns} in the service table.
- * Called by __ip_vs_cleanup()
+ * Called by __ip_vs_batch_cleanup()
*/
-void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
+void ip_vs_service_nets_cleanup(struct list_head *net_list)
{
+ struct netns_ipvs *ipvs;
+ struct net *net;
+
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
- ip_vs_flush(ipvs, true);
+ list_for_each_entry(net, net_list, exit_list) {
+ ipvs = net_ipvs(net);
+ ip_vs_flush(ipvs, true);
+ }
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
index 78b074cd5464..c03066fdd5ca 100644
--- a/net/netfilter/ipvs/ip_vs_ovf.c
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -5,7 +5,7 @@
* Authors: Raducu Deaconu <rhadoo_io@yahoo.com>
*
* Scheduler implements "overflow" loadbalancing according to number of active
- * connections , will keep all conections to the node with the highest weight
+ * connections , will keep all connections to the node with the highest weight
* and overflow to the next node if the number of connections exceeds the node's
* weight.
* Note that this scheduler might not be suitable for UDP because it only uses
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 888d3068a492..b1e300f8881b 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -407,12 +407,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
goto err_put;
skb_dst_drop(skb);
- if (noref) {
- if (!local)
- skb_dst_set_noref(skb, &rt->dst);
- else
- skb_dst_set(skb, dst_clone(&rt->dst));
- } else
+ if (noref)
+ skb_dst_set_noref(skb, &rt->dst);
+ else
skb_dst_set(skb, &rt->dst);
return local;
@@ -574,12 +571,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
goto err_put;
skb_dst_drop(skb);
- if (noref) {
- if (!local)
- skb_dst_set_noref(skb, &rt->dst);
- else
- skb_dst_set(skb, dst_clone(&rt->dst));
- } else
+ if (noref)
+ skb_dst_set_noref(skb, &rt->dst);
+ else
skb_dst_set(skb, &rt->dst);
return local;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5cd610b547e0..0af1898af2b8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -573,7 +573,6 @@ EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
void nf_ct_tmpl_free(struct nf_conn *tmpl)
{
nf_ct_ext_destroy(tmpl);
- nf_ct_ext_free(tmpl);
if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
kfree((char *)tmpl - tmpl->proto.tmpl_padto);
@@ -1417,7 +1416,6 @@ void nf_conntrack_free(struct nf_conn *ct)
WARN_ON(atomic_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
- nf_ct_ext_free(ct);
kmem_cache_free(nf_conntrack_cachep, ct);
smp_mb__before_atomic();
atomic_dec(&net->ct.count);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 6fba74b5aaf7..7956c9f19899 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -30,6 +30,7 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
#define ECACHE_RETRY_WAIT (HZ/10)
+#define ECACHE_STACK_ALLOC (256 / sizeof(void *))
enum retry_state {
STATE_CONGESTED,
@@ -39,11 +40,11 @@ enum retry_state {
static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
{
- struct nf_conn *refs[16];
+ struct nf_conn *refs[ECACHE_STACK_ALLOC];
+ enum retry_state ret = STATE_DONE;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
unsigned int evicted = 0;
- enum retry_state ret = STATE_DONE;
spin_lock(&pcpu->lock);
@@ -54,10 +55,22 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
if (!nf_ct_is_confirmed(ct))
continue;
+ /* This ecache access is safe because the ct is on the
+ * pcpu dying list and we hold the spinlock -- the entry
+ * cannot be free'd until after the lock is released.
+ *
+ * This is true even if ct has a refcount of 0: the
+ * cpu that is about to free the entry must remove it
+ * from the dying list and needs the lock to do so.
+ */
e = nf_ct_ecache_find(ct);
if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
continue;
+ /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means
+ * the worker owns this entry: the ct will remain valid
+ * until the worker puts its ct reference.
+ */
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
ret = STATE_CONGESTED;
break;
@@ -189,15 +202,15 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
if (notify == NULL)
goto out_unlock;
+ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
+ goto out_unlock;
+
e = nf_ct_ecache_find(ct);
if (e == NULL)
goto out_unlock;
events = xchg(&e->cache, 0);
- if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
- goto out_unlock;
-
/* We make a copy of the missed event cache without taking
* the lock, thus we may send missed events twice. However,
* this does not harm and it happens very rarely. */
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index d4ed1e197921..c24e5b64b00c 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -34,21 +34,24 @@ void nf_ct_ext_destroy(struct nf_conn *ct)
t->destroy(ct);
rcu_read_unlock();
}
+
+ kfree(ct->ext);
}
EXPORT_SYMBOL(nf_ct_ext_destroy);
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
{
unsigned int newlen, newoff, oldlen, alloc;
- struct nf_ct_ext *old, *new;
struct nf_ct_ext_type *t;
+ struct nf_ct_ext *new;
/* Conntrack must not be confirmed to avoid races on reallocation. */
WARN_ON(nf_ct_is_confirmed(ct));
- old = ct->ext;
- if (old) {
+ if (ct->ext) {
+ const struct nf_ct_ext *old = ct->ext;
+
if (__nf_ct_ext_exist(old, id))
return NULL;
oldlen = old->len;
@@ -68,22 +71,18 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
rcu_read_unlock();
alloc = max(newlen, NF_CT_EXT_PREALLOC);
- kmemleak_not_leak(old);
- new = __krealloc(old, alloc, gfp);
+ new = krealloc(ct->ext, alloc, gfp);
if (!new)
return NULL;
- if (!old) {
+ if (!ct->ext)
memset(new->offset, 0, sizeof(new->offset));
- ct->ext = new;
- } else if (new != old) {
- kfree_rcu(old, rcu);
- rcu_assign_pointer(ct->ext, new);
- }
new->offset[id] = newoff;
new->len = newlen;
memset((void *)new + newoff, 0, newlen - newoff);
+
+ ct->ext = new;
return (void *)new + newoff;
}
EXPORT_SYMBOL(nf_ct_ext_add);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e2d13cd18875..d8d33ef52ce0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -506,9 +506,45 @@ nla_put_failure:
return -1;
}
+/* all these functions access ct->ext. Caller must either hold a reference
+ * on ct or prevent its deletion by holding either the bucket spinlock or
+ * pcpu dying list lock.
+ */
+static int ctnetlink_dump_extinfo(struct sk_buff *skb,
+ struct nf_conn *ct, u32 type)
+{
+ if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
+ ctnetlink_dump_timestamp(skb, ct) < 0 ||
+ ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+ ctnetlink_dump_labels(skb, ct) < 0 ||
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
+ ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
+{
+ if (ctnetlink_dump_status(skb, ct) < 0 ||
+ ctnetlink_dump_mark(skb, ct) < 0 ||
+ ctnetlink_dump_secctx(skb, ct) < 0 ||
+ ctnetlink_dump_id(skb, ct) < 0 ||
+ ctnetlink_dump_use(skb, ct) < 0 ||
+ ctnetlink_dump_master(skb, ct) < 0)
+ return -1;
+
+ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
+ (ctnetlink_dump_timeout(skb, ct) < 0 ||
+ ctnetlink_dump_protoinfo(skb, ct) < 0))
+ return -1;
+
+ return 0;
+}
+
static int
ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
- struct nf_conn *ct)
+ struct nf_conn *ct, bool extinfo)
{
const struct nf_conntrack_zone *zone;
struct nlmsghdr *nlh;
@@ -552,23 +588,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
- if (ctnetlink_dump_status(skb, ct) < 0 ||
- ctnetlink_dump_acct(skb, ct, type) < 0 ||
- ctnetlink_dump_timestamp(skb, ct) < 0 ||
- ctnetlink_dump_helpinfo(skb, ct) < 0 ||
- ctnetlink_dump_mark(skb, ct) < 0 ||
- ctnetlink_dump_secctx(skb, ct) < 0 ||
- ctnetlink_dump_labels(skb, ct) < 0 ||
- ctnetlink_dump_id(skb, ct) < 0 ||
- ctnetlink_dump_use(skb, ct) < 0 ||
- ctnetlink_dump_master(skb, ct) < 0 ||
- ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
- ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+ if (ctnetlink_dump_info(skb, ct) < 0)
goto nla_put_failure;
-
- if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
- (ctnetlink_dump_timeout(skb, ct) < 0 ||
- ctnetlink_dump_protoinfo(skb, ct) < 0))
+ if (extinfo && ctnetlink_dump_extinfo(skb, ct, type) < 0)
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -953,13 +975,11 @@ restart:
if (!ctnetlink_filter_match(ct, cb->data))
continue;
- rcu_read_lock();
res =
ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct);
- rcu_read_unlock();
+ ct, true);
if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
@@ -1364,10 +1384,8 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
return -ENOMEM;
}
- rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
- rcu_read_unlock();
+ NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true);
nf_ct_put(ct);
if (err <= 0)
goto free;
@@ -1429,12 +1447,18 @@ restart:
continue;
cb->args[1] = 0;
}
- rcu_read_lock();
+
+ /* We can't dump extension info for the unconfirmed
+ * list because unconfirmed conntracks can have
+ * ct->ext reallocated (and thus freed).
+ *
+ * In the dying list case ct->ext can't be free'd
+ * until after we drop pcpu->lock.
+ */
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct);
- rcu_read_unlock();
+ ct, dying ? true : false);
if (res < 0) {
if (!atomic_inc_not_zero(&ct->ct_general.use))
continue;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d481f9baca2f..13f09412cc6a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -151,11 +151,64 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
}
}
+static int nft_netdev_register_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+ int err, j;
+
+ j = 0;
+ list_for_each_entry(hook, hook_list, list) {
+ err = nf_register_net_hook(net, &hook->ops);
+ if (err < 0)
+ goto err_register;
+
+ j++;
+ }
+ return 0;
+
+err_register:
+ list_for_each_entry(hook, hook_list, list) {
+ if (j-- <= 0)
+ break;
+
+ nf_unregister_net_hook(net, &hook->ops);
+ }
+ return err;
+}
+
+static void nft_netdev_unregister_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+
+ list_for_each_entry(hook, hook_list, list)
+ nf_unregister_net_hook(net, &hook->ops);
+}
+
+static int nft_register_basechain_hooks(struct net *net, int family,
+ struct nft_base_chain *basechain)
+{
+ if (family == NFPROTO_NETDEV)
+ return nft_netdev_register_hooks(net, &basechain->hook_list);
+
+ return nf_register_net_hook(net, &basechain->ops);
+}
+
+static void nft_unregister_basechain_hooks(struct net *net, int family,
+ struct nft_base_chain *basechain)
+{
+ if (family == NFPROTO_NETDEV)
+ nft_netdev_unregister_hooks(net, &basechain->hook_list);
+ else
+ nf_unregister_net_hook(net, &basechain->ops);
+}
+
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
- const struct nft_base_chain *basechain;
+ struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
@@ -168,14 +221,14 @@ static int nf_tables_register_hook(struct net *net,
if (basechain->type->ops_register)
return basechain->type->ops_register(net, ops);
- return nf_register_net_hook(net, ops);
+ return nft_register_basechain_hooks(net, table->family, basechain);
}
static void nf_tables_unregister_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
- const struct nft_base_chain *basechain;
+ struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
@@ -187,7 +240,7 @@ static void nf_tables_unregister_hook(struct net *net,
if (basechain->type->ops_unregister)
return basechain->type->ops_unregister(net, ops);
- nf_unregister_net_hook(net, ops);
+ nft_unregister_basechain_hooks(net, table->family, basechain);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -742,7 +795,8 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
if (cnt && i++ == cnt)
break;
- nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
+ nft_unregister_basechain_hooks(net, table->family,
+ nft_base_chain(chain));
}
}
@@ -757,14 +811,16 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table)
if (!nft_is_base_chain(chain))
continue;
- err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+ err = nft_register_basechain_hooks(net, table->family,
+ nft_base_chain(chain));
if (err < 0)
- goto err;
+ goto err_register_hooks;
i++;
}
return 0;
-err:
+
+err_register_hooks:
if (i)
nft_table_disable(net, table, i);
return err;
@@ -1225,6 +1281,46 @@ nla_put_failure:
return -ENOSPC;
}
+static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
+ const struct nft_base_chain *basechain)
+{
+ const struct nf_hook_ops *ops = &basechain->ops;
+ struct nft_hook *hook, *first = NULL;
+ struct nlattr *nest, *nest_devs;
+ int n = 0;
+
+ nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
+ if (nest == NULL)
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+ goto nla_put_failure;
+
+ if (family == NFPROTO_NETDEV) {
+ nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (!first)
+ first = hook;
+
+ if (nla_put_string(skb, NFTA_DEVICE_NAME,
+ hook->ops.dev->name))
+ goto nla_put_failure;
+ n++;
+ }
+ nla_nest_end(skb, nest_devs);
+
+ if (n == 1 &&
+ nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+
+ return 0;
+nla_put_failure:
+ return -1;
+}
+
static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq, int event, u32 flags,
int family, const struct nft_table *table,
@@ -1253,21 +1349,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
- const struct nf_hook_ops *ops = &basechain->ops;
struct nft_stats __percpu *stats;
- struct nlattr *nest;
- nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
- if (nest == NULL)
- goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
- goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+ if (nft_dump_basechain_hook(skb, family, basechain))
goto nla_put_failure;
- if (basechain->dev_name[0] &&
- nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
- goto nla_put_failure;
- nla_nest_end(skb, nest);
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
htonl(basechain->policy)))
@@ -1485,6 +1570,7 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
static void nf_tables_chain_destroy(struct nft_ctx *ctx)
{
struct nft_chain *chain = ctx->chain;
+ struct nft_hook *hook, *next;
if (WARN_ON(chain->use > 0))
return;
@@ -1495,6 +1581,13 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
+ if (ctx->family == NFPROTO_NETDEV) {
+ list_for_each_entry_safe(hook, next,
+ &basechain->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+ }
module_put(basechain->type->owner);
if (rcu_access_pointer(basechain->stats)) {
static_branch_dec(&nft_counters_enabled);
@@ -1508,13 +1601,125 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
}
}
+static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
+ const struct nlattr *attr)
+{
+ struct net_device *dev;
+ char ifname[IFNAMSIZ];
+ struct nft_hook *hook;
+ int err;
+
+ hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
+ if (!hook) {
+ err = -ENOMEM;
+ goto err_hook_alloc;
+ }
+
+ nla_strlcpy(ifname, attr, IFNAMSIZ);
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev) {
+ err = -ENOENT;
+ goto err_hook_dev;
+ }
+ hook->ops.dev = dev;
+
+ return hook;
+
+err_hook_dev:
+ kfree(hook);
+err_hook_alloc:
+ return ERR_PTR(err);
+}
+
+static bool nft_hook_list_find(struct list_head *hook_list,
+ const struct nft_hook *this)
+{
+ struct nft_hook *hook;
+
+ list_for_each_entry(hook, hook_list, list) {
+ if (this->ops.dev == hook->ops.dev)
+ return true;
+ }
+
+ return false;
+}
+
+static int nf_tables_parse_netdev_hooks(struct net *net,
+ const struct nlattr *attr,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook, *next;
+ const struct nlattr *tmp;
+ int rem, n = 0, err;
+
+ nla_for_each_nested(tmp, attr, rem) {
+ if (nla_type(tmp) != NFTA_DEVICE_NAME) {
+ err = -EINVAL;
+ goto err_hook;
+ }
+
+ hook = nft_netdev_hook_alloc(net, tmp);
+ if (IS_ERR(hook)) {
+ err = PTR_ERR(hook);
+ goto err_hook;
+ }
+ if (nft_hook_list_find(hook_list, hook)) {
+ err = -EEXIST;
+ goto err_hook;
+ }
+ list_add_tail(&hook->list, hook_list);
+ n++;
+
+ if (n == NFT_NETDEVICE_MAX) {
+ err = -EFBIG;
+ goto err_hook;
+ }
+ }
+ if (!n)
+ return -EINVAL;
+
+ return 0;
+
+err_hook:
+ list_for_each_entry_safe(hook, next, hook_list, list) {
+ list_del(&hook->list);
+ kfree(hook);
+ }
+ return err;
+}
+
struct nft_chain_hook {
u32 num;
s32 priority;
const struct nft_chain_type *type;
- struct net_device *dev;
+ struct list_head list;
};
+static int nft_chain_parse_netdev(struct net *net,
+ struct nlattr *tb[],
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+ int err;
+
+ if (tb[NFTA_HOOK_DEV]) {
+ hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]);
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ list_add_tail(&hook->list, hook_list);
+ } else if (tb[NFTA_HOOK_DEVS]) {
+ err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS],
+ hook_list);
+ if (err < 0)
+ return err;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nft_chain_parse_hook(struct net *net,
const struct nlattr * const nla[],
struct nft_chain_hook *hook, u8 family,
@@ -1522,7 +1727,6 @@ static int nft_chain_parse_hook(struct net *net,
{
struct nlattr *ha[NFTA_HOOK_MAX + 1];
const struct nft_chain_type *type;
- struct net_device *dev;
int err;
lockdep_assert_held(&net->nft.commit_mutex);
@@ -1560,23 +1764,14 @@ static int nft_chain_parse_hook(struct net *net,
hook->type = type;
- hook->dev = NULL;
+ INIT_LIST_HEAD(&hook->list);
if (family == NFPROTO_NETDEV) {
- char ifname[IFNAMSIZ];
-
- if (!ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
-
- nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
- dev = __dev_get_by_name(net, ifname);
- if (!dev) {
+ err = nft_chain_parse_netdev(net, ha, &hook->list);
+ if (err < 0) {
module_put(type->owner);
- return -ENOENT;
+ return err;
}
- hook->dev = dev;
- } else if (ha[NFTA_HOOK_DEV]) {
+ } else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) {
module_put(type->owner);
return -EOPNOTSUPP;
}
@@ -1586,6 +1781,12 @@ static int nft_chain_parse_hook(struct net *net,
static void nft_chain_release_hook(struct nft_chain_hook *hook)
{
+ struct nft_hook *h, *next;
+
+ list_for_each_entry_safe(h, next, &hook->list, list) {
+ list_del(&h->list);
+ kfree(h);
+ }
module_put(hook->type->owner);
}
@@ -1610,6 +1811,49 @@ static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *cha
return kvmalloc(alloc, GFP_KERNEL);
}
+static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
+ const struct nft_chain_hook *hook,
+ struct nft_chain *chain)
+{
+ ops->pf = family;
+ ops->hooknum = hook->num;
+ ops->priority = hook->priority;
+ ops->priv = chain;
+ ops->hook = hook->type->hooks[ops->hooknum];
+}
+
+static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
+ struct nft_chain_hook *hook, u32 flags)
+{
+ struct nft_chain *chain;
+ struct nft_hook *h;
+
+ basechain->type = hook->type;
+ INIT_LIST_HEAD(&basechain->hook_list);
+ chain = &basechain->chain;
+
+ if (family == NFPROTO_NETDEV) {
+ list_splice_init(&hook->list, &basechain->hook_list);
+ list_for_each_entry(h, &basechain->hook_list, list)
+ nft_basechain_hook_init(&h->ops, family, hook, chain);
+
+ basechain->ops.hooknum = hook->num;
+ basechain->ops.priority = hook->priority;
+ } else {
+ nft_basechain_hook_init(&basechain->ops, family, hook, chain);
+ }
+
+ chain->flags |= NFT_BASE_CHAIN | flags;
+ basechain->policy = NF_ACCEPT;
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+ nft_chain_offload_priority(basechain) < 0)
+ return -EOPNOTSUPP;
+
+ flow_block_init(&basechain->flow_block);
+
+ return 0;
+}
+
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
u8 policy, u32 flags)
{
@@ -1628,7 +1872,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
- struct nf_hook_ops *ops;
err = nft_chain_parse_hook(net, nla, &hook, family, true);
if (err < 0)
@@ -1639,9 +1882,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
nft_chain_release_hook(&hook);
return -ENOMEM;
}
-
- if (hook.dev != NULL)
- strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
+ chain = &basechain->chain;
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
@@ -1654,24 +1895,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
static_branch_inc(&nft_counters_enabled);
}
- basechain->type = hook.type;
- chain = &basechain->chain;
-
- ops = &basechain->ops;
- ops->pf = family;
- ops->hooknum = hook.num;
- ops->priority = hook.priority;
- ops->priv = chain;
- ops->hook = hook.type->hooks[ops->hooknum];
- ops->dev = hook.dev;
-
- chain->flags |= NFT_BASE_CHAIN | flags;
- basechain->policy = NF_ACCEPT;
- if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
- nft_chain_offload_priority(basechain) < 0)
- return -EOPNOTSUPP;
-
- flow_block_init(&basechain->flow_block);
+ err = nft_basechain_init(basechain, family, &hook, flags);
+ if (err < 0) {
+ nft_chain_release_hook(&hook);
+ kfree(basechain);
+ return err;
+ }
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
@@ -1731,6 +1960,25 @@ err1:
return err;
}
+static bool nft_hook_list_equal(struct list_head *hook_list1,
+ struct list_head *hook_list2)
+{
+ struct nft_hook *hook;
+ int n = 0, m = 0;
+
+ n = 0;
+ list_for_each_entry(hook, hook_list2, list) {
+ if (!nft_hook_list_find(hook_list1, hook))
+ return false;
+
+ n++;
+ }
+ list_for_each_entry(hook, hook_list1, list)
+ m++;
+
+ return n == m;
+}
+
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
u32 flags)
{
@@ -1762,12 +2010,19 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return -EBUSY;
}
- ops = &basechain->ops;
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
+ if (ctx->family == NFPROTO_NETDEV) {
+ if (!nft_hook_list_equal(&basechain->hook_list,
+ &hook.list)) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ } else {
+ ops = &basechain->ops;
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
}
nft_chain_release_hook(&hook);
}
@@ -5628,43 +5883,6 @@ nft_flowtable_lookup_byhandle(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
-static int nf_tables_parse_devices(const struct nft_ctx *ctx,
- const struct nlattr *attr,
- struct net_device *dev_array[], int *len)
-{
- const struct nlattr *tmp;
- struct net_device *dev;
- char ifname[IFNAMSIZ];
- int rem, n = 0, err;
-
- nla_for_each_nested(tmp, attr, rem) {
- if (nla_type(tmp) != NFTA_DEVICE_NAME) {
- err = -EINVAL;
- goto err1;
- }
-
- nla_strlcpy(ifname, tmp, IFNAMSIZ);
- dev = __dev_get_by_name(ctx->net, ifname);
- if (!dev) {
- err = -ENOENT;
- goto err1;
- }
-
- dev_array[n++] = dev;
- if (n == NFT_FLOWTABLE_DEVICE_MAX) {
- err = -EFBIG;
- goto err1;
- }
- }
- if (!len)
- return -EINVAL;
-
- err = 0;
-err1:
- *len = n;
- return err;
-}
-
static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
[NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 },
[NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 },
@@ -5675,11 +5893,10 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
const struct nlattr *attr,
struct nft_flowtable *flowtable)
{
- struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
- struct nf_hook_ops *ops;
+ struct nft_hook *hook;
int hooknum, priority;
- int err, n = 0, i;
+ int err;
err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
nft_flowtable_hook_policy, NULL);
@@ -5697,27 +5914,21 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
- err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
- dev_array, &n);
+ err = nf_tables_parse_netdev_hooks(ctx->net,
+ tb[NFTA_FLOWTABLE_HOOK_DEVS],
+ &flowtable->hook_list);
if (err < 0)
return err;
- ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
- if (!ops)
- return -ENOMEM;
-
- flowtable->hooknum = hooknum;
- flowtable->priority = priority;
- flowtable->ops = ops;
- flowtable->ops_len = n;
+ flowtable->hooknum = hooknum;
+ flowtable->data.priority = priority;
- for (i = 0; i < n; i++) {
- flowtable->ops[i].pf = NFPROTO_NETDEV;
- flowtable->ops[i].hooknum = hooknum;
- flowtable->ops[i].priority = priority;
- flowtable->ops[i].priv = &flowtable->data;
- flowtable->ops[i].hook = flowtable->data.type->hook;
- flowtable->ops[i].dev = dev_array[i];
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ hook->ops.pf = NFPROTO_NETDEV;
+ hook->ops.hooknum = hooknum;
+ hook->ops.priority = priority;
+ hook->ops.priv = &flowtable->data;
+ hook->ops.hook = flowtable->data.type->hook;
}
return err;
@@ -5757,14 +5968,51 @@ nft_flowtable_type_get(struct net *net, u8 family)
static void nft_unregister_flowtable_net_hooks(struct net *net,
struct nft_flowtable *flowtable)
{
- int i;
+ struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (!flowtable->ops[i].dev)
- continue;
+ list_for_each_entry(hook, &flowtable->hook_list, list)
+ nf_unregister_net_hook(net, &hook->ops);
+}
- nf_unregister_net_hook(net, &flowtable->ops[i]);
+static int nft_register_flowtable_net_hooks(struct net *net,
+ struct nft_table *table,
+ struct nft_flowtable *flowtable)
+{
+ struct nft_hook *hook, *hook2, *next;
+ struct nft_flowtable *ft;
+ int err, i = 0;
+
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ list_for_each_entry(ft, &table->flowtables, list) {
+ list_for_each_entry(hook2, &ft->hook_list, list) {
+ if (hook->ops.dev == hook2->ops.dev &&
+ hook->ops.pf == hook2->ops.pf) {
+ err = -EBUSY;
+ goto err_unregister_net_hooks;
+ }
+ }
+ }
+
+ err = nf_register_net_hook(net, &hook->ops);
+ if (err < 0)
+ goto err_unregister_net_hooks;
+
+ i++;
}
+
+ return 0;
+
+err_unregister_net_hooks:
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ if (i-- <= 0)
+ break;
+
+ nf_unregister_net_hook(net, &hook->ops);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+
+ return err;
}
static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
@@ -5775,12 +6023,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nf_flowtable_type *type;
- struct nft_flowtable *flowtable, *ft;
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ struct nft_hook *hook, *next;
struct nft_table *table;
struct nft_ctx ctx;
- int err, i, k;
+ int err;
if (!nla[NFTA_FLOWTABLE_TABLE] ||
!nla[NFTA_FLOWTABLE_NAME] ||
@@ -5819,6 +6068,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
flowtable->table = table;
flowtable->handle = nf_tables_alloc_handle(table);
+ INIT_LIST_HEAD(&flowtable->hook_list);
flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
if (!flowtable->name) {
@@ -5842,43 +6092,24 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
if (err < 0)
goto err4;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (!flowtable->ops[i].dev)
- continue;
-
- list_for_each_entry(ft, &table->flowtables, list) {
- for (k = 0; k < ft->ops_len; k++) {
- if (!ft->ops[k].dev)
- continue;
-
- if (flowtable->ops[i].dev == ft->ops[k].dev &&
- flowtable->ops[i].pf == ft->ops[k].pf) {
- err = -EBUSY;
- goto err5;
- }
- }
- }
-
- err = nf_register_net_hook(net, &flowtable->ops[i]);
- if (err < 0)
- goto err5;
- }
+ err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
+ if (err < 0)
+ goto err4;
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err6;
+ goto err5;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
table->use++;
return 0;
-err6:
- i = flowtable->ops_len;
err5:
- for (k = i - 1; k >= 0; k--)
- nf_unregister_net_hook(net, &flowtable->ops[k]);
-
- kfree(flowtable->ops);
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ nf_unregister_net_hook(net, &hook->ops);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
err4:
flowtable->data.type->free(&flowtable->data);
err3:
@@ -5945,8 +6176,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
{
struct nlattr *nest, *nest_devs;
struct nfgenmsg *nfmsg;
+ struct nft_hook *hook;
struct nlmsghdr *nlh;
- int i;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
@@ -5969,18 +6200,15 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
if (!nest)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
- nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
+ nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->data.priority)))
goto nla_put_failure;
nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS);
if (!nest_devs)
goto nla_put_failure;
- for (i = 0; i < flowtable->ops_len; i++) {
- const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);
-
- if (dev &&
- nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
+ list_for_each_entry_rcu(hook, &flowtable->hook_list, list) {
+ if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest_devs);
@@ -6171,7 +6399,12 @@ err:
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
- kfree(flowtable->ops);
+ struct nft_hook *hook, *next;
+
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree(hook);
+ }
kfree(flowtable->name);
flowtable->data.type->free(&flowtable->data);
module_put(flowtable->data.type->owner);
@@ -6211,14 +6444,15 @@ nla_put_failure:
static void nft_flowtable_event(unsigned long event, struct net_device *dev,
struct nft_flowtable *flowtable)
{
- int i;
+ struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (flowtable->ops[i].dev != dev)
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ if (hook->ops.dev != dev)
continue;
- nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
- flowtable->ops[i].dev = NULL;
+ nf_unregister_net_hook(dev_net(dev), &hook->ops);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
break;
}
}
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index ad783f4840ef..93e27a6570bc 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -132,13 +132,13 @@ static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
common->extack = extack;
}
-static int nft_setup_cb_call(struct nft_base_chain *basechain,
- enum tc_setup_type type, void *type_data)
+static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
+ struct list_head *cb_list)
{
struct flow_block_cb *block_cb;
int err;
- list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
+ list_for_each_entry(block_cb, cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err < 0)
return err;
@@ -155,32 +155,44 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
return 0;
}
+static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
+ const struct nft_base_chain *basechain,
+ const struct nft_rule *rule,
+ const struct nft_flow_rule *flow,
+ enum flow_cls_command command)
+{
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+
+ memset(cls_flow, 0, sizeof(*cls_flow));
+
+ if (flow)
+ proto = flow->proto;
+
+ nft_flow_offload_common_init(&cls_flow->common, proto,
+ basechain->ops.priority, &extack);
+ cls_flow->command = command;
+ cls_flow->cookie = (unsigned long) rule;
+ if (flow)
+ cls_flow->rule = flow->rule;
+}
+
static int nft_flow_offload_rule(struct nft_chain *chain,
struct nft_rule *rule,
struct nft_flow_rule *flow,
enum flow_cls_command command)
{
- struct flow_cls_offload cls_flow = {};
+ struct flow_cls_offload cls_flow;
struct nft_base_chain *basechain;
- struct netlink_ext_ack extack;
- __be16 proto = ETH_P_ALL;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
+ nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, command);
- if (flow)
- proto = flow->proto;
-
- nft_flow_offload_common_init(&cls_flow.common, proto,
- basechain->ops.priority, &extack);
- cls_flow.command = command;
- cls_flow.cookie = (unsigned long) rule;
- if (flow)
- cls_flow.rule = flow->rule;
-
- return nft_setup_cb_call(basechain, TC_SETUP_CLSFLOWER, &cls_flow);
+ return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow,
+ &basechain->flow_block.cb_list);
}
static int nft_flow_offload_bind(struct flow_block_offload *bo,
@@ -194,6 +206,16 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
struct nft_base_chain *basechain)
{
struct flow_block_cb *block_cb, *next;
+ struct flow_cls_offload cls_flow;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ chain = &basechain->chain;
+ list_for_each_entry(rule, &chain->rules, list) {
+ nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL,
+ FLOW_CLS_DESTROY);
+ nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list);
+ }
list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
list_del(&block_cb->list);
@@ -224,20 +246,30 @@ static int nft_block_setup(struct nft_base_chain *basechain,
return err;
}
+static void nft_flow_block_offload_init(struct flow_block_offload *bo,
+ struct net *net,
+ enum flow_block_command cmd,
+ struct nft_base_chain *basechain,
+ struct netlink_ext_ack *extack)
+{
+ memset(bo, 0, sizeof(*bo));
+ bo->net = net;
+ bo->block = &basechain->flow_block;
+ bo->command = cmd;
+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo->extack = extack;
+ INIT_LIST_HEAD(&bo->cb_list);
+}
+
static int nft_block_offload_cmd(struct nft_base_chain *chain,
struct net_device *dev,
enum flow_block_command cmd)
{
struct netlink_ext_ack extack = {};
- struct flow_block_offload bo = {};
+ struct flow_block_offload bo;
int err;
- bo.net = dev_net(dev);
- bo.block = &chain->flow_block;
- bo.command = cmd;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
if (err < 0)
@@ -253,17 +285,12 @@ static void nft_indr_block_ing_cmd(struct net_device *dev,
enum flow_block_command cmd)
{
struct netlink_ext_ack extack = {};
- struct flow_block_offload bo = {};
+ struct flow_block_offload bo;
if (!chain)
return;
- bo.net = dev_net(dev);
- bo.block = &chain->flow_block;
- bo.command = cmd;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
@@ -274,15 +301,10 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
struct net_device *dev,
enum flow_block_command cmd)
{
- struct flow_block_offload bo = {};
struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo;
- bo.net = dev_net(dev);
- bo.block = &chain->flow_block;
- bo.command = cmd;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
flow_indr_block_call(dev, &bo, cmd);
@@ -294,32 +316,73 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
-static int nft_flow_offload_chain(struct nft_chain *chain,
- u8 *ppolicy,
+static int nft_chain_offload_cmd(struct nft_base_chain *basechain,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ int err;
+
+ if (dev->netdev_ops->ndo_setup_tc)
+ err = nft_block_offload_cmd(basechain, dev, cmd);
+ else
+ err = nft_indr_block_offload_cmd(basechain, dev, cmd);
+
+ return err;
+}
+
+static int nft_flow_block_chain(struct nft_base_chain *basechain,
+ const struct net_device *this_dev,
+ enum flow_block_command cmd)
+{
+ struct net_device *dev;
+ struct nft_hook *hook;
+ int err, i = 0;
+
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ dev = hook->ops.dev;
+ if (this_dev && this_dev != dev)
+ continue;
+
+ err = nft_chain_offload_cmd(basechain, dev, cmd);
+ if (err < 0 && cmd == FLOW_BLOCK_BIND) {
+ if (!this_dev)
+ goto err_flow_block;
+
+ return err;
+ }
+ i++;
+ }
+
+ return 0;
+
+err_flow_block:
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (i-- <= 0)
+ break;
+
+ dev = hook->ops.dev;
+ nft_chain_offload_cmd(basechain, dev, FLOW_BLOCK_UNBIND);
+ }
+ return err;
+}
+
+static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
enum flow_block_command cmd)
{
struct nft_base_chain *basechain;
- struct net_device *dev;
u8 policy;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
- dev = basechain->ops.dev;
- if (!dev)
- return -EOPNOTSUPP;
-
policy = ppolicy ? *ppolicy : basechain->policy;
/* Only default policy to accept is supported for now. */
if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
return -EOPNOTSUPP;
- if (dev->netdev_ops->ndo_setup_tc)
- return nft_block_offload_cmd(basechain, dev, cmd);
- else
- return nft_indr_block_offload_cmd(basechain, dev, cmd);
+ return nft_flow_block_chain(basechain, NULL, cmd);
}
int nft_flow_rule_offload_commit(struct net *net)
@@ -385,6 +448,7 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
{
struct nft_base_chain *basechain;
struct net *net = dev_net(dev);
+ struct nft_hook *hook, *found;
const struct nft_table *table;
struct nft_chain *chain;
@@ -397,8 +461,16 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
!(chain->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
+ found = NULL;
basechain = nft_base_chain(chain);
- if (strncmp(basechain->dev_name, dev->name, IFNAMSIZ))
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.dev != dev)
+ continue;
+
+ found = hook;
+ break;
+ }
+ if (!found)
continue;
return chain;
@@ -426,18 +498,6 @@ static void nft_indr_block_cb(struct net_device *dev,
mutex_unlock(&net->nft.commit_mutex);
}
-static void nft_offload_chain_clean(struct nft_chain *chain)
-{
- struct nft_rule *rule;
-
- list_for_each_entry(rule, &chain->rules, list) {
- nft_flow_offload_rule(chain, rule,
- NULL, FLOW_CLS_DESTROY);
- }
-
- nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND);
-}
-
static int nft_offload_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -448,7 +508,9 @@ static int nft_offload_netdev_event(struct notifier_block *this,
mutex_lock(&net->nft.commit_mutex);
chain = __nft_offload_get_chain(dev);
if (chain)
- nft_offload_chain_clean(chain);
+ nft_flow_block_chain(nft_base_chain(chain), dev,
+ FLOW_BLOCK_UNBIND);
+
mutex_unlock(&net->nft.commit_mutex);
return NOTIFY_DONE;
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index b5d5d071d765..c78d01bc02e9 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -287,28 +287,35 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
struct nft_ctx *ctx)
{
struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
+ struct nft_hook *hook, *found = NULL;
+ int n = 0;
- switch (event) {
- case NETDEV_UNREGISTER:
- if (strcmp(basechain->dev_name, dev->name) != 0)
- return;
-
- /* UNREGISTER events are also happpening on netns exit.
- *
- * Altough nf_tables core releases all tables/chains, only
- * this event handler provides guarantee that
- * basechain.ops->dev is still accessible, so we cannot
- * skip exiting net namespaces.
- */
- __nft_release_basechain(ctx);
- break;
- case NETDEV_CHANGENAME:
- if (dev->ifindex != basechain->ops.dev->ifindex)
- return;
+ if (event != NETDEV_UNREGISTER)
+ return;
- strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
- break;
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.dev == dev)
+ found = hook;
+
+ n++;
}
+ if (!found)
+ return;
+
+ if (n > 1) {
+ nf_unregister_net_hook(ctx->net, &found->ops);
+ list_del_rcu(&found->list);
+ kfree_rcu(found, rcu);
+ return;
+ }
+
+ /* UNREGISTER events are also happening on netns exit.
+ *
+ * Although nf_tables core releases all tables/chains, only this event
+ * handler provides guarantee that hook->ops.dev is still accessible,
+ * so we cannot skip exiting net namespaces.
+ */
+ __nft_release_basechain(ctx);
}
static int nf_tables_netdev_event(struct notifier_block *this,
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index efccd1ac9a66..0522b2b1fd95 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -458,10 +458,63 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
}
EXPORT_SYMBOL(genlmsg_put);
+static struct genl_dumpit_info *genl_dumpit_info_alloc(void)
+{
+ return kmalloc(sizeof(struct genl_dumpit_info), GFP_KERNEL);
+}
+
+static void genl_dumpit_info_free(const struct genl_dumpit_info *info)
+{
+ kfree(info);
+}
+
+static struct nlattr **
+genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ const struct genl_ops *ops,
+ int hdrlen,
+ enum genl_validate_flags no_strict_flag,
+ bool parallel)
+{
+ enum netlink_validation validate = ops->validate & no_strict_flag ?
+ NL_VALIDATE_LIBERAL :
+ NL_VALIDATE_STRICT;
+ struct nlattr **attrbuf;
+ int err;
+
+ if (!family->maxattr)
+ return NULL;
+
+ if (parallel) {
+ attrbuf = kmalloc_array(family->maxattr + 1,
+ sizeof(struct nlattr *), GFP_KERNEL);
+ if (!attrbuf)
+ return ERR_PTR(-ENOMEM);
+ } else {
+ attrbuf = family->attrbuf;
+ }
+
+ err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
+ family->policy, validate, extack);
+ if (err && parallel) {
+ kfree(attrbuf);
+ return ERR_PTR(err);
+ }
+ return attrbuf;
+}
+
+static void genl_family_rcv_msg_attrs_free(const struct genl_family *family,
+ struct nlattr **attrbuf,
+ bool parallel)
+{
+ if (parallel)
+ kfree(attrbuf);
+}
+
static int genl_lock_start(struct netlink_callback *cb)
{
- /* our ops are always const - netlink API doesn't propagate that */
- const struct genl_ops *ops = cb->data;
+ const struct genl_ops *ops = genl_dumpit_info(cb)->ops;
int rc = 0;
if (ops->start) {
@@ -474,8 +527,7 @@ static int genl_lock_start(struct netlink_callback *cb)
static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
- /* our ops are always const - netlink API doesn't propagate that */
- const struct genl_ops *ops = cb->data;
+ const struct genl_ops *ops = genl_dumpit_info(cb)->ops;
int rc;
genl_lock();
@@ -486,8 +538,8 @@ static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
static int genl_lock_done(struct netlink_callback *cb)
{
- /* our ops are always const - netlink API doesn't propagate that */
- const struct genl_ops *ops = cb->data;
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ const struct genl_ops *ops = info->ops;
int rc = 0;
if (ops->done) {
@@ -495,120 +547,111 @@ static int genl_lock_done(struct netlink_callback *cb)
rc = ops->done(cb);
genl_unlock();
}
+ genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+ genl_dumpit_info_free(info);
return rc;
}
-static int genl_family_rcv_msg(const struct genl_family *family,
- struct sk_buff *skb,
- struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int genl_parallel_done(struct netlink_callback *cb)
{
- const struct genl_ops *ops;
- struct net *net = sock_net(skb->sk);
- struct genl_info info;
- struct genlmsghdr *hdr = nlmsg_data(nlh);
- struct nlattr **attrbuf;
- int hdrlen, err;
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ const struct genl_ops *ops = info->ops;
+ int rc = 0;
- /* this family doesn't exist in this netns */
- if (!family->netnsok && !net_eq(net, &init_net))
- return -ENOENT;
+ if (ops->done)
+ rc = ops->done(cb);
+ genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+ genl_dumpit_info_free(info);
+ return rc;
+}
- hdrlen = GENL_HDRLEN + family->hdrsize;
- if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
- return -EINVAL;
+static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
+ struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ const struct genl_ops *ops,
+ int hdrlen, struct net *net)
+{
+ struct genl_dumpit_info *info;
+ struct nlattr **attrs = NULL;
+ int err;
- ops = genl_get_cmd(hdr->cmd, family);
- if (ops == NULL)
+ if (!ops->dumpit)
return -EOPNOTSUPP;
- if ((ops->flags & GENL_ADMIN_PERM) &&
- !netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
- int rc;
-
- if (ops->dumpit == NULL)
- return -EOPNOTSUPP;
-
- if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) {
- int hdrlen = GENL_HDRLEN + family->hdrsize;
-
- if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
- return -EINVAL;
+ if (ops->validate & GENL_DONT_VALIDATE_DUMP)
+ goto no_attrs;
- if (family->maxattr) {
- unsigned int validate = NL_VALIDATE_STRICT;
-
- if (ops->validate &
- GENL_DONT_VALIDATE_DUMP_STRICT)
- validate = NL_VALIDATE_LIBERAL;
- rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen),
- nlmsg_attrlen(nlh, hdrlen),
- family->maxattr,
- family->policy,
- validate, extack);
- if (rc)
- return rc;
- }
- }
+ if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+ return -EINVAL;
- if (!family->parallel_ops) {
- struct netlink_dump_control c = {
- .module = family->module,
- /* we have const, but the netlink API doesn't */
- .data = (void *)ops,
- .start = genl_lock_start,
- .dump = genl_lock_dumpit,
- .done = genl_lock_done,
- };
+ attrs = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
+ ops, hdrlen,
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ true);
+ if (IS_ERR(attrs))
+ return PTR_ERR(attrs);
+
+no_attrs:
+ /* Allocate dumpit info. It is going to be freed by done() callback. */
+ info = genl_dumpit_info_alloc();
+ if (!info) {
+ genl_family_rcv_msg_attrs_free(family, attrs, true);
+ return -ENOMEM;
+ }
- genl_unlock();
- rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- genl_lock();
+ info->family = family;
+ info->ops = ops;
+ info->attrs = attrs;
- } else {
- struct netlink_dump_control c = {
- .module = family->module,
- .start = ops->start,
- .dump = ops->dumpit,
- .done = ops->done,
- };
+ if (!family->parallel_ops) {
+ struct netlink_dump_control c = {
+ .module = family->module,
+ .data = info,
+ .start = genl_lock_start,
+ .dump = genl_lock_dumpit,
+ .done = genl_lock_done,
+ };
- rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- }
+ genl_unlock();
+ err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
+ genl_lock();
- return rc;
+ } else {
+ struct netlink_dump_control c = {
+ .module = family->module,
+ .data = info,
+ .start = ops->start,
+ .dump = ops->dumpit,
+ .done = genl_parallel_done,
+ };
+
+ err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
}
- if (ops->doit == NULL)
- return -EOPNOTSUPP;
-
- if (family->maxattr && family->parallel_ops) {
- attrbuf = kmalloc_array(family->maxattr + 1,
- sizeof(struct nlattr *),
- GFP_KERNEL);
- if (attrbuf == NULL)
- return -ENOMEM;
- } else
- attrbuf = family->attrbuf;
+ return err;
+}
- if (attrbuf) {
- enum netlink_validation validate = NL_VALIDATE_STRICT;
+static int genl_family_rcv_msg_doit(const struct genl_family *family,
+ struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ const struct genl_ops *ops,
+ int hdrlen, struct net *net)
+{
+ struct nlattr **attrbuf;
+ struct genl_info info;
+ int err;
- if (ops->validate & GENL_DONT_VALIDATE_STRICT)
- validate = NL_VALIDATE_LIBERAL;
+ if (!ops->doit)
+ return -EOPNOTSUPP;
- err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
- family->policy, validate, extack);
- if (err < 0)
- goto out;
- }
+ attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
+ ops, hdrlen,
+ GENL_DONT_VALIDATE_STRICT,
+ family->parallel_ops);
+ if (IS_ERR(attrbuf))
+ return PTR_ERR(attrbuf);
info.snd_seq = nlh->nlmsg_seq;
info.snd_portid = NETLINK_CB(skb).portid;
@@ -632,12 +675,49 @@ static int genl_family_rcv_msg(const struct genl_family *family,
family->post_doit(ops, skb, &info);
out:
- if (family->parallel_ops)
- kfree(attrbuf);
+ genl_family_rcv_msg_attrs_free(family, attrbuf, family->parallel_ops);
return err;
}
+static int genl_family_rcv_msg(const struct genl_family *family,
+ struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ const struct genl_ops *ops;
+ struct net *net = sock_net(skb->sk);
+ struct genlmsghdr *hdr = nlmsg_data(nlh);
+ int hdrlen;
+
+ /* this family doesn't exist in this netns */
+ if (!family->netnsok && !net_eq(net, &init_net))
+ return -ENOENT;
+
+ hdrlen = GENL_HDRLEN + family->hdrsize;
+ if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+ return -EINVAL;
+
+ ops = genl_get_cmd(hdr->cmd, family);
+ if (ops == NULL)
+ return -EOPNOTSUPP;
+
+ if ((ops->flags & GENL_ADMIN_PERM) &&
+ !netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP)
+ return genl_family_rcv_msg_dumpit(family, skb, nlh, extack,
+ ops, hdrlen, net);
+ else
+ return genl_family_rcv_msg_doit(family, skb, nlh, extack,
+ ops, hdrlen, net);
+}
+
static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -1088,25 +1168,6 @@ problem:
subsys_initcall(genl_init);
-/**
- * genl_family_attrbuf - return family's attrbuf
- * @family: the family
- *
- * Return the family's attrbuf, while validating that it's
- * actually valid to access it.
- *
- * You cannot use this function with a family that has parallel_ops
- * and you can only use it within (pre/post) doit/dumpit callbacks.
- */
-struct nlattr **genl_family_attrbuf(const struct genl_family *family)
-{
- if (!WARN_ON(family->parallel_ops))
- lockdep_assert_held(&genl_mutex);
-
- return family->attrbuf;
-}
-EXPORT_SYMBOL(genl_family_attrbuf);
-
static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
gfp_t flags)
{
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 17e6ca62f1be..fd9ad534dd9b 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -102,22 +102,14 @@ nla_put_failure:
static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
{
- struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family);
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct nfc_dev *dev;
- int rc;
u32 idx;
- rc = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + nfc_genl_family.hdrsize,
- attrbuf, nfc_genl_family.maxattr,
- nfc_genl_policy, NULL);
- if (rc < 0)
- return ERR_PTR(rc);
-
- if (!attrbuf[NFC_ATTR_DEVICE_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
return ERR_PTR(-EINVAL);
- idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]);
+ idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
dev = nfc_get_device(idx);
if (!dev)
@@ -1697,7 +1689,8 @@ static const struct genl_ops nfc_genl_ops[] = {
},
{
.cmd = NFC_CMD_GET_TARGET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = nfc_genl_dump_targets,
.done = nfc_genl_dump_targets_done,
},
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 05249eb45082..df9c80bf621d 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -971,6 +971,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
+ bool add_helper = false;
+
/* Packets starting a new connection must be NATted before the
* helper, so that the helper knows about the NAT. We enforce
* this by delaying both NAT and helper calls for unconfirmed
@@ -988,16 +990,17 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
/* Userspace may decide to perform a ct lookup without a helper
- * specified followed by a (recirculate and) commit with one.
- * Therefore, for unconfirmed connections which we will commit,
- * we need to attach the helper here.
+ * specified followed by a (recirculate and) commit with one,
+ * or attach a helper in a later commit. Therefore, for
+ * connections which we will commit, we may need to attach
+ * the helper here.
*/
- if (!nf_ct_is_confirmed(ct) && info->commit &&
- info->helper && !nfct_help(ct)) {
+ if (info->commit && info->helper && !nfct_help(ct)) {
int err = __nf_ct_try_assign_helper(ct, info->ct,
GFP_ATOMIC);
if (err)
return err;
+ add_helper = true;
/* helper installed, add seqadj if NAT is required */
if (info->nat && !nfct_seqadj(ct)) {
@@ -1007,11 +1010,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
/* Call the helper only if:
- * - nf_conntrack_in() was executed above ("!cached") for a
- * confirmed connection, or
+ * - nf_conntrack_in() was executed above ("!cached") or a
+ * helper was just attached ("add_helper") for a confirmed
+ * connection, or
* - When committing an unconfirmed connection.
*/
- if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) &&
+ if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
+ info->commit) &&
ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
return -EINVAL;
}
diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c
index e35869e81766..15ce9b642b25 100644
--- a/net/qrtr/tun.c
+++ b/net/qrtr/tun.c
@@ -111,15 +111,11 @@ static __poll_t qrtr_tun_poll(struct file *filp, poll_table *wait)
static int qrtr_tun_release(struct inode *inode, struct file *filp)
{
struct qrtr_tun *tun = filp->private_data;
- struct sk_buff *skb;
qrtr_endpoint_unregister(&tun->ep);
/* Discard all SKBs */
- while (!skb_queue_empty(&tun->queue)) {
- skb = skb_dequeue(&tun->queue);
- kfree_skb(skb);
- }
+ skb_queue_purge(&tun->queue);
kfree(tun);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 9de2ae22d583..3fd5f40189bd 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -30,6 +30,7 @@
* SOFTWARE.
*
*/
+#include <linux/dmapool.h>
#include <linux/kernel.h>
#include <linux/in.h>
#include <linux/if.h>
@@ -107,6 +108,7 @@ static void rds_ib_dev_free(struct work_struct *work)
rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
if (rds_ibdev->pd)
ib_dealloc_pd(rds_ibdev->pd);
+ dma_pool_destroy(rds_ibdev->rid_hdrs_pool);
list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
list_del(&i_ipaddr->list);
@@ -182,6 +184,12 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->pd = NULL;
goto put_dev;
}
+ rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name,
+ device->dma_device,
+ sizeof(struct rds_header),
+ L1_CACHE_BYTES, 0);
+ if (!rds_ibdev->rid_hdrs_pool)
+ goto put_dev;
rds_ibdev->mr_1m_pool =
rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
diff --git a/net/rds/ib.h b/net/rds/ib.h
index f2b558e8b5ea..6e6f24753998 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -165,8 +165,8 @@ struct rds_ib_connection {
/* tx */
struct rds_ib_work_ring i_send_ring;
struct rm_data_op *i_data_op;
- struct rds_header *i_send_hdrs;
- dma_addr_t i_send_hdrs_dma;
+ struct rds_header **i_send_hdrs;
+ dma_addr_t *i_send_hdrs_dma;
struct rds_ib_send_work *i_sends;
atomic_t i_signaled_sends;
@@ -175,8 +175,8 @@ struct rds_ib_connection {
struct rds_ib_work_ring i_recv_ring;
struct rds_ib_incoming *i_ibinc;
u32 i_recv_data_rem;
- struct rds_header *i_recv_hdrs;
- dma_addr_t i_recv_hdrs_dma;
+ struct rds_header **i_recv_hdrs;
+ dma_addr_t *i_recv_hdrs_dma;
struct rds_ib_recv_work *i_recvs;
u64 i_ack_recv; /* last ACK received */
struct rds_ib_refill_cache i_cache_incs;
@@ -246,6 +246,7 @@ struct rds_ib_device {
struct list_head conn_list;
struct ib_device *dev;
struct ib_pd *pd;
+ struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */
bool use_fastreg;
unsigned int max_mrs;
@@ -381,7 +382,11 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);
void rds_ib_cm_connect_complete(struct rds_connection *conn,
struct rdma_cm_event *event);
-
+struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
+ struct dma_pool *pool,
+ dma_addr_t **dma_addrs, u32 num_hdrs);
+void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
+ dma_addr_t *dma_addrs, u32 num_hdrs);
#define rds_ib_conn_error(conn, fmt...) \
__rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 233f1368162b..6b345c858dba 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -30,6 +30,7 @@
* SOFTWARE.
*
*/
+#include <linux/dmapool.h>
#include <linux/kernel.h>
#include <linux/in.h>
#include <linux/slab.h>
@@ -439,6 +440,68 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)
rds_ibdev->vector_load[index]--;
}
+/* Allocate DMA coherent memory to be used to store struct rds_header for
+ * sending/receiving packets. The pointers to the DMA memory and the
+ * associated DMA addresses are stored in two arrays.
+ *
+ * @ibdev: the IB device
+ * @pool: the DMA memory pool
+ * @dma_addrs: pointer to the array for storing DMA addresses
+ * @num_hdrs: number of headers to allocate
+ *
+ * It returns the pointer to the array storing the DMA memory pointers. On
+ * error, NULL pointer is returned.
+ */
+struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
+ struct dma_pool *pool,
+ dma_addr_t **dma_addrs, u32 num_hdrs)
+{
+ struct rds_header **hdrs;
+ dma_addr_t *hdr_daddrs;
+ u32 i;
+
+ hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL,
+ ibdev_to_node(ibdev));
+ if (!hdrs)
+ return NULL;
+
+ hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL,
+ ibdev_to_node(ibdev));
+ if (!hdr_daddrs) {
+ kvfree(hdrs);
+ return NULL;
+ }
+
+ for (i = 0; i < num_hdrs; i++) {
+ hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]);
+ if (!hdrs[i]) {
+ rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i);
+ return NULL;
+ }
+ }
+
+ *dma_addrs = hdr_daddrs;
+ return hdrs;
+}
+
+/* Free the DMA memory used to store struct rds_header.
+ *
+ * @pool: the DMA memory pool
+ * @hdrs: pointer to the array storing DMA memory pointers
+ * @dma_addrs: pointer to the array storing DMA addresses
+ * @num_hdars: number of headers to free.
+ */
+void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
+ dma_addr_t *dma_addrs, u32 num_hdrs)
+{
+ u32 i;
+
+ for (i = 0; i < num_hdrs; i++)
+ dma_pool_free(pool, hdrs[i], dma_addrs[i]);
+ kvfree(hdrs);
+ kvfree(dma_addrs);
+}
+
/*
* This needs to be very careful to not leave IS_ERR pointers around for
* cleanup to trip over.
@@ -451,6 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
struct ib_cq_init_attr cq_attr = {};
struct rds_ib_device *rds_ibdev;
int ret, fr_queue_space;
+ struct dma_pool *pool;
/*
* It's normal to see a null device if an incoming connection races
@@ -541,31 +605,28 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
goto recv_cq_out;
}
- ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
- ic->i_send_ring.w_nr *
- sizeof(struct rds_header),
- &ic->i_send_hdrs_dma, GFP_KERNEL);
+ pool = rds_ibdev->rid_hdrs_pool;
+ ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma,
+ ic->i_send_ring.w_nr);
if (!ic->i_send_hdrs) {
ret = -ENOMEM;
- rdsdebug("ib_dma_alloc_coherent send failed\n");
+ rdsdebug("DMA send hdrs alloc failed\n");
goto qp_out;
}
- ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
- ic->i_recv_ring.w_nr *
- sizeof(struct rds_header),
- &ic->i_recv_hdrs_dma, GFP_KERNEL);
+ ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma,
+ ic->i_recv_ring.w_nr);
if (!ic->i_recv_hdrs) {
ret = -ENOMEM;
- rdsdebug("ib_dma_alloc_coherent recv failed\n");
+ rdsdebug("DMA recv hdrs alloc failed\n");
goto send_hdrs_dma_out;
}
- ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
- &ic->i_ack_dma, GFP_KERNEL);
+ ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL,
+ &ic->i_ack_dma);
if (!ic->i_ack) {
ret = -ENOMEM;
- rdsdebug("ib_dma_alloc_coherent ack failed\n");
+ rdsdebug("DMA ack header alloc failed\n");
goto recv_hdrs_dma_out;
}
@@ -596,17 +657,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
sends_out:
vfree(ic->i_sends);
+
ack_dma_out:
- ib_dma_free_coherent(dev, sizeof(struct rds_header),
- ic->i_ack, ic->i_ack_dma);
+ dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
+ ic->i_ack = NULL;
+
recv_hdrs_dma_out:
- ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
- sizeof(struct rds_header),
- ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+ rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma,
+ ic->i_recv_ring.w_nr);
+ ic->i_recv_hdrs = NULL;
+ ic->i_recv_hdrs_dma = NULL;
+
send_hdrs_dma_out:
- ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
- sizeof(struct rds_header),
- ic->i_send_hdrs, ic->i_send_hdrs_dma);
+ rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma,
+ ic->i_send_ring.w_nr);
+ ic->i_send_hdrs = NULL;
+ ic->i_send_hdrs_dma = NULL;
+
qp_out:
rdma_destroy_qp(ic->i_cm_id);
recv_cq_out:
@@ -984,8 +1051,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_cm_id ? ic->i_cm_id->qp : NULL);
if (ic->i_cm_id) {
- struct ib_device *dev = ic->i_cm_id->device;
-
rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
err = rdma_disconnect(ic->i_cm_id);
if (err) {
@@ -1035,24 +1100,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ib_destroy_cq(ic->i_recv_cq);
}
- /* then free the resources that ib callbacks use */
- if (ic->i_send_hdrs)
- ib_dma_free_coherent(dev,
- ic->i_send_ring.w_nr *
- sizeof(struct rds_header),
- ic->i_send_hdrs,
- ic->i_send_hdrs_dma);
-
- if (ic->i_recv_hdrs)
- ib_dma_free_coherent(dev,
- ic->i_recv_ring.w_nr *
- sizeof(struct rds_header),
- ic->i_recv_hdrs,
- ic->i_recv_hdrs_dma);
-
- if (ic->i_ack)
- ib_dma_free_coherent(dev, sizeof(struct rds_header),
- ic->i_ack, ic->i_ack_dma);
+ if (ic->rds_ibdev) {
+ struct dma_pool *pool;
+
+ pool = ic->rds_ibdev->rid_hdrs_pool;
+
+ /* then free the resources that ib callbacks use */
+ if (ic->i_send_hdrs) {
+ rds_dma_hdrs_free(pool, ic->i_send_hdrs,
+ ic->i_send_hdrs_dma,
+ ic->i_send_ring.w_nr);
+ ic->i_send_hdrs = NULL;
+ ic->i_send_hdrs_dma = NULL;
+ }
+
+ if (ic->i_recv_hdrs) {
+ rds_dma_hdrs_free(pool, ic->i_recv_hdrs,
+ ic->i_recv_hdrs_dma,
+ ic->i_recv_ring.w_nr);
+ ic->i_recv_hdrs = NULL;
+ ic->i_recv_hdrs_dma = NULL;
+ }
+
+ if (ic->i_ack) {
+ dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
+ ic->i_ack = NULL;
+ }
+ } else {
+ WARN_ON(ic->i_send_hdrs);
+ WARN_ON(ic->i_send_hdrs_dma);
+ WARN_ON(ic->i_recv_hdrs);
+ WARN_ON(ic->i_recv_hdrs_dma);
+ WARN_ON(ic->i_ack);
+ }
if (ic->i_sends)
rds_ib_send_clear_ring(ic);
@@ -1071,9 +1151,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_pd = NULL;
ic->i_send_cq = NULL;
ic->i_recv_cq = NULL;
- ic->i_send_hdrs = NULL;
- ic->i_recv_hdrs = NULL;
- ic->i_ack = NULL;
}
BUG_ON(ic->rds_ibdev);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index a0f99bbf362c..694d411dc72f 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -61,7 +61,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
recv->r_wr.num_sge = RDS_IB_RECV_SGE;
sge = &recv->r_sge[0];
- sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
+ sge->addr = ic->i_recv_hdrs_dma[i];
sge->length = sizeof(struct rds_header);
sge->lkey = ic->i_pd->local_dma_lkey;
@@ -343,7 +343,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
WARN_ON(ret != 1);
sge = &recv->r_sge[0];
- sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
+ sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
sge->length = sizeof(struct rds_header);
sge = &recv->r_sge[1];
@@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
}
data_len -= sizeof(struct rds_header);
- ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
+ ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];
/* Validate the checksum. */
if (!rds_message_verify_checksum(ihdr)) {
@@ -993,10 +993,11 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
} else {
/* We expect errors as the qp is drained during shutdown */
if (rds_conn_up(conn) || rds_conn_connecting(conn))
- rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n",
+ rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos, wc->status,
- ib_wc_status_msg(wc->status));
+ ib_wc_status_msg(wc->status),
+ wc->vendor_err);
}
/* rds_ib_process_recv() doesn't always consume the frag, and
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index dfe6237dafe2..d1cc1d7778d8 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -201,7 +201,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
send->s_wr.ex.imm_data = 0;
sge = &send->s_sge[0];
- sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
+ sge->addr = ic->i_send_hdrs_dma[i];
+
sge->length = sizeof(struct rds_header);
sge->lkey = ic->i_pd->local_dma_lkey;
@@ -300,10 +301,10 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
/* We expect errors as the qp is drained during shutdown */
if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
- rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n",
+ rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos, wc->status,
- ib_wc_status_msg(wc->status));
+ ib_wc_status_msg(wc->status), wc->vendor_err);
}
}
@@ -631,11 +632,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
send->s_queued = jiffies;
send->s_op = NULL;
- send->s_sge[0].addr = ic->i_send_hdrs_dma
- + (pos * sizeof(struct rds_header));
+ send->s_sge[0].addr = ic->i_send_hdrs_dma[pos];
+
send->s_sge[0].length = sizeof(struct rds_header);
- memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
+ memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
+ sizeof(struct rds_header));
+
/* Set up the data, if present */
if (i < work_alloc
@@ -674,7 +677,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
&send->s_wr, send->s_wr.num_sge, send->s_wr.next);
if (ic->i_flowctl && adv_credits) {
- struct rds_header *hdr = &ic->i_send_hdrs[pos];
+ struct rds_header *hdr = ic->i_send_hdrs[pos];
/* add credit and redo the header checksum */
hdr->h_credit = adv_credits;
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 64830d8c1fdb..452163eadb98 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -209,6 +209,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx,
*/
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
{
+ const void *here = __builtin_return_address(0);
struct rxrpc_peer *peer;
_enter("");
@@ -230,6 +231,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
peer->cong_cwnd = 3;
else
peer->cong_cwnd = 4;
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here);
}
_leave(" = %p", peer);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 69d4676a402f..6284c552e943 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -399,7 +399,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
- int bind, bool cpustats)
+ int bind, bool cpustats, u32 flags)
{
struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
struct tcf_idrinfo *idrinfo = tn->idrinfo;
@@ -427,6 +427,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
p->tcfa_tm.install = jiffies;
p->tcfa_tm.lastuse = jiffies;
p->tcfa_tm.firstuse = 0;
+ p->tcfa_flags = flags;
if (est) {
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
&p->tcfa_rate_est,
@@ -451,6 +452,17 @@ err1:
}
EXPORT_SYMBOL(tcf_idr_create);
+int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
+ struct nlattr *est, struct tc_action **a,
+ const struct tc_action_ops *ops, int bind,
+ u32 flags)
+{
+ /* Set cpustats according to actions flags. */
+ return tcf_idr_create(tn, index, est, a, ops, bind,
+ !(flags & TCA_ACT_FLAGS_NO_PERCPU_STATS), flags);
+}
+EXPORT_SYMBOL(tcf_idr_create_from_flags);
+
void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
@@ -773,6 +785,14 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
}
rcu_read_unlock();
+ if (a->tcfa_flags) {
+ struct nla_bitfield32 flags = { a->tcfa_flags,
+ a->tcfa_flags, };
+
+ if (nla_put(skb, TCA_ACT_FLAGS, sizeof(flags), &flags))
+ goto nla_put_failure;
+ }
+
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
@@ -831,12 +851,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
+static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS;
static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
[TCA_ACT_KIND] = { .type = NLA_STRING },
[TCA_ACT_INDEX] = { .type = NLA_U32 },
[TCA_ACT_COOKIE] = { .type = NLA_BINARY,
.len = TC_COOKIE_MAX_SIZE },
[TCA_ACT_OPTIONS] = { .type = NLA_NESTED },
+ [TCA_ACT_FLAGS] = { .type = NLA_BITFIELD32,
+ .validation_data = &tca_act_flags_allowed },
};
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
@@ -845,6 +868,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
bool rtnl_held,
struct netlink_ext_ack *extack)
{
+ struct nla_bitfield32 flags = { 0, 0 };
struct tc_action *a;
struct tc_action_ops *a_o;
struct tc_cookie *cookie = NULL;
@@ -876,6 +900,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
goto err_out;
}
}
+ if (tb[TCA_ACT_FLAGS])
+ flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
} else {
if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
NL_SET_ERR_MSG(extack, "TC action name too long");
@@ -914,10 +940,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
- rtnl_held, tp, extack);
+ rtnl_held, tp, flags.value, extack);
else
err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
- tp, extack);
+ tp, flags.value, extack);
if (err < 0)
goto err_mod;
@@ -989,6 +1015,29 @@ err:
return err;
}
+void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
+ bool drop, bool hw)
+{
+ if (a->cpu_bstats) {
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+
+ if (drop)
+ this_cpu_ptr(a->cpu_qstats)->drops += packets;
+
+ if (hw)
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+ bytes, packets);
+ return;
+ }
+
+ _bstats_update(&a->tcfa_bstats, bytes, packets);
+ if (drop)
+ a->tcfa_qstats.drops += packets;
+ if (hw)
+ _bstats_update(&a->tcfa_bstats_hw, bytes, packets);
+}
+EXPORT_SYMBOL(tcf_action_update_stats);
+
int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
int compat_mode)
{
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 04b7bd4ec751..46f47e58b3be 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -275,7 +275,8 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
int replace, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -303,7 +304,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
ret = tcf_idr_check_alloc(tn, &index, act, bind);
if (!ret) {
ret = tcf_idr_create(tn, index, est, act,
- &act_bpf_ops, bind, true);
+ &act_bpf_ops, bind, true, 0);
if (ret < 0) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2b43cacf82af..43a243081e7d 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -94,7 +94,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
@@ -121,7 +121,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
ret = tcf_idr_check_alloc(tn, &index, a, bind);
if (!ret) {
ret = tcf_idr_create(tn, index, est, a,
- &act_connmark_ops, bind, false);
+ &act_connmark_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d3cfad88dc3a..16e67e1c1db1 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -43,7 +43,7 @@ static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind, bool rtnl_held, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_new;
@@ -68,8 +68,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_csum_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_csum_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -580,7 +580,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
params = rcu_dereference_bh(p->params);
tcf_lastuse_update(&p->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&p->common, skb);
action = READ_ONCE(p->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
@@ -624,7 +624,7 @@ out:
return action;
drop:
- qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
+ tcf_action_inc_drop_qstats(&p->common);
action = TC_ACT_SHOT;
goto out;
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index fcc46025e790..68d6af56b243 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -465,11 +465,11 @@ out_push:
skb_push_rcsum(skb, nh_ofs);
out:
- bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+ tcf_action_update_bstats(&c->common, skb);
return retval;
drop:
- qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
+ tcf_action_inc_drop_qstats(&c->common);
return TC_ACT_SHOT;
}
@@ -656,7 +656,7 @@ static int tcf_ct_fill_params(struct net *net,
static int tcf_ct_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int replace, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ct_net_id);
@@ -688,8 +688,8 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
return err;
if (!err) {
- err = tcf_idr_create(tn, index, est, a,
- &act_ct_ops, bind, true);
+ err = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_ct_ops, bind, flags);
if (err) {
tcf_idr_cleanup(tn, index);
return err;
@@ -905,11 +905,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
{
struct tcf_ct *c = to_ct(a);
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ tcf_action_update_stats(a, bytes, packets, false, hw);
c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
}
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 0dbcfd1dca7b..b1e601007242 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -153,7 +153,7 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
@@ -210,7 +210,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
ret = tcf_idr_create(tn, index, est, a,
- &act_ctinfo_ops, bind, false);
+ &act_ctinfo_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 324f1d1f6d47..416065772719 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -53,7 +53,8 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -98,8 +99,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_gact_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_gact_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -161,9 +162,9 @@ static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
action = gact_rand[ptype](gact);
}
#endif
- bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&gact->common, skb);
if (action == TC_ACT_SHOT)
- qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
+ tcf_action_inc_drop_qstats(&gact->common);
tcf_lastuse_update(&gact->tcf_tm);
@@ -177,15 +178,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
int action = READ_ONCE(gact->tcf_action);
struct tcf_t *tm = &gact->tcf_tm;
- _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes,
- packets);
- if (action == TC_ACT_SHOT)
- this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
-
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats_hw),
- bytes, packets);
-
+ tcf_action_update_stats(a, bytes, packets, action == TC_ACT_SHOT, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3a31e241c647..d562c88cccbe 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -465,7 +465,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -522,7 +523,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a, &act_ife_ops,
- bind, true);
+ bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
kfree(p);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 214a03d405cf..400a2cfe8452 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -95,7 +95,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
const struct tc_action_ops *ops, int ovr, int bind,
- struct tcf_proto *tp)
+ struct tcf_proto *tp, u32 flags)
{
struct tc_action_net *tn = net_generic(net, id);
struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -144,7 +144,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a, ops, bind,
- false);
+ false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -205,19 +205,19 @@ err1:
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind, bool rtnl_held, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
- bind, tp);
+ bind, tp, flags);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind, bool unlocked, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
- bind, tp);
+ bind, tp, flags);
}
static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 08923b21e566..b6e1b5bbb4da 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -93,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -148,8 +148,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
return -EINVAL;
}
- ret = tcf_idr_create(tn, index, est, a,
- &act_mirred_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_mirred_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -231,7 +231,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
}
tcf_lastuse_update(&m->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&m->common, skb);
m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
m_eaction = READ_ONCE(m->tcfm_eaction);
@@ -289,8 +289,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
- res->qstats = this_cpu_ptr(m->common.cpu_qstats);
- skb_tc_reinsert(skb, res);
+ if (skb_tc_reinsert(skb, res))
+ tcf_action_inc_overlimit_qstats(&m->common);
__this_cpu_dec(mirred_rec_level);
return TC_ACT_CONSUMED;
}
@@ -303,7 +303,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (err) {
out:
- qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
+ tcf_action_inc_overlimit_qstats(&m->common);
if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
@@ -318,10 +318,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
struct tcf_mirred *m = to_mirred(a);
struct tcf_t *tm = &m->tcf_tm;
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ tcf_action_update_stats(a, bytes, packets, false, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 4cf6c553bb0b..4d8c822b6aca 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -131,7 +131,8 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
static int tcf_mpls_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mpls_net_id);
struct nlattr *tb[TCA_MPLS_MAX + 1];
@@ -224,7 +225,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_mpls_ops, bind, true);
+ &act_mpls_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index ea4c5359e7df..88a1b79a1848 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -36,7 +36,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_action **a, int ovr, int bind,
bool rtnl_held, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -61,7 +61,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
ret = tcf_idr_create(tn, index, est, a,
- &act_nat_ops, bind, false);
+ &act_nat_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index cdfaa79382a2..d5eff6ac17a9 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -137,7 +137,8 @@ nla_failure:
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
@@ -190,7 +191,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_free;
}
ret = tcf_idr_create(tn, index, est, a,
- &act_pedit_ops, bind, false);
+ &act_pedit_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
goto out_free;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 89c04c52af3d..d96271590268 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -47,7 +47,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
int ret = 0, tcfp_result = TC_ACT_OK, err, size;
@@ -87,7 +87,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, NULL, a,
- &act_police_ops, bind, true);
+ &act_police_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -294,10 +294,7 @@ static void tcf_police_stats_update(struct tc_action *a,
struct tcf_police *police = to_police(a);
struct tcf_t *tm = &police->tcf_tm;
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ tcf_action_update_stats(a, bytes, packets, false, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
@@ -345,10 +342,7 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
nla_put_u32(skb, TCA_POLICE_AVRATE, p->tcfp_ewma_rate))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse);
- t.firstuse = jiffies_to_clock_t(jiffies - police->tcf_tm.firstuse);
- t.expires = jiffies_to_clock_t(police->tcf_tm.expires);
+ tcf_tm_dump(&t, &police->tcf_tm);
if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD))
goto nla_put_failure;
spin_unlock_bh(&police->tcf_lock);
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 514456a0b9a8..29b23bfaf10d 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -36,7 +36,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind, bool rtnl_held, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -69,7 +69,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_sample_ops, bind, true);
+ &act_sample_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 6120e56117ca..97639b259cd7 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -86,7 +86,8 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -127,7 +128,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_simp_ops, bind, false);
+ &act_simp_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6a8d3337c577..5f7ca7f89ca2 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -86,7 +86,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
@@ -165,7 +165,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_skbedit_ops, bind, true);
+ &act_skbedit_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 888437f97ba6..39e6d94cfafb 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -79,7 +79,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
@@ -143,7 +143,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_skbmod_ops, bind, true);
+ &act_skbmod_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 2f83a79f76aa..cb34e5d57aaa 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -31,7 +31,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
params = rcu_dereference_bh(t->params);
tcf_lastuse_update(&t->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&t->common, skb);
action = READ_ONCE(t->tcf_action);
switch (params->tcft_action) {
@@ -208,7 +208,7 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
@@ -347,8 +347,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
}
if (!exists) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_tunnel_key_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_tunnel_key_ops, bind,
+ act_flags);
if (ret) {
NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
goto release_tun_meta;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 08aaf719a70f..b6939abc61eb 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -29,7 +29,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
u16 tci;
tcf_lastuse_update(&v->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&v->common, skb);
/* Ensure 'data' points at mac_header prior calling vlan manipulating
* functions.
@@ -88,7 +88,7 @@ out:
return action;
drop:
- qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+ tcf_action_inc_drop_qstats(&v->common);
return TC_ACT_SHOT;
}
@@ -102,7 +102,8 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -188,8 +189,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
action = parm->v_action;
if (!exists) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_vlan_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_vlan_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -307,10 +308,7 @@ static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets,
struct tcf_vlan *v = to_vlan(a);
struct tcf_t *tm = &v->tcf_tm;
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ tcf_action_update_stats(a, bytes, packets, false, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 98dd87ce1510..b1c7e726ce5d 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -530,8 +530,7 @@ begin:
fq_flow_set_throttled(q, f);
goto begin;
}
- if (time_next_packet &&
- (s64)(now - time_next_packet - q->ce_threshold) > 0) {
+ if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
INET_ECN_set_ce(skb);
q->stat_ce_mark++;
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index c261c0a18868..968519ff36e9 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -14,7 +14,6 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/skbuff.h>
-#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/netlink.h>
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8769b4b8807d..8561e825f401 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -382,13 +382,8 @@ void __qdisc_run(struct Qdisc *q)
int packets;
while (qdisc_restart(q, &packets)) {
- /*
- * Ordered by possible occurrence: Postpone processing if
- * 1. we've exceeded packet quota
- * 2. another process needs the CPU;
- */
quota -= packets;
- if (quota <= 0 || need_resched()) {
+ if (quota <= 0) {
__netif_schedule(q);
break;
}
@@ -1214,8 +1209,13 @@ void dev_deactivate_many(struct list_head *head)
/* Wait for outstanding qdisc_run calls. */
list_for_each_entry(dev, head, close_list) {
- while (some_qdisc_is_busy(dev))
- yield();
+ while (some_qdisc_is_busy(dev)) {
+ /* wait_event() would avoid this sleep-loop but would
+ * require expensive checks in the fast paths of packet
+ * processing which isn't worth it.
+ */
+ schedule_timeout_uninterruptible(1);
+ }
/* The new qdisc is assigned at this point so we can safely
* unwind stale skb lists and qdisc statistics
*/
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index d2ffc9a0ba3a..1ba893b85dad 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -429,6 +429,8 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
changeover = 1 ;
asoc->peer.primary_path = transport;
+ sctp_ulpevent_nofity_peer_addr_change(transport,
+ SCTP_ADDR_MADE_PRIM, 0);
/* Set a default msg_name for events. */
memcpy(&asoc->peer.primary_addr, &transport->ipaddr,
@@ -569,6 +571,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
asoc->peer.transport_count--;
+ sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0);
sctp_transport_free(peer);
}
@@ -707,6 +710,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
asoc->peer.transport_count++;
+ sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_ADDED, 0);
+
/* If we do not yet have a primary path, set one. */
if (!asoc->peer.primary_path) {
sctp_assoc_set_primary(asoc, peer);
@@ -781,10 +786,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
enum sctp_transport_cmd command,
sctp_sn_error_t error)
{
- struct sctp_ulpevent *event;
- struct sockaddr_storage addr;
- int spc_state = 0;
bool ulp_notify = true;
+ int spc_state = 0;
/* Record the transition on the transport. */
switch (command) {
@@ -836,16 +839,9 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
/* Generate and send a SCTP_PEER_ADDR_CHANGE notification
* to the user.
*/
- if (ulp_notify) {
- memset(&addr, 0, sizeof(struct sockaddr_storage));
- memcpy(&addr, &transport->ipaddr,
- transport->af_specific->sockaddr_len);
-
- event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
- 0, spc_state, error, GFP_ATOMIC);
- if (event)
- asoc->stream.si->enqueue_event(&asoc->ulpq, event);
- }
+ if (ulp_notify)
+ sctp_ulpevent_nofity_peer_addr_change(transport,
+ spc_state, error);
/* Select new active and retran paths. */
sctp_select_active_and_retran_path(asoc);
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index cc0405c79dfc..cc3ce5d80b08 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -75,41 +75,39 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
struct list_head *pos, *temp;
struct sctp_chunk *chunk;
struct sctp_ulpevent *ev;
- int error = 0, notify;
-
- /* If we failed, we may need to notify. */
- notify = msg->send_failed ? -1 : 0;
+ int error, sent;
/* Release all references. */
list_for_each_safe(pos, temp, &msg->chunks) {
list_del_init(pos);
chunk = list_entry(pos, struct sctp_chunk, frag_list);
- /* Check whether we _really_ need to notify. */
- if (notify < 0) {
- asoc = chunk->asoc;
- if (msg->send_error)
- error = msg->send_error;
- else
- error = asoc->outqueue.error;
-
- notify = sctp_ulpevent_type_enabled(asoc->subscribe,
- SCTP_SEND_FAILED);
+
+ if (!msg->send_failed) {
+ sctp_chunk_put(chunk);
+ continue;
}
- /* Generate a SEND FAILED event only if enabled. */
- if (notify > 0) {
- int sent;
- if (chunk->has_tsn)
- sent = SCTP_DATA_SENT;
- else
- sent = SCTP_DATA_UNSENT;
+ asoc = chunk->asoc;
+ error = msg->send_error ?: asoc->outqueue.error;
+ sent = chunk->has_tsn ? SCTP_DATA_SENT : SCTP_DATA_UNSENT;
+ if (sctp_ulpevent_type_enabled(asoc->subscribe,
+ SCTP_SEND_FAILED)) {
ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent,
error, GFP_ATOMIC);
if (ev)
asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
+ if (sctp_ulpevent_type_enabled(asoc->subscribe,
+ SCTP_SEND_FAILED_EVENT)) {
+ ev = sctp_ulpevent_make_send_failed_event(asoc, chunk,
+ sent, error,
+ GFP_ATOMIC);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+ }
+
sctp_chunk_put(chunk);
}
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index e0cc1edf49a0..c82dbdcf13f2 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -238,7 +238,7 @@ fail:
* When a destination address on a multi-homed peer encounters a change
* an interface details event is sent.
*/
-struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
+static struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
const struct sctp_association *asoc,
const struct sockaddr_storage *aaddr,
int flags, int state, int error, gfp_t gfp)
@@ -336,6 +336,22 @@ fail:
return NULL;
}
+void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport,
+ int state, int error)
+{
+ struct sctp_association *asoc = transport->asoc;
+ struct sockaddr_storage addr;
+ struct sctp_ulpevent *event;
+
+ memset(&addr, 0, sizeof(struct sockaddr_storage));
+ memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len);
+
+ event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, state,
+ error, GFP_ATOMIC);
+ if (event)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, event);
+}
+
/* Create and initialize an SCTP_REMOTE_ERROR notification.
*
* Note: This assumes that the chunk->skb->data already points to the
@@ -511,6 +527,45 @@ fail:
return NULL;
}
+struct sctp_ulpevent *sctp_ulpevent_make_send_failed_event(
+ const struct sctp_association *asoc, struct sctp_chunk *chunk,
+ __u16 flags, __u32 error, gfp_t gfp)
+{
+ struct sctp_send_failed_event *ssf;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
+ int len;
+
+ skb = skb_copy_expand(chunk->skb, sizeof(*ssf), 0, gfp);
+ if (!skb)
+ return NULL;
+
+ len = ntohs(chunk->chunk_hdr->length);
+ len -= sctp_datachk_len(&asoc->stream);
+
+ skb_pull(skb, sctp_datachk_len(&asoc->stream));
+ event = sctp_skb2event(skb);
+ sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
+
+ ssf = skb_push(skb, sizeof(*ssf));
+ ssf->ssf_type = SCTP_SEND_FAILED_EVENT;
+ ssf->ssf_flags = flags;
+ ssf->ssf_length = sizeof(*ssf) + len;
+ skb_trim(skb, ssf->ssf_length);
+ ssf->ssf_error = error;
+
+ ssf->ssfe_info.snd_sid = chunk->sinfo.sinfo_stream;
+ ssf->ssfe_info.snd_ppid = chunk->sinfo.sinfo_ppid;
+ ssf->ssfe_info.snd_context = chunk->sinfo.sinfo_context;
+ ssf->ssfe_info.snd_assoc_id = chunk->sinfo.sinfo_assoc_id;
+ ssf->ssfe_info.snd_flags = chunk->chunk_hdr->flags;
+
+ sctp_ulpevent_set_owner(event, asoc);
+ ssf->ssf_assoc_id = sctp_assoc2id(asoc);
+
+ return event;
+}
+
/* Create and initialize a SCTP_SHUTDOWN_EVENT notification.
*
* Socket Extensions for SCTP - draft-01
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 47946f489fd4..b7d9fd285c71 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -174,6 +174,7 @@ static int smc_release(struct socket *sock)
if (!sk)
goto out;
+ sock_hold(sk); /* sock_put below */
smc = smc_sk(sk);
/* cleanup for a dangling non-blocking connect */
@@ -196,6 +197,7 @@ static int smc_release(struct socket *sock)
sock->sk = NULL;
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
sock_put(sk); /* final sock_put */
out:
return rc;
@@ -977,12 +979,14 @@ void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
__smc_release(smc);
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
sock_put(sk); /* final sock_put */
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 878313f8d6c1..be11ba41190f 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -188,6 +188,7 @@ struct smc_connection {
* 0 for SMC-R, 32 for SMC-D
*/
u64 peer_token; /* SMC-D token of peer */
+ u8 killed : 1; /* abnormal termination */
};
struct smc_sock { /* smc sock container */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index d0b0f4c865b4..7dc07ec2379b 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -63,7 +63,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
wr_rdma_buf,
(struct smc_wr_tx_pend_priv **)pend);
- if (!conn->alert_token_local)
+ if (conn->killed)
/* abnormal termination */
rc = -EPIPE;
return rc;
@@ -328,7 +328,7 @@ static void smcd_cdc_rx_tsklet(unsigned long data)
struct smcd_cdc_msg cdc;
struct smc_sock *smc;
- if (!conn)
+ if (!conn || conn->killed)
return;
data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index fc06720b53c1..d34e5adce2eb 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -13,6 +13,7 @@
#include <linux/sched/signal.h>
#include <net/sock.h>
+#include <net/tcp.h>
#include "smc.h"
#include "smc_tx.h"
@@ -65,8 +66,9 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
rc = sk_wait_event(sk, &timeout,
!smc_tx_prepared_sends(&smc->conn) ||
- (sk->sk_err == ECONNABORTED) ||
- (sk->sk_err == ECONNRESET),
+ sk->sk_err == ECONNABORTED ||
+ sk->sk_err == ECONNRESET ||
+ smc->conn.killed,
&wait);
if (rc)
break;
@@ -95,11 +97,13 @@ static int smc_close_final(struct smc_connection *conn)
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
else
conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
+ if (conn->killed)
+ return -EPIPE;
return smc_cdc_get_slot_and_msg_send(conn);
}
-static int smc_close_abort(struct smc_connection *conn)
+int smc_close_abort(struct smc_connection *conn)
{
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
@@ -109,19 +113,15 @@ static int smc_close_abort(struct smc_connection *conn)
/* terminate smc socket abnormally - active abort
* link group is terminated, i.e. RDMA communication no longer possible
*/
-static void smc_close_active_abort(struct smc_sock *smc)
+void smc_close_active_abort(struct smc_sock *smc)
{
struct sock *sk = &smc->sk;
-
- struct smc_cdc_conn_state_flags *txflags =
- &smc->conn.local_tx_ctrl.conn_state_flags;
+ bool release_clcsock = false;
if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
sk->sk_err = ECONNABORTED;
- if (smc->clcsock && smc->clcsock->sk) {
- smc->clcsock->sk->sk_err = ECONNABORTED;
- smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
- }
+ if (smc->clcsock && smc->clcsock->sk)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
}
switch (sk->sk_state) {
case SMC_ACTIVE:
@@ -129,35 +129,29 @@ static void smc_close_active_abort(struct smc_sock *smc)
release_sock(sk);
cancel_delayed_work_sync(&smc->conn.tx_work);
lock_sock(sk);
+ sk->sk_state = SMC_CLOSED;
sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
- if (!smc_cdc_rxed_any_close(&smc->conn))
- sk->sk_state = SMC_PEERABORTWAIT;
- else
- sk->sk_state = SMC_CLOSED;
release_sock(sk);
cancel_delayed_work_sync(&smc->conn.tx_work);
lock_sock(sk);
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* postponed passive closing */
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
- if (!txflags->peer_conn_closed) {
- /* just SHUTDOWN_SEND done */
- sk->sk_state = SMC_PEERABORTWAIT;
- } else {
- sk->sk_state = SMC_CLOSED;
- }
+ case SMC_PEERFINCLOSEWAIT:
+ sk->sk_state = SMC_CLOSED;
+ smc_conn_free(&smc->conn);
+ release_clcsock = true;
sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_CLOSED;
break;
- case SMC_PEERFINCLOSEWAIT:
- sock_put(sk); /* passive closing */
- break;
case SMC_INIT:
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
@@ -166,6 +160,12 @@ static void smc_close_active_abort(struct smc_sock *smc)
sock_set_flag(sk, SOCK_DEAD);
sk->sk_state_change(sk);
+
+ if (release_clcsock) {
+ release_sock(sk);
+ smc_clcsock_release(smc);
+ lock_sock(sk);
+ }
}
static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -215,8 +215,6 @@ again:
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
- if (rc)
- break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
@@ -229,8 +227,6 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
- if (rc)
- break;
}
sk->sk_state = SMC_CLOSED;
break;
@@ -246,8 +242,6 @@ again:
goto again;
/* confirm close from peer */
rc = smc_close_final(conn);
- if (rc)
- break;
if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
@@ -263,8 +257,6 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
- if (rc)
- break;
}
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -272,10 +264,12 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- smc_close_abort(conn);
+ rc = smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERABORTWAIT:
+ sk->sk_state = SMC_CLOSED;
+ break;
case SMC_CLOSED:
/* nothing to do, add tracing in future patch */
break;
@@ -344,12 +338,6 @@ static void smc_close_passive_work(struct work_struct *work)
lock_sock(sk);
old_state = sk->sk_state;
- if (!conn->alert_token_local) {
- /* abnormal termination */
- smc_close_active_abort(smc);
- goto wakeup;
- }
-
rxflags = &conn->local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
/* peer has not received all data */
@@ -451,8 +439,6 @@ again:
goto again;
/* send close wr request */
rc = smc_close_wr(conn);
- if (rc)
- break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
@@ -466,8 +452,6 @@ again:
goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
- if (rc)
- break;
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index e0e3b5df25d2..634fea2b7c95 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -24,5 +24,7 @@ int smc_close_active(struct smc_sock *smc);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
void smc_clcsock_release(struct smc_sock *smc);
+int smc_close_abort(struct smc_connection *conn);
+void smc_close_active_abort(struct smc_sock *smc);
#endif /* SMC_CLOSE_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2ba97ff325a5..0d92456729ab 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -42,20 +42,40 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
+/* return head of link group list and its lock for a given link group */
+static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
+ spinlock_t **lgr_lock)
+{
+ if (lgr->is_smcd) {
+ *lgr_lock = &lgr->smcd->lgr_lock;
+ return &lgr->smcd->lgr_list;
+ }
+
+ *lgr_lock = &smc_lgr_list.lock;
+ return &smc_lgr_list.list;
+}
+
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
/* client link group creation always follows the server link group
* creation. For client use a somewhat higher removal delay time,
* otherwise there is a risk of out-of-sync link groups.
*/
- mod_delayed_work(system_wq, &lgr->free_work,
- (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
- SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
+ if (!lgr->freeing && !lgr->freefast) {
+ mod_delayed_work(system_wq, &lgr->free_work,
+ (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
+ SMC_LGR_FREE_DELAY_CLNT :
+ SMC_LGR_FREE_DELAY_SERV);
+ }
}
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
{
- mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
+ if (!lgr->freeing && !lgr->freefast) {
+ lgr->freefast = 1;
+ mod_delayed_work(system_wq, &lgr->free_work,
+ SMC_LGR_FREE_DELAY_FAST);
+ }
}
/* Register connection's alert token in our lookup structure.
@@ -134,6 +154,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
+ conn->lgr = NULL;
}
/* Send delete link, either as client to request the initiation
@@ -157,48 +178,62 @@ static void smc_lgr_free_work(struct work_struct *work)
struct smc_link_group *lgr = container_of(to_delayed_work(work),
struct smc_link_group,
free_work);
+ spinlock_t *lgr_lock;
+ struct smc_link *lnk;
bool conns;
- spin_lock_bh(&smc_lgr_list.lock);
+ smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
+ if (lgr->freeing) {
+ spin_unlock_bh(lgr_lock);
+ return;
+ }
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
if (!conns) { /* number of lgr connections is no longer zero */
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_unlock_bh(lgr_lock);
return;
}
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list); /* remove from smc_lgr_list */
- spin_unlock_bh(&smc_lgr_list.lock);
+ list_del_init(&lgr->list); /* remove from smc_lgr_list */
+ lnk = &lgr->lnk[SMC_SINGLE_LINK];
if (!lgr->is_smcd && !lgr->terminating) {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
-
/* try to send del link msg, on error free lgr immediately */
if (lnk->state == SMC_LNK_ACTIVE &&
!smc_link_send_delete(lnk)) {
/* reschedule in case we never receive a response */
smc_lgr_schedule_free_work(lgr);
+ spin_unlock_bh(lgr_lock);
return;
}
}
+ lgr->freeing = 1; /* this instance does the freeing, no new schedule */
+ spin_unlock_bh(lgr_lock);
+ cancel_delayed_work(&lgr->free_work);
- if (!delayed_work_pending(&lgr->free_work)) {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
+ smc_llc_link_inactive(lnk);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
+ smc_lgr_free(lgr);
+}
- if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
- smc_llc_link_inactive(lnk);
- if (lgr->is_smcd)
- smc_ism_signal_shutdown(lgr);
- smc_lgr_free(lgr);
- }
+static void smc_lgr_terminate_work(struct work_struct *work)
+{
+ struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+ terminate_work);
+
+ smc_lgr_terminate(lgr);
}
/* create a new SMC link group */
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_link_group *lgr;
+ struct list_head *lgr_list;
struct smc_link *lnk;
+ spinlock_t *lgr_lock;
u8 rndvec[3];
int rc = 0;
int i;
@@ -217,6 +252,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
}
lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
+ lgr->terminating = 0;
+ lgr->freefast = 0;
+ lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
rwlock_init(&lgr->sndbufs_lock);
rwlock_init(&lgr->rmbs_lock);
@@ -228,13 +266,18 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
smc_lgr_list.num += SMC_LGR_NUM_INCR;
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
+ INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
lgr->conns_all = RB_ROOT;
if (ini->is_smcd) {
/* SMC-D specific settings */
+ get_device(&ini->ism_dev->dev);
lgr->peer_gid = ini->ism_gid;
lgr->smcd = ini->ism_dev;
+ lgr_list = &ini->ism_dev->lgr_list;
+ lgr_lock = &lgr->smcd->lgr_lock;
} else {
/* SMC-R specific settings */
+ get_device(&ini->ib_dev->ibdev->dev);
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
SMC_SYSTEMID_LEN);
@@ -245,6 +288,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lnk->link_id = SMC_SINGLE_LINK;
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
+ lgr_list = &smc_lgr_list.list;
+ lgr_lock = &smc_lgr_list.lock;
lnk->path_mtu =
ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
if (!ini->ib_dev->initialized)
@@ -274,9 +319,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
goto destroy_qp;
}
smc->conn.lgr = lgr;
- spin_lock_bh(&smc_lgr_list.lock);
- list_add(&lgr->list, &smc_lgr_list.list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_lock_bh(lgr_lock);
+ list_add(&lgr->list, lgr_list);
+ spin_unlock_bh(lgr_lock);
return 0;
destroy_qp:
@@ -309,7 +354,7 @@ static void smc_buf_unuse(struct smc_connection *conn,
conn->sndbuf_desc->used = 0;
if (conn->rmb_desc) {
if (!conn->rmb_desc->regerr) {
- if (!lgr->is_smcd) {
+ if (!lgr->is_smcd && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
smc_llc_do_delete_rkey(
&lgr->lnk[SMC_SINGLE_LINK],
@@ -340,9 +385,10 @@ void smc_conn_free(struct smc_connection *conn)
} else {
smc_cdc_tx_dismiss_slots(conn);
}
- smc_lgr_unregister_conn(conn);
- smc_buf_unuse(conn, lgr); /* allow buffer reuse */
- conn->lgr = NULL;
+ if (!list_empty(&lgr->list)) {
+ smc_lgr_unregister_conn(conn);
+ smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+ }
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
@@ -433,23 +479,50 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
static void smc_lgr_free(struct smc_link_group *lgr)
{
smc_lgr_free_bufs(lgr);
- if (lgr->is_smcd)
+ if (lgr->is_smcd) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
- else
+ put_device(&lgr->smcd->dev);
+ } else {
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
+ put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
+ }
kfree(lgr);
}
void smc_lgr_forget(struct smc_link_group *lgr)
{
- spin_lock_bh(&smc_lgr_list.lock);
+ struct list_head *lgr_list;
+ spinlock_t *lgr_lock;
+
+ lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
/* do not use this link group for new connections */
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ if (!list_empty(lgr_list))
+ list_del_init(lgr_list);
+ spin_unlock_bh(lgr_lock);
+}
+
+static void smc_sk_wake_ups(struct smc_sock *smc)
+{
+ smc->sk.sk_write_space(&smc->sk);
+ smc->sk.sk_data_ready(&smc->sk);
+ smc->sk.sk_state_change(&smc->sk);
}
-/* terminate linkgroup abnormally */
+/* kill a connection */
+static void smc_conn_kill(struct smc_connection *conn)
+{
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+ smc_close_abort(conn);
+ conn->killed = 1;
+ smc_sk_wake_ups(smc);
+ smc_lgr_unregister_conn(conn);
+ smc->sk.sk_err = ECONNABORTED;
+ smc_close_active_abort(smc);
+}
+
+/* terminate link group */
static void __smc_lgr_terminate(struct smc_link_group *lgr)
{
struct smc_connection *conn;
@@ -459,52 +532,65 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
if (lgr->terminating)
return; /* lgr already terminating */
lgr->terminating = 1;
- if (!list_empty(&lgr->list)) /* forget lgr */
- list_del_init(&lgr->list);
if (!lgr->is_smcd)
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
- write_lock_bh(&lgr->conns_lock);
+ /* kill remaining link group connections */
+ read_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
while (node) {
+ read_unlock_bh(&lgr->conns_lock);
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
- sock_hold(&smc->sk); /* sock_put in close work */
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
- __smc_lgr_unregister_conn(conn);
- conn->lgr = NULL;
- write_unlock_bh(&lgr->conns_lock);
- if (!schedule_work(&conn->close_work))
- sock_put(&smc->sk);
- write_lock_bh(&lgr->conns_lock);
+ sock_hold(&smc->sk); /* sock_put below */
+ lock_sock(&smc->sk);
+ smc_conn_kill(conn);
+ release_sock(&smc->sk);
+ sock_put(&smc->sk); /* sock_hold above */
+ read_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
- write_unlock_bh(&lgr->conns_lock);
+ read_unlock_bh(&lgr->conns_lock);
if (!lgr->is_smcd)
wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
- smc_lgr_schedule_free_work(lgr);
+ smc_lgr_schedule_free_work_fast(lgr);
}
+/* unlink and terminate link group */
void smc_lgr_terminate(struct smc_link_group *lgr)
{
- spin_lock_bh(&smc_lgr_list.lock);
+ spinlock_t *lgr_lock;
+
+ smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
+ if (lgr->terminating) {
+ spin_unlock_bh(lgr_lock);
+ return; /* lgr already terminating */
+ }
+ list_del_init(&lgr->list);
+ spin_unlock_bh(lgr_lock);
__smc_lgr_terminate(lgr);
- spin_unlock_bh(&smc_lgr_list.lock);
}
/* Called when IB port is terminated */
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *l;
+ LIST_HEAD(lgr_free_list);
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (!lgr->is_smcd &&
lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
- __smc_lgr_terminate(lgr);
+ list_move(&lgr->list, &lgr_free_list);
}
spin_unlock_bh(&smc_lgr_list.lock);
+
+ list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
+ list_del_init(&lgr->list);
+ __smc_lgr_terminate(lgr);
+ }
}
/* Called when SMC-D device is terminated or peer is lost */
@@ -514,20 +600,19 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
LIST_HEAD(lgr_free_list);
/* run common cleanup function and build free list */
- spin_lock_bh(&smc_lgr_list.lock);
- list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
- if (lgr->is_smcd && lgr->smcd == dev &&
- (!peer_gid || lgr->peer_gid == peer_gid) &&
+ spin_lock_bh(&dev->lgr_lock);
+ list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
+ if ((!peer_gid || lgr->peer_gid == peer_gid) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
- __smc_lgr_terminate(lgr);
list_move(&lgr->list, &lgr_free_list);
}
}
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_unlock_bh(&dev->lgr_lock);
/* cancel the regular free workers and actually free lgrs */
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
+ __smc_lgr_terminate(lgr);
cancel_delayed_work_sync(&lgr->free_work);
if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
smc_ism_signal_shutdown(lgr);
@@ -607,10 +692,14 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
+ struct list_head *lgr_list;
struct smc_link_group *lgr;
enum smc_lgr_role role;
+ spinlock_t *lgr_lock;
int rc = 0;
+ lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
+ lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
ini->cln_first_contact = SMC_FIRST_CONTACT;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
if (role == SMC_CLNT && ini->srv_first_contact)
@@ -618,8 +707,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
goto create;
/* determine if an existing link group can be reused */
- spin_lock_bh(&smc_lgr_list.lock);
- list_for_each_entry(lgr, &smc_lgr_list.list, list) {
+ spin_lock_bh(lgr_lock);
+ list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
@@ -639,7 +728,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
}
write_unlock_bh(&lgr->conns_lock);
}
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_unlock_bh(lgr_lock);
if (role == SMC_CLNT && !ini->srv_first_contact &&
ini->cln_first_contact == SMC_FIRST_CONTACT) {
@@ -1027,16 +1116,45 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
return 0;
}
+static void smc_core_going_away(void)
+{
+ struct smc_ib_device *smcibdev;
+ struct smcd_dev *smcd;
+
+ spin_lock(&smc_ib_devices.lock);
+ list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
+ int i;
+
+ for (i = 0; i < SMC_MAX_PORTS; i++)
+ set_bit(i, smcibdev->ports_going_away);
+ }
+ spin_unlock(&smc_ib_devices.lock);
+
+ spin_lock(&smcd_dev_list.lock);
+ list_for_each_entry(smcd, &smcd_dev_list.list, list) {
+ smcd->going_away = 1;
+ }
+ spin_unlock(&smcd_dev_list.lock);
+}
+
/* Called (from smc_exit) when module is removed */
void smc_core_exit(void)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_freeing_list);
+ struct smcd_dev *smcd;
+
+ smc_core_going_away();
spin_lock_bh(&smc_lgr_list.lock);
- if (!list_empty(&smc_lgr_list.list))
- list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
+ list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
spin_unlock_bh(&smc_lgr_list.lock);
+
+ spin_lock(&smcd_dev_list.lock);
+ list_for_each_entry(smcd, &smcd_dev_list.list, list)
+ list_splice_init(&smcd->lgr_list, &lgr_freeing_list);
+ spin_unlock(&smcd_dev_list.lock);
+
list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
list_del_init(&lgr->list);
if (!lgr->is_smcd) {
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c00ac61dc129..e6fd1ed42064 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -202,8 +202,11 @@ struct smc_link_group {
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
+ struct work_struct terminate_work; /* abnormal lgr termination */
u8 sync_err : 1; /* lgr no longer fits to peer */
u8 terminating : 1;/* lgr is terminating */
+ u8 freefast : 1; /* free worker scheduled fast */
+ u8 freeing : 1; /* lgr is being freed */
bool is_smcd; /* SMC-R or SMC-D */
union {
@@ -280,6 +283,12 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
+static inline void smc_lgr_terminate_sched(struct smc_link_group *lgr)
+{
+ if (!lgr->terminating)
+ schedule_work(&lgr->terminate_work);
+}
+
struct smc_sock;
struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index d14ca4af6f94..af05daeb0538 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -242,8 +242,12 @@ static void smc_ib_port_event_work(struct work_struct *work)
for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask);
- if (!smc_ib_port_active(smcibdev, port_idx + 1))
+ if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
+ set_bit(port_idx, smcibdev->ports_going_away);
smc_port_terminate(smcibdev, port_idx + 1);
+ } else {
+ clear_bit(port_idx, smcibdev->ports_going_away);
+ }
}
}
@@ -259,8 +263,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
switch (ibevent->event) {
case IB_EVENT_DEVICE_FATAL:
/* terminate all ports on device */
- for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++)
+ for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) {
set_bit(port_idx, &smcibdev->port_event_mask);
+ set_bit(port_idx, smcibdev->ports_going_away);
+ }
schedule_work(&smcibdev->port_event_work);
break;
case IB_EVENT_PORT_ERR:
@@ -269,6 +275,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
port_idx = ibevent->element.port_num - 1;
if (port_idx < SMC_MAX_PORTS) {
set_bit(port_idx, &smcibdev->port_event_mask);
+ if (ibevent->event == IB_EVENT_PORT_ERR)
+ set_bit(port_idx, smcibdev->ports_going_away);
+ else if (ibevent->event == IB_EVENT_PORT_ACTIVE)
+ clear_bit(port_idx, smcibdev->ports_going_away);
schedule_work(&smcibdev->port_event_work);
}
break;
@@ -307,6 +317,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
port_idx = ibevent->element.qp->port - 1;
if (port_idx < SMC_MAX_PORTS) {
set_bit(port_idx, &smcibdev->port_event_mask);
+ set_bit(port_idx, smcibdev->ports_going_away);
schedule_work(&smcibdev->port_event_work);
}
break;
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index da60ab9e8d70..6a0069db6cae 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -47,6 +47,7 @@ struct smc_ib_device { /* ib-device infos for smc */
u8 initialized : 1; /* ib dev CQ, evthdl done */
struct work_struct port_event_work;
unsigned long port_event_mask;
+ DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS);
};
struct smc_buf_desc;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index e89e918b88e0..ee7340898cb4 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -286,7 +286,9 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
spin_lock_init(&smcd->lock);
+ spin_lock_init(&smcd->lgr_lock);
INIT_LIST_HEAD(&smcd->vlan);
+ INIT_LIST_HEAD(&smcd->lgr_list);
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
if (!smcd->event_wq) {
@@ -313,6 +315,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
spin_lock(&smcd_dev_list.lock);
list_del(&smcd->list);
spin_unlock(&smcd_dev_list.lock);
+ smcd->going_away = 1;
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
@@ -342,6 +345,8 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
{
struct smc_ism_event_work *wrk;
+ if (smcd->going_away)
+ return;
/* copy event to event work queue, and let it be handled there */
wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
if (!wrk)
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 4fd60c522802..e1918ffaf125 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -475,7 +475,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
}
smc_llc_send_message(link, llc, sizeof(*llc));
- smc_lgr_schedule_free_work_fast(lgr);
+ smc_lgr_terminate_sched(lgr);
}
}
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 2920b006f65c..352ee2fa17dc 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -781,6 +781,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
dev_put(ndev);
if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
+ !test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
@@ -820,6 +821,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
smc_ib_port_active(ibdev, i) &&
+ !test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
@@ -846,7 +848,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
- if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
+ if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
+ !ismdev->going_away) {
ini->ism_dev = ismdev;
break;
}
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 97e8369002d7..39d7b34d06d2 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -201,6 +201,8 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct smc_connection *conn = &smc->conn;
+ struct smc_cdc_conn_state_flags *cflags =
+ &conn->local_tx_ctrl.conn_state_flags;
struct sock *sk = &smc->sk;
int rc;
@@ -210,7 +212,9 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
add_wait_queue(sk_sleep(sk), &wait);
rc = sk_wait_event(sk, timeo,
sk->sk_err ||
+ cflags->peer_conn_abort ||
sk->sk_shutdown & RCV_SHUTDOWN ||
+ conn->killed ||
fcrit(conn),
&wait);
remove_wait_queue(sk_sleep(sk), &wait);
@@ -314,11 +318,13 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
if (read_done >= target || (pipe && read_done))
break;
+ if (conn->killed)
+ break;
+
if (smc_rx_recvmsg_data_available(smc))
goto copy;
- if (sk->sk_shutdown & RCV_SHUTDOWN ||
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) {
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
/* smc_cdc_msg_recv_action() could have run after
* above smc_rx_recvmsg_data_available()
*/
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 6c8f09c1ce51..824f096ee7de 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -86,6 +86,7 @@ static int smc_tx_wait(struct smc_sock *smc, int flags)
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
if (sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
+ conn->killed ||
conn->local_tx_ctrl.conn_state_flags.peer_done_writing) {
rc = -EPIPE;
break;
@@ -155,7 +156,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
return -ENOTCONN;
if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
(smc->sk.sk_err == ECONNABORTED) ||
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+ conn->killed)
return -EPIPE;
if (smc_cdc_rxed_any_close(conn))
return send_done ?: -ECONNRESET;
@@ -282,10 +283,8 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
peer_rmbe_offset;
rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
- if (rc) {
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ if (rc)
smc_lgr_terminate(lgr);
- }
return rc;
}
@@ -495,10 +494,11 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
if (smc->sk.sk_err == ECONNABORTED)
return sock_error(&smc->sk);
+ if (conn->killed)
+ return -EPIPE;
rc = 0;
- if (conn->alert_token_local) /* connection healthy */
- mod_delayed_work(system_wq, &conn->tx_work,
- SMC_TX_WORK_DELAY);
+ mod_delayed_work(system_wq, &conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
return rc;
}
@@ -547,6 +547,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
int rc;
+ if (conn->killed ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ return -EPIPE; /* connection being aborted */
if (conn->lgr->is_smcd)
rc = smcd_tx_sndbuf_nonempty(conn);
else
@@ -573,9 +576,7 @@ void smc_tx_work(struct work_struct *work)
int rc;
lock_sock(&smc->sk);
- if (smc->sk.sk_err ||
- !conn->alert_token_local ||
- conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ if (smc->sk.sk_err)
goto out;
rc = smc_tx_sndbuf_nonempty(conn);
@@ -608,8 +609,11 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
((to_confirm > conn->rmbe_update_limit) &&
((sender_free <= (conn->rmb_desc->len / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
+ if (conn->killed ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ return;
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
- conn->alert_token_local) { /* connection healthy */
+ !conn->killed) {
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 253aa75dc2b6..50743dc56c86 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -101,7 +101,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
clear_bit(i, link->wr_tx_mask);
}
/* terminate connections of this link group abnormally */
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
}
if (pnd_snd.handler)
pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -191,7 +191,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
return -EPIPE;
}
if (idx == link->wr_tx_cnt)
@@ -247,7 +247,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
if (rc) {
smc_wr_tx_put_slot(link, priv);
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
}
return rc;
}
@@ -272,7 +272,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
SMC_WR_REG_MR_WAIT_TIME);
if (!rc) {
/* timeout - terminate connections */
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
return -EPIPE;
}
if (rc == -ERESTARTSYS)
@@ -373,7 +373,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
/* terminate connections of this link group
* abnormally
*/
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
break;
default:
smc_wr_rx_post(link); /* refill WR RX */
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 23cb379a93d6..ab648dd150ee 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net)
tipc_sk_rht_destroy(net);
}
+static void __net_exit tipc_pernet_pre_exit(struct net *net)
+{
+ tipc_node_pre_cleanup_net(net);
+}
+
+static struct pernet_operations tipc_pernet_pre_exit_ops = {
+ .pre_exit = tipc_pernet_pre_exit,
+};
+
static struct pernet_operations tipc_net_ops = {
.init = tipc_init_net,
.exit = tipc_exit_net,
@@ -151,6 +160,10 @@ static int __init tipc_init(void)
if (err)
goto out_pernet_topsrv;
+ err = register_pernet_subsys(&tipc_pernet_pre_exit_ops);
+ if (err)
+ goto out_register_pernet_subsys;
+
err = tipc_bearer_setup();
if (err)
goto out_bearer;
@@ -158,6 +171,8 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
out_bearer:
+ unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
+out_register_pernet_subsys:
unregister_pernet_device(&tipc_topsrv_net_ops);
out_pernet_topsrv:
tipc_socket_stop();
@@ -177,6 +192,7 @@ out_netlink:
static void __exit tipc_exit(void)
{
tipc_bearer_cleanup();
+ unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
unregister_pernet_device(&tipc_topsrv_net_ops);
tipc_socket_stop();
unregister_pernet_device(&tipc_net_ops);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 60d829581068..8776d32a4a47 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,6 +59,7 @@
#include <net/netns/generic.h>
#include <linux/rhashtable.h>
#include <net/genetlink.h>
+#include <net/netns/hash.h>
struct tipc_node;
struct tipc_bearer;
@@ -185,6 +186,11 @@ static inline int in_range(u16 val, u16 min, u16 max)
return !less(val, min) && !more(val, max);
}
+static inline u32 tipc_net_hash_mixes(struct net *net, int tn_rand)
+{
+ return net_hash_mix(&init_net) ^ net_hash_mix(net) ^ tn_rand;
+}
+
#ifdef CONFIG_SYSCTL
int tipc_register_sysctl(void);
void tipc_unregister_sysctl(void);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index c138d68e8a69..b043e8c6397a 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -94,6 +94,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
msg_set_dest_domain(hdr, dest_domain);
msg_set_bc_netid(hdr, tn->net_id);
b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+ msg_set_peer_net_hash(hdr, tipc_net_hash_mixes(net, tn->random));
msg_set_node_id(hdr, tipc_own_id(net));
}
@@ -242,7 +243,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
if (!tipc_in_scope(legacy, b->domain, src))
return;
tipc_node_check_dest(net, src, peer_id, b, caps, signature,
- &maddr, &respond, &dupl_addr);
+ msg_peer_net_hash(hdr), &maddr, &respond,
+ &dupl_addr);
if (dupl_addr)
disc_dupl_alert(b, src, &maddr);
if (!respond)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 999eab592de8..7d7a66178607 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1873,7 +1873,7 @@ void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
tipc_link_create_dummy_tnl_msg(tnl, xmitq);
- /* This failover link enpoint was never established before,
+ /* This failover link endpoint was never established before,
* so it has not received anything from peer.
* Otherwise, it must be a normal failover situation or the
* node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 922d262e153f..973795a1a968 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -190,6 +190,59 @@ err:
return 0;
}
+/**
+ * tipc_msg_append(): Append data to tail of an existing buffer queue
+ * @hdr: header to be used
+ * @m: the data to be appended
+ * @mss: max allowable size of buffer
+ * @dlen: size of data to be appended
+ * @txq: queue to appand to
+ * Returns the number og 1k blocks appended or errno value
+ */
+int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
+ int mss, struct sk_buff_head *txq)
+{
+ struct sk_buff *skb, *prev;
+ int accounted, total, curr;
+ int mlen, cpy, rem = dlen;
+ struct tipc_msg *hdr;
+
+ skb = skb_peek_tail(txq);
+ accounted = skb ? msg_blocks(buf_msg(skb)) : 0;
+ total = accounted;
+
+ while (rem) {
+ if (!skb || skb->len >= mss) {
+ prev = skb;
+ skb = tipc_buf_acquire(mss, GFP_KERNEL);
+ if (unlikely(!skb))
+ return -ENOMEM;
+ skb_orphan(skb);
+ skb_trim(skb, MIN_H_SIZE);
+ hdr = buf_msg(skb);
+ skb_copy_to_linear_data(skb, _hdr, MIN_H_SIZE);
+ msg_set_hdr_sz(hdr, MIN_H_SIZE);
+ msg_set_size(hdr, MIN_H_SIZE);
+ __skb_queue_tail(txq, skb);
+ total += 1;
+ if (prev)
+ msg_set_ack_required(buf_msg(prev), 0);
+ msg_set_ack_required(hdr, 1);
+ }
+ hdr = buf_msg(skb);
+ curr = msg_blocks(hdr);
+ mlen = msg_size(hdr);
+ cpy = min_t(int, rem, mss - mlen);
+ if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
+ return -EFAULT;
+ msg_set_size(hdr, mlen + cpy);
+ skb_put(skb, cpy);
+ rem -= cpy;
+ total += msg_blocks(hdr) - curr;
+ }
+ return total - accounted;
+}
+
/* tipc_msg_validate - validate basic format of received message
*
* This routine ensures a TIPC message has an acceptable header, and at least
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 0daa6f04ca81..0435dda4b90c 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -290,6 +290,16 @@ static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d)
msg_set_bits(m, 0, 18, 1, d);
}
+static inline int msg_ack_required(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_ack_required(struct tipc_msg *m, u32 d)
+{
+ msg_set_bits(m, 0, 18, 1, d);
+}
+
static inline bool msg_is_rcast(struct tipc_msg *m)
{
return msg_bits(m, 0, 18, 0x1);
@@ -1026,6 +1036,20 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
}
+/* Word 13
+ */
+static inline void msg_set_peer_net_hash(struct tipc_msg *m, u32 n)
+{
+ msg_set_word(m, 13, n);
+}
+
+static inline u32 msg_peer_net_hash(struct tipc_msg *m)
+{
+ return msg_word(m, 13);
+}
+
+/* Word 14
+ */
static inline u32 msg_sugg_node_addr(struct tipc_msg *m)
{
return msg_word(m, 14);
@@ -1065,6 +1089,8 @@ int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
int pktmax, struct sk_buff_head *frags);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
+int tipc_msg_append(struct tipc_msg *hdr, struct msghdr *m, int dlen,
+ int mss, struct sk_buff_head *txq);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
bool tipc_msg_assemble(struct sk_buff_head *list);
bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 836e629e8f4a..5feaf3b67380 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -146,7 +146,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
struct publication *publ;
struct sk_buff *skb = NULL;
struct distr_item *item = NULL;
- u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) /
+ u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) /
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index d6165ad384c0..d32bbd0f5e46 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -176,7 +176,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
},
{
.cmd = TIPC_NL_PUBL_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = tipc_nl_publ_dump,
},
{
@@ -239,7 +240,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
},
{
.cmd = TIPC_NL_MON_PEER_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = tipc_nl_node_dump_monitor_peer,
},
{
@@ -250,7 +252,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
#ifdef CONFIG_TIPC_MEDIA_UDP
{
.cmd = TIPC_NL_UDP_GET_REMOTEIP,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = tipc_udp_nl_dump_remoteip,
},
#endif
@@ -268,18 +271,6 @@ struct genl_family tipc_genl_family __ro_after_init = {
.n_ops = ARRAY_SIZE(tipc_genl_v2_ops),
};
-int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
-{
- u32 maxattr = tipc_genl_family.maxattr;
-
- *attr = genl_family_attrbuf(&tipc_genl_family);
- if (!*attr)
- return -EOPNOTSUPP;
-
- return nlmsg_parse_deprecated(nlh, GENL_HDRLEN, *attr, maxattr,
- tipc_nl_policy, NULL);
-}
-
int __init tipc_netlink_start(void)
{
int res;
diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h
index 4ba0ad422110..7cf777723e3e 100644
--- a/net/tipc/netlink.h
+++ b/net/tipc/netlink.h
@@ -38,7 +38,6 @@
#include <net/netlink.h>
extern struct genl_family tipc_genl_family;
-int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf);
struct tipc_nl_msg {
struct sk_buff *skb;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index e135d4e11231..17a529739f8d 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -181,15 +181,18 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
struct tipc_nl_compat_msg *msg,
struct sk_buff *arg)
{
+ struct genl_dumpit_info info;
int len = 0;
int err;
struct sk_buff *buf;
struct nlmsghdr *nlmsg;
struct netlink_callback cb;
+ struct nlattr **attrbuf;
memset(&cb, 0, sizeof(cb));
cb.nlh = (struct nlmsghdr *)arg->data;
cb.skb = arg;
+ cb.data = &info;
buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (!buf)
@@ -201,19 +204,35 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
return -ENOMEM;
}
+ attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1,
+ sizeof(struct nlattr *), GFP_KERNEL);
+ if (!attrbuf) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ info.attrs = attrbuf;
+ err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
+ tipc_genl_family.maxattr,
+ tipc_genl_family.policy, NULL);
+ if (err)
+ goto err_out;
+
do {
int rem;
len = (*cmd->dumpit)(buf, &cb);
nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) {
- struct nlattr **attrs;
-
- err = tipc_nlmsg_parse(nlmsg, &attrs);
+ err = nlmsg_parse_deprecated(nlmsg, GENL_HDRLEN,
+ attrbuf,
+ tipc_genl_family.maxattr,
+ tipc_genl_family.policy,
+ NULL);
if (err)
goto err_out;
- err = (*cmd->format)(msg, attrs);
+ err = (*cmd->format)(msg, attrbuf);
if (err)
goto err_out;
@@ -231,6 +250,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
err = 0;
err_out:
+ kfree(attrbuf);
tipc_dump_done(&cb);
kfree_skb(buf);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c8f6177dd5a2..4b60928049ea 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -126,6 +126,8 @@ struct tipc_node {
struct timer_list timer;
struct rcu_head rcu;
unsigned long delete_at;
+ struct net *peer_net;
+ u32 peer_hash_mix;
};
/* Node FSM states and events:
@@ -184,7 +186,7 @@ static struct tipc_link *node_active_link(struct tipc_node *n, int sel)
return n->links[bearer_id].link;
}
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected)
{
struct tipc_node *n;
int bearer_id;
@@ -194,6 +196,14 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
if (unlikely(!n))
return mtu;
+ /* Allow MAX_MSG_SIZE when building connection oriented message
+ * if they are in the same core network
+ */
+ if (n->peer_net && connected) {
+ tipc_node_put(n);
+ return mtu;
+ }
+
bearer_id = n->active_links[sel & 1];
if (likely(bearer_id != INVALID_BEARER_ID))
mtu = n->links[bearer_id].mtu;
@@ -360,8 +370,37 @@ static void tipc_node_write_unlock(struct tipc_node *n)
}
}
+static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes)
+{
+ int net_id = tipc_netid(n->net);
+ struct tipc_net *tn_peer;
+ struct net *tmp;
+ u32 hash_chk;
+
+ if (n->peer_net)
+ return;
+
+ for_each_net_rcu(tmp) {
+ tn_peer = tipc_net(tmp);
+ if (!tn_peer)
+ continue;
+ /* Integrity checking whether node exists in namespace or not */
+ if (tn_peer->net_id != net_id)
+ continue;
+ if (memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN))
+ continue;
+ hash_chk = tipc_net_hash_mixes(tmp, tn_peer->random);
+ if (hash_mixes ^ hash_chk)
+ continue;
+ n->peer_net = tmp;
+ n->peer_hash_mix = hash_mixes;
+ break;
+ }
+}
+
static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
- u8 *peer_id, u16 capabilities)
+ u8 *peer_id, u16 capabilities,
+ u32 signature, u32 hash_mixes)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *n, *temp_node;
@@ -372,6 +411,8 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
spin_lock_bh(&tn->node_list_lock);
n = tipc_node_find(net, addr);
if (n) {
+ if (n->peer_hash_mix ^ hash_mixes)
+ tipc_node_assign_peer_net(n, hash_mixes);
if (n->capabilities == capabilities)
goto exit;
/* Same node may come back with new capabilities */
@@ -389,6 +430,7 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+
goto exit;
}
n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -399,6 +441,10 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
n->addr = addr;
memcpy(&n->peer_id, peer_id, 16);
n->net = net;
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ /* Assign kernel local namespace if exists */
+ tipc_node_assign_peer_net(n, hash_mixes);
n->capabilities = capabilities;
kref_init(&n->kref);
rwlock_init(&n->lock);
@@ -426,6 +472,10 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
tipc_bc_sndlink(net),
&n->bc_entry.link)) {
pr_warn("Broadcast rcv link creation failed, no memory\n");
+ if (n->peer_net) {
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ }
kfree(n);
n = NULL;
goto exit;
@@ -979,7 +1029,7 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
void tipc_node_check_dest(struct net *net, u32 addr,
u8 *peer_id, struct tipc_bearer *b,
- u16 capabilities, u32 signature,
+ u16 capabilities, u32 signature, u32 hash_mixes,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr)
{
@@ -998,7 +1048,8 @@ void tipc_node_check_dest(struct net *net, u32 addr,
*dupl_addr = false;
*respond = false;
- n = tipc_node_create(net, addr, peer_id, capabilities);
+ n = tipc_node_create(net, addr, peer_id, capabilities, signature,
+ hash_mixes);
if (!n)
return;
@@ -1343,6 +1394,10 @@ static void node_lost_contact(struct tipc_node *n,
/* Notify publications from this node */
n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
+ if (n->peer_net) {
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ }
/* Notify sockets connected to node */
list_for_each_entry_safe(conn, safe, conns, list) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
@@ -1424,6 +1479,56 @@ msg_full:
return -EMSGSIZE;
}
+static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list)
+{
+ struct tipc_msg *hdr = buf_msg(skb_peek(list));
+ struct sk_buff_head inputq;
+
+ switch (msg_user(hdr)) {
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ if (msg_connected(hdr) || msg_named(hdr)) {
+ tipc_loopback_trace(peer_net, list);
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(peer_net, list);
+ return;
+ }
+ if (msg_mcast(hdr)) {
+ tipc_loopback_trace(peer_net, list);
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(peer_net, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ return;
+ }
+ return;
+ case MSG_FRAGMENTER:
+ if (tipc_msg_assemble(list)) {
+ tipc_loopback_trace(peer_net, list);
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(peer_net, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ }
+ return;
+ case GROUP_PROTOCOL:
+ case CONN_MANAGER:
+ tipc_loopback_trace(peer_net, list);
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(peer_net, list);
+ return;
+ case LINK_PROTOCOL:
+ case NAME_DISTRIBUTOR:
+ case TUNNEL_PROTOCOL:
+ case BCAST_PROTOCOL:
+ return;
+ default:
+ return;
+ };
+}
+
/**
* tipc_node_xmit() is the general link level function for message sending
* @net: the applicable net namespace
@@ -1439,6 +1544,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
struct tipc_link_entry *le = NULL;
struct tipc_node *n;
struct sk_buff_head xmitq;
+ bool node_up = false;
int bearer_id;
int rc;
@@ -1456,6 +1562,17 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
}
tipc_node_read_lock(n);
+ node_up = node_is_up(n);
+ if (node_up && n->peer_net && check_net(n->peer_net)) {
+ /* xmit inner linux container */
+ tipc_lxc_xmit(n->peer_net, list);
+ if (likely(skb_queue_empty(list))) {
+ tipc_node_read_unlock(n);
+ tipc_node_put(n);
+ return 0;
+ }
+ }
+
bearer_id = n->active_links[selector & 1];
if (unlikely(bearer_id == INVALID_BEARER_ID)) {
tipc_node_read_unlock(n);
@@ -2484,13 +2601,9 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
int err;
if (!prev_node) {
- struct nlattr **attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
struct nlattr *mon[TIPC_NLA_MON_MAX + 1];
- err = tipc_nlmsg_parse(cb->nlh, &attrs);
- if (err)
- return err;
-
if (!attrs[TIPC_NLA_MON])
return -EINVAL;
@@ -2591,3 +2704,33 @@ int tipc_node_dump(struct tipc_node *n, bool more, char *buf)
return i;
}
+
+void tipc_node_pre_cleanup_net(struct net *exit_net)
+{
+ struct tipc_node *n;
+ struct tipc_net *tn;
+ struct net *tmp;
+
+ rcu_read_lock();
+ for_each_net_rcu(tmp) {
+ if (tmp == exit_net)
+ continue;
+ tn = tipc_net(tmp);
+ if (!tn)
+ continue;
+ spin_lock_bh(&tn->node_list_lock);
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ if (!n->peer_net)
+ continue;
+ if (n->peer_net != exit_net)
+ continue;
+ tipc_node_write_lock(n);
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ tipc_node_write_unlock_fast(n);
+ break;
+ }
+ spin_unlock_bh(&tn->node_list_lock);
+ }
+ rcu_read_unlock();
+}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 291d0ecd4101..c39cd861c07d 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -54,7 +54,8 @@ enum {
TIPC_LINK_PROTO_SEQNO = (1 << 6),
TIPC_MCAST_RBCTL = (1 << 7),
TIPC_GAP_ACK_BLOCK = (1 << 8),
- TIPC_TUNNEL_ENHANCED = (1 << 9)
+ TIPC_TUNNEL_ENHANCED = (1 << 9),
+ TIPC_NAGLE = (1 << 10)
};
#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \
@@ -66,7 +67,9 @@ enum {
TIPC_LINK_PROTO_SEQNO | \
TIPC_MCAST_RBCTL | \
TIPC_GAP_ACK_BLOCK | \
- TIPC_TUNNEL_ENHANCED)
+ TIPC_TUNNEL_ENHANCED | \
+ TIPC_NAGLE)
+
#define INVALID_BEARER_ID -1
void tipc_node_stop(struct net *net);
@@ -75,7 +78,7 @@ u32 tipc_node_get_addr(struct tipc_node *node);
u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
struct tipc_bearer *bearer,
- u16 capabilities, u32 signature,
+ u16 capabilities, u32 signature, u32 hash_mixes,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr);
void tipc_node_delete_links(struct net *net, int bearer_id);
@@ -92,7 +95,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr);
void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected);
bool tipc_node_is_up(struct net *net, u32 addr);
u16 tipc_node_get_capabilities(struct net *net, u32 addr);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
@@ -107,4 +110,5 @@ int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
struct netlink_callback *cb);
+void tipc_node_pre_cleanup_net(struct net *exit_net);
#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 4b92b196cfa6..5d7859aac78e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -75,6 +75,7 @@ struct sockaddr_pair {
* @conn_instance: TIPC instance used when connection was established
* @published: non-zero if port has one or more associated names
* @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @maxnagle: maximum size of msg which can be subject to nagle
* @portid: unique port identity in TIPC socket hash table
* @phdr: preformatted message header used when sending messages
* #cong_links: list of congested links
@@ -97,6 +98,7 @@ struct tipc_sock {
u32 conn_instance;
int published;
u32 max_pkt;
+ u32 maxnagle;
u32 portid;
struct tipc_msg phdr;
struct list_head cong_links;
@@ -116,6 +118,10 @@ struct tipc_sock {
struct tipc_mc_method mc_method;
struct rcu_head rcu;
struct tipc_group *group;
+ u32 oneway;
+ u16 snd_backlog;
+ bool expect_ack;
+ bool nodelay;
bool group_is_open;
};
@@ -137,6 +143,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
+static void tipc_sk_push_backlog(struct tipc_sock *tsk);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -227,6 +234,26 @@ static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
return 1;
}
+/* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle
+ */
+static void tsk_set_nagle(struct tipc_sock *tsk)
+{
+ struct sock *sk = &tsk->sk;
+
+ tsk->maxnagle = 0;
+ if (sk->sk_type != SOCK_STREAM)
+ return;
+ if (tsk->nodelay)
+ return;
+ if (!(tsk->peer_caps & TIPC_NAGLE))
+ return;
+ /* Limit node local buffer size to avoid receive queue overflow */
+ if (tsk->max_pkt == MAX_MSG_SIZE)
+ tsk->maxnagle = 1500;
+ else
+ tsk->maxnagle = tsk->max_pkt;
+}
+
/**
* tsk_advance_rx_queue - discard first buffer in socket receive queue
*
@@ -446,6 +473,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
tsk = tipc_sk(sk);
tsk->max_pkt = MAX_PKT_DEFAULT;
+ tsk->maxnagle = 0;
INIT_LIST_HEAD(&tsk->publications);
INIT_LIST_HEAD(&tsk->cong_links);
msg = &tsk->phdr;
@@ -512,8 +540,12 @@ static void __tipc_shutdown(struct socket *sock, int error)
tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
!tsk_conn_cong(tsk)));
- /* Remove any pending SYN message */
- __skb_queue_purge(&sk->sk_write_queue);
+ /* Push out unsent messages or remove if pending SYN */
+ skb = skb_peek(&sk->sk_write_queue);
+ if (skb && !msg_is_syn(buf_msg(skb)))
+ tipc_sk_push_backlog(tsk);
+ else
+ __skb_queue_purge(&sk->sk_write_queue);
/* Reject all unreceived messages, except on an active connection
* (which disconnects locally & sends a 'FIN+' to peer).
@@ -854,7 +886,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
/* Build message as chain of buffers */
__skb_queue_head_init(&pkts);
- mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1208,6 +1240,27 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
tipc_sk_rcv(net, inputq);
}
+/* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
+ * when socket is in Nagle mode
+ */
+static void tipc_sk_push_backlog(struct tipc_sock *tsk)
+{
+ struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
+ struct net *net = sock_net(&tsk->sk);
+ u32 dnode = tsk_peer_node(tsk);
+ int rc;
+
+ if (skb_queue_empty(txq) || tsk->cong_link_cnt)
+ return;
+
+ tsk->snt_unacked += tsk->snd_backlog;
+ tsk->snd_backlog = 0;
+ tsk->expect_ack = true;
+ rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
+ if (rc == -ELINKCONG)
+ tsk->cong_link_cnt = 1;
+}
+
/**
* tipc_sk_conn_proto_rcv - receive a connection mng protocol message
* @tsk: receiving socket
@@ -1221,7 +1274,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
u32 onode = tsk_own_node(tsk);
struct sock *sk = &tsk->sk;
int mtyp = msg_type(hdr);
- bool conn_cong;
+ bool was_cong;
/* Ignore if connection cannot be validated: */
if (!tsk_peer_msg(tsk, hdr)) {
@@ -1254,11 +1307,13 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
__skb_queue_tail(xmitq, skb);
return;
} else if (mtyp == CONN_ACK) {
- conn_cong = tsk_conn_cong(tsk);
+ was_cong = tsk_conn_cong(tsk);
+ tsk->expect_ack = false;
+ tipc_sk_push_backlog(tsk);
tsk->snt_unacked -= msg_conn_ack(hdr);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
tsk->snd_win = msg_adv_win(hdr);
- if (conn_cong)
+ if (was_cong && !tsk_conn_cong(tsk))
sk->sk_write_space(sk);
} else if (mtyp != CONN_PROBE_REPLY) {
pr_warn("Received unknown CONN_PROTO msg\n");
@@ -1388,7 +1443,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
return rc;
__skb_queue_head_init(&pkts);
- mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1437,15 +1492,15 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
struct sock *sk = sock->sk;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+ struct sk_buff_head *txq = &sk->sk_write_queue;
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
- struct sk_buff_head pkts;
u32 dnode = tsk_peer_node(tsk);
+ int maxnagle = tsk->maxnagle;
+ int maxpkt = tsk->max_pkt;
int send, sent = 0;
- int rc = 0;
-
- __skb_queue_head_init(&pkts);
+ int blocks, rc = 0;
if (unlikely(dlen > INT_MAX))
return -EMSGSIZE;
@@ -1467,21 +1522,35 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
tipc_sk_connected(sk)));
if (unlikely(rc))
break;
-
send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
- rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
- if (unlikely(rc != send))
- break;
-
- trace_tipc_sk_sendstream(sk, skb_peek(&pkts),
+ blocks = tsk->snd_backlog;
+ if (tsk->oneway++ >= 4 && send <= maxnagle) {
+ rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
+ if (unlikely(rc < 0))
+ break;
+ blocks += rc;
+ if (blocks <= 64 && tsk->expect_ack) {
+ tsk->snd_backlog = blocks;
+ sent += send;
+ break;
+ }
+ tsk->expect_ack = true;
+ } else {
+ rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq);
+ if (unlikely(rc != send))
+ break;
+ blocks += tsk_inc(tsk, send + MIN_H_SIZE);
+ }
+ trace_tipc_sk_sendstream(sk, skb_peek(txq),
TIPC_DUMP_SK_SNDQ, " ");
- rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+ rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
tsk->cong_link_cnt = 1;
rc = 0;
}
if (likely(!rc)) {
- tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
+ tsk->snt_unacked += blocks;
+ tsk->snd_backlog = 0;
sent += send;
}
} while (sent < dlen && !rc);
@@ -1526,8 +1595,9 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
tipc_set_sk_state(sk, TIPC_ESTABLISHED);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
- tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
+ tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true);
tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+ tsk_set_nagle(tsk);
__skb_queue_purge(&sk->sk_write_queue);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
return;
@@ -1848,6 +1918,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
bool peek = flags & MSG_PEEK;
int offset, required, copy, copied = 0;
int hlen, dlen, err, rc;
+ bool ack = false;
long timeout;
/* Catch invalid receive attempts */
@@ -1892,6 +1963,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Copy data if msg ok, otherwise return error/partial data */
if (likely(!err)) {
+ ack = msg_ack_required(hdr);
offset = skb_cb->bytes_read;
copy = min_t(int, dlen - offset, buflen - copied);
rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
@@ -1919,7 +1991,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Send connection flow control advertisement when applicable */
tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
- if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
+ if (ack || tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
tipc_sk_send_ack(tsk);
/* Exit if all requested data or FIN/error received */
@@ -1990,6 +2062,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
smp_wmb();
tsk->cong_link_cnt--;
wakeup = true;
+ tipc_sk_push_backlog(tsk);
break;
case GROUP_PROTOCOL:
tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
@@ -2029,6 +2102,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
if (unlikely(msg_mcast(hdr)))
return false;
+ tsk->oneway = 0;
switch (sk->sk_state) {
case TIPC_CONNECTING:
@@ -2074,6 +2148,8 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
return true;
return false;
case TIPC_ESTABLISHED:
+ if (!skb_queue_empty(&sk->sk_write_queue))
+ tipc_sk_push_backlog(tsk);
/* Accept only connection-based messages sent by peer */
if (likely(con_msg && !err && pport == oport && pnode == onode))
return true;
@@ -2959,6 +3035,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_SRC_DROPPABLE:
case TIPC_DEST_DROPPABLE:
case TIPC_CONN_TIMEOUT:
+ case TIPC_NODELAY:
if (ol < sizeof(value))
return -EINVAL;
if (get_user(value, (u32 __user *)ov))
@@ -3007,6 +3084,10 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_GROUP_LEAVE:
res = tipc_sk_leave(tsk);
break;
+ case TIPC_NODELAY:
+ tsk->nodelay = !!value;
+ tsk_set_nagle(tsk);
+ break;
default:
res = -EINVAL;
}
@@ -3588,13 +3669,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct tipc_sock *tsk;
if (!tsk_portid) {
- struct nlattr **attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
- err = tipc_nlmsg_parse(cb->nlh, &attrs);
- if (err)
- return err;
-
if (!attrs[TIPC_NLA_SOCK])
return -EINVAL;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 287df68721df..43ca5fd6574d 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -448,15 +448,11 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
int i;
if (!bid && !skip_cnt) {
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
struct net *net = sock_net(skb->sk);
struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
- struct nlattr **attrs;
char *bname;
- err = tipc_nlmsg_parse(cb->nlh, &attrs);
- if (err)
- return err;
-
if (!attrs[TIPC_NLA_BEARER])
return -EINVAL;
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index e4328b3b72eb..61ec78521a60 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -26,3 +26,13 @@ config TLS_DEVICE
Enable kernel support for HW offload of the TLS protocol.
If unsure, say N.
+
+config TLS_TOE
+ bool "Transport Layer Security TCP stack bypass"
+ depends on TLS
+ default n
+ help
+ Enable kernel support for legacy HW offload of the TLS protocol,
+ which is incompatible with the Linux networking stack semantics.
+
+ If unsure, say N.
diff --git a/net/tls/Makefile b/net/tls/Makefile
index ef0dc74ce8f9..f1ffbfe8968d 100644
--- a/net/tls/Makefile
+++ b/net/tls/Makefile
@@ -3,8 +3,11 @@
# Makefile for the TLS subsystem.
#
+CFLAGS_trace.o := -I$(src)
+
obj-$(CONFIG_TLS) += tls.o
-tls-y := tls_main.o tls_sw.o
+tls-y := tls_main.o tls_sw.o tls_proc.o trace.o
+tls-$(CONFIG_TLS_TOE) += tls_toe.o
tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index f959487c5cd1..33b267b052c0 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -38,6 +38,8 @@
#include <net/tcp.h>
#include <net/tls.h>
+#include "trace.h"
+
/* device_offload_lock is used to synchronize tls_dev_add
* against NETDEV_DOWN notifications.
*/
@@ -202,6 +204,15 @@ void tls_device_free_resources_tx(struct sock *sk)
tls_free_partial_record(sk, tls_ctx);
}
+void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+ trace_tls_device_tx_resync_req(sk, got_seq, exp_seq);
+ WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags));
+}
+EXPORT_SYMBOL_GPL(tls_offload_tx_resync_request);
+
static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx,
u32 seq)
{
@@ -216,6 +227,7 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx,
rcd_sn = tls_ctx->tx.rec_seq;
+ trace_tls_device_tx_resync_send(sk, seq, rcd_sn);
down_read(&device_offload_lock);
netdev = tls_ctx->netdev;
if (netdev)
@@ -419,7 +431,7 @@ static int tls_push_data(struct sock *sk,
~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST))
return -ENOTSUPP;
- if (sk->sk_err)
+ if (unlikely(sk->sk_err))
return -sk->sk_err;
flags |= MSG_SENDPAGE_DECRYPTED;
@@ -440,9 +452,8 @@ static int tls_push_data(struct sock *sk,
max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
prot->prepend_size;
do {
- rc = tls_do_allocation(sk, ctx, pfrag,
- prot->prepend_size);
- if (rc) {
+ rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size);
+ if (unlikely(rc)) {
rc = sk_stream_wait_memory(sk, &timeo);
if (!rc)
continue;
@@ -637,15 +648,19 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
static void tls_device_resync_rx(struct tls_context *tls_ctx,
struct sock *sk, u32 seq, u8 *rcd_sn)
{
+ struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
struct net_device *netdev;
if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags)))
return;
+
+ trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type);
netdev = READ_ONCE(tls_ctx->netdev);
if (netdev)
netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn,
TLS_OFFLOAD_CTX_DIR_RX);
clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC);
}
void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
@@ -653,8 +668,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *rx_ctx;
u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
+ u32 sock_data, is_req_pending;
struct tls_prot_info *prot;
- u32 is_req_pending;
s64 resync_req;
u32 req_seq;
@@ -683,8 +698,12 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
/* head of next rec is already in, note that the sock_inq will
* include the currently parsed message when called from parser
*/
- if (tcp_inq(sk) > rcd_len)
+ sock_data = tcp_inq(sk);
+ if (sock_data > rcd_len) {
+ trace_tls_device_rx_resync_nh_delay(sk, sock_data,
+ rcd_len);
return;
+ }
rx_ctx->resync_nh_do_now = 0;
seq += rcd_len;
@@ -728,6 +747,7 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx,
/* head of next rec is already in, parser will sync for us */
if (tcp_inq(sk) > rxm->full_len) {
+ trace_tls_device_rx_resync_nh_schedule(sk);
ctx->resync_nh_do_now = 1;
} else {
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -826,9 +846,9 @@ free_buf:
return err;
}
-int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
+int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+ struct sk_buff *skb, struct strp_msg *rxm)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
int is_decrypted = skb->decrypted;
int is_encrypted = !is_decrypted;
@@ -840,6 +860,10 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
is_encrypted &= !skb_iter->decrypted;
}
+ trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,
+ tls_ctx->rx.rec_seq, rxm->full_len,
+ is_encrypted, is_decrypted);
+
ctx->sw.decrypted |= is_decrypted;
/* Return immediately if the record is either entirely plaintext or
@@ -1013,6 +1037,8 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX,
&ctx->crypto_send.info,
tcp_sk(sk)->write_seq);
+ trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_TX,
+ tcp_sk(sk)->write_seq, rec_seq, rc);
if (rc)
goto release_lock;
@@ -1049,6 +1075,7 @@ free_marker_record:
int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
{
+ struct tls12_crypto_info_aes_gcm_128 *info;
struct tls_offload_context_rx *context;
struct net_device *netdev;
int rc = 0;
@@ -1096,6 +1123,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
&ctx->crypto_recv.info,
tcp_sk(sk)->copied_seq);
+ info = (void *)&ctx->crypto_recv.info;
+ trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_RX,
+ tcp_sk(sk)->copied_seq, info->rec_seq, rc);
if (rc)
goto free_sw_resources;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index ac88877dcade..f144b965704e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -41,7 +41,9 @@
#include <linux/inetdevice.h>
#include <linux/inet_diag.h>
+#include <net/snmp.h>
#include <net/tls.h>
+#include <net/tls_toe.h>
MODULE_AUTHOR("Mellanox Technologies");
MODULE_DESCRIPTION("Transport Layer Security Support");
@@ -58,14 +60,12 @@ static struct proto *saved_tcpv6_prot;
static DEFINE_MUTEX(tcpv6_prot_mutex);
static struct proto *saved_tcpv4_prot;
static DEFINE_MUTEX(tcpv4_prot_mutex);
-static LIST_HEAD(device_list);
-static DEFINE_SPINLOCK(device_spinlock);
static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
static struct proto_ops tls_sw_proto_ops;
static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
struct proto *base);
-static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+void update_sk_prot(struct sock *sk, struct tls_context *ctx)
{
int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
@@ -286,14 +286,19 @@ static void tls_sk_proto_cleanup(struct sock *sk,
kfree(ctx->tx.rec_seq);
kfree(ctx->tx.iv);
tls_sw_release_resources_tx(sk);
+ TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW);
} else if (ctx->tx_conf == TLS_HW) {
tls_device_free_resources_tx(sk);
+ TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
}
- if (ctx->rx_conf == TLS_SW)
+ if (ctx->rx_conf == TLS_SW) {
tls_sw_release_resources_rx(sk);
- else if (ctx->rx_conf == TLS_HW)
+ TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW);
+ } else if (ctx->rx_conf == TLS_HW) {
tls_device_offload_cleanup_rx(sk);
+ TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE);
+ }
}
static void tls_sk_proto_close(struct sock *sk, long timeout)
@@ -534,19 +539,29 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
if (tx) {
rc = tls_set_device_offload(sk, ctx);
conf = TLS_HW;
- if (rc) {
+ if (!rc) {
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
+ } else {
rc = tls_set_sw_offload(sk, ctx, 1);
if (rc)
goto err_crypto_info;
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW);
conf = TLS_SW;
}
} else {
rc = tls_set_device_offload_rx(sk, ctx);
conf = TLS_HW;
- if (rc) {
+ if (!rc) {
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE);
+ } else {
rc = tls_set_sw_offload(sk, ctx, 0);
if (rc)
goto err_crypto_info;
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW);
conf = TLS_SW;
}
tls_sw_strparser_arm(sk, ctx);
@@ -603,7 +618,7 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
return do_tls_setsockopt(sk, optname, optval, optlen);
}
-static struct tls_context *create_ctx(struct sock *sk)
+struct tls_context *tls_ctx_create(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tls_context *ctx;
@@ -643,90 +658,6 @@ static void tls_build_proto(struct sock *sk)
}
}
-static void tls_hw_sk_destruct(struct sock *sk)
-{
- struct tls_context *ctx = tls_get_ctx(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- ctx->sk_destruct(sk);
- /* Free ctx */
- rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
- tls_ctx_free(sk, ctx);
-}
-
-static int tls_hw_prot(struct sock *sk)
-{
- struct tls_context *ctx;
- struct tls_device *dev;
- int rc = 0;
-
- spin_lock_bh(&device_spinlock);
- list_for_each_entry(dev, &device_list, dev_list) {
- if (dev->feature && dev->feature(dev)) {
- ctx = create_ctx(sk);
- if (!ctx)
- goto out;
-
- spin_unlock_bh(&device_spinlock);
- tls_build_proto(sk);
- ctx->sk_destruct = sk->sk_destruct;
- sk->sk_destruct = tls_hw_sk_destruct;
- ctx->rx_conf = TLS_HW_RECORD;
- ctx->tx_conf = TLS_HW_RECORD;
- update_sk_prot(sk, ctx);
- spin_lock_bh(&device_spinlock);
- rc = 1;
- break;
- }
- }
-out:
- spin_unlock_bh(&device_spinlock);
- return rc;
-}
-
-static void tls_hw_unhash(struct sock *sk)
-{
- struct tls_context *ctx = tls_get_ctx(sk);
- struct tls_device *dev;
-
- spin_lock_bh(&device_spinlock);
- list_for_each_entry(dev, &device_list, dev_list) {
- if (dev->unhash) {
- kref_get(&dev->kref);
- spin_unlock_bh(&device_spinlock);
- dev->unhash(dev, sk);
- kref_put(&dev->kref, dev->release);
- spin_lock_bh(&device_spinlock);
- }
- }
- spin_unlock_bh(&device_spinlock);
- ctx->sk_proto->unhash(sk);
-}
-
-static int tls_hw_hash(struct sock *sk)
-{
- struct tls_context *ctx = tls_get_ctx(sk);
- struct tls_device *dev;
- int err;
-
- err = ctx->sk_proto->hash(sk);
- spin_lock_bh(&device_spinlock);
- list_for_each_entry(dev, &device_list, dev_list) {
- if (dev->hash) {
- kref_get(&dev->kref);
- spin_unlock_bh(&device_spinlock);
- err |= dev->hash(dev, sk);
- kref_put(&dev->kref, dev->release);
- spin_lock_bh(&device_spinlock);
- }
- }
- spin_unlock_bh(&device_spinlock);
-
- if (err)
- tls_hw_unhash(sk);
- return err;
-}
-
static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
struct proto *base)
{
@@ -764,10 +695,11 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW];
#endif
-
+#ifdef CONFIG_TLS_TOE
prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
- prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_hw_hash;
- prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_hw_unhash;
+ prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_toe_hash;
+ prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_toe_unhash;
+#endif
}
static int tls_init(struct sock *sk)
@@ -775,8 +707,12 @@ static int tls_init(struct sock *sk)
struct tls_context *ctx;
int rc = 0;
- if (tls_hw_prot(sk))
+ tls_build_proto(sk);
+
+#ifdef CONFIG_TLS_TOE
+ if (tls_toe_bypass(sk))
return 0;
+#endif
/* The TLS ulp is currently supported only for TCP sockets
* in ESTABLISHED state.
@@ -787,11 +723,9 @@ static int tls_init(struct sock *sk)
if (sk->sk_state != TCP_ESTABLISHED)
return -ENOTSUPP;
- tls_build_proto(sk);
-
/* allocate tls context */
write_lock_bh(&sk->sk_callback_lock);
- ctx = create_ctx(sk);
+ ctx = tls_ctx_create(sk);
if (!ctx) {
rc = -ENOMEM;
goto out;
@@ -877,21 +811,34 @@ static size_t tls_get_info_size(const struct sock *sk)
return size;
}
-void tls_register_device(struct tls_device *device)
+static int __net_init tls_init_net(struct net *net)
{
- spin_lock_bh(&device_spinlock);
- list_add_tail(&device->dev_list, &device_list);
- spin_unlock_bh(&device_spinlock);
+ int err;
+
+ net->mib.tls_statistics = alloc_percpu(struct linux_tls_mib);
+ if (!net->mib.tls_statistics)
+ return -ENOMEM;
+
+ err = tls_proc_init(net);
+ if (err)
+ goto err_free_stats;
+
+ return 0;
+err_free_stats:
+ free_percpu(net->mib.tls_statistics);
+ return err;
}
-EXPORT_SYMBOL(tls_register_device);
-void tls_unregister_device(struct tls_device *device)
+static void __net_exit tls_exit_net(struct net *net)
{
- spin_lock_bh(&device_spinlock);
- list_del(&device->dev_list);
- spin_unlock_bh(&device_spinlock);
+ tls_proc_fini(net);
+ free_percpu(net->mib.tls_statistics);
}
-EXPORT_SYMBOL(tls_unregister_device);
+
+static struct pernet_operations tls_proc_ops = {
+ .init = tls_init_net,
+ .exit = tls_exit_net,
+};
static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
.name = "tls",
@@ -904,6 +851,12 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
static int __init tls_register(void)
{
+ int err;
+
+ err = register_pernet_subsys(&tls_proc_ops);
+ if (err)
+ return err;
+
tls_sw_proto_ops = inet_stream_ops;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
@@ -917,6 +870,7 @@ static void __exit tls_unregister(void)
{
tcp_unregister_ulp(&tcp_tls_ulp_ops);
tls_device_cleanup();
+ unregister_pernet_subsys(&tls_proc_ops);
}
module_init(tls_register);
diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c
new file mode 100644
index 000000000000..83d9c80a684e
--- /dev/null
+++ b/net/tls/tls_proc.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/snmp.h>
+#include <net/tls.h>
+
+static const struct snmp_mib tls_mib_list[] = {
+ SNMP_MIB_ITEM("TlsCurrTxSw", LINUX_MIB_TLSCURRTXSW),
+ SNMP_MIB_ITEM("TlsCurrRxSw", LINUX_MIB_TLSCURRRXSW),
+ SNMP_MIB_ITEM("TlsCurrTxDevice", LINUX_MIB_TLSCURRTXDEVICE),
+ SNMP_MIB_ITEM("TlsCurrRxDevice", LINUX_MIB_TLSCURRRXDEVICE),
+ SNMP_MIB_ITEM("TlsTxSw", LINUX_MIB_TLSTXSW),
+ SNMP_MIB_ITEM("TlsRxSw", LINUX_MIB_TLSRXSW),
+ SNMP_MIB_ITEM("TlsTxDevice", LINUX_MIB_TLSTXDEVICE),
+ SNMP_MIB_ITEM("TlsRxDevice", LINUX_MIB_TLSRXDEVICE),
+ SNMP_MIB_ITEM("TlsDecryptError", LINUX_MIB_TLSDECRYPTERROR),
+ SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC),
+ SNMP_MIB_SENTINEL
+};
+
+static int tls_statistics_seq_show(struct seq_file *seq, void *v)
+{
+ unsigned long buf[LINUX_MIB_TLSMAX] = {};
+ struct net *net = seq->private;
+ int i;
+
+ snmp_get_cpu_field_batch(buf, tls_mib_list, net->mib.tls_statistics);
+ for (i = 0; tls_mib_list[i].name; i++)
+ seq_printf(seq, "%-32s\t%lu\n", tls_mib_list[i].name, buf[i]);
+
+ return 0;
+}
+
+int __net_init tls_proc_init(struct net *net)
+{
+ if (!proc_create_net_single("tls_stat", 0444, net->proc_net,
+ tls_statistics_seq_show, NULL))
+ return -ENOMEM;
+ return 0;
+}
+
+void __net_exit tls_proc_fini(struct net *net)
+{
+ remove_proc_entry("tls_stat", net->proc_net);
+}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index c2b5e0d2ba1a..de7561d4cfa5 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -168,6 +168,9 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
/* Propagate if there was an err */
if (err) {
+ if (err == -EBADMSG)
+ TLS_INC_STATS(sock_net(skb->sk),
+ LINUX_MIB_TLSDECRYPTERROR);
ctx->async_wait.err = err;
tls_err_abort(skb->sk, err);
} else {
@@ -253,6 +256,8 @@ static int tls_do_decryption(struct sock *sk,
return ret;
ret = crypto_wait_req(ret, &ctx->async_wait);
+ } else if (ret == -EBADMSG) {
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
}
if (async)
@@ -1490,7 +1495,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
if (!ctx->decrypted) {
if (tls_ctx->rx_conf == TLS_HW) {
- err = tls_device_decrypted(sk, skb);
+ err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
if (err < 0)
return err;
}
@@ -1518,7 +1523,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
rxm->offset += prot->prepend_size;
rxm->full_len -= prot->overhead_size;
tls_advance_record_sn(sk, prot, &tls_ctx->rx);
- ctx->decrypted = true;
+ ctx->decrypted = 1;
ctx->saved_data_ready(sk);
} else {
*zc = false;
@@ -1928,7 +1933,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
tls_err_abort(sk, EBADMSG);
goto splice_read_end;
}
- ctx->decrypted = true;
+ ctx->decrypted = 1;
}
rxm = strp_msg(skb);
@@ -2029,7 +2034,7 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- ctx->decrypted = false;
+ ctx->decrypted = 0;
ctx->recv_pkt = skb;
strp_pause(strp);
@@ -2386,10 +2391,11 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
if (crypto_info->version == TLS_1_3_VERSION)
- sw_ctx_rx->async_capable = false;
+ sw_ctx_rx->async_capable = 0;
else
sw_ctx_rx->async_capable =
- tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+ !!(tfm->__crt_alg->cra_flags &
+ CRYPTO_ALG_ASYNC);
/* Set up strparser */
memset(&cb, 0, sizeof(cb));
diff --git a/net/tls/tls_toe.c b/net/tls/tls_toe.c
new file mode 100644
index 000000000000..7e1330f19165
--- /dev/null
+++ b/net/tls/tls_toe.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <net/inet_connection_sock.h>
+#include <net/tls.h>
+#include <net/tls_toe.h>
+
+static LIST_HEAD(device_list);
+static DEFINE_SPINLOCK(device_spinlock);
+
+static void tls_toe_sk_destruct(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tls_context *ctx = tls_get_ctx(sk);
+
+ ctx->sk_destruct(sk);
+ /* Free ctx */
+ rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+ tls_ctx_free(sk, ctx);
+}
+
+int tls_toe_bypass(struct sock *sk)
+{
+ struct tls_toe_device *dev;
+ struct tls_context *ctx;
+ int rc = 0;
+
+ spin_lock_bh(&device_spinlock);
+ list_for_each_entry(dev, &device_list, dev_list) {
+ if (dev->feature && dev->feature(dev)) {
+ ctx = tls_ctx_create(sk);
+ if (!ctx)
+ goto out;
+
+ ctx->sk_destruct = sk->sk_destruct;
+ sk->sk_destruct = tls_toe_sk_destruct;
+ ctx->rx_conf = TLS_HW_RECORD;
+ ctx->tx_conf = TLS_HW_RECORD;
+ update_sk_prot(sk, ctx);
+ rc = 1;
+ break;
+ }
+ }
+out:
+ spin_unlock_bh(&device_spinlock);
+ return rc;
+}
+
+void tls_toe_unhash(struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ struct tls_toe_device *dev;
+
+ spin_lock_bh(&device_spinlock);
+ list_for_each_entry(dev, &device_list, dev_list) {
+ if (dev->unhash) {
+ kref_get(&dev->kref);
+ spin_unlock_bh(&device_spinlock);
+ dev->unhash(dev, sk);
+ kref_put(&dev->kref, dev->release);
+ spin_lock_bh(&device_spinlock);
+ }
+ }
+ spin_unlock_bh(&device_spinlock);
+ ctx->sk_proto->unhash(sk);
+}
+
+int tls_toe_hash(struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ struct tls_toe_device *dev;
+ int err;
+
+ err = ctx->sk_proto->hash(sk);
+ spin_lock_bh(&device_spinlock);
+ list_for_each_entry(dev, &device_list, dev_list) {
+ if (dev->hash) {
+ kref_get(&dev->kref);
+ spin_unlock_bh(&device_spinlock);
+ err |= dev->hash(dev, sk);
+ kref_put(&dev->kref, dev->release);
+ spin_lock_bh(&device_spinlock);
+ }
+ }
+ spin_unlock_bh(&device_spinlock);
+
+ if (err)
+ tls_toe_unhash(sk);
+ return err;
+}
+
+void tls_toe_register_device(struct tls_toe_device *device)
+{
+ spin_lock_bh(&device_spinlock);
+ list_add_tail(&device->dev_list, &device_list);
+ spin_unlock_bh(&device_spinlock);
+}
+EXPORT_SYMBOL(tls_toe_register_device);
+
+void tls_toe_unregister_device(struct tls_toe_device *device)
+{
+ spin_lock_bh(&device_spinlock);
+ list_del(&device->dev_list);
+ spin_unlock_bh(&device_spinlock);
+}
+EXPORT_SYMBOL(tls_toe_unregister_device);
diff --git a/net/tls/trace.c b/net/tls/trace.c
new file mode 100644
index 000000000000..e374913cf9c9
--- /dev/null
+++ b/net/tls/trace.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/module.h>
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#endif
diff --git a/net/tls/trace.h b/net/tls/trace.h
new file mode 100644
index 000000000000..9ba5f600ea43
--- /dev/null
+++ b/net/tls/trace.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tls
+
+#if !defined(_TLS_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _TLS_TRACE_H_
+
+#include <asm/unaligned.h>
+#include <linux/tracepoint.h>
+
+struct sock;
+
+TRACE_EVENT(tls_device_offload_set,
+
+ TP_PROTO(struct sock *sk, int dir, u32 tcp_seq, u8 *rec_no, int ret),
+
+ TP_ARGS(sk, dir, tcp_seq, rec_no, ret),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u64, rec_no )
+ __field( int, dir )
+ __field( u32, tcp_seq )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->rec_no = get_unaligned_be64(rec_no);
+ __entry->dir = dir;
+ __entry->tcp_seq = tcp_seq;
+ __entry->ret = ret;
+ ),
+
+ TP_printk(
+ "sk=%p direction=%d tcp_seq=%u rec_no=%llu ret=%d",
+ __entry->sk, __entry->dir, __entry->tcp_seq, __entry->rec_no,
+ __entry->ret
+ )
+);
+
+TRACE_EVENT(tls_device_decrypted,
+
+ TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, u32 rec_len,
+ bool encrypted, bool decrypted),
+
+ TP_ARGS(sk, tcp_seq, rec_no, rec_len, encrypted, decrypted),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u64, rec_no )
+ __field( u32, tcp_seq )
+ __field( u32, rec_len )
+ __field( bool, encrypted )
+ __field( bool, decrypted )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->rec_no = get_unaligned_be64(rec_no);
+ __entry->tcp_seq = tcp_seq;
+ __entry->rec_len = rec_len;
+ __entry->encrypted = encrypted;
+ __entry->decrypted = decrypted;
+ ),
+
+ TP_printk(
+ "sk=%p tcp_seq=%u rec_no=%llu len=%u encrypted=%d decrypted=%d",
+ __entry->sk, __entry->tcp_seq,
+ __entry->rec_no, __entry->rec_len,
+ __entry->encrypted, __entry->decrypted
+ )
+);
+
+TRACE_EVENT(tls_device_rx_resync_send,
+
+ TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, int sync_type),
+
+ TP_ARGS(sk, tcp_seq, rec_no, sync_type),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u64, rec_no )
+ __field( u32, tcp_seq )
+ __field( int, sync_type )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->rec_no = get_unaligned_be64(rec_no);
+ __entry->tcp_seq = tcp_seq;
+ __entry->sync_type = sync_type;
+ ),
+
+ TP_printk(
+ "sk=%p tcp_seq=%u rec_no=%llu sync_type=%d",
+ __entry->sk, __entry->tcp_seq, __entry->rec_no,
+ __entry->sync_type
+ )
+);
+
+TRACE_EVENT(tls_device_rx_resync_nh_schedule,
+
+ TP_PROTO(struct sock *sk),
+
+ TP_ARGS(sk),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ ),
+
+ TP_printk(
+ "sk=%p", __entry->sk
+ )
+);
+
+TRACE_EVENT(tls_device_rx_resync_nh_delay,
+
+ TP_PROTO(struct sock *sk, u32 sock_data, u32 rec_len),
+
+ TP_ARGS(sk, sock_data, rec_len),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u32, sock_data )
+ __field( u32, rec_len )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->sock_data = sock_data;
+ __entry->rec_len = rec_len;
+ ),
+
+ TP_printk(
+ "sk=%p sock_data=%u rec_len=%u",
+ __entry->sk, __entry->sock_data, __entry->rec_len
+ )
+);
+
+TRACE_EVENT(tls_device_tx_resync_req,
+
+ TP_PROTO(struct sock *sk, u32 tcp_seq, u32 exp_tcp_seq),
+
+ TP_ARGS(sk, tcp_seq, exp_tcp_seq),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u32, tcp_seq )
+ __field( u32, exp_tcp_seq )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->tcp_seq = tcp_seq;
+ __entry->exp_tcp_seq = exp_tcp_seq;
+ ),
+
+ TP_printk(
+ "sk=%p tcp_seq=%u exp_tcp_seq=%u",
+ __entry->sk, __entry->tcp_seq, __entry->exp_tcp_seq
+ )
+);
+
+TRACE_EVENT(tls_device_tx_resync_send,
+
+ TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no),
+
+ TP_ARGS(sk, tcp_seq, rec_no),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u64, rec_no )
+ __field( u32, tcp_seq )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->rec_no = get_unaligned_be64(rec_no);
+ __entry->tcp_seq = tcp_seq;
+ ),
+
+ TP_printk(
+ "sk=%p tcp_seq=%u rec_no=%llu",
+ __entry->sk, __entry->tcp_seq, __entry->rec_no
+ )
+);
+
+#endif /* _TLS_TRACE_H_ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0d8da809bea2..193cba2d777b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -284,11 +284,9 @@ static struct sock *__unix_find_socket_byname(struct net *net,
if (u->addr->len == len &&
!memcmp(u->addr->name, sunname, len))
- goto found;
+ return s;
}
- s = NULL;
-found:
- return s;
+ return NULL;
}
static inline struct sock *unix_find_socket_byname(struct net *net,
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index c443db7af8d4..bef8772116ec 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -13,15 +13,16 @@
#include <linux/hyperv.h>
#include <net/sock.h>
#include <net/af_vsock.h>
+#include <asm/hyperv-tlfs.h>
/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
- * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
- * hosts don't have this limitation; but, keep the defaults the same for compat.
+ * stricter requirements on the hv_sock ring buffer size of six 4K pages.
+ * hyperv-tlfs defines HV_HYP_PAGE_SIZE as 4K. Newer hosts don't have this
+ * limitation; but, keep the defaults the same for compat.
*/
-#define PAGE_SIZE_4K 4096
-#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
-#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
-#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
+#define RINGBUFFER_HVS_RCV_SIZE (HV_HYP_PAGE_SIZE * 6)
+#define RINGBUFFER_HVS_SND_SIZE (HV_HYP_PAGE_SIZE * 6)
+#define RINGBUFFER_HVS_MAX_SIZE (HV_HYP_PAGE_SIZE * 64)
/* The MTU is 16KB per the host side's design */
#define HVS_MTU_SIZE (1024 * 16)
@@ -54,7 +55,8 @@ struct hvs_recv_buf {
* ringbuffer APIs that allow us to directly copy data from userspace buffer
* to VMBus ringbuffer.
*/
-#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
+#define HVS_SEND_BUF_SIZE \
+ (HV_HYP_PAGE_SIZE - sizeof(struct vmpipe_proto_header))
struct hvs_send_buf {
/* The header before the payload data */
@@ -393,10 +395,10 @@ static void hvs_open_connection(struct vmbus_channel *chan)
} else {
sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
- sndbuf = ALIGN(sndbuf, PAGE_SIZE);
+ sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE);
rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
- rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
+ rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE);
}
ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
@@ -670,7 +672,7 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
ssize_t ret = 0;
ssize_t bytes_written = 0;
- BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
+ BUILD_BUG_ON(sizeof(*send_buf) != HV_HYP_PAGE_SIZE);
send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
if (!send_buf)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 481f7f8a1655..d02c9b41a768 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -268,6 +268,55 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
}
static ssize_t
+virtio_transport_stream_do_peek(struct vsock_sock *vsk,
+ struct msghdr *msg,
+ size_t len)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct virtio_vsock_pkt *pkt;
+ size_t bytes, total = 0, off;
+ int err = -EFAULT;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ list_for_each_entry(pkt, &vvs->rx_queue, list) {
+ off = pkt->off;
+
+ if (total == len)
+ break;
+
+ while (total < len && off < pkt->len) {
+ bytes = len - total;
+ if (bytes > pkt->len - off)
+ bytes = pkt->len - off;
+
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ spin_unlock_bh(&vvs->rx_lock);
+
+ err = memcpy_to_msg(msg, pkt->buf + off, bytes);
+ if (err)
+ goto out;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ total += bytes;
+ off += bytes;
+ }
+ }
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ return total;
+
+out:
+ if (total)
+ err = total;
+ return err;
+}
+
+static ssize_t
virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len)
@@ -339,9 +388,9 @@ virtio_transport_stream_dequeue(struct vsock_sock *vsk,
size_t len, int flags)
{
if (flags & MSG_PEEK)
- return -EOPNOTSUPP;
-
- return virtio_transport_stream_do_dequeue(vsk, msg, len);
+ return virtio_transport_stream_do_peek(vsk, msg, len);
+ else
+ return virtio_transport_stream_do_dequeue(vsk, msg, len);
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7b72286922f7..7186cb653c75 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8265,10 +8265,8 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
/* leave request id zero for legacy request
* or if driver does not support multi-scheduled scan
*/
- if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) {
- while (!sched_scan_req->reqid)
- sched_scan_req->reqid = cfg80211_assign_cookie(rdev);
- }
+ if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1)
+ sched_scan_req->reqid = cfg80211_assign_cookie(rdev);
err = rdev_sched_scan_start(rdev, dev, sched_scan_req);
if (err)
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index dc8f689bd469..f9e83031a40a 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -114,7 +114,7 @@ void regulatory_hint_country_ie(struct wiphy *wiphy,
u8 country_ie_len);
/**
- * regulatory_hint_disconnect - informs all devices have been disconneted
+ * regulatory_hint_disconnect - informs all devices have been disconnected
*
* Regulotory rules can be enhanced further upon scanning and upon
* connection to an AP. These rules become stale if we disconnect