summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/bluetooth/hci_conn.c22
-rw-r--r--net/bluetooth/hci_core.c10
-rw-r--r--net/bluetooth/hci_event.c44
-rw-r--r--net/bluetooth/hci_sync.c23
-rw-r--r--net/bluetooth/l2cap_core.c13
-rw-r--r--net/can/j1939/main.c24
-rw-r--r--net/can/j1939/socket.c5
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/skmsg.c3
-rw-r--r--net/core/sock.c6
-rw-r--r--net/dccp/proto.c3
-rw-r--r--net/dsa/dsa.c24
-rw-r--r--net/handshake/handshake.h1
-rw-r--r--net/handshake/request.c4
-rw-r--r--net/ieee802154/trace.h2
-rw-r--r--net/ipv4/esp4_offload.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c8
-rw-r--r--net/ipv4/tcp_offload.c19
-rw-r--r--net/ipv4/udplite.c2
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv6/esp6_offload.c3
-rw-r--r--net/ipv6/exthdrs.c29
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/udplite.c4
-rw-r--r--net/ipv6/xfrm6_input.c3
-rw-r--r--net/mac80211/cfg.c9
-rw-r--r--net/mac80211/he.c15
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/link.c4
-rw-r--r--net/mac80211/mlme.c13
-rw-r--r--net/mac80211/rx.c6
-rw-r--r--net/mac80211/tx.c8
-rw-r--r--net/mac80211/util.c4
-rw-r--r--net/mac802154/trace.h2
-rw-r--r--net/mptcp/pm.c23
-rw-r--r--net/mptcp/pm_netlink.c19
-rw-r--r--net/mptcp/pm_userspace.c48
-rw-r--r--net/mptcp/protocol.c160
-rw-r--r--net/mptcp/protocol.h6
-rw-r--r--net/mptcp/subflow.c17
-rw-r--r--net/netfilter/ipset/ip_set_core.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_flow_table_core.c13
-rw-r--r--net/netfilter/nf_flow_table_ip.c4
-rw-r--r--net/netfilter/nf_tables_api.c427
-rw-r--r--net/netfilter/nfnetlink.c3
-rw-r--r--net/netfilter/nfnetlink_osf.c1
-rw-r--r--net/netfilter/nft_bitwise.c2
-rw-r--r--net/netfilter/nft_immediate.c90
-rw-r--r--net/netfilter/nft_set_bitmap.c5
-rw-r--r--net/netfilter/nft_set_hash.c23
-rw-r--r--net/netfilter/nft_set_pipapo.c75
-rw-r--r--net/netfilter/nft_set_rbtree.c5
-rw-r--r--net/netfilter/xt_osf.c1
-rw-r--r--net/netlabel/netlabel_kapi.c3
-rw-r--r--net/openvswitch/datapath.c19
-rw-r--r--net/openvswitch/vport.c18
-rw-r--r--net/sched/act_ct.c9
-rw-r--r--net/sched/act_pedit.c48
-rw-r--r--net/sched/act_police.c10
-rw-r--r--net/sched/cls_api.c15
-rw-r--r--net/sched/cls_u32.c18
-rw-r--r--net/sched/sch_api.c72
-rw-r--r--net/sched/sch_fq_pie.c10
-rw-r--r--net/sched/sch_generic.c44
-rw-r--r--net/sched/sch_mq.c8
-rw-r--r--net/sched/sch_mqprio.c8
-rw-r--r--net/sched/sch_netem.c8
-rw-r--r--net/sched/sch_pie.c5
-rw-r--r--net/sched/sch_red.c5
-rw-r--r--net/sched/sch_sfq.c5
-rw-r--r--net/sched/sch_taprio.c9
-rw-r--r--net/sched/sch_teql.c2
-rw-r--r--net/sctp/sm_sideeffect.c5
-rw-r--r--net/sctp/sm_statefuns.c2
-rw-r--r--net/smc/smc_llc.c4
-rw-r--r--net/tipc/bearer.c4
-rw-r--r--net/wireless/core.c4
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/rdev-ops.h6
-rw-r--r--net/wireless/reg.c7
-rw-r--r--net/wireless/util.c9
-rw-r--r--net/xfrm/xfrm_input.c8
-rw-r--r--net/xfrm/xfrm_interface_core.c54
-rw-r--r--net/xfrm/xfrm_policy.c14
88 files changed, 1221 insertions, 485 deletions
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 6968e55eb971..28a939d56090 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -101,7 +101,6 @@ static void batadv_dat_purge(struct work_struct *work);
*/
static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
{
- INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work,
msecs_to_jiffies(10000));
}
@@ -819,6 +818,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
if (!bat_priv->dat.hash)
return -ENOMEM;
+ INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
batadv_dat_start_timer(bat_priv);
batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1,
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f75ef12f18f7..1ef952bda97d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -947,8 +947,8 @@ static void find_cis(struct hci_conn *conn, void *data)
{
struct iso_list_data *d = data;
- /* Ignore broadcast */
- if (!bacmp(&conn->dst, BDADDR_ANY))
+ /* Ignore broadcast or if CIG don't match */
+ if (!bacmp(&conn->dst, BDADDR_ANY) || d->cig != conn->iso_qos.ucast.cig)
return;
d->count++;
@@ -963,12 +963,17 @@ static void cis_cleanup(struct hci_conn *conn)
struct hci_dev *hdev = conn->hdev;
struct iso_list_data d;
+ if (conn->iso_qos.ucast.cig == BT_ISO_QOS_CIG_UNSET)
+ return;
+
memset(&d, 0, sizeof(d));
d.cig = conn->iso_qos.ucast.cig;
/* Check if ISO connection is a CIS and remove CIG if there are
* no other connections using it.
*/
+ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_BOUND, &d);
+ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECT, &d);
hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &d);
if (d.count)
return;
@@ -1766,24 +1771,23 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
memset(&data, 0, sizeof(data));
- /* Allocate a CIG if not set */
+ /* Allocate first still reconfigurable CIG if not set */
if (qos->ucast.cig == BT_ISO_QOS_CIG_UNSET) {
- for (data.cig = 0x00; data.cig < 0xff; data.cig++) {
+ for (data.cig = 0x00; data.cig < 0xf0; data.cig++) {
data.count = 0;
- data.cis = 0xff;
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK,
- BT_BOUND, &data);
+ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK,
+ BT_CONNECT, &data);
if (data.count)
continue;
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK,
+ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK,
BT_CONNECTED, &data);
if (!data.count)
break;
}
- if (data.cig == 0xff)
+ if (data.cig == 0xf0)
return false;
/* Update CIG */
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index a856b1051d35..48917c68358d 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1416,10 +1416,10 @@ int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
{
- struct smp_ltk *k;
+ struct smp_ltk *k, *tmp;
int removed = 0;
- list_for_each_entry_rcu(k, &hdev->long_term_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type)
continue;
@@ -1435,9 +1435,9 @@ int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
{
- struct smp_irk *k;
+ struct smp_irk *k, *tmp;
- list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type)
continue;
@@ -2686,7 +2686,9 @@ void hci_unregister_dev(struct hci_dev *hdev)
{
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
+ mutex_lock(&hdev->unregister_lock);
hci_dev_set_flag(hdev, HCI_UNREGISTER);
+ mutex_unlock(&hdev->unregister_lock);
write_lock(&hci_dev_list_lock);
list_del(&hdev->list);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d00ef6e3fc45..09ba6d8987ee 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3804,48 +3804,56 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_rp_le_set_cig_params *rp = data;
+ struct hci_cp_le_set_cig_params *cp;
struct hci_conn *conn;
- int i = 0;
+ u8 status = rp->status;
+ int i;
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_CIG_PARAMS);
+ if (!cp || rp->num_handles != cp->num_cis || rp->cig_id != cp->cig_id) {
+ bt_dev_err(hdev, "unexpected Set CIG Parameters response data");
+ status = HCI_ERROR_UNSPECIFIED;
+ }
+
hci_dev_lock(hdev);
- if (rp->status) {
+ if (status) {
while ((conn = hci_conn_hash_lookup_cig(hdev, rp->cig_id))) {
conn->state = BT_CLOSED;
- hci_connect_cfm(conn, rp->status);
+ hci_connect_cfm(conn, status);
hci_conn_del(conn);
}
goto unlock;
}
- rcu_read_lock();
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2553
+ *
+ * If the Status return parameter is zero, then the Controller shall
+ * set the Connection_Handle arrayed return parameter to the connection
+ * handle(s) corresponding to the CIS configurations specified in
+ * the CIS_IDs command parameter, in the same order.
+ */
+ for (i = 0; i < rp->num_handles; ++i) {
+ conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, rp->cig_id,
+ cp->cis[i].cis_id);
+ if (!conn || !bacmp(&conn->dst, BDADDR_ANY))
+ continue;
- list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- if (conn->type != ISO_LINK ||
- conn->iso_qos.ucast.cig != rp->cig_id ||
- conn->state == BT_CONNECTED)
+ if (conn->state != BT_BOUND && conn->state != BT_CONNECT)
continue;
- conn->handle = __le16_to_cpu(rp->handle[i++]);
+ conn->handle = __le16_to_cpu(rp->handle[i]);
bt_dev_dbg(hdev, "%p handle 0x%4.4x parent %p", conn,
conn->handle, conn->parent);
/* Create CIS if LE is already connected */
- if (conn->parent && conn->parent->state == BT_CONNECTED) {
- rcu_read_unlock();
+ if (conn->parent && conn->parent->state == BT_CONNECTED)
hci_le_create_cis(conn);
- rcu_read_lock();
- }
-
- if (i == rp->num_handles)
- break;
}
- rcu_read_unlock();
-
unlock:
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 647a8ce54062..804cde43b4e0 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -629,6 +629,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
INIT_LIST_HEAD(&hdev->cmd_sync_work_list);
mutex_init(&hdev->cmd_sync_work_lock);
+ mutex_init(&hdev->unregister_lock);
INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
INIT_WORK(&hdev->reenable_adv_work, reenable_adv);
@@ -692,14 +693,19 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
void *data, hci_cmd_sync_work_destroy_t destroy)
{
struct hci_cmd_sync_work_entry *entry;
+ int err = 0;
- if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
- return -ENODEV;
+ mutex_lock(&hdev->unregister_lock);
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ err = -ENODEV;
+ goto unlock;
+ }
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return -ENOMEM;
-
+ if (!entry) {
+ err = -ENOMEM;
+ goto unlock;
+ }
entry->func = func;
entry->data = data;
entry->destroy = destroy;
@@ -710,7 +716,9 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
queue_work(hdev->req_workqueue, &hdev->cmd_sync_work);
- return 0;
+unlock:
+ mutex_unlock(&hdev->unregister_lock);
+ return err;
}
EXPORT_SYMBOL(hci_cmd_sync_submit);
@@ -4543,6 +4551,9 @@ static int hci_init_sync(struct hci_dev *hdev)
!hci_dev_test_flag(hdev, HCI_CONFIG))
return 0;
+ if (hci_dev_test_and_set_flag(hdev, HCI_DEBUGFS_CREATED))
+ return 0;
+
hci_debugfs_create_common(hdev);
if (lmp_bredr_capable(hdev))
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 376b523c7b26..c5e8798e297c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4306,6 +4306,10 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
result = __le16_to_cpu(rsp->result);
status = __le16_to_cpu(rsp->status);
+ if (result == L2CAP_CR_SUCCESS && (dcid < L2CAP_CID_DYN_START ||
+ dcid > L2CAP_CID_DYN_END))
+ return -EPROTO;
+
BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x",
dcid, scid, result, status);
@@ -4337,6 +4341,11 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
switch (result) {
case L2CAP_CR_SUCCESS:
+ if (__l2cap_get_chan_by_dcid(conn, dcid)) {
+ err = -EBADSLT;
+ break;
+ }
+
l2cap_state_change(chan, BT_CONFIG);
chan->ident = 0;
chan->dcid = dcid;
@@ -4663,7 +4672,9 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
chan->ops->set_shutdown(chan);
+ l2cap_chan_unlock(chan);
mutex_lock(&conn->chan_lock);
+ l2cap_chan_lock(chan);
l2cap_chan_del(chan, ECONNRESET);
mutex_unlock(&conn->chan_lock);
@@ -4702,7 +4713,9 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
return 0;
}
+ l2cap_chan_unlock(chan);
mutex_lock(&conn->chan_lock);
+ l2cap_chan_lock(chan);
l2cap_chan_del(chan, 0);
mutex_unlock(&conn->chan_lock);
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index 821d4ff303b3..ecff1c947d68 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -126,7 +126,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
#define J1939_CAN_ID CAN_EFF_FLAG
#define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG)
-static DEFINE_SPINLOCK(j1939_netdev_lock);
+static DEFINE_MUTEX(j1939_netdev_lock);
static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
{
@@ -220,7 +220,7 @@ static void __j1939_rx_release(struct kref *kref)
j1939_can_rx_unregister(priv);
j1939_ecu_unmap_all(priv);
j1939_priv_set(priv->ndev, NULL);
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
}
/* get pointer to priv without increasing ref counter */
@@ -248,9 +248,9 @@ static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev)
{
struct j1939_priv *priv;
- spin_lock(&j1939_netdev_lock);
+ mutex_lock(&j1939_netdev_lock);
priv = j1939_priv_get_by_ndev_locked(ndev);
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
return priv;
}
@@ -260,14 +260,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
struct j1939_priv *priv, *priv_new;
int ret;
- spin_lock(&j1939_netdev_lock);
+ mutex_lock(&j1939_netdev_lock);
priv = j1939_priv_get_by_ndev_locked(ndev);
if (priv) {
kref_get(&priv->rx_kref);
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
return priv;
}
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
priv = j1939_priv_create(ndev);
if (!priv)
@@ -277,29 +277,31 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
spin_lock_init(&priv->j1939_socks_lock);
INIT_LIST_HEAD(&priv->j1939_socks);
- spin_lock(&j1939_netdev_lock);
+ mutex_lock(&j1939_netdev_lock);
priv_new = j1939_priv_get_by_ndev_locked(ndev);
if (priv_new) {
/* Someone was faster than us, use their priv and roll
* back our's.
*/
kref_get(&priv_new->rx_kref);
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
dev_put(ndev);
kfree(priv);
return priv_new;
}
j1939_priv_set(ndev, priv);
- spin_unlock(&j1939_netdev_lock);
ret = j1939_can_rx_register(priv);
if (ret < 0)
goto out_priv_put;
+ mutex_unlock(&j1939_netdev_lock);
return priv;
out_priv_put:
j1939_priv_set(ndev, NULL);
+ mutex_unlock(&j1939_netdev_lock);
+
dev_put(ndev);
kfree(priv);
@@ -308,7 +310,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
void j1939_netdev_stop(struct j1939_priv *priv)
{
- kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
+ kref_put_mutex(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
j1939_priv_put(priv);
}
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 1790469b2580..35970c25496a 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -1088,6 +1088,11 @@ void j1939_sk_errqueue(struct j1939_session *session,
void j1939_sk_send_loop_abort(struct sock *sk, int err)
{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ if (jsk->state & J1939_SOCK_ERRQUEUE)
+ return;
+
sk->sk_err = err;
sk_error_report(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index b3c13e041935..c29f3e1db3ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4471,8 +4471,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
u32 next_cpu;
u32 ident;
- /* First check into global flow table if there is a match */
- ident = sock_flow_table->ents[hash & sock_flow_table->mask];
+ /* First check into global flow table if there is a match.
+ * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
+ */
+ ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
if ((ident ^ hash) & ~rps_cpu_mask)
goto try_rps;
@@ -10541,7 +10543,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
return NULL;
netdev_init_one_queue(dev, queue, NULL);
RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
- queue->qdisc_sleeping = &noop_qdisc;
+ RCU_INIT_POINTER(queue->qdisc_sleeping, &noop_qdisc);
rcu_assign_pointer(dev->ingress_queue, queue);
#endif
return queue;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index a9060e1f0e43..a29508e1ff35 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1210,7 +1210,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
- psock->saved_data_ready(sk);
+ if (psock)
+ psock->saved_data_ready(sk);
rcu_read_unlock();
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 24f2761bdb1d..6e5662ca00fe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1362,12 +1362,6 @@ set_sndbuf:
__sock_set_mark(sk, val);
break;
case SO_RCVMARK:
- if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
-
sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
break;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a06b5641287a..b0ebf853cb07 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -191,6 +191,9 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
struct dccp_sock *dp = dccp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, "
+ "please contact the netdev mailing list\n");
+
icsk->icsk_rto = DCCP_TIMEOUT_INIT;
icsk->icsk_syn_retries = sysctl_dccp_request_retries;
sk->sk_state = DCCP_CLOSED;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index ab1afe67fd18..1afed89e03c0 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -403,6 +403,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
return 0;
}
+static struct dsa_port *
+dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds)
+{
+ struct dsa_port *cpu_dp;
+
+ if (!ds->ops->preferred_default_local_cpu_port)
+ return NULL;
+
+ cpu_dp = ds->ops->preferred_default_local_cpu_port(ds);
+ if (!cpu_dp)
+ return NULL;
+
+ if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds))
+ return NULL;
+
+ return cpu_dp;
+}
+
/* Perform initial assignment of CPU ports to user ports and DSA links in the
* fabric, giving preference to CPU ports local to each switch. Default to
* using the first CPU port in the switch tree if the port does not have a CPU
@@ -410,12 +428,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
*/
static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
{
- struct dsa_port *cpu_dp, *dp;
+ struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp;
list_for_each_entry(cpu_dp, &dst->ports, list) {
if (!dsa_port_is_cpu(cpu_dp))
continue;
+ preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds);
+ if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp)
+ continue;
+
/* Prefer a local CPU port */
dsa_switch_for_each_port(dp, cpu_dp->ds) {
/* Prefer the first local CPU port found */
diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h
index 8aeaadca844f..4dac965c99df 100644
--- a/net/handshake/handshake.h
+++ b/net/handshake/handshake.h
@@ -31,7 +31,6 @@ struct handshake_req {
struct list_head hr_list;
struct rhash_head hr_rhash;
unsigned long hr_flags;
- struct file *hr_file;
const struct handshake_proto *hr_proto;
struct sock *hr_sk;
void (*hr_odestruct)(struct sock *sk);
diff --git a/net/handshake/request.c b/net/handshake/request.c
index d78d41abb3d9..94d5cef3e048 100644
--- a/net/handshake/request.c
+++ b/net/handshake/request.c
@@ -239,7 +239,6 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req,
}
req->hr_odestruct = req->hr_sk->sk_destruct;
req->hr_sk->sk_destruct = handshake_sk_destruct;
- req->hr_file = sock->file;
ret = -EOPNOTSUPP;
net = sock_net(req->hr_sk);
@@ -335,9 +334,6 @@ bool handshake_req_cancel(struct sock *sk)
return false;
}
- /* Request accepted and waiting for DONE */
- fput(req->hr_file);
-
out_true:
trace_handshake_cancel(net, req, sk);
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index e5d8439b9e45..c16db0b326fa 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -13,7 +13,7 @@
#define MAXNAME 32
#define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME)
-#define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \
+#define WPAN_PHY_ASSIGN strscpy(__entry->wpan_phy_name, \
wpan_phy_name(wpan_phy), \
MAXNAME)
#define WPAN_PHY_PR_FMT "%s"
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 3969fa805679..ee848be59e65 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -340,6 +340,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
secpath_reset(skb);
+ if (skb_needs_linearize(skb, skb->dev->features) &&
+ __skb_linearize(skb))
+ return -ENOMEM;
return 0;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 40fe70fc2015..88dfe51e68f3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -34,8 +34,8 @@ static int ip_ttl_min = 1;
static int ip_ttl_max = 255;
static int tcp_syn_retries_min = 1;
static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
-static int ip_ping_group_range_min[] = { 0, 0 };
-static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
+static unsigned long ip_ping_group_range_min[] = { 0, 0 };
+static unsigned long ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
@@ -165,7 +165,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
{
struct user_namespace *user_ns = current_user_ns();
int ret;
- gid_t urange[2];
+ unsigned long urange[2];
kgid_t low, high;
struct ctl_table tmp = {
.data = &urange,
@@ -178,7 +178,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
inet_get_ping_group_range_table(table, &low, &high);
urange[0] = from_kgid_munged(user_ns, low);
urange[1] = from_kgid_munged(user_ns, high);
- ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
low = make_kgid(user_ns, urange[0]);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 45dda7889387..4851211aa60d 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -60,12 +60,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
struct tcphdr *th;
unsigned int thlen;
unsigned int seq;
- __be32 delta;
unsigned int oldlen;
unsigned int mss;
struct sk_buff *gso_skb = skb;
__sum16 newcheck;
bool ooo_okay, copy_destructor;
+ __wsum delta;
th = tcp_hdr(skb);
thlen = th->doff * 4;
@@ -75,7 +75,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, thlen))
goto out;
- oldlen = (u16)~skb->len;
+ oldlen = ~skb->len;
__skb_pull(skb, thlen);
mss = skb_shinfo(skb)->gso_size;
@@ -110,7 +110,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (skb_is_gso(segs))
mss *= skb_shinfo(segs)->gso_segs;
- delta = htonl(oldlen + (thlen + mss));
+ delta = (__force __wsum)htonl(oldlen + thlen + mss);
skb = segs;
th = tcp_hdr(skb);
@@ -119,8 +119,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP))
tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss);
- newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
- (__force u32)delta));
+ newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta));
while (skb->next) {
th->fin = th->psh = 0;
@@ -165,11 +164,11 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc));
}
- delta = htonl(oldlen + (skb_tail_pointer(skb) -
- skb_transport_header(skb)) +
- skb->data_len);
- th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
- (__force u32)delta));
+ delta = (__force __wsum)htonl(oldlen +
+ (skb_tail_pointer(skb) -
+ skb_transport_header(skb)) +
+ skb->data_len);
+ th->check = ~csum_fold(csum_add(csum_unfold(th->check), delta));
if (skb->ip_summed == CHECKSUM_PARTIAL)
gso_reset_checksum(skb, ~th->check);
else
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 56d94d23b9e0..143f93a12f25 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -22,6 +22,8 @@ static int udplite_sk_init(struct sock *sk)
{
udp_init_sock(sk);
udp_sk(sk)->pcflag = UDPLITE_BIT;
+ pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
+ "please contact the netdev mailing list\n");
return 0;
}
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad2afeef4f10..eac206a290d0 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -164,6 +164,7 @@ drop:
kfree_skb(skb);
return 0;
}
+EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
int xfrm4_rcv(struct sk_buff *skb)
{
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 75c02992c520..772340268997 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -374,6 +374,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
secpath_reset(skb);
+ if (skb_needs_linearize(skb, skb->dev->features) &&
+ __skb_linearize(skb))
+ return -ENOMEM;
return 0;
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index a8d961d3a477..5fa0e37305d9 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -569,24 +569,6 @@ looped_back:
return -1;
}
- if (skb_cloned(skb)) {
- if (pskb_expand_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE, 0,
- GFP_ATOMIC)) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
- return -1;
- }
- } else {
- err = skb_cow_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE);
- if (unlikely(err)) {
- kfree_skb(skb);
- return -1;
- }
- }
-
- hdr = (struct ipv6_rpl_sr_hdr *)skb_transport_header(skb);
-
if (!pskb_may_pull(skb, ipv6_rpl_srh_size(n, hdr->cmpri,
hdr->cmpre))) {
kfree_skb(skb);
@@ -630,6 +612,17 @@ looped_back:
skb_pull(skb, ((hdr->hdrlen + 1) << 3));
skb_postpull_rcsum(skb, oldhdr,
sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3));
+ if (unlikely(!hdr->segments_left)) {
+ if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0,
+ GFP_ATOMIC)) {
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ kfree(buf);
+ return -1;
+ }
+
+ oldhdr = ipv6_hdr(skb);
+ }
skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index c4835dbdfcff..f804c11e2146 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -114,7 +114,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
addr_type = ipv6_addr_type(daddr);
if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
(addr_type & IPV6_ADDR_MAPPED) ||
- (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
+ (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if &&
+ l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if))
return -EINVAL;
ipcm6_init_sk(&ipc6, np);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e3aec46bd466..392aaa373b66 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6412,9 +6412,9 @@ static struct ctl_table ipv6_route_table_template[] = {
{
.procname = "skip_notify_on_dev_down",
.data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 3bab0cc13697..8e010d07917a 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -8,6 +8,8 @@
* Changes:
* Fixes:
*/
+#define pr_fmt(fmt) "UDPLite6: " fmt
+
#include <linux/export.h>
#include <linux/proc_fs.h>
#include "udp_impl.h"
@@ -16,6 +18,8 @@ static int udplitev6_sk_init(struct sock *sk)
{
udpv6_init_sock(sk);
udp_sk(sk)->pcflag = UDPLITE_BIT;
+ pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
+ "please contact the netdev mailing list\n");
return 0;
}
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 04cbeefd8982..4907ab241d6b 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -86,6 +86,9 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
__be32 *udpdata32;
__u16 encap_type = up->encap_type;
+ if (skb->protocol == htons(ETH_P_IP))
+ return xfrm4_udp_encap_rcv(sk, skb);
+
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 86b2036d73ff..f2d08dbccfb7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -4865,11 +4865,16 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy,
unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ int res;
if (wdev->use_4addr)
return -EOPNOTSUPP;
- return ieee80211_vif_set_links(sdata, wdev->valid_links);
+ mutex_lock(&sdata->local->mtx);
+ res = ieee80211_vif_set_links(sdata, wdev->valid_links);
+ mutex_unlock(&sdata->local->mtx);
+
+ return res;
}
static void ieee80211_del_intf_link(struct wiphy *wiphy,
@@ -4878,7 +4883,9 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ mutex_lock(&sdata->local->mtx);
ieee80211_vif_set_links(sdata, wdev->valid_links);
+ mutex_unlock(&sdata->local->mtx);
}
static int sta_add_link_station(struct ieee80211_local *local,
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 729f261520c7..0322abae0825 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -3,7 +3,7 @@
* HE handling
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2019 - 2022 Intel Corporation
+ * Copyright(c) 2019 - 2023 Intel Corporation
*/
#include "ieee80211_i.h"
@@ -114,6 +114,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct link_sta_info *link_sta)
{
struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap;
+ const struct ieee80211_sta_he_cap *own_he_cap_ptr;
struct ieee80211_sta_he_cap own_he_cap;
struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
u8 he_ppe_size;
@@ -123,12 +124,16 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
memset(he_cap, 0, sizeof(*he_cap));
- if (!he_cap_ie ||
- !ieee80211_get_he_iftype_cap(sband,
- ieee80211_vif_type_p2p(&sdata->vif)))
+ if (!he_cap_ie)
return;
- own_he_cap = sband->iftype_data->he_cap;
+ own_he_cap_ptr =
+ ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ if (!own_he_cap_ptr)
+ return;
+
+ own_he_cap = *own_he_cap_ptr;
/* Make sure size is OK */
mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b0372e76f373..4159fb65038b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2312,7 +2312,7 @@ ieee802_11_parse_elems(const u8 *start, size_t len, bool action,
return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss);
}
-void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos);
+void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id);
extern const int ieee802_1d_to_ac[8];
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index e82db88a47f8..40f030b8ece9 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -2,7 +2,7 @@
/*
* MLO link handling
*
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022-2023 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -409,6 +409,7 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
IEEE80211_CHANCTX_SHARED);
WARN_ON_ONCE(ret);
+ ieee80211_mgd_set_link_qos_params(link);
ieee80211_link_info_change_notify(sdata, link,
BSS_CHANGED_ERP_CTS_PROT |
BSS_CHANGED_ERP_PREAMBLE |
@@ -423,7 +424,6 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
BSS_CHANGED_TWT |
BSS_CHANGED_HE_OBSS_PD |
BSS_CHANGED_HE_BSS_COLOR);
- ieee80211_mgd_set_link_qos_params(link);
}
old_active = sdata->vif.active_links;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e13a0354c397..5a4303130ef2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1217,6 +1217,7 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb,
const u16 *inner)
{
unsigned int skb_len = skb->len;
+ bool at_extension = false;
bool added = false;
int i, j;
u8 *len, *list_len = NULL;
@@ -1228,7 +1229,6 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb,
for (i = 0; i < PRESENT_ELEMS_MAX && outer[i]; i++) {
u16 elem = outer[i];
bool have_inner = false;
- bool at_extension = false;
/* should at least be sorted in the sense of normal -> ext */
WARN_ON(at_extension && elem < PRESENT_ELEM_EXT_OFFS);
@@ -1257,8 +1257,14 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb,
}
*list_len += 1;
skb_put_u8(skb, (u8)elem);
+ added = true;
}
+ /* if we added a list but no extension list, make a zero-len one */
+ if (added && (!at_extension || !list_len))
+ skb_put_u8(skb, 0);
+
+ /* if nothing added remove extension element completely */
if (!added)
skb_trim(skb, skb_len);
else
@@ -1366,10 +1372,11 @@ static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata,
ieee80211_add_non_inheritance_elem(skb, outer_present_elems,
link_present_elems);
- ieee80211_fragment_element(skb, subelem_len);
+ ieee80211_fragment_element(skb, subelem_len,
+ IEEE80211_MLE_SUBELEM_FRAGMENT);
}
- ieee80211_fragment_element(skb, ml_elem_len);
+ ieee80211_fragment_element(skb, ml_elem_len, WLAN_EID_FRAGMENT);
}
static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 58222c077898..fc6e130364da 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2110,7 +2110,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
/* either the frame has been decrypted or will be dropped */
status->flag |= RX_FLAG_DECRYPTED;
- if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE &&
+ if (unlikely(ieee80211_is_beacon(fc) && (result & RX_DROP_UNUSABLE) &&
rx->sdata->dev))
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
skb->data, skb->len);
@@ -4965,7 +4965,9 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
}
if (unlikely(rx->sta && rx->sta->sta.mlo) &&
- is_unicast_ether_addr(hdr->addr1)) {
+ is_unicast_ether_addr(hdr->addr1) &&
+ !ieee80211_is_probe_resp(hdr->frame_control) &&
+ !ieee80211_is_beacon(hdr->frame_control)) {
/* translate to MLD addresses */
if (ether_addr_equal(link->conf->addr, hdr->addr1))
ether_addr_copy(hdr->addr1, rx->sdata->vif.addr);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0d9fbc8458fd..13b522dab0a3 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4445,7 +4445,7 @@ static void ieee80211_mlo_multicast_tx(struct net_device *dev,
struct sk_buff *skb)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- unsigned long links = sdata->vif.valid_links;
+ unsigned long links = sdata->vif.active_links;
unsigned int link;
u32 ctrl_flags = IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX;
@@ -5528,7 +5528,7 @@ ieee80211_beacon_get_template_ema_list(struct ieee80211_hw *hw,
{
struct ieee80211_ema_beacons *ema_beacons = NULL;
- WARN_ON(__ieee80211_beacon_get(hw, vif, NULL, false, link_id, 0,
+ WARN_ON(__ieee80211_beacon_get(hw, vif, NULL, true, link_id, 0,
&ema_beacons));
return ema_beacons;
@@ -6040,7 +6040,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
if (WARN_ON_ONCE(link == ARRAY_SIZE(sdata->vif.link_conf)))
- link = ffs(sdata->vif.valid_links) - 1;
+ link = ffs(sdata->vif.active_links) - 1;
}
IEEE80211_SKB_CB(skb)->control.flags |=
@@ -6076,7 +6076,7 @@ void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
band = chanctx_conf->def.chan->band;
} else {
WARN_ON(link_id >= 0 &&
- !(sdata->vif.valid_links & BIT(link_id)));
+ !(sdata->vif.active_links & BIT(link_id)));
/* MLD transmissions must not rely on the band */
band = 0;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 4bf76150925d..3bd07a0a782f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -5049,7 +5049,7 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos,
return pos;
}
-void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos)
+void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id)
{
unsigned int elem_len;
@@ -5069,7 +5069,7 @@ void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos)
memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len);
/* place the fragment ID */
len_pos += 255 + 1;
- *len_pos = WLAN_EID_FRAGMENT;
+ *len_pos = frag_id;
/* and point to fragment length to update later */
len_pos++;
}
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 689396d6c76a..1574ecc48075 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -14,7 +14,7 @@
#define MAXNAME 32
#define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME)
-#define LOCAL_ASSIGN strlcpy(__entry->wpan_phy_name, \
+#define LOCAL_ASSIGN strscpy(__entry->wpan_phy_name, \
wpan_phy_name(local->hw.phy), MAXNAME)
#define LOCAL_PR_FMT "%s"
#define LOCAL_PR_ARG __entry->wpan_phy_name
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 78c924506e83..76612bca275a 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -87,8 +87,15 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
unsigned int subflows_max;
int ret = 0;
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_active(msk);
+ if (mptcp_pm_is_userspace(msk)) {
+ if (mptcp_userspace_pm_active(msk)) {
+ spin_lock_bh(&pm->lock);
+ pm->subflows++;
+ spin_unlock_bh(&pm->lock);
+ return true;
+ }
+ return false;
+ }
subflows_max = mptcp_pm_get_subflows_max(msk);
@@ -181,8 +188,16 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
struct mptcp_pm_data *pm = &msk->pm;
bool update_subflows;
- update_subflows = (subflow->request_join || subflow->mp_join) &&
- mptcp_pm_is_kernel(msk);
+ update_subflows = subflow->request_join || subflow->mp_join;
+ if (mptcp_pm_is_userspace(msk)) {
+ if (update_subflows) {
+ spin_lock_bh(&pm->lock);
+ pm->subflows--;
+ spin_unlock_bh(&pm->lock);
+ }
+ return;
+ }
+
if (!READ_ONCE(pm->work_pending) && !update_subflows)
return;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index bc343dab5e3f..1224dfca5bf3 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1047,6 +1047,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
+ inet_sk_state_store(newsk, TCP_LISTEN);
err = kernel_listen(ssock, backlog);
if (err)
return err;
@@ -1558,6 +1559,24 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
return ret;
}
+void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
+{
+ struct mptcp_rm_list alist = { .nr = 0 };
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, rm_list, list) {
+ remove_anno_list_by_saddr(msk, &entry->addr);
+ if (alist.nr < MPTCP_RM_IDS_MAX)
+ alist.ids[alist.nr++] = entry->addr.id;
+ }
+
+ if (alist.nr) {
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_remove_addr(msk, &alist);
+ spin_unlock_bh(&msk->pm.lock);
+ }
+}
+
void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list)
{
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 27a275805c06..b06aa58dfcf2 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -69,6 +69,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
MPTCP_PM_MAX_ADDR_ID + 1,
1);
list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list);
+ msk->pm.local_addr_used++;
ret = e->addr.id;
} else if (match) {
ret = entry->addr.id;
@@ -79,6 +80,31 @@ append_err:
return ret;
}
+/* If the subflow is closed from the other peer (not via a
+ * subflow destroy command then), we want to keep the entry
+ * not to assign the same ID to another address and to be
+ * able to send RM_ADDR after the removal of the subflow.
+ */
+static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *addr)
+{
+ struct mptcp_pm_addr_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) {
+ /* TODO: a refcount is needed because the entry can
+ * be used multiple times (e.g. fullmesh mode).
+ */
+ list_del_rcu(&entry->list);
+ kfree(entry);
+ msk->pm.local_addr_used--;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex)
@@ -171,6 +197,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
spin_lock_bh(&msk->pm.lock);
if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
+ msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &addr_val.addr, false);
mptcp_pm_nl_addr_send_ack(msk);
}
@@ -232,7 +259,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
list_move(&match->list, &free_list);
- mptcp_pm_remove_addrs_and_subflows(msk, &free_list);
+ mptcp_pm_remove_addrs(msk, &free_list);
release_sock((struct sock *)msk);
@@ -251,6 +278,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_pm_addr_entry local = { 0 };
struct mptcp_addr_info addr_r;
struct mptcp_addr_info addr_l;
struct mptcp_sock *msk;
@@ -302,12 +330,26 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
+ local.addr = addr_l;
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &local);
+ if (err < 0) {
+ GENL_SET_ERR_MSG(info, "did not match address and id");
+ goto create_err;
+ }
+
lock_sock(sk);
err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
release_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ if (err)
+ mptcp_userspace_pm_delete_local_addr(msk, &local);
+ else
+ msk->pm.subflows++;
+ spin_unlock_bh(&msk->pm.lock);
+
create_err:
sock_put((struct sock *)msk);
return err;
@@ -420,7 +462,11 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
if (ssk) {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_pm_addr_entry entry = { .addr = addr_l };
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_userspace_pm_delete_local_addr(msk, &entry);
+ spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
mptcp_close_ssk(sk, ssk, subflow);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 67311e7d5b21..a6c7f2d24909 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -44,7 +44,7 @@ enum {
static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp;
static void __mptcp_destroy_sock(struct sock *sk);
-static void __mptcp_check_send_data_fin(struct sock *sk);
+static void mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
@@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- return !__mptcp_check_fallback(msk) &&
- ((1 << sk->sk_state) &
+ return ((1 << sk->sk_state) &
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) &&
msk->write_seq == READ_ONCE(msk->snd_una);
}
@@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
u64 rcv_data_fin_seq;
bool ret = false;
- if (__mptcp_check_fallback(msk))
- return ret;
-
/* Need to ack a DATA_FIN received from a peer while this side
* of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
* msk->rcv_data_fin was set when parsing the incoming options
@@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk)
}
ret = true;
- mptcp_send_ack(msk);
+ if (!__mptcp_check_fallback(msk))
+ mptcp_send_ack(msk);
mptcp_close_wake_up(sk);
}
return ret;
@@ -850,12 +847,12 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
return true;
}
-static void __mptcp_flush_join_list(struct sock *sk)
+static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list)
{
struct mptcp_subflow_context *tmp, *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
+ list_for_each_entry_safe(subflow, tmp, join_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast(ssk);
@@ -897,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk)
return false;
}
-void mptcp_subflow_eof(struct sock *sk)
-{
- if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags))
- mptcp_schedule_work(sk);
-}
-
-static void mptcp_check_for_eof(struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- int receivers = 0;
-
- mptcp_for_each_subflow(msk, subflow)
- receivers += !subflow->rx_eof;
- if (receivers)
- return;
-
- if (!(sk->sk_shutdown & RCV_SHUTDOWN)) {
- /* hopefully temporary hack: propagate shutdown status
- * to msk, when all subflows agree on it
- */
- WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
-
- smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- sk->sk_data_ready(sk);
- }
-
- switch (sk->sk_state) {
- case TCP_ESTABLISHED:
- inet_sk_state_store(sk, TCP_CLOSE_WAIT);
- break;
- case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_CLOSING);
- break;
- case TCP_FIN_WAIT2:
- inet_sk_state_store(sk, TCP_CLOSE);
- break;
- default:
- return;
- }
- mptcp_close_wake_up(sk);
-}
-
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
@@ -1609,7 +1563,7 @@ out:
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
if (do_check_data_fin)
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
}
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
@@ -1727,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
*copied_syn = 0;
} else if (ret && ret != -EINPROGRESS) {
- mptcp_disconnect(sk, 0);
+ /* The disconnect() op called by tcp_sendmsg_fastopen()/
+ * __inet_stream_connect() can fail, due to looking check,
+ * see mptcp_disconnect().
+ * Attempt it again outside the problematic scope.
+ */
+ if (!mptcp_disconnect(sk, 0))
+ sk->sk_socket->state = SS_UNCONNECTED;
}
inet_sk(sk)->defer_connect = 0;
@@ -2158,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
break;
}
- if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
- mptcp_check_for_eof(msk);
-
if (sk->sk_shutdown & RCV_SHUTDOWN) {
/* race breaker: the shutdown could be after the
* previous receive queue check
@@ -2389,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
- tcp_disconnect(ssk, 0);
+ /* The MPTCP code never wait on the subflow sockets, TCP-level
+ * disconnect should never fail
+ */
+ WARN_ON_ONCE(tcp_disconnect(ssk, 0));
msk->subflow->state = SS_UNCONNECTED;
mptcp_subflow_ctx_reset(subflow);
release_sock(ssk);
@@ -2408,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
kfree_rcu(subflow, rcu);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
- if (ssk->sk_state == TCP_LISTEN) {
- tcp_set_state(ssk, TCP_CLOSE);
- mptcp_subflow_queue_clean(sk, ssk);
- inet_csk_listen_stop(ssk);
- mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
- }
-
__tcp_close(ssk, 0);
/* close acquired an extra ref */
@@ -2671,16 +2624,12 @@ static void mptcp_worker(struct work_struct *work)
if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto unlock;
- mptcp_check_data_fin_ack(sk);
-
mptcp_check_fastclose(msk);
mptcp_pm_nl_work(msk);
- if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
- mptcp_check_for_eof(msk);
-
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
+ mptcp_check_data_fin_ack(sk);
mptcp_check_data_fin(sk);
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
@@ -2812,13 +2761,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
break;
fallthrough;
case TCP_SYN_SENT:
- tcp_disconnect(ssk, O_NONBLOCK);
+ WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK));
break;
default:
if (__mptcp_check_fallback(mptcp_sk(sk))) {
pr_debug("Fallback");
ssk->sk_shutdown |= how;
tcp_shutdown(ssk, how);
+
+ /* simulate the data_fin ack reception to let the state
+ * machine move forward
+ */
+ WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt);
+ mptcp_schedule_work(sk);
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
tcp_send_ack(ssk);
@@ -2858,7 +2813,7 @@ static int mptcp_close_state(struct sock *sk)
return next & TCP_ACTION_FIN;
}
-static void __mptcp_check_send_data_fin(struct sock *sk)
+static void mptcp_check_send_data_fin(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2876,19 +2831,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- /* fallback socket will not get data_fin/ack, can move to the next
- * state now
- */
- if (__mptcp_check_fallback(msk)) {
- WRITE_ONCE(msk->snd_una, msk->write_seq);
- if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
- inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_close_wake_up(sk);
- } else if (sk->sk_state == TCP_FIN_WAIT1) {
- inet_sk_state_store(sk, TCP_FIN_WAIT2);
- }
- }
-
mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
@@ -2908,7 +2850,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
WRITE_ONCE(msk->snd_data_fin_enable, 1);
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
}
static void __mptcp_destroy_sock(struct sock *sk)
@@ -2953,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
return EPOLLIN | EPOLLRDNORM;
}
-static void mptcp_listen_inuse_dec(struct sock *sk)
+static void mptcp_check_listen_stop(struct sock *sk)
{
- if (inet_sk_state_load(sk) == TCP_LISTEN)
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ struct sock *ssk;
+
+ if (inet_sk_state_load(sk) != TCP_LISTEN)
+ return;
+
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ ssk = mptcp_sk(sk)->first;
+ if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN))
+ return;
+
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ mptcp_subflow_queue_clean(sk, ssk);
+ inet_csk_listen_stop(ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
+ tcp_set_state(ssk, TCP_CLOSE);
+ release_sock(ssk);
}
bool __mptcp_close(struct sock *sk, long timeout)
@@ -2969,7 +2925,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
- mptcp_listen_inuse_dec(sk);
+ mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
goto cleanup;
}
@@ -3073,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ /* Deny disconnect if other threads are blocked in sk_wait_event()
+ * or inet_wait_for_connect().
+ */
+ if (sk->sk_wait_pending)
+ return -EBUSY;
+
/* We are on the fastopen error path. We can't call straight into the
* subflows cleanup code due to lock nesting (we are already under
- * msk->firstsocket lock). Do nothing and leave the cleanup to the
- * caller.
+ * msk->firstsocket lock).
*/
if (msk->fastopening)
- return 0;
+ return -EBUSY;
- mptcp_listen_inuse_dec(sk);
+ mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
@@ -3140,6 +3101,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
#endif
+ nsk->sk_wait_pending = 0;
__mptcp_init_sock(nsk);
msk = mptcp_sk(nsk);
@@ -3327,9 +3289,14 @@ static void mptcp_release_cb(struct sock *sk)
for (;;) {
unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
msk->push_pending;
+ struct list_head join_list;
+
if (!flags)
break;
+ INIT_LIST_HEAD(&join_list);
+ list_splice_init(&msk->join_list, &join_list);
+
/* the following actions acquire the subflow socket lock
*
* 1) can't be invoked in atomic scope
@@ -3340,8 +3307,9 @@ static void mptcp_release_cb(struct sock *sk)
msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
+
if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
- __mptcp_flush_join_list(sk);
+ __mptcp_flush_join_list(sk, &join_list);
if (flags & BIT(MPTCP_PUSH_PENDING))
__mptcp_push_pending(sk, 0);
if (flags & BIT(MPTCP_RETRANSMIT))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c5255258bfb3..d3783a7056e1 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -113,7 +113,6 @@
/* MPTCP socket atomic flags */
#define MPTCP_NOSPACE 1
#define MPTCP_WORK_RTX 2
-#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
@@ -476,14 +475,13 @@ struct mptcp_subflow_context {
send_mp_fail : 1,
send_fastclose : 1,
send_infinite_map : 1,
- rx_eof : 1,
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 8;
+ __unused : 9;
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
@@ -720,7 +718,6 @@ static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk);
-void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
{
@@ -832,6 +829,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
+void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list);
void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 4688daa6b38b..d9c8b21c6076 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1749,14 +1749,16 @@ static void subflow_state_change(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
__subflow_state_change(sk);
+ msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_propagate_sndbuf(parent, sk);
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(mptcp_sk(parent), sk);
- pr_fallback(mptcp_sk(parent));
+ mptcp_rcv_space_init(msk, sk);
+ pr_fallback(msk);
subflow->conn_finished = 1;
mptcp_set_connected(parent);
}
@@ -1772,11 +1774,12 @@ static void subflow_state_change(struct sock *sk)
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
- if (__mptcp_check_fallback(mptcp_sk(parent)) &&
- !subflow->rx_eof && subflow_is_done(sk)) {
- subflow->rx_eof = 1;
- mptcp_subflow_eof(parent);
- }
+ /* when the fallback subflow closes the rx side, trigger a 'dummy'
+ * ingress data fin, so that the msk state will follow along
+ */
+ if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk &&
+ mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true))
+ mptcp_schedule_work(parent);
}
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 46ebee9400da..9a6b64779e64 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1694,6 +1694,14 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
+ if (retried) {
+ __ip_set_get(set);
+ nfnl_unlock(NFNL_SUBSYS_IPSET);
+ cond_resched();
+ nfnl_lock(NFNL_SUBSYS_IPSET);
+ __ip_set_put(set);
+ }
+
ip_set_lock(set);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
ip_set_unlock(set);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index feb1d7fcb09f..a80b960223e1 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -1207,6 +1207,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->transport_header = skb->network_header;
skb_set_inner_ipproto(skb, next_protocol);
+ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
bool check = false;
@@ -1349,6 +1350,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->transport_header = skb->network_header;
skb_set_inner_ipproto(skb, next_protocol);
+ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
bool check = false;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c4ccfec6cb98..d119f1d4c2fc 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2260,6 +2260,9 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
return 0;
helper = rcu_dereference(help->helper);
+ if (!helper)
+ return 0;
+
if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
return 0;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 04bd0ed4d2ae..b0ef48b21dcb 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -317,12 +317,12 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
EXPORT_SYMBOL_GPL(flow_offload_add);
void flow_offload_refresh(struct nf_flowtable *flow_table,
- struct flow_offload *flow)
+ struct flow_offload *flow, bool force)
{
u32 timeout;
timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
- if (timeout - READ_ONCE(flow->timeout) > HZ)
+ if (force || timeout - READ_ONCE(flow->timeout) > HZ)
WRITE_ONCE(flow->timeout, timeout);
else
return;
@@ -334,6 +334,12 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
}
EXPORT_SYMBOL_GPL(flow_offload_refresh);
+static bool nf_flow_is_outdated(const struct flow_offload *flow)
+{
+ return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) &&
+ !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
+}
+
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
{
return nf_flow_timeout_delta(flow->timeout) <= 0;
@@ -423,7 +429,8 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
struct flow_offload *flow, void *data)
{
if (nf_flow_has_expired(flow) ||
- nf_ct_is_dying(flow->ct))
+ nf_ct_is_dying(flow->ct) ||
+ nf_flow_is_outdated(flow))
flow_offload_teardown(flow);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 19efba1e51ef..3bbaf9c7ea46 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -384,7 +384,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
- flow_offload_refresh(flow_table, flow);
+ flow_offload_refresh(flow_table, flow, false);
nf_flow_encap_pop(skb, tuplehash);
thoff -= offset;
@@ -650,7 +650,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
- flow_offload_refresh(flow_table, flow);
+ flow_offload_refresh(flow_table, flow, false);
nf_flow_encap_pop(skb, tuplehash);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index dc5675962de4..4c7937fd803f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -151,6 +151,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
return NULL;
INIT_LIST_HEAD(&trans->list);
+ INIT_LIST_HEAD(&trans->binding_list);
trans->msg_type = msg_type;
trans->ctx = *ctx;
@@ -163,13 +164,20 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
}
-static void nft_trans_destroy(struct nft_trans *trans)
+static void nft_trans_list_del(struct nft_trans *trans)
{
list_del(&trans->list);
+ list_del(&trans->binding_list);
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ nft_trans_list_del(trans);
kfree(trans);
}
-static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set,
+ bool bind)
{
struct nftables_pernet *nft_net;
struct net *net = ctx->net;
@@ -183,16 +191,80 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (nft_trans_set(trans) == set)
- nft_trans_set_bound(trans) = true;
+ nft_trans_set_bound(trans) = bind;
break;
case NFT_MSG_NEWSETELEM:
if (nft_trans_elem_set(trans) == set)
- nft_trans_elem_set_bound(trans) = true;
+ nft_trans_elem_set_bound(trans) = bind;
break;
}
}
}
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, true);
+}
+
+static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, false);
+}
+
+static void __nft_chain_trans_bind(const struct nft_ctx *ctx,
+ struct nft_chain *chain, bool bind)
+{
+ struct nftables_pernet *nft_net;
+ struct net *net = ctx->net;
+ struct nft_trans *trans;
+
+ if (!nft_chain_binding(chain))
+ return;
+
+ nft_net = nft_pernet(net);
+ list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain(trans) == chain)
+ nft_trans_chain_bound(trans) = bind;
+ break;
+ case NFT_MSG_NEWRULE:
+ if (trans->ctx.chain == chain)
+ nft_trans_rule_bound(trans) = bind;
+ break;
+ }
+ }
+}
+
+static void nft_chain_trans_bind(const struct nft_ctx *ctx,
+ struct nft_chain *chain)
+{
+ __nft_chain_trans_bind(ctx, chain, true);
+}
+
+int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+ if (!nft_chain_binding(chain))
+ return 0;
+
+ if (nft_chain_binding(ctx->chain))
+ return -EOPNOTSUPP;
+
+ if (chain->bound)
+ return -EBUSY;
+
+ chain->bound = true;
+ chain->use++;
+ nft_chain_trans_bind(ctx, chain);
+
+ return 0;
+}
+
+void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+ __nft_chain_trans_bind(ctx, chain, false);
+}
+
static int nft_netdev_register_hooks(struct net *net,
struct list_head *hook_list)
{
@@ -292,6 +364,19 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (!nft_trans_set_update(trans) &&
+ nft_set_is_anonymous(nft_trans_set(trans)))
+ list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (!nft_trans_chain_update(trans) &&
+ nft_chain_binding(nft_trans_chain(trans)))
+ list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ break;
+ }
+
list_add_tail(&trans->list, &nft_net->commit_list);
}
@@ -338,8 +423,9 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
}
}
-
+ nft_trans_chain(trans) = ctx->chain;
nft_trans_commit_list_add_tail(ctx->net, trans);
+
return trans;
}
@@ -357,8 +443,7 @@ static int nft_delchain(struct nft_ctx *ctx)
return 0;
}
-static void nft_rule_expr_activate(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_expr *expr;
@@ -371,9 +456,8 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
}
}
-static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
- struct nft_rule *rule,
- enum nft_trans_phase phase)
+void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule,
+ enum nft_trans_phase phase)
{
struct nft_expr *expr;
@@ -495,6 +579,58 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
+static void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_deactivate(ctx->net, set, elem);
+
+ return 0;
+}
+
+struct nft_set_elem_catchall {
+ struct list_head list;
+ struct rcu_head rcu;
+ void *elem;
+};
+
+static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set)
+{
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_elem_catchall *catchall;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+ nft_setelem_data_deactivate(ctx->net, set, &elem);
+ break;
+ }
+}
+
+static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_deactivate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+
+ nft_map_catchall_deactivate(ctx, set);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -503,6 +639,9 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
if (err < 0)
return err;
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
nft_deactivate_next(ctx->net, set);
ctx->table->use--;
@@ -1600,6 +1739,8 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
if (nft_base_chain_netdev(family, ops->hooknum)) {
nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
+ if (!nest_devs)
+ goto nla_put_failure;
if (!hook_list)
hook_list = &basechain->hook_list;
@@ -2224,7 +2365,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
return 0;
}
-static int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
+int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
{
int err;
@@ -2526,6 +2667,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nft_trans_basechain(trans) = basechain;
INIT_LIST_HEAD(&nft_trans_chain_hooks(trans));
list_splice(&hook.list, &nft_trans_chain_hooks(trans));
+ if (nla[NFTA_CHAIN_HOOK])
+ module_put(hook.type->owner);
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -2668,21 +2811,18 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack);
}
-static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_chain *chain,
+static int nft_delchain_hook(struct nft_ctx *ctx,
+ struct nft_base_chain *basechain,
struct netlink_ext_ack *extack)
{
+ const struct nft_chain *chain = &basechain->chain;
const struct nlattr * const *nla = ctx->nla;
struct nft_chain_hook chain_hook = {};
- struct nft_base_chain *basechain;
struct nft_hook *this, *hook;
LIST_HEAD(chain_del_list);
struct nft_trans *trans;
int err;
- if (!nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- basechain = nft_base_chain(chain);
err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
ctx->family, chain->flags, extack);
if (err < 0)
@@ -2767,7 +2907,12 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;
- return nft_delchain_hook(&ctx, chain, extack);
+ if (nft_is_base_chain(chain)) {
+ struct nft_base_chain *basechain = nft_base_chain(chain);
+
+ if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
+ return nft_delchain_hook(&ctx, basechain, extack);
+ }
}
if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
@@ -3488,8 +3633,7 @@ err_fill_rule_info:
return err;
}
-static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_expr *expr, *next;
@@ -3506,7 +3650,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
+static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
{
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
@@ -3594,12 +3738,6 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
return 0;
}
-struct nft_set_elem_catchall {
- struct list_head list;
- struct rcu_head rcu;
- void *elem;
-};
-
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
u8 genmask = nft_genmask_next(ctx->net);
@@ -3842,7 +3980,8 @@ err_destroy_flow_rule:
if (flow)
nft_flow_rule_destroy(flow);
err_release_rule:
- nf_tables_rule_release(&ctx, rule);
+ nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR);
+ nf_tables_rule_destroy(&ctx, rule);
err_release_expr:
for (i = 0; i < n; i++) {
if (expr_info[i].ops) {
@@ -4774,6 +4913,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
@@ -4782,6 +4924,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
@@ -4828,6 +4974,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ if (nft_set_is_anonymous(set))
+ return -EOPNOTSUPP;
+
err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
if (err < 0)
return err;
@@ -4917,6 +5066,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
set->num_exprs = num_exprs;
set->handle = nf_tables_alloc_handle(table);
+ INIT_LIST_HEAD(&set->pending_update);
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
@@ -4930,7 +5080,7 @@ err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
err_set_destroy:
- ops->destroy(set);
+ ops->destroy(&ctx, set);
err_set_init:
kfree(set->name);
err_set_name:
@@ -4945,7 +5095,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
list_del_rcu(&catchall->list);
- nft_set_elem_destroy(set, catchall->elem, true);
+ nf_tables_set_elem_destroy(ctx, set, catchall->elem);
kfree_rcu(catchall, rcu);
}
}
@@ -4960,7 +5110,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(ctx, set->exprs[i]);
- set->ops->destroy(set);
+ set->ops->destroy(ctx, set);
nft_set_catchall_destroy(ctx, set);
kfree(set->name);
kvfree(set);
@@ -5125,10 +5275,60 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
}
+static void nft_setelem_data_activate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_activate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_activate(ctx->net, set, elem);
+
+ return 0;
+}
+
+static void nft_map_catchall_activate(const struct nft_ctx *ctx,
+ struct nft_set *set)
+{
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_elem_catchall *catchall;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+ nft_setelem_data_activate(ctx->net, set, &elem);
+ break;
+ }
+}
+
+static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_activate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+
+ nft_map_catchall_activate(ctx, set);
+}
+
void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
{
- if (nft_set_is_anonymous(set))
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(ctx, set);
+
nft_clear(ctx->net, set);
+ }
set->use++;
}
@@ -5139,14 +5339,28 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
enum nft_trans_phase phase)
{
switch (phase) {
- case NFT_TRANS_PREPARE:
+ case NFT_TRANS_PREPARE_ERROR:
+ nft_set_trans_unbind(ctx, set);
if (nft_set_is_anonymous(set))
nft_deactivate_next(ctx->net, set);
set->use--;
+ break;
+ case NFT_TRANS_PREPARE:
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
+ nft_deactivate_next(ctx->net, set);
+ }
+ set->use--;
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
+ if (nft_set_is_anonymous(set) &&
+ set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
set->use--;
fallthrough;
default:
@@ -5899,6 +6113,7 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
__nft_set_elem_expr_destroy(ctx, expr);
}
+/* Drop references and destroy. Called from gc, dynset and abort path. */
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
@@ -5920,11 +6135,11 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
-/* Only called from commit path, nft_setelem_data_deactivate() already deals
- * with the refcounting from the preparation phase.
+/* Destroy element. References have been already dropped in the preparation
+ * path via nft_setelem_data_deactivate().
*/
-static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem)
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
@@ -6487,19 +6702,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (flags)
*nft_set_ext_flags(ext) = flags;
+ if (obj) {
+ *nft_set_ext_obj(ext) = obj;
+ obj->use++;
+ }
if (ulen > 0) {
if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
err = -EINVAL;
- goto err_elem_userdata;
+ goto err_elem_free;
}
udata = nft_set_ext_userdata(ext);
udata->len = ulen - 1;
nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
}
- if (obj) {
- *nft_set_ext_obj(ext) = obj;
- obj->use++;
- }
err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs);
if (err < 0)
goto err_elem_free;
@@ -6554,10 +6769,7 @@ err_set_full:
err_element_clash:
kfree(trans);
err_elem_free:
- if (obj)
- obj->use--;
-err_elem_userdata:
- nf_tables_set_elem_destroy(ctx, set, elem.priv);
+ nft_set_elem_destroy(set, elem.priv, true);
err_parse_data:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_release(&elem.data.val, desc.type);
@@ -6601,7 +6813,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+ if (!list_empty(&set->bindings) &&
+ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -6634,7 +6847,6 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
{
struct nft_chain *chain;
- struct nft_rule *rule;
if (type == NFT_DATA_VERDICT) {
switch (data->verdict.code) {
@@ -6642,15 +6854,6 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
case NFT_GOTO:
chain = data->verdict.chain;
chain->use++;
-
- if (!nft_chain_is_bound(chain))
- break;
-
- chain->table->use++;
- list_for_each_entry(rule, &chain->rules, list)
- chain->use++;
-
- nft_chain_add(chain->table, chain);
break;
}
}
@@ -6885,7 +7088,9 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+
+ if (!list_empty(&set->bindings) &&
+ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -7667,6 +7872,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
enum nft_trans_phase phase)
{
switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
case NFT_TRANS_PREPARE:
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
@@ -8939,7 +9145,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
synchronize_rcu();
list_for_each_entry_safe(trans, next, &head, list) {
- list_del(&trans->list);
+ nft_trans_list_del(trans);
nft_commit_release(trans);
}
}
@@ -9005,7 +9211,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
continue;
}
- if (WARN_ON_ONCE(data + expr->ops->size > data_boundary))
+ if (WARN_ON_ONCE(data + size + expr->ops->size > data_boundary))
return -ENOMEM;
memcpy(data + size, expr, expr->ops->size);
@@ -9273,10 +9479,25 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
}
}
+static void nft_set_commit_update(struct list_head *set_update_list)
+{
+ struct nft_set *set, *next;
+
+ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+ list_del_init(&set->pending_update);
+
+ if (!set->ops->commit)
+ continue;
+
+ set->ops->commit(set);
+ }
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
@@ -9289,6 +9510,27 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
return 0;
}
+ list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (!nft_trans_set_update(trans) &&
+ nft_set_is_anonymous(nft_trans_set(trans)) &&
+ !nft_trans_set_bound(trans)) {
+ pr_warn_once("nftables ruleset with unbound set\n");
+ return -EINVAL;
+ }
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (!nft_trans_chain_update(trans) &&
+ nft_chain_binding(nft_trans_chain(trans)) &&
+ !nft_trans_chain_bound(trans)) {
+ pr_warn_once("nftables ruleset with unbound chain\n");
+ return -EINVAL;
+ }
+ break;
+ }
+ }
+
/* 0. Validate ruleset, otherwise roll back for error reporting. */
if (nf_tables_validate(net) < 0)
return -EAGAIN;
@@ -9451,6 +9693,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_NEWSETELEM);
+ if (te->set->ops->commit &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
@@ -9465,6 +9712,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
atomic_dec(&te->set->nelems);
te->set->ndeact--;
}
+ if (te->set->ops->commit &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
@@ -9527,6 +9779,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
}
+ nft_set_commit_update(&set_update_list);
+
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
@@ -9586,10 +9840,25 @@ static void nf_tables_abort_release(struct nft_trans *trans)
kfree(trans);
}
+static void nft_set_abort_update(struct list_head *set_update_list)
+{
+ struct nft_set *set, *next;
+
+ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+ list_del_init(&set->pending_update);
+
+ if (!set->ops->abort)
+ continue;
+
+ set->ops->abort(set);
+ }
+}
+
static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
if (action == NFNL_ABORT_VALIDATE &&
@@ -9631,7 +9900,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
} else {
- if (nft_chain_is_bound(trans->ctx.chain)) {
+ if (nft_trans_chain_bound(trans)) {
nft_trans_destroy(trans);
break;
}
@@ -9654,6 +9923,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWRULE:
+ if (nft_trans_rule_bound(trans)) {
+ nft_trans_destroy(trans);
+ break;
+ }
trans->ctx.chain->use--;
list_del_rcu(&nft_trans_rule(trans)->list);
nft_rule_expr_deactivate(&trans->ctx,
@@ -9688,6 +9961,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSET:
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_set(trans));
+ if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(&trans->ctx, nft_trans_set(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
@@ -9699,6 +9975,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_setelem_remove(net, te->set, &te->elem);
if (!nft_setelem_is_catchall(te->set, &te->elem))
atomic_dec(&te->set->nelems);
+
+ if (te->set->ops->abort &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
@@ -9709,6 +9991,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
if (!nft_setelem_is_catchall(te->set, &te->elem))
te->set->ndeact--;
+ if (te->set->ops->abort &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWOBJ:
@@ -9751,11 +10038,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
}
+ nft_set_abort_update(&set_update_list);
+
synchronize_rcu();
list_for_each_entry_safe_reverse(trans, next,
&nft_net->commit_list, list) {
- list_del(&trans->list);
+ nft_trans_list_del(trans);
nf_tables_abort_release(trans);
}
@@ -10204,22 +10493,12 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
static void nft_verdict_uninit(const struct nft_data *data)
{
struct nft_chain *chain;
- struct nft_rule *rule;
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
chain->use--;
-
- if (!nft_chain_is_bound(chain))
- break;
-
- chain->table->use--;
- list_for_each_entry(rule, &chain->rules, list)
- chain->use--;
-
- nft_chain_del(chain);
break;
}
}
@@ -10454,6 +10733,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
table->use--;
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(&ctx, set);
+
nft_set_destroy(&ctx, set);
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
@@ -10538,6 +10820,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->binding_list);
INIT_LIST_HEAD(&nft_net->module_list);
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index ae7146475d17..c9fbe0f707b5 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -533,7 +533,8 @@ ack:
* processed, this avoids that the same error is
* reported several times when replaying the batch.
*/
- if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
+ if (err == -ENOMEM ||
+ nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
/* We failed to enqueue an error, reset the
* list of errors and send OOM to userspace
* pointing to the batch header.
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index ee6840bd5933..8f1bfa6ccc2d 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -439,3 +439,4 @@ module_init(nfnl_osf_init);
module_exit(nfnl_osf_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 84eae7cabc67..2527a01486ef 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -323,7 +323,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
dreg = priv->dreg;
regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE);
for (i = 0; i < regcount; i++, dreg++)
- track->regs[priv->dreg].bitwise = expr;
+ track->regs[dreg].bitwise = expr;
return false;
}
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index c9d2f7c29f53..3d76ebfe8939 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -76,11 +76,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
switch (priv->data.verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- if (nft_chain_is_bound(chain)) {
- err = -EBUSY;
- goto err1;
- }
- chain->bound = true;
+ err = nf_tables_bind_chain(ctx, chain);
+ if (err < 0)
+ return err;
break;
default:
break;
@@ -98,6 +96,31 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data = &priv->data;
+ struct nft_ctx chain_ctx;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+ if (!nft_chain_binding(chain))
+ break;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_activate(&chain_ctx, rule);
+
+ nft_clear(ctx->net, chain);
+ break;
+ default:
+ break;
+ }
+ }
return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
}
@@ -107,6 +130,43 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data = &priv->data;
+ struct nft_ctx chain_ctx;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+ if (!nft_chain_binding(chain))
+ break;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+
+ switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
+ nf_tables_unbind_chain(ctx, chain);
+ fallthrough;
+ case NFT_TRANS_PREPARE:
+ nft_deactivate_next(ctx->net, chain);
+ break;
+ default:
+ nft_chain_del(chain);
+ chain->bound = false;
+ chain->table->use--;
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
if (phase == NFT_TRANS_COMMIT)
return;
@@ -131,15 +191,27 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
case NFT_GOTO:
chain = data->verdict.chain;
- if (!nft_chain_is_bound(chain))
+ if (!nft_chain_binding(chain))
+ break;
+
+ /* Rule construction failed, but chain is already bound:
+ * let the transaction records release this chain and its rules.
+ */
+ if (chain->bound) {
+ chain->use--;
break;
+ }
+ /* Rule has been deleted, release chain and its rules. */
chain_ctx = *ctx;
chain_ctx.chain = chain;
- list_for_each_entry_safe(rule, n, &chain->rules, list)
- nf_tables_rule_release(&chain_ctx, rule);
-
+ chain->use--;
+ list_for_each_entry_safe(rule, n, &chain->rules, list) {
+ chain->use--;
+ list_del(&rule->list);
+ nf_tables_rule_destroy(&chain_ctx, rule);
+ }
nf_tables_chain_destroy(&chain_ctx);
break;
default:
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 96081ac8d2b4..1e5e7a181e0b 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -271,13 +271,14 @@ static int nft_bitmap_init(const struct nft_set *set,
return 0;
}
-static void nft_bitmap_destroy(const struct nft_set *set)
+static void nft_bitmap_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be, *n;
list_for_each_entry_safe(be, n, &priv->list, head)
- nft_set_elem_destroy(set, be, true);
+ nf_tables_set_elem_destroy(ctx, set, be);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 76de6c8d9865..0b73cb0e752f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -400,19 +400,31 @@ static int nft_rhash_init(const struct nft_set *set,
return 0;
}
+struct nft_rhash_ctx {
+ const struct nft_ctx ctx;
+ const struct nft_set *set;
+};
+
static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy(arg, ptr, true);
+ struct nft_rhash_ctx *rhash_ctx = arg;
+
+ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
}
-static void nft_rhash_destroy(const struct nft_set *set)
+static void nft_rhash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_ctx rhash_ctx = {
+ .ctx = *ctx,
+ .set = set,
+ };
cancel_delayed_work_sync(&priv->gc_work);
rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
- (void *)set);
+ (void *)&rhash_ctx);
}
/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
@@ -643,7 +655,8 @@ static int nft_hash_init(const struct nft_set *set,
return 0;
}
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
@@ -653,7 +666,7 @@ static void nft_hash_destroy(const struct nft_set *set)
for (i = 0; i < priv->buckets; i++) {
hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
hlist_del_rcu(&he->node);
- nft_set_elem_destroy(set, he, true);
+ nf_tables_set_elem_destroy(ctx, set, he);
}
}
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 06d46d182634..0452ee586c1c 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1600,17 +1600,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m)
}
}
-/**
- * pipapo_reclaim_match - RCU callback to free fields from old matching data
- * @rcu: RCU head
- */
-static void pipapo_reclaim_match(struct rcu_head *rcu)
+static void pipapo_free_match(struct nft_pipapo_match *m)
{
- struct nft_pipapo_match *m;
int i;
- m = container_of(rcu, struct nft_pipapo_match, rcu);
-
for_each_possible_cpu(i)
kfree(*per_cpu_ptr(m->scratch, i));
@@ -1625,7 +1618,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
}
/**
- * pipapo_commit() - Replace lookup data with current working copy
+ * pipapo_reclaim_match - RCU callback to free fields from old matching data
+ * @rcu: RCU head
+ */
+static void pipapo_reclaim_match(struct rcu_head *rcu)
+{
+ struct nft_pipapo_match *m;
+
+ m = container_of(rcu, struct nft_pipapo_match, rcu);
+ pipapo_free_match(m);
+}
+
+/**
+ * nft_pipapo_commit() - Replace lookup data with current working copy
* @set: nftables API set representation
*
* While at it, check if we should perform garbage collection on the working
@@ -1635,7 +1640,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
* We also need to create a new working copy for subsequent insertions and
* deletions.
*/
-static void pipapo_commit(const struct nft_set *set)
+static void nft_pipapo_commit(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *new_clone, *old;
@@ -1660,6 +1665,26 @@ static void pipapo_commit(const struct nft_set *set)
priv->clone = new_clone;
}
+static void nft_pipapo_abort(const struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *new_clone, *m;
+
+ if (!priv->dirty)
+ return;
+
+ m = rcu_dereference(priv->match);
+
+ new_clone = pipapo_clone(m);
+ if (IS_ERR(new_clone))
+ return;
+
+ priv->dirty = false;
+
+ pipapo_free_match(priv->clone);
+ priv->clone = new_clone;
+}
+
/**
* nft_pipapo_activate() - Mark element reference as active given key, commit
* @net: Network namespace
@@ -1667,8 +1692,7 @@ static void pipapo_commit(const struct nft_set *set)
* @elem: nftables API element representation containing key data
*
* On insertion, elements are added to a copy of the matching data currently
- * in use for lookups, and not directly inserted into current lookup data, so
- * we'll take care of that by calling pipapo_commit() here. Both
+ * in use for lookups, and not directly inserted into current lookup data. Both
* nft_pipapo_insert() and nft_pipapo_activate() are called once for each
* element, hence we can't purpose either one as a real commit operation.
*/
@@ -1684,8 +1708,6 @@ static void nft_pipapo_activate(const struct net *net,
nft_set_elem_change_active(net, set, &e->ext);
nft_set_elem_clear_busy(&e->ext);
-
- pipapo_commit(set);
}
/**
@@ -1931,7 +1953,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
if (i == m->field_count) {
priv->dirty = true;
pipapo_drop(m, rulemap);
- pipapo_commit(set);
return;
}
@@ -1953,12 +1974,16 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->net);
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
int i, r;
rcu_read_lock();
- m = rcu_dereference(priv->match);
+ if (iter->genmask == nft_genmask_cur(net))
+ m = rcu_dereference(priv->match);
+ else
+ m = priv->clone;
if (unlikely(!m))
goto out;
@@ -2127,10 +2152,12 @@ out_scratch:
/**
* nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
+ * @ctx: context
* @set: nftables API set representation
* @m: matching data pointing to key mapping array
*/
-static void nft_set_pipapo_match_destroy(const struct nft_set *set,
+static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set,
struct nft_pipapo_match *m)
{
struct nft_pipapo_field *f;
@@ -2147,15 +2174,17 @@ static void nft_set_pipapo_match_destroy(const struct nft_set *set,
e = f->mt[r].e;
- nft_set_elem_destroy(set, e, true);
+ nf_tables_set_elem_destroy(ctx, set, e);
}
}
/**
* nft_pipapo_destroy() - Free private data for set and all committed elements
+ * @ctx: context
* @set: nftables API set representation
*/
-static void nft_pipapo_destroy(const struct nft_set *set)
+static void nft_pipapo_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
@@ -2165,7 +2194,7 @@ static void nft_pipapo_destroy(const struct nft_set *set)
if (m) {
rcu_barrier();
- nft_set_pipapo_match_destroy(set, m);
+ nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
@@ -2182,7 +2211,7 @@ static void nft_pipapo_destroy(const struct nft_set *set)
m = priv->clone;
if (priv->dirty)
- nft_set_pipapo_match_destroy(set, m);
+ nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
@@ -2230,6 +2259,8 @@ const struct nft_set_type nft_set_pipapo_type = {
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
+ .commit = nft_pipapo_commit,
+ .abort = nft_pipapo_abort,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
@@ -2252,6 +2283,8 @@ const struct nft_set_type nft_set_pipapo_avx2_type = {
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
+ .commit = nft_pipapo_commit,
+ .abort = nft_pipapo_abort,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 2f114aa10f1a..5c05c9b990fb 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -664,7 +664,8 @@ static int nft_rbtree_init(const struct nft_set *set,
return 0;
}
-static void nft_rbtree_destroy(const struct nft_set *set)
+static void nft_rbtree_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
@@ -675,7 +676,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_set_elem_destroy(set, rbe, true);
+ nf_tables_set_elem_destroy(ctx, set, rbe);
}
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index e1990baf3a3b..dc9485854002 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -71,4 +71,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
MODULE_DESCRIPTION("Passive OS fingerprint matching.");
MODULE_ALIAS("ipt_osf");
MODULE_ALIAS("ip6t_osf");
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 54c083003947..27511c90a26f 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -857,7 +857,8 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap,
offset -= iter->startbit;
idx = offset / NETLBL_CATMAP_MAPSIZE;
- iter->bitmap[idx] |= bitmap << (offset % NETLBL_CATMAP_MAPSIZE);
+ iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap
+ << (offset % NETLBL_CATMAP_MAPSIZE);
return 0;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index fcee6012293b..58f530f60172 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p)
/* First drop references to device. */
hlist_del_rcu(&p->dp_hash_node);
- /* Free percpu memory */
- free_percpu(p->upcall_stats);
-
/* Then destroy it. */
ovs_vport_del(p);
}
@@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_portids;
}
- vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
- if (!vport->upcall_stats) {
- err = -ENOMEM;
- goto err_destroy_vport;
- }
-
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_NEW);
BUG_ON(err < 0);
@@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-err_destroy_vport:
- ovs_dp_detach_port(vport);
err_destroy_portids:
kfree(rcu_dereference_raw(dp->upcall_portids));
err_unlock_and_destroy_meters:
@@ -2322,12 +2311,6 @@ restart:
goto exit_unlock_free;
}
- vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
- if (!vport->upcall_stats) {
- err = -ENOMEM;
- goto exit_unlock_free_vport;
- }
-
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
OVS_VPORT_CMD_NEW, GFP_KERNEL);
@@ -2345,8 +2328,6 @@ restart:
ovs_notify(&dp_vport_genl_family, reply, info);
return 0;
-exit_unlock_free_vport:
- ovs_dp_detach_port(vport);
exit_unlock_free:
ovs_unlock();
kfree_skb(reply);
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 7e0f5c45b512..972ae01a70f7 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -124,6 +124,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
{
struct vport *vport;
size_t alloc_size;
+ int err;
alloc_size = sizeof(struct vport);
if (priv_size) {
@@ -135,17 +136,29 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
if (!vport)
return ERR_PTR(-ENOMEM);
+ vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
+ if (!vport->upcall_stats) {
+ err = -ENOMEM;
+ goto err_kfree_vport;
+ }
+
vport->dp = parms->dp;
vport->port_no = parms->port_no;
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
- kfree(vport);
- return ERR_PTR(-EINVAL);
+ err = -EINVAL;
+ goto err_free_percpu;
}
return vport;
+
+err_free_percpu:
+ free_percpu(vport->upcall_stats);
+err_kfree_vport:
+ kfree(vport);
+ return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(ovs_vport_alloc);
@@ -165,6 +178,7 @@ void ovs_vport_free(struct vport *vport)
* it is safe to use raw dereference.
*/
kfree(rcu_dereference_raw(vport->upcall_portids));
+ free_percpu(vport->upcall_stats);
kfree(vport);
}
EXPORT_SYMBOL_GPL(ovs_vport_free);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 9cc0bc7c71ed..abc71a06d634 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -610,6 +610,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
struct flow_offload_tuple tuple = {};
enum ip_conntrack_info ctinfo;
struct tcphdr *tcph = NULL;
+ bool force_refresh = false;
struct flow_offload *flow;
struct nf_conn *ct;
u8 dir;
@@ -647,6 +648,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
* established state, then don't refresh.
*/
return false;
+ force_refresh = true;
}
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
@@ -660,7 +662,12 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
else
ctinfo = IP_CT_ESTABLISHED_REPLY;
- flow_offload_refresh(nf_ft, flow);
+ flow_offload_refresh(nf_ft, flow, force_refresh);
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
+ /* Process this flow in SW to allow promoting to ASSURED */
+ return false;
+ }
+
nf_conntrack_get(&ct->ct_general);
nf_ct_set(skb, ct, ctinfo);
if (nf_ft->flags & NF_FLOWTABLE_COUNTER)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index fc945c7e4123..c819b812a899 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -13,7 +13,10 @@
#include <linux/rtnetlink.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/slab.h>
+#include <net/ipv6.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_pedit.h>
@@ -327,28 +330,58 @@ static bool offset_valid(struct sk_buff *skb, int offset)
return true;
}
-static void pedit_skb_hdr_offset(struct sk_buff *skb,
+static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type)
+{
+ const int noff = skb_network_offset(skb);
+ int ret = -EINVAL;
+ struct iphdr _iph;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ const struct iphdr *iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph);
+
+ if (!iph)
+ goto out;
+ *hoffset = noff + iph->ihl * 4;
+ ret = 0;
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ ret = ipv6_find_hdr(skb, hoffset, header_type, NULL, NULL) == header_type ? 0 : -EINVAL;
+ break;
+ }
+out:
+ return ret;
+}
+
+static int pedit_skb_hdr_offset(struct sk_buff *skb,
enum pedit_header_type htype, int *hoffset)
{
+ int ret = -EINVAL;
/* 'htype' is validated in the netlink parsing */
switch (htype) {
case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
- if (skb_mac_header_was_set(skb))
+ if (skb_mac_header_was_set(skb)) {
*hoffset = skb_mac_offset(skb);
+ ret = 0;
+ }
break;
case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK:
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
*hoffset = skb_network_offset(skb);
+ ret = 0;
break;
case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+ ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_TCP);
+ break;
case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
- if (skb_transport_header_was_set(skb))
- *hoffset = skb_transport_offset(skb);
+ ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_UDP);
break;
default:
break;
}
+ return ret;
}
TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
@@ -384,6 +417,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
int hoffset = 0;
u32 *ptr, hdata;
u32 val;
+ int rc;
if (tkey_ex) {
htype = tkey_ex->htype;
@@ -392,7 +426,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
tkey_ex++;
}
- pedit_skb_hdr_offset(skb, htype, &hoffset);
+ rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+ if (rc) {
+ pr_info_ratelimited("tc action pedit unable to extract header offset for header type (0x%x)\n", htype);
+ goto bad;
+ }
if (tkey->offmask) {
u8 *d, _d;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 227cba58ce9f..2e9dce03d1ec 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -357,23 +357,23 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
if (p->rate_present) {
psched_ratecfg_getrate(&opt.rate, &p->rate);
- if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) &&
+ if ((p->rate.rate_bytes_ps >= (1ULL << 32)) &&
nla_put_u64_64bit(skb, TCA_POLICE_RATE64,
- police->params->rate.rate_bytes_ps,
+ p->rate.rate_bytes_ps,
TCA_POLICE_PAD))
goto nla_put_failure;
}
if (p->peak_present) {
psched_ratecfg_getrate(&opt.peakrate, &p->peak);
- if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) &&
+ if ((p->peak.rate_bytes_ps >= (1ULL << 32)) &&
nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64,
- police->params->peak.rate_bytes_ps,
+ p->peak.rate_bytes_ps,
TCA_POLICE_PAD))
goto nla_put_failure;
}
if (p->pps_present) {
if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64,
- police->params->ppsrate.rate_pkts_ps,
+ p->ppsrate.rate_pkts_ps,
TCA_POLICE_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2621550bfddc..a193cc7b3241 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -43,8 +43,6 @@
#include <net/flow_offload.h>
#include <net/tc_wrapper.h>
-extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
-
/* The list of all installed classifier types */
static LIST_HEAD(tcf_proto_base);
@@ -659,8 +657,8 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
{
struct tcf_block *block = chain->block;
const struct tcf_proto_ops *tmplt_ops;
+ unsigned int refcnt, non_act_refcnt;
bool free_block = false;
- unsigned int refcnt;
void *tmplt_priv;
mutex_lock(&block->lock);
@@ -680,13 +678,15 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
* save these to temporary variables.
*/
refcnt = --chain->refcnt;
+ non_act_refcnt = refcnt - chain->action_refcnt;
tmplt_ops = chain->tmplt_ops;
tmplt_priv = chain->tmplt_priv;
- /* The last dropped non-action reference will trigger notification. */
- if (refcnt - chain->action_refcnt == 0 && !by_act) {
- tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
- block, NULL, 0, 0, false);
+ if (non_act_refcnt == chain->explicitly_created && !by_act) {
+ if (non_act_refcnt == 0)
+ tc_chain_notify_delete(tmplt_ops, tmplt_priv,
+ chain->index, block, NULL, 0, 0,
+ false);
/* Last reference to chain, no need to lock. */
chain->flushing = false;
}
@@ -2952,6 +2952,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
return PTR_ERR(ops);
if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
+ module_put(ops->owner);
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4e2e269f121f..d15d50de7980 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -718,13 +718,19 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
struct nlattr *est, u32 flags, u32 fl_flags,
struct netlink_ext_ack *extack)
{
- int err;
+ int err, ifindex = -1;
err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags,
fl_flags, extack);
if (err < 0)
return err;
+ if (tb[TCA_U32_INDEV]) {
+ ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
+ if (ifindex < 0)
+ return -EINVAL;
+ }
+
if (tb[TCA_U32_LINK]) {
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
struct tc_u_hnode *ht_down = NULL, *ht_old;
@@ -759,13 +765,9 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
tcf_bind_filter(tp, &n->res, base);
}
- if (tb[TCA_U32_INDEV]) {
- int ret;
- ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
- if (ret < 0)
- return -EINVAL;
- n->ifindex = ret;
- }
+ if (ifindex >= 0)
+ n->ifindex = ifindex;
+
return 0;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 014209b1dd58..aa6b1fe65151 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -309,7 +309,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
if (dev_ingress_queue(dev))
q = qdisc_match_from_root(
- dev_ingress_queue(dev)->qdisc_sleeping,
+ rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
handle);
out:
return q;
@@ -328,7 +328,8 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
nq = dev_ingress_queue_rcu(dev);
if (nq)
- q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+ q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
+ handle);
out:
return q;
}
@@ -634,8 +635,13 @@ EXPORT_SYMBOL(qdisc_watchdog_init);
void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
u64 delta_ns)
{
- if (test_bit(__QDISC_STATE_DEACTIVATED,
- &qdisc_root_sleeping(wd->qdisc)->state))
+ bool deactivated;
+
+ rcu_read_lock();
+ deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
+ &qdisc_root_sleeping(wd->qdisc)->state);
+ rcu_read_unlock();
+ if (deactivated)
return;
if (hrtimer_is_queued(&wd->timer)) {
@@ -1073,17 +1079,29 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (parent == NULL) {
unsigned int i, num_q, ingress;
+ struct netdev_queue *dev_queue;
ingress = 0;
num_q = dev->num_tx_queues;
if ((q && q->flags & TCQ_F_INGRESS) ||
(new && new->flags & TCQ_F_INGRESS)) {
- num_q = 1;
ingress = 1;
- if (!dev_ingress_queue(dev)) {
+ dev_queue = dev_ingress_queue(dev);
+ if (!dev_queue) {
NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
return -ENOENT;
}
+
+ q = rtnl_dereference(dev_queue->qdisc_sleeping);
+
+ /* This is the counterpart of that qdisc_refcount_inc_nz() call in
+ * __tcf_qdisc_find() for filter requests.
+ */
+ if (!qdisc_refcount_dec_if_one(q)) {
+ NL_SET_ERR_MSG(extack,
+ "Current ingress or clsact Qdisc has ongoing filter requests");
+ return -EBUSY;
+ }
}
if (dev->flags & IFF_UP)
@@ -1094,18 +1112,26 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (new && new->ops->attach && !ingress)
goto skip;
- for (i = 0; i < num_q; i++) {
- struct netdev_queue *dev_queue = dev_ingress_queue(dev);
-
- if (!ingress)
+ if (!ingress) {
+ for (i = 0; i < num_q; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
+ old = dev_graft_qdisc(dev_queue, new);
- old = dev_graft_qdisc(dev_queue, new);
- if (new && i > 0)
- qdisc_refcount_inc(new);
-
- if (!ingress)
+ if (new && i > 0)
+ qdisc_refcount_inc(new);
qdisc_put(old);
+ }
+ } else {
+ old = dev_graft_qdisc(dev_queue, NULL);
+
+ /* {ingress,clsact}_destroy() @old before grafting @new to avoid
+ * unprotected concurrent accesses to net_device::miniq_{in,e}gress
+ * pointer(s) in mini_qdisc_pair_swap().
+ */
+ qdisc_notify(net, skb, n, classid, old, new, extack);
+ qdisc_destroy(old);
+
+ dev_graft_qdisc(dev_queue, new);
}
skip:
@@ -1119,8 +1145,6 @@ skip:
if (new && new->ops->attach)
new->ops->attach(new);
- } else {
- notify_and_destroy(net, skb, n, classid, old, new, extack);
}
if (dev->flags & IFF_UP)
@@ -1478,7 +1502,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
}
q = qdisc_leaf(p, clid);
} else if (dev_ingress_queue(dev)) {
- q = dev_ingress_queue(dev)->qdisc_sleeping;
+ q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
} else {
q = rtnl_dereference(dev->qdisc);
@@ -1564,7 +1588,7 @@ replay:
}
q = qdisc_leaf(p, clid);
} else if (dev_ingress_queue_create(dev)) {
- q = dev_ingress_queue(dev)->qdisc_sleeping;
+ q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
} else {
q = rtnl_dereference(dev->qdisc);
@@ -1805,8 +1829,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
- tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
- &q_idx, s_q_idx, false,
+ tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
+ skb, cb, &q_idx, s_q_idx, false,
tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
@@ -2249,8 +2273,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
- tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
- &t, s_t, false) < 0)
+ tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
+ skb, tcm, cb, &t, s_t, false) < 0)
goto done;
done:
@@ -2302,7 +2326,9 @@ static struct pernet_operations psched_net_ops = {
.exit = psched_net_exit,
};
+#if IS_ENABLED(CONFIG_RETPOLINE)
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
+#endif
static int __init pktsched_init(void)
{
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 6980796d435d..591d87d5e5c0 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -201,6 +201,11 @@ out:
return NET_XMIT_CN;
}
+static struct netlink_range_validation fq_pie_q_range = {
+ .min = 1,
+ .max = 1 << 20,
+};
+
static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
[TCA_FQ_PIE_LIMIT] = {.type = NLA_U32},
[TCA_FQ_PIE_FLOWS] = {.type = NLA_U32},
@@ -208,7 +213,8 @@ static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
[TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32},
[TCA_FQ_PIE_ALPHA] = {.type = NLA_U32},
[TCA_FQ_PIE_BETA] = {.type = NLA_U32},
- [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32},
+ [TCA_FQ_PIE_QUANTUM] =
+ NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range),
[TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32},
[TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32},
[TCA_FQ_PIE_ECN] = {.type = NLA_U32},
@@ -373,6 +379,7 @@ static void fq_pie_timer(struct timer_list *t)
spinlock_t *root_lock; /* to lock qdisc for probability calculations */
u32 idx;
+ rcu_read_lock();
root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -385,6 +392,7 @@ static void fq_pie_timer(struct timer_list *t)
mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
spin_unlock(root_lock);
+ rcu_read_unlock();
}
static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 37e41f972f69..5d7e23f4cc0e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -648,7 +648,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
static struct netdev_queue noop_netdev_queue = {
RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
- .qdisc_sleeping = &noop_qdisc,
+ RCU_POINTER_INITIALIZER(qdisc_sleeping, &noop_qdisc),
};
struct Qdisc noop_qdisc = {
@@ -1046,7 +1046,7 @@ static void qdisc_free_cb(struct rcu_head *head)
qdisc_free(q);
}
-static void qdisc_destroy(struct Qdisc *qdisc)
+static void __qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
@@ -1070,6 +1070,14 @@ static void qdisc_destroy(struct Qdisc *qdisc)
call_rcu(&qdisc->rcu, qdisc_free_cb);
}
+void qdisc_destroy(struct Qdisc *qdisc)
+{
+ if (qdisc->flags & TCQ_F_BUILTIN)
+ return;
+
+ __qdisc_destroy(qdisc);
+}
+
void qdisc_put(struct Qdisc *qdisc)
{
if (!qdisc)
@@ -1079,7 +1087,7 @@ void qdisc_put(struct Qdisc *qdisc)
!refcount_dec_and_test(&qdisc->refcnt))
return;
- qdisc_destroy(qdisc);
+ __qdisc_destroy(qdisc);
}
EXPORT_SYMBOL(qdisc_put);
@@ -1094,7 +1102,7 @@ void qdisc_put_unlocked(struct Qdisc *qdisc)
!refcount_dec_and_rtnl_lock(&qdisc->refcnt))
return;
- qdisc_destroy(qdisc);
+ __qdisc_destroy(qdisc);
rtnl_unlock();
}
EXPORT_SYMBOL(qdisc_put_unlocked);
@@ -1103,7 +1111,7 @@ EXPORT_SYMBOL(qdisc_put_unlocked);
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
struct Qdisc *qdisc)
{
- struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *oqdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
spinlock_t *root_lock;
root_lock = qdisc_lock(oqdisc);
@@ -1112,7 +1120,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
/* ... and graft new one */
if (qdisc == NULL)
qdisc = &noop_qdisc;
- dev_queue->qdisc_sleeping = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
spin_unlock_bh(root_lock);
@@ -1125,12 +1133,12 @@ static void shutdown_scheduler_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc_default)
{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
struct Qdisc *qdisc_default = _qdisc_default;
if (qdisc) {
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- dev_queue->qdisc_sleeping = qdisc_default;
+ rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc_default);
qdisc_put(qdisc);
}
@@ -1154,7 +1162,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
if (!netif_is_multiqueue(dev))
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
- dev_queue->qdisc_sleeping = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
}
static void attach_default_qdiscs(struct net_device *dev)
@@ -1167,7 +1175,7 @@ static void attach_default_qdiscs(struct net_device *dev)
if (!netif_is_multiqueue(dev) ||
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- qdisc = txq->qdisc_sleeping;
+ qdisc = rtnl_dereference(txq->qdisc_sleeping);
rcu_assign_pointer(dev->qdisc, qdisc);
qdisc_refcount_inc(qdisc);
} else {
@@ -1186,7 +1194,7 @@ static void attach_default_qdiscs(struct net_device *dev)
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
dev->priv_flags |= IFF_NO_QUEUE;
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- qdisc = txq->qdisc_sleeping;
+ qdisc = rtnl_dereference(txq->qdisc_sleeping);
rcu_assign_pointer(dev->qdisc, qdisc);
qdisc_refcount_inc(qdisc);
dev->priv_flags ^= IFF_NO_QUEUE;
@@ -1202,7 +1210,7 @@ static void transition_one_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_need_watchdog)
{
- struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *new_qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
int *need_watchdog_p = _need_watchdog;
if (!(new_qdisc->flags & TCQ_F_BUILTIN))
@@ -1272,7 +1280,7 @@ static void dev_reset_queue(struct net_device *dev,
struct Qdisc *qdisc;
bool nolock;
- qdisc = dev_queue->qdisc_sleeping;
+ qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
if (!qdisc)
return;
@@ -1303,7 +1311,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
int val;
dev_queue = netdev_get_tx_queue(dev, i);
- q = dev_queue->qdisc_sleeping;
+ q = rtnl_dereference(dev_queue->qdisc_sleeping);
root_lock = qdisc_lock(q);
spin_lock_bh(root_lock);
@@ -1379,7 +1387,7 @@ EXPORT_SYMBOL(dev_deactivate);
static int qdisc_change_tx_queue_len(struct net_device *dev,
struct netdev_queue *dev_queue)
{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
const struct Qdisc_ops *ops = qdisc->ops;
if (ops->change_tx_queue_len)
@@ -1404,7 +1412,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
unsigned int i;
for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping);
/* Only update the default qdiscs we created,
* qdiscs with handles are always hashed.
*/
@@ -1412,7 +1420,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
qdisc_hash_del(qdisc);
}
for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping);
if (qdisc != &noop_qdisc && !qdisc->handle)
qdisc_hash_add(qdisc, false);
}
@@ -1449,7 +1457,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
struct Qdisc *qdisc = _qdisc;
rcu_assign_pointer(dev_queue->qdisc, qdisc);
- dev_queue->qdisc_sleeping = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
}
void dev_init_scheduler(struct net_device *dev)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index d0bc660d7401..c860119a8f09 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -141,7 +141,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
* qdisc totals are added at end.
*/
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
- qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
spin_lock_bh(qdisc_lock(qdisc));
gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
@@ -202,7 +202,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
- return dev_queue->qdisc_sleeping;
+ return rtnl_dereference(dev_queue->qdisc_sleeping);
}
static unsigned long mq_find(struct Qdisc *sch, u32 classid)
@@ -221,7 +221,7 @@ static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle |= TC_H_MIN(cl);
- tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
return 0;
}
@@ -230,7 +230,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
- sch = dev_queue->qdisc_sleeping;
+ sch = rtnl_dereference(dev_queue->qdisc_sleeping);
if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
return -1;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index dc5a0ff50b14..ab69ff7577fc 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -557,7 +557,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
* qdisc totals are added at end.
*/
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
- qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
spin_lock_bh(qdisc_lock(qdisc));
gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
@@ -604,7 +604,7 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
if (!dev_queue)
return NULL;
- return dev_queue->qdisc_sleeping;
+ return rtnl_dereference(dev_queue->qdisc_sleeping);
}
static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
@@ -637,7 +637,7 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
tcm->tcm_parent = (tc < 0) ? 0 :
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(tc + TC_H_MIN_PRIORITY));
- tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
} else {
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_info = 0;
@@ -693,7 +693,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
} else {
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
- sch = dev_queue->qdisc_sleeping;
+ sch = rtnl_dereference(dev_queue->qdisc_sleeping);
if (gnet_stats_copy_basic(d, sch->cpu_bstats,
&sch->bstats, true) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6ef3021e1169..e79be1b3e74d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -966,6 +966,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
if (ret < 0)
return ret;
+ sch_tree_lock(sch);
/* backup q->clg and q->loss_model */
old_clg = q->clg;
old_loss_model = q->loss_model;
@@ -974,7 +975,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
if (ret) {
q->loss_model = old_loss_model;
- return ret;
+ goto unlock;
}
} else {
q->loss_model = CLG_RANDOM;
@@ -1041,6 +1042,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
/* capping jitter to the range acceptable by tabledist() */
q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
+unlock:
+ sch_tree_unlock(sch);
return ret;
get_table_failure:
@@ -1050,7 +1053,8 @@ get_table_failure:
*/
q->clg = old_clg;
q->loss_model = old_loss_model;
- return ret;
+
+ goto unlock;
}
static int netem_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 2152a56d73f8..2da6250ec346 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -421,8 +421,10 @@ static void pie_timer(struct timer_list *t)
{
struct pie_sched_data *q = from_timer(q, t, adapt_timer);
struct Qdisc *sch = q->sch;
- spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spinlock_t *root_lock;
+ rcu_read_lock();
+ root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
pie_calculate_probability(&q->params, &q->vars, sch->qstats.backlog);
@@ -430,6 +432,7 @@ static void pie_timer(struct timer_list *t)
if (q->params.tupdate)
mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
spin_unlock(root_lock);
+ rcu_read_unlock();
}
static int pie_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 98129324e157..16277b6a0238 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -321,12 +321,15 @@ static inline void red_adaptative_timer(struct timer_list *t)
{
struct red_sched_data *q = from_timer(q, t, adapt_timer);
struct Qdisc *sch = q->sch;
- spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spinlock_t *root_lock;
+ rcu_read_lock();
+ root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
red_adaptative_algo(&q->parms, &q->vars);
mod_timer(&q->adapt_timer, jiffies + HZ/2);
spin_unlock(root_lock);
+ rcu_read_unlock();
}
static int red_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index abd436307d6a..66dcb18638fe 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -606,10 +606,12 @@ static void sfq_perturbation(struct timer_list *t)
{
struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
struct Qdisc *sch = q->sch;
- spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spinlock_t *root_lock;
siphash_key_t nkey;
get_random_bytes(&nkey, sizeof(nkey));
+ rcu_read_lock();
+ root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
q->perturbation = nkey;
if (!q->filter_list && q->tail)
@@ -618,6 +620,7 @@ static void sfq_perturbation(struct timer_list *t)
if (q->perturb_period)
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+ rcu_read_unlock();
}
static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 76db9a10ef50..cf0e61ed9225 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -797,6 +797,9 @@ static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch,
taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
+ if (q->cur_txq[tc] >= dev->num_tx_queues)
+ q->cur_txq[tc] = first_txq;
+
if (skb)
return skb;
} while (q->cur_txq[tc] != first_txq);
@@ -2358,7 +2361,7 @@ static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
if (!dev_queue)
return NULL;
- return dev_queue->qdisc_sleeping;
+ return rtnl_dereference(dev_queue->qdisc_sleeping);
}
static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
@@ -2377,7 +2380,7 @@ static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle |= TC_H_MIN(cl);
- tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
return 0;
}
@@ -2389,7 +2392,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
{
struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
- sch = dev_queue->qdisc_sleeping;
+ sch = rtnl_dereference(dev_queue->qdisc_sleeping);
if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
return -1;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 16f9238aa51d..7721239c185f 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -297,7 +297,7 @@ restart:
struct net_device *slave = qdisc_dev(q);
struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
- if (slave_txq->qdisc_sleeping != q)
+ if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
continue;
if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
!netif_running(slave)) {
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 7fbeb99d8d32..23d6633966b1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1250,7 +1250,10 @@ static int sctp_side_effects(enum sctp_event_type event_type,
default:
pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
status, state, event_type, subtype.chunk);
- BUG();
+ error = status;
+ if (error >= 0)
+ error = -EINVAL;
+ WARN_ON_ONCE(1);
break;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 97f1155a2045..08fdf1251f46 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4482,7 +4482,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
if (!ev)
- return -ENOMEM;
+ return SCTP_DISPOSITION_NOMEM;
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(ev));
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 7a8d9163d186..90f0b60b196a 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -851,6 +851,8 @@ static int smc_llc_add_link_cont(struct smc_link *link,
addc_llc->num_rkeys = *num_rkeys_todo;
n = *num_rkeys_todo;
for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) {
+ while (*buf_pos && !(*buf_pos)->used)
+ *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
if (!*buf_pos) {
addc_llc->num_rkeys = addc_llc->num_rkeys -
*num_rkeys_todo;
@@ -867,8 +869,6 @@ static int smc_llc_add_link_cont(struct smc_link *link,
(*num_rkeys_todo)--;
*buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
- while (*buf_pos && !(*buf_pos)->used)
- *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
}
addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT;
addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 53881406e200..cdcd2731860b 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1258,7 +1258,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
struct tipc_nl_msg msg;
struct tipc_media *media;
struct sk_buff *rep;
- struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
if (!info->attrs[TIPC_NLA_MEDIA])
return -EINVAL;
@@ -1307,7 +1307,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
int err;
char *name;
struct tipc_media *m;
- struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
if (!info->attrs[TIPC_NLA_MEDIA])
return -EINVAL;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5b0c4d5b80cf..b3ec9eaec36b 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -368,12 +368,12 @@ static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
rdev = container_of(work, struct cfg80211_registered_device,
sched_scan_stop_wk);
- rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) {
if (req->nl_owner_dead)
cfg80211_stop_sched_scan_req(rdev, req, false);
}
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
}
static void cfg80211_propagate_radar_detect_wk(struct work_struct *work)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d95f8053020d..087d60c0f6e4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10723,6 +10723,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NL80211_ATTR_MLD_ADDR])
return -EINVAL;
req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+ if (!is_valid_ether_addr(req.ap_mld_addr))
+ return -EINVAL;
}
req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 2e497cf26ef2..69b508743e57 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018, 2021-2022 Intel Corporation
+ * Copyright (C) 2018, 2021-2023 Intel Corporation
*/
#ifndef __CFG80211_RDEV_OPS
#define __CFG80211_RDEV_OPS
@@ -1441,8 +1441,8 @@ rdev_del_intf_link(struct cfg80211_registered_device *rdev,
unsigned int link_id)
{
trace_rdev_del_intf_link(&rdev->wiphy, wdev, link_id);
- if (rdev->ops->add_intf_link)
- rdev->ops->add_intf_link(&rdev->wiphy, wdev, link_id);
+ if (rdev->ops->del_intf_link)
+ rdev->ops->del_intf_link(&rdev->wiphy, wdev, link_id);
trace_rdev_return_void(&rdev->wiphy);
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0d40d6af7e10..26f11e4746c0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2404,11 +2404,8 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
- wiphy_lock(wiphy);
ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef,
iftype);
- wiphy_unlock(wiphy);
-
if (!ret)
return ret;
break;
@@ -2440,11 +2437,11 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy)
struct wireless_dev *wdev;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- ASSERT_RTNL();
-
+ wiphy_lock(wiphy);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list)
if (!reg_wdev_chan_valid(wiphy, wdev))
cfg80211_leave(rdev, wdev);
+ wiphy_unlock(wiphy);
}
static void reg_check_chans_work(struct work_struct *work)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3bc0c3072e78..9755ef281040 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,7 +5,7 @@
* Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#include <linux/export.h>
#include <linux/bitops.h>
@@ -2558,6 +2558,13 @@ void cfg80211_remove_links(struct wireless_dev *wdev)
{
unsigned int link_id;
+ /*
+ * links are controlled by upper layers (userspace/cfg)
+ * only for AP mode, so only remove them here for AP
+ */
+ if (wdev->iftype != NL80211_IFTYPE_AP)
+ return;
+
wdev_lock(wdev);
if (wdev->valid_links) {
for_each_valid_link(wdev, link_id)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 39fb91ff23d9..815b38080401 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -131,6 +131,7 @@ struct sec_path *secpath_set(struct sk_buff *skb)
memset(sp->ovec, 0, sizeof(sp->ovec));
sp->olen = 0;
sp->len = 0;
+ sp->verified_cnt = 0;
return sp;
}
@@ -330,11 +331,10 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
{
switch (x->props.mode) {
case XFRM_MODE_BEET:
- switch (XFRM_MODE_SKB_CB(skb)->protocol) {
- case IPPROTO_IPIP:
- case IPPROTO_BEETPH:
+ switch (x->sel.family) {
+ case AF_INET:
return xfrm4_remove_beet_encap(x, skb);
- case IPPROTO_IPV6:
+ case AF_INET6:
return xfrm6_remove_beet_encap(x, skb);
}
break;
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 1f99dc469027..35279c220bd7 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -310,6 +310,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
skb->mark = 0;
}
+static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type, unsigned short family)
+{
+ struct sec_path *sp;
+
+ sp = skb_sec_path(skb);
+ if (sp && (sp->len || sp->olen) &&
+ !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ goto discard;
+
+ XFRM_SPI_SKB_CB(skb)->family = family;
+ if (family == AF_INET) {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+ } else {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+ }
+
+ return xfrm_input(skb, nexthdr, spi, encap_type);
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int xfrmi4_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET);
+}
+
+static int xfrmi6_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+ 0, 0, AF_INET6);
+}
+
+static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET);
+}
+
+static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6);
+}
+
static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
{
const struct xfrm_mode *inner_mode;
@@ -945,8 +991,8 @@ static struct pernet_operations xfrmi_net_ops = {
};
static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
- .handler = xfrm6_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi6_rcv,
+ .input_handler = xfrmi6_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -996,8 +1042,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
#endif
static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
- .handler = xfrm4_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi4_rcv,
+ .input_handler = xfrmi4_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi4_err,
.priority = 10,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6d15788b5123..e7617c9959c3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1831,6 +1831,7 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -1869,6 +1870,7 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -3349,6 +3351,13 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id))
return ++idx;
if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
+ if (idx < sp->verified_cnt) {
+ /* Secpath entry previously verified, consider optional and
+ * continue searching
+ */
+ continue;
+ }
+
if (start == -1)
start = -2-idx;
break;
@@ -3723,6 +3732,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
* Order is _important_. Later we will implement
* some barriers, but at the moment barriers
* are implied between each two transformations.
+ * Upon success, marks secpath entries as having been
+ * verified to allow them to be skipped in future policy
+ * checks (e.g. nested tunnels).
*/
for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
k = xfrm_policy_ok(tpp[i], sp, k, family, if_id);
@@ -3741,6 +3753,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
xfrm_pols_put(pols, npols);
+ sp->verified_cnt = k;
+
return 1;
}
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);