diff options
Diffstat (limited to 'net')
88 files changed, 1221 insertions, 485 deletions
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 6968e55eb971..28a939d56090 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -101,7 +101,6 @@ static void batadv_dat_purge(struct work_struct *work); */ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge); queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work, msecs_to_jiffies(10000)); } @@ -819,6 +818,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv) if (!bat_priv->dat.hash) return -ENOMEM; + INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge); batadv_dat_start_timer(bat_priv); batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f75ef12f18f7..1ef952bda97d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -947,8 +947,8 @@ static void find_cis(struct hci_conn *conn, void *data) { struct iso_list_data *d = data; - /* Ignore broadcast */ - if (!bacmp(&conn->dst, BDADDR_ANY)) + /* Ignore broadcast or if CIG don't match */ + if (!bacmp(&conn->dst, BDADDR_ANY) || d->cig != conn->iso_qos.ucast.cig) return; d->count++; @@ -963,12 +963,17 @@ static void cis_cleanup(struct hci_conn *conn) struct hci_dev *hdev = conn->hdev; struct iso_list_data d; + if (conn->iso_qos.ucast.cig == BT_ISO_QOS_CIG_UNSET) + return; + memset(&d, 0, sizeof(d)); d.cig = conn->iso_qos.ucast.cig; /* Check if ISO connection is a CIS and remove CIG if there are * no other connections using it. */ + hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_BOUND, &d); + hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECT, &d); hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &d); if (d.count) return; @@ -1766,24 +1771,23 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) memset(&data, 0, sizeof(data)); - /* Allocate a CIG if not set */ + /* Allocate first still reconfigurable CIG if not set */ if (qos->ucast.cig == BT_ISO_QOS_CIG_UNSET) { - for (data.cig = 0x00; data.cig < 0xff; data.cig++) { + for (data.cig = 0x00; data.cig < 0xf0; data.cig++) { data.count = 0; - data.cis = 0xff; - hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, - BT_BOUND, &data); + hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, + BT_CONNECT, &data); if (data.count) continue; - hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, + hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &data); if (!data.count) break; } - if (data.cig == 0xff) + if (data.cig == 0xf0) return false; /* Update CIG */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a856b1051d35..48917c68358d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1416,10 +1416,10 @@ int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) { - struct smp_ltk *k; + struct smp_ltk *k, *tmp; int removed = 0; - list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { + list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) { if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type) continue; @@ -1435,9 +1435,9 @@ int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) { - struct smp_irk *k; + struct smp_irk *k, *tmp; - list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) { + list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) { if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type) continue; @@ -2686,7 +2686,9 @@ void hci_unregister_dev(struct hci_dev *hdev) { BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); + mutex_lock(&hdev->unregister_lock); hci_dev_set_flag(hdev, HCI_UNREGISTER); + mutex_unlock(&hdev->unregister_lock); write_lock(&hci_dev_list_lock); list_del(&hdev->list); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d00ef6e3fc45..09ba6d8987ee 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3804,48 +3804,56 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_rp_le_set_cig_params *rp = data; + struct hci_cp_le_set_cig_params *cp; struct hci_conn *conn; - int i = 0; + u8 status = rp->status; + int i; bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_CIG_PARAMS); + if (!cp || rp->num_handles != cp->num_cis || rp->cig_id != cp->cig_id) { + bt_dev_err(hdev, "unexpected Set CIG Parameters response data"); + status = HCI_ERROR_UNSPECIFIED; + } + hci_dev_lock(hdev); - if (rp->status) { + if (status) { while ((conn = hci_conn_hash_lookup_cig(hdev, rp->cig_id))) { conn->state = BT_CLOSED; - hci_connect_cfm(conn, rp->status); + hci_connect_cfm(conn, status); hci_conn_del(conn); } goto unlock; } - rcu_read_lock(); + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2553 + * + * If the Status return parameter is zero, then the Controller shall + * set the Connection_Handle arrayed return parameter to the connection + * handle(s) corresponding to the CIS configurations specified in + * the CIS_IDs command parameter, in the same order. + */ + for (i = 0; i < rp->num_handles; ++i) { + conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, rp->cig_id, + cp->cis[i].cis_id); + if (!conn || !bacmp(&conn->dst, BDADDR_ANY)) + continue; - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (conn->type != ISO_LINK || - conn->iso_qos.ucast.cig != rp->cig_id || - conn->state == BT_CONNECTED) + if (conn->state != BT_BOUND && conn->state != BT_CONNECT) continue; - conn->handle = __le16_to_cpu(rp->handle[i++]); + conn->handle = __le16_to_cpu(rp->handle[i]); bt_dev_dbg(hdev, "%p handle 0x%4.4x parent %p", conn, conn->handle, conn->parent); /* Create CIS if LE is already connected */ - if (conn->parent && conn->parent->state == BT_CONNECTED) { - rcu_read_unlock(); + if (conn->parent && conn->parent->state == BT_CONNECTED) hci_le_create_cis(conn); - rcu_read_lock(); - } - - if (i == rp->num_handles) - break; } - rcu_read_unlock(); - unlock: hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 647a8ce54062..804cde43b4e0 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -629,6 +629,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev) INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work); INIT_LIST_HEAD(&hdev->cmd_sync_work_list); mutex_init(&hdev->cmd_sync_work_lock); + mutex_init(&hdev->unregister_lock); INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work); INIT_WORK(&hdev->reenable_adv_work, reenable_adv); @@ -692,14 +693,19 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry; + int err = 0; - if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) - return -ENODEV; + mutex_lock(&hdev->unregister_lock); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + err = -ENODEV; + goto unlock; + } entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - + if (!entry) { + err = -ENOMEM; + goto unlock; + } entry->func = func; entry->data = data; entry->destroy = destroy; @@ -710,7 +716,9 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, queue_work(hdev->req_workqueue, &hdev->cmd_sync_work); - return 0; +unlock: + mutex_unlock(&hdev->unregister_lock); + return err; } EXPORT_SYMBOL(hci_cmd_sync_submit); @@ -4543,6 +4551,9 @@ static int hci_init_sync(struct hci_dev *hdev) !hci_dev_test_flag(hdev, HCI_CONFIG)) return 0; + if (hci_dev_test_and_set_flag(hdev, HCI_DEBUGFS_CREATED)) + return 0; + hci_debugfs_create_common(hdev); if (lmp_bredr_capable(hdev)) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 376b523c7b26..c5e8798e297c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4306,6 +4306,10 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, result = __le16_to_cpu(rsp->result); status = __le16_to_cpu(rsp->status); + if (result == L2CAP_CR_SUCCESS && (dcid < L2CAP_CID_DYN_START || + dcid > L2CAP_CID_DYN_END)) + return -EPROTO; + BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); @@ -4337,6 +4341,11 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, switch (result) { case L2CAP_CR_SUCCESS: + if (__l2cap_get_chan_by_dcid(conn, dcid)) { + err = -EBADSLT; + break; + } + l2cap_state_change(chan, BT_CONFIG); chan->ident = 0; chan->dcid = dcid; @@ -4663,7 +4672,9 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, chan->ops->set_shutdown(chan); + l2cap_chan_unlock(chan); mutex_lock(&conn->chan_lock); + l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); mutex_unlock(&conn->chan_lock); @@ -4702,7 +4713,9 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, return 0; } + l2cap_chan_unlock(chan); mutex_lock(&conn->chan_lock); + l2cap_chan_lock(chan); l2cap_chan_del(chan, 0); mutex_unlock(&conn->chan_lock); diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 821d4ff303b3..ecff1c947d68 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -126,7 +126,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) #define J1939_CAN_ID CAN_EFF_FLAG #define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG) -static DEFINE_SPINLOCK(j1939_netdev_lock); +static DEFINE_MUTEX(j1939_netdev_lock); static struct j1939_priv *j1939_priv_create(struct net_device *ndev) { @@ -220,7 +220,7 @@ static void __j1939_rx_release(struct kref *kref) j1939_can_rx_unregister(priv); j1939_ecu_unmap_all(priv); j1939_priv_set(priv->ndev, NULL); - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); } /* get pointer to priv without increasing ref counter */ @@ -248,9 +248,9 @@ static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev) { struct j1939_priv *priv; - spin_lock(&j1939_netdev_lock); + mutex_lock(&j1939_netdev_lock); priv = j1939_priv_get_by_ndev_locked(ndev); - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); return priv; } @@ -260,14 +260,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) struct j1939_priv *priv, *priv_new; int ret; - spin_lock(&j1939_netdev_lock); + mutex_lock(&j1939_netdev_lock); priv = j1939_priv_get_by_ndev_locked(ndev); if (priv) { kref_get(&priv->rx_kref); - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); return priv; } - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); priv = j1939_priv_create(ndev); if (!priv) @@ -277,29 +277,31 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) spin_lock_init(&priv->j1939_socks_lock); INIT_LIST_HEAD(&priv->j1939_socks); - spin_lock(&j1939_netdev_lock); + mutex_lock(&j1939_netdev_lock); priv_new = j1939_priv_get_by_ndev_locked(ndev); if (priv_new) { /* Someone was faster than us, use their priv and roll * back our's. */ kref_get(&priv_new->rx_kref); - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); dev_put(ndev); kfree(priv); return priv_new; } j1939_priv_set(ndev, priv); - spin_unlock(&j1939_netdev_lock); ret = j1939_can_rx_register(priv); if (ret < 0) goto out_priv_put; + mutex_unlock(&j1939_netdev_lock); return priv; out_priv_put: j1939_priv_set(ndev, NULL); + mutex_unlock(&j1939_netdev_lock); + dev_put(ndev); kfree(priv); @@ -308,7 +310,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) void j1939_netdev_stop(struct j1939_priv *priv) { - kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock); + kref_put_mutex(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock); j1939_priv_put(priv); } diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 1790469b2580..35970c25496a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -1088,6 +1088,11 @@ void j1939_sk_errqueue(struct j1939_session *session, void j1939_sk_send_loop_abort(struct sock *sk, int err) { + struct j1939_sock *jsk = j1939_sk(sk); + + if (jsk->state & J1939_SOCK_ERRQUEUE) + return; + sk->sk_err = err; sk_error_report(sk); diff --git a/net/core/dev.c b/net/core/dev.c index b3c13e041935..c29f3e1db3ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4471,8 +4471,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, u32 next_cpu; u32 ident; - /* First check into global flow table if there is a match */ - ident = sock_flow_table->ents[hash & sock_flow_table->mask]; + /* First check into global flow table if there is a match. + * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow(). + */ + ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]); if ((ident ^ hash) & ~rps_cpu_mask) goto try_rps; @@ -10541,7 +10543,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) return NULL; netdev_init_one_queue(dev, queue, NULL); RCU_INIT_POINTER(queue->qdisc, &noop_qdisc); - queue->qdisc_sleeping = &noop_qdisc; + RCU_INIT_POINTER(queue->qdisc_sleeping, &noop_qdisc); rcu_assign_pointer(dev->ingress_queue, queue); #endif return queue; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index a9060e1f0e43..a29508e1ff35 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1210,7 +1210,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - psock->saved_data_ready(sk); + if (psock) + psock->saved_data_ready(sk); rcu_read_unlock(); } } diff --git a/net/core/sock.c b/net/core/sock.c index 24f2761bdb1d..6e5662ca00fe 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1362,12 +1362,6 @@ set_sndbuf: __sock_set_mark(sk, val); break; case SO_RCVMARK: - if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { - ret = -EPERM; - break; - } - sock_valbool_flag(sk, SOCK_RCVMARK, valbool); break; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a06b5641287a..b0ebf853cb07 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -191,6 +191,9 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) struct dccp_sock *dp = dccp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, " + "please contact the netdev mailing list\n"); + icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ab1afe67fd18..1afed89e03c0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -403,6 +403,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return 0; } +static struct dsa_port * +dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + + if (!ds->ops->preferred_default_local_cpu_port) + return NULL; + + cpu_dp = ds->ops->preferred_default_local_cpu_port(ds); + if (!cpu_dp) + return NULL; + + if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds)) + return NULL; + + return cpu_dp; +} + /* Perform initial assignment of CPU ports to user ports and DSA links in the * fabric, giving preference to CPU ports local to each switch. Default to * using the first CPU port in the switch tree if the port does not have a CPU @@ -410,12 +428,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) */ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) { - struct dsa_port *cpu_dp, *dp; + struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp; list_for_each_entry(cpu_dp, &dst->ports, list) { if (!dsa_port_is_cpu(cpu_dp)) continue; + preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds); + if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp) + continue; + /* Prefer a local CPU port */ dsa_switch_for_each_port(dp, cpu_dp->ds) { /* Prefer the first local CPU port found */ diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h index 8aeaadca844f..4dac965c99df 100644 --- a/net/handshake/handshake.h +++ b/net/handshake/handshake.h @@ -31,7 +31,6 @@ struct handshake_req { struct list_head hr_list; struct rhash_head hr_rhash; unsigned long hr_flags; - struct file *hr_file; const struct handshake_proto *hr_proto; struct sock *hr_sk; void (*hr_odestruct)(struct sock *sk); diff --git a/net/handshake/request.c b/net/handshake/request.c index d78d41abb3d9..94d5cef3e048 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -239,7 +239,6 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, } req->hr_odestruct = req->hr_sk->sk_destruct; req->hr_sk->sk_destruct = handshake_sk_destruct; - req->hr_file = sock->file; ret = -EOPNOTSUPP; net = sock_net(req->hr_sk); @@ -335,9 +334,6 @@ bool handshake_req_cancel(struct sock *sk) return false; } - /* Request accepted and waiting for DONE */ - fput(req->hr_file); - out_true: trace_handshake_cancel(net, req, sk); diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index e5d8439b9e45..c16db0b326fa 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -13,7 +13,7 @@ #define MAXNAME 32 #define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME) -#define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \ +#define WPAN_PHY_ASSIGN strscpy(__entry->wpan_phy_name, \ wpan_phy_name(wpan_phy), \ MAXNAME) #define WPAN_PHY_PR_FMT "%s" diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 3969fa805679..ee848be59e65 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -340,6 +340,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ secpath_reset(skb); + if (skb_needs_linearize(skb, skb->dev->features) && + __skb_linearize(skb)) + return -ENOMEM; return 0; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 40fe70fc2015..88dfe51e68f3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -34,8 +34,8 @@ static int ip_ttl_min = 1; static int ip_ttl_max = 255; static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; -static int ip_ping_group_range_min[] = { 0, 0 }; -static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; +static unsigned long ip_ping_group_range_min[] = { 0, 0 }; +static unsigned long ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; static u32 fib_multipath_hash_fields_all_mask __maybe_unused = @@ -165,7 +165,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, { struct user_namespace *user_ns = current_user_ns(); int ret; - gid_t urange[2]; + unsigned long urange[2]; kgid_t low, high; struct ctl_table tmp = { .data = &urange, @@ -178,7 +178,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, inet_get_ping_group_range_table(table, &low, &high); urange[0] = from_kgid_munged(user_ns, low); urange[1] = from_kgid_munged(user_ns, high); - ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { low = make_kgid(user_ns, urange[0]); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 45dda7889387..4851211aa60d 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -60,12 +60,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, struct tcphdr *th; unsigned int thlen; unsigned int seq; - __be32 delta; unsigned int oldlen; unsigned int mss; struct sk_buff *gso_skb = skb; __sum16 newcheck; bool ooo_okay, copy_destructor; + __wsum delta; th = tcp_hdr(skb); thlen = th->doff * 4; @@ -75,7 +75,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, thlen)) goto out; - oldlen = (u16)~skb->len; + oldlen = ~skb->len; __skb_pull(skb, thlen); mss = skb_shinfo(skb)->gso_size; @@ -110,7 +110,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (skb_is_gso(segs)) mss *= skb_shinfo(segs)->gso_segs; - delta = htonl(oldlen + (thlen + mss)); + delta = (__force __wsum)htonl(oldlen + thlen + mss); skb = segs; th = tcp_hdr(skb); @@ -119,8 +119,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); - newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); + newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta)); while (skb->next) { th->fin = th->psh = 0; @@ -165,11 +164,11 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc)); } - delta = htonl(oldlen + (skb_tail_pointer(skb) - - skb_transport_header(skb)) + - skb->data_len); - th->check = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); + delta = (__force __wsum)htonl(oldlen + + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + + skb->data_len); + th->check = ~csum_fold(csum_add(csum_unfold(th->check), delta)); if (skb->ip_summed == CHECKSUM_PARTIAL) gso_reset_checksum(skb, ~th->check); else diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 56d94d23b9e0..143f93a12f25 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -22,6 +22,8 @@ static int udplite_sk_init(struct sock *sk) { udp_init_sock(sk); udp_sk(sk)->pcflag = UDPLITE_BIT; + pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " + "please contact the netdev mailing list\n"); return 0; } diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index ad2afeef4f10..eac206a290d0 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -164,6 +164,7 @@ drop: kfree_skb(skb); return 0; } +EXPORT_SYMBOL(xfrm4_udp_encap_rcv); int xfrm4_rcv(struct sk_buff *skb) { diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 75c02992c520..772340268997 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -374,6 +374,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features secpath_reset(skb); + if (skb_needs_linearize(skb, skb->dev->features) && + __skb_linearize(skb)) + return -ENOMEM; return 0; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a8d961d3a477..5fa0e37305d9 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -569,24 +569,6 @@ looped_back: return -1; } - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE, 0, - GFP_ATOMIC)) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); - return -1; - } - } else { - err = skb_cow_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE); - if (unlikely(err)) { - kfree_skb(skb); - return -1; - } - } - - hdr = (struct ipv6_rpl_sr_hdr *)skb_transport_header(skb); - if (!pskb_may_pull(skb, ipv6_rpl_srh_size(n, hdr->cmpri, hdr->cmpre))) { kfree_skb(skb); @@ -630,6 +612,17 @@ looped_back: skb_pull(skb, ((hdr->hdrlen + 1) << 3)); skb_postpull_rcsum(skb, oldhdr, sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3)); + if (unlikely(!hdr->segments_left)) { + if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0, + GFP_ATOMIC)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + kfree(buf); + return -1; + } + + oldhdr = ipv6_hdr(skb); + } skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr)); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index c4835dbdfcff..f804c11e2146 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -114,7 +114,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) addr_type = ipv6_addr_type(daddr); if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) || (addr_type & IPV6_ADDR_MAPPED) || - (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if)) + (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if && + l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if)) return -EINVAL; ipcm6_init_sk(&ipc6, np); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e3aec46bd466..392aaa373b66 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6412,9 +6412,9 @@ static struct ctl_table ipv6_route_table_template[] = { { .procname = "skip_notify_on_dev_down", .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 3bab0cc13697..8e010d07917a 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -8,6 +8,8 @@ * Changes: * Fixes: */ +#define pr_fmt(fmt) "UDPLite6: " fmt + #include <linux/export.h> #include <linux/proc_fs.h> #include "udp_impl.h" @@ -16,6 +18,8 @@ static int udplitev6_sk_init(struct sock *sk) { udpv6_init_sock(sk); udp_sk(sk)->pcflag = UDPLITE_BIT; + pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " + "please contact the netdev mailing list\n"); return 0; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 04cbeefd8982..4907ab241d6b 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -86,6 +86,9 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __be32 *udpdata32; __u16 encap_type = up->encap_type; + if (skb->protocol == htons(ETH_P_IP)) + return xfrm4_udp_encap_rcv(sk, skb); + /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 86b2036d73ff..f2d08dbccfb7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4865,11 +4865,16 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int res; if (wdev->use_4addr) return -EOPNOTSUPP; - return ieee80211_vif_set_links(sdata, wdev->valid_links); + mutex_lock(&sdata->local->mtx); + res = ieee80211_vif_set_links(sdata, wdev->valid_links); + mutex_unlock(&sdata->local->mtx); + + return res; } static void ieee80211_del_intf_link(struct wiphy *wiphy, @@ -4878,7 +4883,9 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + mutex_lock(&sdata->local->mtx); ieee80211_vif_set_links(sdata, wdev->valid_links); + mutex_unlock(&sdata->local->mtx); } static int sta_add_link_station(struct ieee80211_local *local, diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 729f261520c7..0322abae0825 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -3,7 +3,7 @@ * HE handling * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright(c) 2019 - 2022 Intel Corporation + * Copyright(c) 2019 - 2023 Intel Corporation */ #include "ieee80211_i.h" @@ -114,6 +114,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct link_sta_info *link_sta) { struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap; + const struct ieee80211_sta_he_cap *own_he_cap_ptr; struct ieee80211_sta_he_cap own_he_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 he_ppe_size; @@ -123,12 +124,16 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, memset(he_cap, 0, sizeof(*he_cap)); - if (!he_cap_ie || - !ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif))) + if (!he_cap_ie) return; - own_he_cap = sband->iftype_data->he_cap; + own_he_cap_ptr = + ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); + if (!own_he_cap_ptr) + return; + + own_he_cap = *own_he_cap_ptr; /* Make sure size is OK */ mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b0372e76f373..4159fb65038b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2312,7 +2312,7 @@ ieee802_11_parse_elems(const u8 *start, size_t len, bool action, return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss); } -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos); +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id); extern const int ieee802_1d_to_ac[8]; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index e82db88a47f8..40f030b8ece9 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -2,7 +2,7 @@ /* * MLO link handling * - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include <linux/slab.h> #include <linux/kernel.h> @@ -409,6 +409,7 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, IEEE80211_CHANCTX_SHARED); WARN_ON_ONCE(ret); + ieee80211_mgd_set_link_qos_params(link); ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE | @@ -423,7 +424,6 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_TWT | BSS_CHANGED_HE_OBSS_PD | BSS_CHANGED_HE_BSS_COLOR); - ieee80211_mgd_set_link_qos_params(link); } old_active = sdata->vif.active_links; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e13a0354c397..5a4303130ef2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1217,6 +1217,7 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb, const u16 *inner) { unsigned int skb_len = skb->len; + bool at_extension = false; bool added = false; int i, j; u8 *len, *list_len = NULL; @@ -1228,7 +1229,6 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb, for (i = 0; i < PRESENT_ELEMS_MAX && outer[i]; i++) { u16 elem = outer[i]; bool have_inner = false; - bool at_extension = false; /* should at least be sorted in the sense of normal -> ext */ WARN_ON(at_extension && elem < PRESENT_ELEM_EXT_OFFS); @@ -1257,8 +1257,14 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb, } *list_len += 1; skb_put_u8(skb, (u8)elem); + added = true; } + /* if we added a list but no extension list, make a zero-len one */ + if (added && (!at_extension || !list_len)) + skb_put_u8(skb, 0); + + /* if nothing added remove extension element completely */ if (!added) skb_trim(skb, skb_len); else @@ -1366,10 +1372,11 @@ static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata, ieee80211_add_non_inheritance_elem(skb, outer_present_elems, link_present_elems); - ieee80211_fragment_element(skb, subelem_len); + ieee80211_fragment_element(skb, subelem_len, + IEEE80211_MLE_SUBELEM_FRAGMENT); } - ieee80211_fragment_element(skb, ml_elem_len); + ieee80211_fragment_element(skb, ml_elem_len, WLAN_EID_FRAGMENT); } static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 58222c077898..fc6e130364da 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2110,7 +2110,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* either the frame has been decrypted or will be dropped */ status->flag |= RX_FLAG_DECRYPTED; - if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE && + if (unlikely(ieee80211_is_beacon(fc) && (result & RX_DROP_UNUSABLE) && rx->sdata->dev)) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, skb->data, skb->len); @@ -4965,7 +4965,9 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, } if (unlikely(rx->sta && rx->sta->sta.mlo) && - is_unicast_ether_addr(hdr->addr1)) { + is_unicast_ether_addr(hdr->addr1) && + !ieee80211_is_probe_resp(hdr->frame_control) && + !ieee80211_is_beacon(hdr->frame_control)) { /* translate to MLD addresses */ if (ether_addr_equal(link->conf->addr, hdr->addr1)) ether_addr_copy(hdr->addr1, rx->sdata->vif.addr); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0d9fbc8458fd..13b522dab0a3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4445,7 +4445,7 @@ static void ieee80211_mlo_multicast_tx(struct net_device *dev, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - unsigned long links = sdata->vif.valid_links; + unsigned long links = sdata->vif.active_links; unsigned int link; u32 ctrl_flags = IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX; @@ -5528,7 +5528,7 @@ ieee80211_beacon_get_template_ema_list(struct ieee80211_hw *hw, { struct ieee80211_ema_beacons *ema_beacons = NULL; - WARN_ON(__ieee80211_beacon_get(hw, vif, NULL, false, link_id, 0, + WARN_ON(__ieee80211_beacon_get(hw, vif, NULL, true, link_id, 0, &ema_beacons)); return ema_beacons; @@ -6040,7 +6040,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (WARN_ON_ONCE(link == ARRAY_SIZE(sdata->vif.link_conf))) - link = ffs(sdata->vif.valid_links) - 1; + link = ffs(sdata->vif.active_links) - 1; } IEEE80211_SKB_CB(skb)->control.flags |= @@ -6076,7 +6076,7 @@ void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata, band = chanctx_conf->def.chan->band; } else { WARN_ON(link_id >= 0 && - !(sdata->vif.valid_links & BIT(link_id))); + !(sdata->vif.active_links & BIT(link_id))); /* MLD transmissions must not rely on the band */ band = 0; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 4bf76150925d..3bd07a0a782f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -5049,7 +5049,7 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos, return pos; } -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos) +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id) { unsigned int elem_len; @@ -5069,7 +5069,7 @@ void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos) memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len); /* place the fragment ID */ len_pos += 255 + 1; - *len_pos = WLAN_EID_FRAGMENT; + *len_pos = frag_id; /* and point to fragment length to update later */ len_pos++; } diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h index 689396d6c76a..1574ecc48075 100644 --- a/net/mac802154/trace.h +++ b/net/mac802154/trace.h @@ -14,7 +14,7 @@ #define MAXNAME 32 #define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME) -#define LOCAL_ASSIGN strlcpy(__entry->wpan_phy_name, \ +#define LOCAL_ASSIGN strscpy(__entry->wpan_phy_name, \ wpan_phy_name(local->hw.phy), MAXNAME) #define LOCAL_PR_FMT "%s" #define LOCAL_PR_ARG __entry->wpan_phy_name diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 78c924506e83..76612bca275a 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -87,8 +87,15 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) unsigned int subflows_max; int ret = 0; - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_active(msk); + if (mptcp_pm_is_userspace(msk)) { + if (mptcp_userspace_pm_active(msk)) { + spin_lock_bh(&pm->lock); + pm->subflows++; + spin_unlock_bh(&pm->lock); + return true; + } + return false; + } subflows_max = mptcp_pm_get_subflows_max(msk); @@ -181,8 +188,16 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, struct mptcp_pm_data *pm = &msk->pm; bool update_subflows; - update_subflows = (subflow->request_join || subflow->mp_join) && - mptcp_pm_is_kernel(msk); + update_subflows = subflow->request_join || subflow->mp_join; + if (mptcp_pm_is_userspace(msk)) { + if (update_subflows) { + spin_lock_bh(&pm->lock); + pm->subflows--; + spin_unlock_bh(&pm->lock); + } + return; + } + if (!READ_ONCE(pm->work_pending) && !update_subflows) return; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index bc343dab5e3f..1224dfca5bf3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1047,6 +1047,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; + inet_sk_state_store(newsk, TCP_LISTEN); err = kernel_listen(ssock, backlog); if (err) return err; @@ -1558,6 +1559,24 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) return ret; } +void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) +{ + struct mptcp_rm_list alist = { .nr = 0 }; + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, rm_list, list) { + remove_anno_list_by_saddr(msk, &entry->addr); + if (alist.nr < MPTCP_RM_IDS_MAX) + alist.ids[alist.nr++] = entry->addr.id; + } + + if (alist.nr) { + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &alist); + spin_unlock_bh(&msk->pm.lock); + } +} + void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list) { diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 27a275805c06..b06aa58dfcf2 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -69,6 +69,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, MPTCP_PM_MAX_ADDR_ID + 1, 1); list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list); + msk->pm.local_addr_used++; ret = e->addr.id; } else if (match) { ret = entry->addr.id; @@ -79,6 +80,31 @@ append_err: return ret; } +/* If the subflow is closed from the other peer (not via a + * subflow destroy command then), we want to keep the entry + * not to assign the same ID to another address and to be + * able to send RM_ADDR after the removal of the subflow. + */ +static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *addr) +{ + struct mptcp_pm_addr_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) { + /* TODO: a refcount is needed because the entry can + * be used multiple times (e.g. fullmesh mode). + */ + list_del_rcu(&entry->list); + kfree(entry); + msk->pm.local_addr_used--; + return 0; + } + } + + return -EINVAL; +} + int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) @@ -171,6 +197,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&msk->pm.lock); if (mptcp_pm_alloc_anno_list(msk, &addr_val)) { + msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &addr_val.addr, false); mptcp_pm_nl_addr_send_ack(msk); } @@ -232,7 +259,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) list_move(&match->list, &free_list); - mptcp_pm_remove_addrs_and_subflows(msk, &free_list); + mptcp_pm_remove_addrs(msk, &free_list); release_sock((struct sock *)msk); @@ -251,6 +278,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_pm_addr_entry local = { 0 }; struct mptcp_addr_info addr_r; struct mptcp_addr_info addr_l; struct mptcp_sock *msk; @@ -302,12 +330,26 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) goto create_err; } + local.addr = addr_l; + err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + if (err < 0) { + GENL_SET_ERR_MSG(info, "did not match address and id"); + goto create_err; + } + lock_sock(sk); err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); release_sock(sk); + spin_lock_bh(&msk->pm.lock); + if (err) + mptcp_userspace_pm_delete_local_addr(msk, &local); + else + msk->pm.subflows++; + spin_unlock_bh(&msk->pm.lock); + create_err: sock_put((struct sock *)msk); return err; @@ -420,7 +462,11 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); if (ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_pm_addr_entry entry = { .addr = addr_l }; + spin_lock_bh(&msk->pm.lock); + mptcp_userspace_pm_delete_local_addr(msk, &entry); + spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); mptcp_close_ssk(sk, ssk, subflow); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 67311e7d5b21..a6c7f2d24909 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -44,7 +44,7 @@ enum { static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp; static void __mptcp_destroy_sock(struct sock *sk); -static void __mptcp_check_send_data_fin(struct sock *sk); +static void mptcp_check_send_data_fin(struct sock *sk); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; @@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - return !__mptcp_check_fallback(msk) && - ((1 << sk->sk_state) & + return ((1 << sk->sk_state) & (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) && msk->write_seq == READ_ONCE(msk->snd_una); } @@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk) u64 rcv_data_fin_seq; bool ret = false; - if (__mptcp_check_fallback(msk)) - return ret; - /* Need to ack a DATA_FIN received from a peer while this side * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2. * msk->rcv_data_fin was set when parsing the incoming options @@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk) } ret = true; - mptcp_send_ack(msk); + if (!__mptcp_check_fallback(msk)) + mptcp_send_ack(msk); mptcp_close_wake_up(sk); } return ret; @@ -850,12 +847,12 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) return true; } -static void __mptcp_flush_join_list(struct sock *sk) +static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list) { struct mptcp_subflow_context *tmp, *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) { + list_for_each_entry_safe(subflow, tmp, join_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); @@ -897,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk) return false; } -void mptcp_subflow_eof(struct sock *sk) -{ - if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags)) - mptcp_schedule_work(sk); -} - -static void mptcp_check_for_eof(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - int receivers = 0; - - mptcp_for_each_subflow(msk, subflow) - receivers += !subflow->rx_eof; - if (receivers) - return; - - if (!(sk->sk_shutdown & RCV_SHUTDOWN)) { - /* hopefully temporary hack: propagate shutdown status - * to msk, when all subflows agree on it - */ - WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); - - smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ - sk->sk_data_ready(sk); - } - - switch (sk->sk_state) { - case TCP_ESTABLISHED: - inet_sk_state_store(sk, TCP_CLOSE_WAIT); - break; - case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_CLOSING); - break; - case TCP_FIN_WAIT2: - inet_sk_state_store(sk, TCP_CLOSE); - break; - default: - return; - } - mptcp_close_wake_up(sk); -} - static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -1609,7 +1563,7 @@ out: if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); if (do_check_data_fin) - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); } static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first) @@ -1727,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) *copied_syn = 0; } else if (ret && ret != -EINPROGRESS) { - mptcp_disconnect(sk, 0); + /* The disconnect() op called by tcp_sendmsg_fastopen()/ + * __inet_stream_connect() can fail, due to looking check, + * see mptcp_disconnect(). + * Attempt it again outside the problematic scope. + */ + if (!mptcp_disconnect(sk, 0)) + sk->sk_socket->state = SS_UNCONNECTED; } inet_sk(sk)->defer_connect = 0; @@ -2158,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, break; } - if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) - mptcp_check_for_eof(msk); - if (sk->sk_shutdown & RCV_SHUTDOWN) { /* race breaker: the shutdown could be after the * previous receive queue check @@ -2389,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { - tcp_disconnect(ssk, 0); + /* The MPTCP code never wait on the subflow sockets, TCP-level + * disconnect should never fail + */ + WARN_ON_ONCE(tcp_disconnect(ssk, 0)); msk->subflow->state = SS_UNCONNECTED; mptcp_subflow_ctx_reset(subflow); release_sock(ssk); @@ -2408,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); - mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - } - __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -2671,16 +2624,12 @@ static void mptcp_worker(struct work_struct *work) if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN))) goto unlock; - mptcp_check_data_fin_ack(sk); - mptcp_check_fastclose(msk); mptcp_pm_nl_work(msk); - if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) - mptcp_check_for_eof(msk); - - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); + mptcp_check_data_fin_ack(sk); mptcp_check_data_fin(sk); if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) @@ -2812,13 +2761,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) break; fallthrough; case TCP_SYN_SENT: - tcp_disconnect(ssk, O_NONBLOCK); + WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK)); break; default: if (__mptcp_check_fallback(mptcp_sk(sk))) { pr_debug("Fallback"); ssk->sk_shutdown |= how; tcp_shutdown(ssk, how); + + /* simulate the data_fin ack reception to let the state + * machine move forward + */ + WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt); + mptcp_schedule_work(sk); } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); @@ -2858,7 +2813,7 @@ static int mptcp_close_state(struct sock *sk) return next & TCP_ACTION_FIN; } -static void __mptcp_check_send_data_fin(struct sock *sk) +static void mptcp_check_send_data_fin(struct sock *sk) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); @@ -2876,19 +2831,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk) WRITE_ONCE(msk->snd_nxt, msk->write_seq); - /* fallback socket will not get data_fin/ack, can move to the next - * state now - */ - if (__mptcp_check_fallback(msk)) { - WRITE_ONCE(msk->snd_una, msk->write_seq); - if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) { - inet_sk_state_store(sk, TCP_CLOSE); - mptcp_close_wake_up(sk); - } else if (sk->sk_state == TCP_FIN_WAIT1) { - inet_sk_state_store(sk, TCP_FIN_WAIT2); - } - } - mptcp_for_each_subflow(msk, subflow) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); @@ -2908,7 +2850,7 @@ static void __mptcp_wr_shutdown(struct sock *sk) WRITE_ONCE(msk->write_seq, msk->write_seq + 1); WRITE_ONCE(msk->snd_data_fin_enable, 1); - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); } static void __mptcp_destroy_sock(struct sock *sk) @@ -2953,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk) return EPOLLIN | EPOLLRDNORM; } -static void mptcp_listen_inuse_dec(struct sock *sk) +static void mptcp_check_listen_stop(struct sock *sk) { - if (inet_sk_state_load(sk) == TCP_LISTEN) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + struct sock *ssk; + + if (inet_sk_state_load(sk) != TCP_LISTEN) + return; + + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + ssk = mptcp_sk(sk)->first; + if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN)) + return; + + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_queue_clean(sk, ssk); + inet_csk_listen_stop(ssk); + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); + tcp_set_state(ssk, TCP_CLOSE); + release_sock(ssk); } bool __mptcp_close(struct sock *sk, long timeout) @@ -2969,7 +2925,7 @@ bool __mptcp_close(struct sock *sk, long timeout) WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); goto cleanup; } @@ -3073,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); + /* Deny disconnect if other threads are blocked in sk_wait_event() + * or inet_wait_for_connect(). + */ + if (sk->sk_wait_pending) + return -EBUSY; + /* We are on the fastopen error path. We can't call straight into the * subflows cleanup code due to lock nesting (we are already under - * msk->firstsocket lock). Do nothing and leave the cleanup to the - * caller. + * msk->firstsocket lock). */ if (msk->fastopening) - return 0; + return -EBUSY; - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_timer(sk); @@ -3140,6 +3101,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif + nsk->sk_wait_pending = 0; __mptcp_init_sock(nsk); msk = mptcp_sk(nsk); @@ -3327,9 +3289,14 @@ static void mptcp_release_cb(struct sock *sk) for (;;) { unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | msk->push_pending; + struct list_head join_list; + if (!flags) break; + INIT_LIST_HEAD(&join_list); + list_splice_init(&msk->join_list, &join_list); + /* the following actions acquire the subflow socket lock * * 1) can't be invoked in atomic scope @@ -3340,8 +3307,9 @@ static void mptcp_release_cb(struct sock *sk) msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); + if (flags & BIT(MPTCP_FLUSH_JOIN_LIST)) - __mptcp_flush_join_list(sk); + __mptcp_flush_join_list(sk, &join_list); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); if (flags & BIT(MPTCP_RETRANSMIT)) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c5255258bfb3..d3783a7056e1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -113,7 +113,6 @@ /* MPTCP socket atomic flags */ #define MPTCP_NOSPACE 1 #define MPTCP_WORK_RTX 2 -#define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 @@ -476,14 +475,13 @@ struct mptcp_subflow_context { send_mp_fail : 1, send_fastclose : 1, send_infinite_map : 1, - rx_eof : 1, remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 8; + __unused : 9; enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; @@ -720,7 +718,6 @@ static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit) void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); -void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) { @@ -832,6 +829,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); +void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4688daa6b38b..d9c8b21c6076 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1749,14 +1749,16 @@ static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; + struct mptcp_sock *msk; __subflow_state_change(sk); + msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { mptcp_propagate_sndbuf(parent, sk); mptcp_do_fallback(sk); - mptcp_rcv_space_init(mptcp_sk(parent), sk); - pr_fallback(mptcp_sk(parent)); + mptcp_rcv_space_init(msk, sk); + pr_fallback(msk); subflow->conn_finished = 1; mptcp_set_connected(parent); } @@ -1772,11 +1774,12 @@ static void subflow_state_change(struct sock *sk) subflow_sched_work_if_closed(mptcp_sk(parent), sk); - if (__mptcp_check_fallback(mptcp_sk(parent)) && - !subflow->rx_eof && subflow_is_done(sk)) { - subflow->rx_eof = 1; - mptcp_subflow_eof(parent); - } + /* when the fallback subflow closes the rx side, trigger a 'dummy' + * ingress data fin, so that the msk state will follow along + */ + if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk && + mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) + mptcp_schedule_work(parent); } void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 46ebee9400da..9a6b64779e64 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1694,6 +1694,14 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { + if (retried) { + __ip_set_get(set); + nfnl_unlock(NFNL_SUBSYS_IPSET); + cond_resched(); + nfnl_lock(NFNL_SUBSYS_IPSET); + __ip_set_put(set); + } + ip_set_lock(set); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); ip_set_unlock(set); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index feb1d7fcb09f..a80b960223e1 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -1207,6 +1207,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->transport_header = skb->network_header; skb_set_inner_ipproto(skb, next_protocol); + skb_set_inner_mac_header(skb, skb_inner_network_offset(skb)); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { bool check = false; @@ -1349,6 +1350,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->transport_header = skb->network_header; skb_set_inner_ipproto(skb, next_protocol); + skb_set_inner_mac_header(skb, skb_inner_network_offset(skb)); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { bool check = false; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c4ccfec6cb98..d119f1d4c2fc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2260,6 +2260,9 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, return 0; helper = rcu_dereference(help->helper); + if (!helper) + return 0; + if (!(helper->flags & NF_CT_HELPER_F_USERSPACE)) return 0; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 04bd0ed4d2ae..b0ef48b21dcb 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -317,12 +317,12 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) EXPORT_SYMBOL_GPL(flow_offload_add); void flow_offload_refresh(struct nf_flowtable *flow_table, - struct flow_offload *flow) + struct flow_offload *flow, bool force) { u32 timeout; timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); - if (timeout - READ_ONCE(flow->timeout) > HZ) + if (force || timeout - READ_ONCE(flow->timeout) > HZ) WRITE_ONCE(flow->timeout, timeout); else return; @@ -334,6 +334,12 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, } EXPORT_SYMBOL_GPL(flow_offload_refresh); +static bool nf_flow_is_outdated(const struct flow_offload *flow) +{ + return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && + !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); +} + static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return nf_flow_timeout_delta(flow->timeout) <= 0; @@ -423,7 +429,8 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { if (nf_flow_has_expired(flow) || - nf_ct_is_dying(flow->ct)) + nf_ct_is_dying(flow->ct) || + nf_flow_is_outdated(flow)) flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 19efba1e51ef..3bbaf9c7ea46 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -384,7 +384,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; - flow_offload_refresh(flow_table, flow); + flow_offload_refresh(flow_table, flow, false); nf_flow_encap_pop(skb, tuplehash); thoff -= offset; @@ -650,7 +650,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; - flow_offload_refresh(flow_table, flow); + flow_offload_refresh(flow_table, flow, false); nf_flow_encap_pop(skb, tuplehash); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dc5675962de4..4c7937fd803f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -151,6 +151,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, return NULL; INIT_LIST_HEAD(&trans->list); + INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -163,13 +164,20 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } -static void nft_trans_destroy(struct nft_trans *trans) +static void nft_trans_list_del(struct nft_trans *trans) { list_del(&trans->list); + list_del(&trans->binding_list); +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + nft_trans_list_del(trans); kfree(trans); } -static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set, + bool bind) { struct nftables_pernet *nft_net; struct net *net = ctx->net; @@ -183,16 +191,80 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) - nft_trans_set_bound(trans) = true; + nft_trans_set_bound(trans) = bind; break; case NFT_MSG_NEWSETELEM: if (nft_trans_elem_set(trans) == set) - nft_trans_elem_set_bound(trans) = true; + nft_trans_elem_set_bound(trans) = bind; break; } } } +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, true); +} + +static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, false); +} + +static void __nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain, bool bind) +{ + struct nftables_pernet *nft_net; + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_chain_binding(chain)) + return; + + nft_net = nft_pernet(net); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain(trans) == chain) + nft_trans_chain_bound(trans) = bind; + break; + case NFT_MSG_NEWRULE: + if (trans->ctx.chain == chain) + nft_trans_rule_bound(trans) = bind; + break; + } + } +} + +static void nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, true); +} + +int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + if (!nft_chain_binding(chain)) + return 0; + + if (nft_chain_binding(ctx->chain)) + return -EOPNOTSUPP; + + if (chain->bound) + return -EBUSY; + + chain->bound = true; + chain->use++; + nft_chain_trans_bind(ctx, chain); + + return 0; +} + +void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, false); +} + static int nft_netdev_register_hooks(struct net *net, struct list_head *hook_list) { @@ -292,6 +364,19 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr { struct nftables_pernet *nft_net = nft_pernet(net); + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (!nft_trans_set_update(trans) && + nft_set_is_anonymous(nft_trans_set(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + } + list_add_tail(&trans->list, &nft_net->commit_list); } @@ -338,8 +423,9 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } - + nft_trans_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); + return trans; } @@ -357,8 +443,7 @@ static int nft_delchain(struct nft_ctx *ctx) return 0; } -static void nft_rule_expr_activate(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr; @@ -371,9 +456,8 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, } } -static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule, - enum nft_trans_phase phase) +void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr; @@ -495,6 +579,58 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, return __nft_trans_set_add(ctx, msg_type, set, NULL); } +static void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_deactivate(ctx->net, set, elem); + + return 0; +} + +struct nft_set_elem_catchall { + struct list_head list; + struct rcu_head rcu; + void *elem; +}; + +static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, + struct nft_set *set) +{ + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_elem_catchall *catchall; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + list_for_each_entry(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + if (!nft_set_elem_active(ext, genmask)) + continue; + + elem.priv = catchall->elem; + nft_setelem_data_deactivate(ctx->net, set, &elem); + break; + } +} + +static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_deactivate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); + + nft_map_catchall_deactivate(ctx, set); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -503,6 +639,9 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) if (err < 0) return err; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); ctx->table->use--; @@ -1600,6 +1739,8 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family, if (nft_base_chain_netdev(family, ops->hooknum)) { nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS); + if (!nest_devs) + goto nla_put_failure; if (!hook_list) hook_list = &basechain->hook_list; @@ -2224,7 +2365,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, return 0; } -static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) +int nft_chain_add(struct nft_table *table, struct nft_chain *chain) { int err; @@ -2526,6 +2667,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, nft_trans_basechain(trans) = basechain; INIT_LIST_HEAD(&nft_trans_chain_hooks(trans)); list_splice(&hook.list, &nft_trans_chain_hooks(trans)); + if (nla[NFTA_CHAIN_HOOK]) + module_put(hook.type->owner); nft_trans_commit_list_add_tail(ctx->net, trans); @@ -2668,21 +2811,18 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack); } -static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_chain *chain, +static int nft_delchain_hook(struct nft_ctx *ctx, + struct nft_base_chain *basechain, struct netlink_ext_ack *extack) { + const struct nft_chain *chain = &basechain->chain; const struct nlattr * const *nla = ctx->nla; struct nft_chain_hook chain_hook = {}; - struct nft_base_chain *basechain; struct nft_hook *this, *hook; LIST_HEAD(chain_del_list); struct nft_trans *trans; int err; - if (!nft_is_base_chain(chain)) - return -EOPNOTSUPP; - - basechain = nft_base_chain(chain); err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook, ctx->family, chain->flags, extack); if (err < 0) @@ -2767,7 +2907,12 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, if (chain->flags & NFT_CHAIN_HW_OFFLOAD) return -EOPNOTSUPP; - return nft_delchain_hook(&ctx, chain, extack); + if (nft_is_base_chain(chain)) { + struct nft_base_chain *basechain = nft_base_chain(chain); + + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) + return nft_delchain_hook(&ctx, basechain, extack); + } } if (info->nlh->nlmsg_flags & NLM_F_NONREC && @@ -3488,8 +3633,7 @@ err_fill_rule_info: return err; } -static void nf_tables_rule_destroy(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr, *next; @@ -3506,7 +3650,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) +static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); @@ -3594,12 +3738,6 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, return 0; } -struct nft_set_elem_catchall { - struct list_head list; - struct rcu_head rcu; - void *elem; -}; - int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); @@ -3842,7 +3980,8 @@ err_destroy_flow_rule: if (flow) nft_flow_rule_destroy(flow); err_release_rule: - nf_tables_rule_release(&ctx, rule); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR); + nf_tables_rule_destroy(&ctx, rule); err_release_expr: for (i = 0; i < n; i++) { if (expr_info[i].ops) { @@ -4774,6 +4913,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; @@ -4782,6 +4924,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } @@ -4828,6 +4974,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); if (err < 0) return err; @@ -4917,6 +5066,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, set->num_exprs = num_exprs; set->handle = nf_tables_alloc_handle(table); + INIT_LIST_HEAD(&set->pending_update); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) @@ -4930,7 +5080,7 @@ err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); err_set_destroy: - ops->destroy(set); + ops->destroy(&ctx, set); err_set_init: kfree(set->name); err_set_name: @@ -4945,7 +5095,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { list_del_rcu(&catchall->list); - nft_set_elem_destroy(set, catchall->elem, true); + nf_tables_set_elem_destroy(ctx, set, catchall->elem); kfree_rcu(catchall, rcu); } } @@ -4960,7 +5110,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(ctx, set->exprs[i]); - set->ops->destroy(set); + set->ops->destroy(ctx, set); nft_set_catchall_destroy(ctx, set); kfree(set->name); kvfree(set); @@ -5125,10 +5275,60 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } } +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_activate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_activate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_catchall_activate(const struct nft_ctx *ctx, + struct nft_set *set) +{ + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_elem_catchall *catchall; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + list_for_each_entry(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + if (!nft_set_elem_active(ext, genmask)) + continue; + + elem.priv = catchall->elem; + nft_setelem_data_activate(ctx->net, set, &elem); + break; + } +} + +static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_activate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); + + nft_map_catchall_activate(ctx, set); +} + void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (nft_set_is_anonymous(set)) + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(ctx, set); + nft_clear(ctx->net, set); + } set->use++; } @@ -5139,14 +5339,28 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, enum nft_trans_phase phase) { switch (phase) { - case NFT_TRANS_PREPARE: + case NFT_TRANS_PREPARE_ERROR: + nft_set_trans_unbind(ctx, set); if (nft_set_is_anonymous(set)) nft_deactivate_next(ctx->net, set); set->use--; + break; + case NFT_TRANS_PREPARE: + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + + nft_deactivate_next(ctx->net, set); + } + set->use--; return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: + if (nft_set_is_anonymous(set) && + set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + set->use--; fallthrough; default: @@ -5899,6 +6113,7 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, __nft_set_elem_expr_destroy(ctx, expr); } +/* Drop references and destroy. Called from gc, dynset and abort path. */ void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr) { @@ -5920,11 +6135,11 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_setelem_data_deactivate() already deals - * with the refcounting from the preparation phase. +/* Destroy element. References have been already dropped in the preparation + * path via nft_setelem_data_deactivate(). */ -static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); @@ -6487,19 +6702,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (flags) *nft_set_ext_flags(ext) = flags; + if (obj) { + *nft_set_ext_obj(ext) = obj; + obj->use++; + } if (ulen > 0) { if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) { err = -EINVAL; - goto err_elem_userdata; + goto err_elem_free; } udata = nft_set_ext_userdata(ext); udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } - if (obj) { - *nft_set_ext_obj(ext) = obj; - obj->use++; - } err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs); if (err < 0) goto err_elem_free; @@ -6554,10 +6769,7 @@ err_set_full: err_element_clash: kfree(trans); err_elem_free: - if (obj) - obj->use--; -err_elem_userdata: - nf_tables_set_elem_destroy(ctx, set, elem.priv); + nft_set_elem_destroy(set, elem.priv, true); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_release(&elem.data.val, desc.type); @@ -6601,7 +6813,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); @@ -6634,7 +6847,6 @@ static int nf_tables_newsetelem(struct sk_buff *skb, void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { struct nft_chain *chain; - struct nft_rule *rule; if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { @@ -6642,15 +6854,6 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) case NFT_GOTO: chain = data->verdict.chain; chain->use++; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use++; - list_for_each_entry(rule, &chain->rules, list) - chain->use++; - - nft_chain_add(chain->table, chain); break; } } @@ -6885,7 +7088,9 @@ static int nf_tables_delsetelem(struct sk_buff *skb, set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); @@ -7667,6 +7872,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: @@ -8939,7 +9145,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) synchronize_rcu(); list_for_each_entry_safe(trans, next, &head, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nft_commit_release(trans); } } @@ -9005,7 +9211,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha continue; } - if (WARN_ON_ONCE(data + expr->ops->size > data_boundary)) + if (WARN_ON_ONCE(data + size + expr->ops->size > data_boundary)) return -ENOMEM; memcpy(data + size, expr, expr->ops->size); @@ -9273,10 +9479,25 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation) } } +static void nft_set_commit_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->commit) + continue; + + set->ops->commit(set); + } +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; @@ -9289,6 +9510,27 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } + list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (!nft_trans_set_update(trans) && + nft_set_is_anonymous(nft_trans_set(trans)) && + !nft_trans_set_bound(trans)) { + pr_warn_once("nftables ruleset with unbound set\n"); + return -EINVAL; + } + break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans)) && + !nft_trans_chain_bound(trans)) { + pr_warn_once("nftables ruleset with unbound chain\n"); + return -EINVAL; + } + break; + } + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN; @@ -9451,6 +9693,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_NEWSETELEM); + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -9465,6 +9712,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) atomic_dec(&te->set->nelems); te->set->ndeact--; } + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { @@ -9527,6 +9779,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_set_commit_update(&set_update_list); + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, nft_net->base_seq); @@ -9586,10 +9840,25 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } +static void nft_set_abort_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->abort) + continue; + + set->ops->abort(set); + } +} + static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; if (action == NFNL_ABORT_VALIDATE && @@ -9631,7 +9900,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { - if (nft_chain_is_bound(trans->ctx.chain)) { + if (nft_trans_chain_bound(trans)) { nft_trans_destroy(trans); break; } @@ -9654,6 +9923,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: + if (nft_trans_rule_bound(trans)) { + nft_trans_destroy(trans); + break; + } trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, @@ -9688,6 +9961,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DESTROYSET: trans->ctx.table->use++; nft_clear(trans->ctx.net, nft_trans_set(trans)); + if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -9699,6 +9975,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_setelem_remove(net, te->set, &te->elem); if (!nft_setelem_is_catchall(te->set, &te->elem)) atomic_dec(&te->set->nelems); + + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: @@ -9709,6 +9991,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) if (!nft_setelem_is_catchall(te->set, &te->elem)) te->set->ndeact--; + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: @@ -9751,11 +10038,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } } + nft_set_abort_update(&set_update_list); + synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nf_tables_abort_release(trans); } @@ -10204,22 +10493,12 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { struct nft_chain *chain; - struct nft_rule *rule; switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; chain->use--; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use--; - list_for_each_entry(rule, &chain->rules, list) - chain->use--; - - nft_chain_del(chain); break; } } @@ -10454,6 +10733,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table) list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); table->use--; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(&ctx, set); + nft_set_destroy(&ctx, set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { @@ -10538,6 +10820,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index ae7146475d17..c9fbe0f707b5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -533,7 +533,8 @@ ack: * processed, this avoids that the same error is * reported several times when replaying the batch. */ - if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) { + if (err == -ENOMEM || + nfnl_err_add(&err_list, nlh, err, &extack) < 0) { /* We failed to enqueue an error, reset the * list of errors and send OOM to userspace * pointing to the batch header. diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index ee6840bd5933..8f1bfa6ccc2d 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -439,3 +439,4 @@ module_init(nfnl_osf_init); module_exit(nfnl_osf_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 84eae7cabc67..2527a01486ef 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -323,7 +323,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track, dreg = priv->dreg; regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE); for (i = 0; i < regcount; i++, dreg++) - track->regs[priv->dreg].bitwise = expr; + track->regs[dreg].bitwise = expr; return false; } diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index c9d2f7c29f53..3d76ebfe8939 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -76,11 +76,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx, switch (priv->data.verdict.code) { case NFT_JUMP: case NFT_GOTO: - if (nft_chain_is_bound(chain)) { - err = -EBUSY; - goto err1; - } - chain->bound = true; + err = nf_tables_bind_chain(ctx, chain); + if (err < 0) + return err; break; default: break; @@ -98,6 +96,31 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + const struct nft_data *data = &priv->data; + struct nft_ctx chain_ctx; + struct nft_chain *chain; + struct nft_rule *rule; + + if (priv->dreg == NFT_REG_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; + if (!nft_chain_binding(chain)) + break; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_activate(&chain_ctx, rule); + + nft_clear(ctx->net, chain); + break; + default: + break; + } + } return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); } @@ -107,6 +130,43 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + const struct nft_data *data = &priv->data; + struct nft_ctx chain_ctx; + struct nft_chain *chain; + struct nft_rule *rule; + + if (priv->dreg == NFT_REG_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; + if (!nft_chain_binding(chain)) + break; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_deactivate(&chain_ctx, rule, phase); + + switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nf_tables_unbind_chain(ctx, chain); + fallthrough; + case NFT_TRANS_PREPARE: + nft_deactivate_next(ctx->net, chain); + break; + default: + nft_chain_del(chain); + chain->bound = false; + chain->table->use--; + break; + } + break; + default: + break; + } + } if (phase == NFT_TRANS_COMMIT) return; @@ -131,15 +191,27 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, case NFT_GOTO: chain = data->verdict.chain; - if (!nft_chain_is_bound(chain)) + if (!nft_chain_binding(chain)) + break; + + /* Rule construction failed, but chain is already bound: + * let the transaction records release this chain and its rules. + */ + if (chain->bound) { + chain->use--; break; + } + /* Rule has been deleted, release chain and its rules. */ chain_ctx = *ctx; chain_ctx.chain = chain; - list_for_each_entry_safe(rule, n, &chain->rules, list) - nf_tables_rule_release(&chain_ctx, rule); - + chain->use--; + list_for_each_entry_safe(rule, n, &chain->rules, list) { + chain->use--; + list_del(&rule->list); + nf_tables_rule_destroy(&chain_ctx, rule); + } nf_tables_chain_destroy(&chain_ctx); break; default: diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 96081ac8d2b4..1e5e7a181e0b 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -271,13 +271,14 @@ static int nft_bitmap_init(const struct nft_set *set, return 0; } -static void nft_bitmap_destroy(const struct nft_set *set) +static void nft_bitmap_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) - nft_set_elem_destroy(set, be, true); + nf_tables_set_elem_destroy(ctx, set, be); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 76de6c8d9865..0b73cb0e752f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -400,19 +400,31 @@ static int nft_rhash_init(const struct nft_set *set, return 0; } +struct nft_rhash_ctx { + const struct nft_ctx ctx; + const struct nft_set *set; +}; + static void nft_rhash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy(arg, ptr, true); + struct nft_rhash_ctx *rhash_ctx = arg; + + nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr); } -static void nft_rhash_destroy(const struct nft_set *set) +static void nft_rhash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_ctx rhash_ctx = { + .ctx = *ctx, + .set = set, + }; cancel_delayed_work_sync(&priv->gc_work); rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, - (void *)set); + (void *)&rhash_ctx); } /* Number of buckets is stored in u32, so cap our result to 1U<<31 */ @@ -643,7 +655,8 @@ static int nft_hash_init(const struct nft_set *set, return 0; } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; @@ -653,7 +666,7 @@ static void nft_hash_destroy(const struct nft_set *set) for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); - nft_set_elem_destroy(set, he, true); + nf_tables_set_elem_destroy(ctx, set, he); } } } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 06d46d182634..0452ee586c1c 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1600,17 +1600,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m) } } -/** - * pipapo_reclaim_match - RCU callback to free fields from old matching data - * @rcu: RCU head - */ -static void pipapo_reclaim_match(struct rcu_head *rcu) +static void pipapo_free_match(struct nft_pipapo_match *m) { - struct nft_pipapo_match *m; int i; - m = container_of(rcu, struct nft_pipapo_match, rcu); - for_each_possible_cpu(i) kfree(*per_cpu_ptr(m->scratch, i)); @@ -1625,7 +1618,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) } /** - * pipapo_commit() - Replace lookup data with current working copy + * pipapo_reclaim_match - RCU callback to free fields from old matching data + * @rcu: RCU head + */ +static void pipapo_reclaim_match(struct rcu_head *rcu) +{ + struct nft_pipapo_match *m; + + m = container_of(rcu, struct nft_pipapo_match, rcu); + pipapo_free_match(m); +} + +/** + * nft_pipapo_commit() - Replace lookup data with current working copy * @set: nftables API set representation * * While at it, check if we should perform garbage collection on the working @@ -1635,7 +1640,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * We also need to create a new working copy for subsequent insertions and * deletions. */ -static void pipapo_commit(const struct nft_set *set) +static void nft_pipapo_commit(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *new_clone, *old; @@ -1660,6 +1665,26 @@ static void pipapo_commit(const struct nft_set *set) priv->clone = new_clone; } +static void nft_pipapo_abort(const struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *new_clone, *m; + + if (!priv->dirty) + return; + + m = rcu_dereference(priv->match); + + new_clone = pipapo_clone(m); + if (IS_ERR(new_clone)) + return; + + priv->dirty = false; + + pipapo_free_match(priv->clone); + priv->clone = new_clone; +} + /** * nft_pipapo_activate() - Mark element reference as active given key, commit * @net: Network namespace @@ -1667,8 +1692,7 @@ static void pipapo_commit(const struct nft_set *set) * @elem: nftables API element representation containing key data * * On insertion, elements are added to a copy of the matching data currently - * in use for lookups, and not directly inserted into current lookup data, so - * we'll take care of that by calling pipapo_commit() here. Both + * in use for lookups, and not directly inserted into current lookup data. Both * nft_pipapo_insert() and nft_pipapo_activate() are called once for each * element, hence we can't purpose either one as a real commit operation. */ @@ -1684,8 +1708,6 @@ static void nft_pipapo_activate(const struct net *net, nft_set_elem_change_active(net, set, &e->ext); nft_set_elem_clear_busy(&e->ext); - - pipapo_commit(set); } /** @@ -1931,7 +1953,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, if (i == m->field_count) { priv->dirty = true; pipapo_drop(m, rulemap); - pipapo_commit(set); return; } @@ -1953,12 +1974,16 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_pipapo *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->net); struct nft_pipapo_match *m; struct nft_pipapo_field *f; int i, r; rcu_read_lock(); - m = rcu_dereference(priv->match); + if (iter->genmask == nft_genmask_cur(net)) + m = rcu_dereference(priv->match); + else + m = priv->clone; if (unlikely(!m)) goto out; @@ -2127,10 +2152,12 @@ out_scratch: /** * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array + * @ctx: context * @set: nftables API set representation * @m: matching data pointing to key mapping array */ -static void nft_set_pipapo_match_destroy(const struct nft_set *set, +static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo_field *f; @@ -2147,15 +2174,17 @@ static void nft_set_pipapo_match_destroy(const struct nft_set *set, e = f->mt[r].e; - nft_set_elem_destroy(set, e, true); + nf_tables_set_elem_destroy(ctx, set, e); } } /** * nft_pipapo_destroy() - Free private data for set and all committed elements + * @ctx: context * @set: nftables API set representation */ -static void nft_pipapo_destroy(const struct nft_set *set) +static void nft_pipapo_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; @@ -2165,7 +2194,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) if (m) { rcu_barrier(); - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); @@ -2182,7 +2211,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) m = priv->clone; if (priv->dirty) - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(priv->clone->scratch_aligned); @@ -2230,6 +2259,8 @@ const struct nft_set_type nft_set_pipapo_type = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; @@ -2252,6 +2283,8 @@ const struct nft_set_type nft_set_pipapo_avx2_type = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 2f114aa10f1a..5c05c9b990fb 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -664,7 +664,8 @@ static int nft_rbtree_init(const struct nft_set *set, return 0; } -static void nft_rbtree_destroy(const struct nft_set *set) +static void nft_rbtree_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; @@ -675,7 +676,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe, true); + nf_tables_set_elem_destroy(ctx, set, rbe); } } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index e1990baf3a3b..dc9485854002 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -71,4 +71,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); MODULE_DESCRIPTION("Passive OS fingerprint matching."); MODULE_ALIAS("ipt_osf"); MODULE_ALIAS("ip6t_osf"); -MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 54c083003947..27511c90a26f 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -857,7 +857,8 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, offset -= iter->startbit; idx = offset / NETLBL_CATMAP_MAPSIZE; - iter->bitmap[idx] |= bitmap << (offset % NETLBL_CATMAP_MAPSIZE); + iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap + << (offset % NETLBL_CATMAP_MAPSIZE); return 0; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index fcee6012293b..58f530f60172 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p) /* First drop references to device. */ hlist_del_rcu(&p->dp_hash_node); - /* Free percpu memory */ - free_percpu(p->upcall_stats); - /* Then destroy it. */ ovs_vport_del(p); } @@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_portids; } - vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); - if (!vport->upcall_stats) { - err = -ENOMEM; - goto err_destroy_vport; - } - err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_NEW); BUG_ON(err < 0); @@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_vport: - ovs_dp_detach_port(vport); err_destroy_portids: kfree(rcu_dereference_raw(dp->upcall_portids)); err_unlock_and_destroy_meters: @@ -2322,12 +2311,6 @@ restart: goto exit_unlock_free; } - vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); - if (!vport->upcall_stats) { - err = -ENOMEM; - goto exit_unlock_free_vport; - } - err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW, GFP_KERNEL); @@ -2345,8 +2328,6 @@ restart: ovs_notify(&dp_vport_genl_family, reply, info); return 0; -exit_unlock_free_vport: - ovs_dp_detach_port(vport); exit_unlock_free: ovs_unlock(); kfree_skb(reply); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 7e0f5c45b512..972ae01a70f7 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -124,6 +124,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, { struct vport *vport; size_t alloc_size; + int err; alloc_size = sizeof(struct vport); if (priv_size) { @@ -135,17 +136,29 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, if (!vport) return ERR_PTR(-ENOMEM); + vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); + if (!vport->upcall_stats) { + err = -ENOMEM; + goto err_kfree_vport; + } + vport->dp = parms->dp; vport->port_no = parms->port_no; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) { - kfree(vport); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto err_free_percpu; } return vport; + +err_free_percpu: + free_percpu(vport->upcall_stats); +err_kfree_vport: + kfree(vport); + return ERR_PTR(err); } EXPORT_SYMBOL_GPL(ovs_vport_alloc); @@ -165,6 +178,7 @@ void ovs_vport_free(struct vport *vport) * it is safe to use raw dereference. */ kfree(rcu_dereference_raw(vport->upcall_portids)); + free_percpu(vport->upcall_stats); kfree(vport); } EXPORT_SYMBOL_GPL(ovs_vport_free); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9cc0bc7c71ed..abc71a06d634 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -610,6 +610,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct flow_offload_tuple tuple = {}; enum ip_conntrack_info ctinfo; struct tcphdr *tcph = NULL; + bool force_refresh = false; struct flow_offload *flow; struct nf_conn *ct; u8 dir; @@ -647,6 +648,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, * established state, then don't refresh. */ return false; + force_refresh = true; } if (tcph && (unlikely(tcph->fin || tcph->rst))) { @@ -660,7 +662,12 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, else ctinfo = IP_CT_ESTABLISHED_REPLY; - flow_offload_refresh(nf_ft, flow); + flow_offload_refresh(nf_ft, flow, force_refresh); + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { + /* Process this flow in SW to allow promoting to ASSURED */ + return false; + } + nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); if (nf_ft->flags & NF_FLOWTABLE_COUNTER) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fc945c7e4123..c819b812a899 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -13,7 +13,10 @@ #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/slab.h> +#include <net/ipv6.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_pedit.h> @@ -327,28 +330,58 @@ static bool offset_valid(struct sk_buff *skb, int offset) return true; } -static void pedit_skb_hdr_offset(struct sk_buff *skb, +static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type) +{ + const int noff = skb_network_offset(skb); + int ret = -EINVAL; + struct iphdr _iph; + + switch (skb->protocol) { + case htons(ETH_P_IP): { + const struct iphdr *iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph); + + if (!iph) + goto out; + *hoffset = noff + iph->ihl * 4; + ret = 0; + break; + } + case htons(ETH_P_IPV6): + ret = ipv6_find_hdr(skb, hoffset, header_type, NULL, NULL) == header_type ? 0 : -EINVAL; + break; + } +out: + return ret; +} + +static int pedit_skb_hdr_offset(struct sk_buff *skb, enum pedit_header_type htype, int *hoffset) { + int ret = -EINVAL; /* 'htype' is validated in the netlink parsing */ switch (htype) { case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - if (skb_mac_header_was_set(skb)) + if (skb_mac_header_was_set(skb)) { *hoffset = skb_mac_offset(skb); + ret = 0; + } break; case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK: case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: *hoffset = skb_network_offset(skb); + ret = 0; break; case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_TCP); + break; case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: - if (skb_transport_header_was_set(skb)) - *hoffset = skb_transport_offset(skb); + ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_UDP); break; default: break; } + return ret; } TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, @@ -384,6 +417,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, int hoffset = 0; u32 *ptr, hdata; u32 val; + int rc; if (tkey_ex) { htype = tkey_ex->htype; @@ -392,7 +426,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, tkey_ex++; } - pedit_skb_hdr_offset(skb, htype, &hoffset); + rc = pedit_skb_hdr_offset(skb, htype, &hoffset); + if (rc) { + pr_info_ratelimited("tc action pedit unable to extract header offset for header type (0x%x)\n", htype); + goto bad; + } if (tkey->offmask) { u8 *d, _d; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 227cba58ce9f..2e9dce03d1ec 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -357,23 +357,23 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, opt.burst = PSCHED_NS2TICKS(p->tcfp_burst); if (p->rate_present) { psched_ratecfg_getrate(&opt.rate, &p->rate); - if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) && + if ((p->rate.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_POLICE_RATE64, - police->params->rate.rate_bytes_ps, + p->rate.rate_bytes_ps, TCA_POLICE_PAD)) goto nla_put_failure; } if (p->peak_present) { psched_ratecfg_getrate(&opt.peakrate, &p->peak); - if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) && + if ((p->peak.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64, - police->params->peak.rate_bytes_ps, + p->peak.rate_bytes_ps, TCA_POLICE_PAD)) goto nla_put_failure; } if (p->pps_present) { if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64, - police->params->ppsrate.rate_pkts_ps, + p->ppsrate.rate_pkts_ps, TCA_POLICE_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2621550bfddc..a193cc7b3241 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -43,8 +43,6 @@ #include <net/flow_offload.h> #include <net/tc_wrapper.h> -extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; - /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); @@ -659,8 +657,8 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, { struct tcf_block *block = chain->block; const struct tcf_proto_ops *tmplt_ops; + unsigned int refcnt, non_act_refcnt; bool free_block = false; - unsigned int refcnt; void *tmplt_priv; mutex_lock(&block->lock); @@ -680,13 +678,15 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, * save these to temporary variables. */ refcnt = --chain->refcnt; + non_act_refcnt = refcnt - chain->action_refcnt; tmplt_ops = chain->tmplt_ops; tmplt_priv = chain->tmplt_priv; - /* The last dropped non-action reference will trigger notification. */ - if (refcnt - chain->action_refcnt == 0 && !by_act) { - tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index, - block, NULL, 0, 0, false); + if (non_act_refcnt == chain->explicitly_created && !by_act) { + if (non_act_refcnt == 0) + tc_chain_notify_delete(tmplt_ops, tmplt_priv, + chain->index, block, NULL, 0, 0, + false); /* Last reference to chain, no need to lock. */ chain->flushing = false; } @@ -2952,6 +2952,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); + module_put(ops->owner); return -EOPNOTSUPP; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4e2e269f121f..d15d50de7980 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -718,13 +718,19 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr *est, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { - int err; + int err, ifindex = -1; err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags, fl_flags, extack); if (err < 0) return err; + if (tb[TCA_U32_INDEV]) { + ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); + if (ifindex < 0) + return -EINVAL; + } + if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; @@ -759,13 +765,9 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, tcf_bind_filter(tp, &n->res, base); } - if (tb[TCA_U32_INDEV]) { - int ret; - ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); - if (ret < 0) - return -EINVAL; - n->ifindex = ret; - } + if (ifindex >= 0) + n->ifindex = ifindex; + return 0; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 014209b1dd58..aa6b1fe65151 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -309,7 +309,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) if (dev_ingress_queue(dev)) q = qdisc_match_from_root( - dev_ingress_queue(dev)->qdisc_sleeping, + rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping), handle); out: return q; @@ -328,7 +328,8 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) nq = dev_ingress_queue_rcu(dev); if (nq) - q = qdisc_match_from_root(nq->qdisc_sleeping, handle); + q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping), + handle); out: return q; } @@ -634,8 +635,13 @@ EXPORT_SYMBOL(qdisc_watchdog_init); void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, u64 delta_ns) { - if (test_bit(__QDISC_STATE_DEACTIVATED, - &qdisc_root_sleeping(wd->qdisc)->state)) + bool deactivated; + + rcu_read_lock(); + deactivated = test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(wd->qdisc)->state); + rcu_read_unlock(); + if (deactivated) return; if (hrtimer_is_queued(&wd->timer)) { @@ -1073,17 +1079,29 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (parent == NULL) { unsigned int i, num_q, ingress; + struct netdev_queue *dev_queue; ingress = 0; num_q = dev->num_tx_queues; if ((q && q->flags & TCQ_F_INGRESS) || (new && new->flags & TCQ_F_INGRESS)) { - num_q = 1; ingress = 1; - if (!dev_ingress_queue(dev)) { + dev_queue = dev_ingress_queue(dev); + if (!dev_queue) { NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); return -ENOENT; } + + q = rtnl_dereference(dev_queue->qdisc_sleeping); + + /* This is the counterpart of that qdisc_refcount_inc_nz() call in + * __tcf_qdisc_find() for filter requests. + */ + if (!qdisc_refcount_dec_if_one(q)) { + NL_SET_ERR_MSG(extack, + "Current ingress or clsact Qdisc has ongoing filter requests"); + return -EBUSY; + } } if (dev->flags & IFF_UP) @@ -1094,18 +1112,26 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (new && new->ops->attach && !ingress) goto skip; - for (i = 0; i < num_q; i++) { - struct netdev_queue *dev_queue = dev_ingress_queue(dev); - - if (!ingress) + if (!ingress) { + for (i = 0; i < num_q; i++) { dev_queue = netdev_get_tx_queue(dev, i); + old = dev_graft_qdisc(dev_queue, new); - old = dev_graft_qdisc(dev_queue, new); - if (new && i > 0) - qdisc_refcount_inc(new); - - if (!ingress) + if (new && i > 0) + qdisc_refcount_inc(new); qdisc_put(old); + } + } else { + old = dev_graft_qdisc(dev_queue, NULL); + + /* {ingress,clsact}_destroy() @old before grafting @new to avoid + * unprotected concurrent accesses to net_device::miniq_{in,e}gress + * pointer(s) in mini_qdisc_pair_swap(). + */ + qdisc_notify(net, skb, n, classid, old, new, extack); + qdisc_destroy(old); + + dev_graft_qdisc(dev_queue, new); } skip: @@ -1119,8 +1145,6 @@ skip: if (new && new->ops->attach) new->ops->attach(new); - } else { - notify_and_destroy(net, skb, n, classid, old, new, extack); } if (dev->flags & IFF_UP) @@ -1478,7 +1502,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue(dev)) { - q = dev_ingress_queue(dev)->qdisc_sleeping; + q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); @@ -1564,7 +1588,7 @@ replay: } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue_create(dev)) { - q = dev_ingress_queue(dev)->qdisc_sleeping; + q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); @@ -1805,8 +1829,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) dev_queue = dev_ingress_queue(dev); if (dev_queue && - tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, - &q_idx, s_q_idx, false, + tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping), + skb, cb, &q_idx, s_q_idx, false, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; @@ -2249,8 +2273,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) dev_queue = dev_ingress_queue(dev); if (dev_queue && - tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, - &t, s_t, false) < 0) + tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping), + skb, tcm, cb, &t, s_t, false) < 0) goto done; done: @@ -2302,7 +2326,9 @@ static struct pernet_operations psched_net_ops = { .exit = psched_net_exit, }; +#if IS_ENABLED(CONFIG_RETPOLINE) DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); +#endif static int __init pktsched_init(void) { diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 6980796d435d..591d87d5e5c0 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -201,6 +201,11 @@ out: return NET_XMIT_CN; } +static struct netlink_range_validation fq_pie_q_range = { + .min = 1, + .max = 1 << 20, +}; + static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32}, [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32}, @@ -208,7 +213,8 @@ static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32}, [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32}, [TCA_FQ_PIE_BETA] = {.type = NLA_U32}, - [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32}, + [TCA_FQ_PIE_QUANTUM] = + NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range), [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32}, [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32}, [TCA_FQ_PIE_ECN] = {.type = NLA_U32}, @@ -373,6 +379,7 @@ static void fq_pie_timer(struct timer_list *t) spinlock_t *root_lock; /* to lock qdisc for probability calculations */ u32 idx; + rcu_read_lock(); root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -385,6 +392,7 @@ static void fq_pie_timer(struct timer_list *t) mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate); spin_unlock(root_lock); + rcu_read_unlock(); } static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 37e41f972f69..5d7e23f4cc0e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -648,7 +648,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { static struct netdev_queue noop_netdev_queue = { RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc), - .qdisc_sleeping = &noop_qdisc, + RCU_POINTER_INITIALIZER(qdisc_sleeping, &noop_qdisc), }; struct Qdisc noop_qdisc = { @@ -1046,7 +1046,7 @@ static void qdisc_free_cb(struct rcu_head *head) qdisc_free(q); } -static void qdisc_destroy(struct Qdisc *qdisc) +static void __qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; @@ -1070,6 +1070,14 @@ static void qdisc_destroy(struct Qdisc *qdisc) call_rcu(&qdisc->rcu, qdisc_free_cb); } +void qdisc_destroy(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return; + + __qdisc_destroy(qdisc); +} + void qdisc_put(struct Qdisc *qdisc) { if (!qdisc) @@ -1079,7 +1087,7 @@ void qdisc_put(struct Qdisc *qdisc) !refcount_dec_and_test(&qdisc->refcnt)) return; - qdisc_destroy(qdisc); + __qdisc_destroy(qdisc); } EXPORT_SYMBOL(qdisc_put); @@ -1094,7 +1102,7 @@ void qdisc_put_unlocked(struct Qdisc *qdisc) !refcount_dec_and_rtnl_lock(&qdisc->refcnt)) return; - qdisc_destroy(qdisc); + __qdisc_destroy(qdisc); rtnl_unlock(); } EXPORT_SYMBOL(qdisc_put_unlocked); @@ -1103,7 +1111,7 @@ EXPORT_SYMBOL(qdisc_put_unlocked); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc) { - struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; + struct Qdisc *oqdisc = rtnl_dereference(dev_queue->qdisc_sleeping); spinlock_t *root_lock; root_lock = qdisc_lock(oqdisc); @@ -1112,7 +1120,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, /* ... and graft new one */ if (qdisc == NULL) qdisc = &noop_qdisc; - dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc); rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); spin_unlock_bh(root_lock); @@ -1125,12 +1133,12 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc_default) { - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - dev_queue->qdisc_sleeping = qdisc_default; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc_default); qdisc_put(qdisc); } @@ -1154,7 +1162,7 @@ static void attach_one_default_qdisc(struct net_device *dev, if (!netif_is_multiqueue(dev)) qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc); } static void attach_default_qdiscs(struct net_device *dev) @@ -1167,7 +1175,7 @@ static void attach_default_qdiscs(struct net_device *dev) if (!netif_is_multiqueue(dev) || dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - qdisc = txq->qdisc_sleeping; + qdisc = rtnl_dereference(txq->qdisc_sleeping); rcu_assign_pointer(dev->qdisc, qdisc); qdisc_refcount_inc(qdisc); } else { @@ -1186,7 +1194,7 @@ static void attach_default_qdiscs(struct net_device *dev) netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); dev->priv_flags |= IFF_NO_QUEUE; netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - qdisc = txq->qdisc_sleeping; + qdisc = rtnl_dereference(txq->qdisc_sleeping); rcu_assign_pointer(dev->qdisc, qdisc); qdisc_refcount_inc(qdisc); dev->priv_flags ^= IFF_NO_QUEUE; @@ -1202,7 +1210,7 @@ static void transition_one_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_need_watchdog) { - struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *new_qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); int *need_watchdog_p = _need_watchdog; if (!(new_qdisc->flags & TCQ_F_BUILTIN)) @@ -1272,7 +1280,7 @@ static void dev_reset_queue(struct net_device *dev, struct Qdisc *qdisc; bool nolock; - qdisc = dev_queue->qdisc_sleeping; + qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); if (!qdisc) return; @@ -1303,7 +1311,7 @@ static bool some_qdisc_is_busy(struct net_device *dev) int val; dev_queue = netdev_get_tx_queue(dev, i); - q = dev_queue->qdisc_sleeping; + q = rtnl_dereference(dev_queue->qdisc_sleeping); root_lock = qdisc_lock(q); spin_lock_bh(root_lock); @@ -1379,7 +1387,7 @@ EXPORT_SYMBOL(dev_deactivate); static int qdisc_change_tx_queue_len(struct net_device *dev, struct netdev_queue *dev_queue) { - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); const struct Qdisc_ops *ops = qdisc->ops; if (ops->change_tx_queue_len) @@ -1404,7 +1412,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) unsigned int i; for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping); /* Only update the default qdiscs we created, * qdiscs with handles are always hashed. */ @@ -1412,7 +1420,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) qdisc_hash_del(qdisc); } for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping); if (qdisc != &noop_qdisc && !qdisc->handle) qdisc_hash_add(qdisc, false); } @@ -1449,7 +1457,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc = _qdisc; rcu_assign_pointer(dev_queue->qdisc, qdisc); - dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc); } void dev_init_scheduler(struct net_device *dev) diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index d0bc660d7401..c860119a8f09 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -141,7 +141,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) * qdisc totals are added at end. */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, @@ -202,7 +202,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long mq_find(struct Qdisc *sch, u32 classid) @@ -221,7 +221,7 @@ static int mq_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; return 0; } @@ -230,7 +230,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; + sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index dc5a0ff50b14..ab69ff7577fc 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -557,7 +557,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) * qdisc totals are added at end. */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, @@ -604,7 +604,7 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) if (!dev_queue) return NULL; - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long mqprio_find(struct Qdisc *sch, u32 classid) @@ -637,7 +637,7 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = (tc < 0) ? 0 : TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(tc + TC_H_MIN_PRIORITY)); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; } else { tcm->tcm_parent = TC_H_ROOT; tcm->tcm_info = 0; @@ -693,7 +693,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; + sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6ef3021e1169..e79be1b3e74d 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -966,6 +966,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, if (ret < 0) return ret; + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; old_loss_model = q->loss_model; @@ -974,7 +975,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]); if (ret) { q->loss_model = old_loss_model; - return ret; + goto unlock; } } else { q->loss_model = CLG_RANDOM; @@ -1041,6 +1042,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); +unlock: + sch_tree_unlock(sch); return ret; get_table_failure: @@ -1050,7 +1053,8 @@ get_table_failure: */ q->clg = old_clg; q->loss_model = old_loss_model; - return ret; + + goto unlock; } static int netem_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 2152a56d73f8..2da6250ec346 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -421,8 +421,10 @@ static void pie_timer(struct timer_list *t) { struct pie_sched_data *q = from_timer(q, t, adapt_timer); struct Qdisc *sch = q->sch; - spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spinlock_t *root_lock; + rcu_read_lock(); + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); pie_calculate_probability(&q->params, &q->vars, sch->qstats.backlog); @@ -430,6 +432,7 @@ static void pie_timer(struct timer_list *t) if (q->params.tupdate) mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); spin_unlock(root_lock); + rcu_read_unlock(); } static int pie_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 98129324e157..16277b6a0238 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -321,12 +321,15 @@ static inline void red_adaptative_timer(struct timer_list *t) { struct red_sched_data *q = from_timer(q, t, adapt_timer); struct Qdisc *sch = q->sch; - spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spinlock_t *root_lock; + rcu_read_lock(); + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); red_adaptative_algo(&q->parms, &q->vars); mod_timer(&q->adapt_timer, jiffies + HZ/2); spin_unlock(root_lock); + rcu_read_unlock(); } static int red_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index abd436307d6a..66dcb18638fe 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -606,10 +606,12 @@ static void sfq_perturbation(struct timer_list *t) { struct sfq_sched_data *q = from_timer(q, t, perturb_timer); struct Qdisc *sch = q->sch; - spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spinlock_t *root_lock; siphash_key_t nkey; get_random_bytes(&nkey, sizeof(nkey)); + rcu_read_lock(); + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); q->perturbation = nkey; if (!q->filter_list && q->tail) @@ -618,6 +620,7 @@ static void sfq_perturbation(struct timer_list *t) if (q->perturb_period) mod_timer(&q->perturb_timer, jiffies + q->perturb_period); + rcu_read_unlock(); } static int sfq_change(struct Qdisc *sch, struct nlattr *opt) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 76db9a10ef50..cf0e61ed9225 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -797,6 +797,9 @@ static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch, taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]); + if (q->cur_txq[tc] >= dev->num_tx_queues) + q->cur_txq[tc] = first_txq; + if (skb) return skb; } while (q->cur_txq[tc] != first_txq); @@ -2358,7 +2361,7 @@ static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) if (!dev_queue) return NULL; - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long taprio_find(struct Qdisc *sch, u32 classid) @@ -2377,7 +2380,7 @@ static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; return 0; } @@ -2389,7 +2392,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, { struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; + sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 16f9238aa51d..7721239c185f 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -297,7 +297,7 @@ restart: struct net_device *slave = qdisc_dev(q); struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); - if (slave_txq->qdisc_sleeping != q) + if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q) continue; if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) || !netif_running(slave)) { diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 7fbeb99d8d32..23d6633966b1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1250,7 +1250,10 @@ static int sctp_side_effects(enum sctp_event_type event_type, default: pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n", status, state, event_type, subtype.chunk); - BUG(); + error = status; + if (error >= 0) + error = -EINVAL; + WARN_ON_ONCE(1); break; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 97f1155a2045..08fdf1251f46 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4482,7 +4482,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net, SCTP_AUTH_NEW_KEY, GFP_ATOMIC); if (!ev) - return -ENOMEM; + return SCTP_DISPOSITION_NOMEM; sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 7a8d9163d186..90f0b60b196a 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -851,6 +851,8 @@ static int smc_llc_add_link_cont(struct smc_link *link, addc_llc->num_rkeys = *num_rkeys_todo; n = *num_rkeys_todo; for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) { + while (*buf_pos && !(*buf_pos)->used) + *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); if (!*buf_pos) { addc_llc->num_rkeys = addc_llc->num_rkeys - *num_rkeys_todo; @@ -867,8 +869,6 @@ static int smc_llc_add_link_cont(struct smc_link *link, (*num_rkeys_todo)--; *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); - while (*buf_pos && !(*buf_pos)->used) - *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); } addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT; addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 53881406e200..cdcd2731860b 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1258,7 +1258,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) struct tipc_nl_msg msg; struct tipc_media *media; struct sk_buff *rep; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; @@ -1307,7 +1307,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) int err; char *name; struct tipc_media *m; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; diff --git a/net/wireless/core.c b/net/wireless/core.c index 5b0c4d5b80cf..b3ec9eaec36b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -368,12 +368,12 @@ static void cfg80211_sched_scan_stop_wk(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, sched_scan_stop_wk); - rtnl_lock(); + wiphy_lock(&rdev->wiphy); list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) { if (req->nl_owner_dead) cfg80211_stop_sched_scan_req(rdev, req, false); } - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); } static void cfg80211_propagate_radar_detect_wk(struct work_struct *work) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d95f8053020d..087d60c0f6e4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10723,6 +10723,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MLD_ADDR]) return -EINVAL; req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); + if (!is_valid_ether_addr(req.ap_mld_addr)) + return -EINVAL; } req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 2e497cf26ef2..69b508743e57 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2021-2022 Intel Corporation + * Copyright (C) 2018, 2021-2023 Intel Corporation */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS @@ -1441,8 +1441,8 @@ rdev_del_intf_link(struct cfg80211_registered_device *rdev, unsigned int link_id) { trace_rdev_del_intf_link(&rdev->wiphy, wdev, link_id); - if (rdev->ops->add_intf_link) - rdev->ops->add_intf_link(&rdev->wiphy, wdev, link_id); + if (rdev->ops->del_intf_link) + rdev->ops->del_intf_link(&rdev->wiphy, wdev, link_id); trace_rdev_return_void(&rdev->wiphy); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0d40d6af7e10..26f11e4746c0 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2404,11 +2404,8 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: - wiphy_lock(wiphy); ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); - wiphy_unlock(wiphy); - if (!ret) return ret; break; @@ -2440,11 +2437,11 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy) struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - ASSERT_RTNL(); - + wiphy_lock(wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) if (!reg_wdev_chan_valid(wiphy, wdev)) cfg80211_leave(rdev, wdev); + wiphy_unlock(wiphy); } static void reg_check_chans_work(struct work_struct *work) diff --git a/net/wireless/util.c b/net/wireless/util.c index 3bc0c3072e78..9755ef281040 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #include <linux/export.h> #include <linux/bitops.h> @@ -2558,6 +2558,13 @@ void cfg80211_remove_links(struct wireless_dev *wdev) { unsigned int link_id; + /* + * links are controlled by upper layers (userspace/cfg) + * only for AP mode, so only remove them here for AP + */ + if (wdev->iftype != NL80211_IFTYPE_AP) + return; + wdev_lock(wdev); if (wdev->valid_links) { for_each_valid_link(wdev, link_id) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 39fb91ff23d9..815b38080401 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -131,6 +131,7 @@ struct sec_path *secpath_set(struct sk_buff *skb) memset(sp->ovec, 0, sizeof(sp->ovec)); sp->olen = 0; sp->len = 0; + sp->verified_cnt = 0; return sp; } @@ -330,11 +331,10 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, { switch (x->props.mode) { case XFRM_MODE_BEET: - switch (XFRM_MODE_SKB_CB(skb)->protocol) { - case IPPROTO_IPIP: - case IPPROTO_BEETPH: + switch (x->sel.family) { + case AF_INET: return xfrm4_remove_beet_encap(x, skb); - case IPPROTO_IPV6: + case AF_INET6: return xfrm6_remove_beet_encap(x, skb); } break; diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 1f99dc469027..35279c220bd7 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -310,6 +310,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) skb->mark = 0; } +static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type, unsigned short family) +{ + struct sec_path *sp; + + sp = skb_sec_path(skb); + if (sp && (sp->len || sp->olen) && + !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + goto discard; + + XFRM_SPI_SKB_CB(skb)->family = family; + if (family == AF_INET) { + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + } else { + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + } + + return xfrm_input(skb, nexthdr, spi, encap_type); +discard: + kfree_skb(skb); + return 0; +} + +static int xfrmi4_rcv(struct sk_buff *skb) +{ + return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET); +} + +static int xfrmi6_rcv(struct sk_buff *skb) +{ + return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], + 0, 0, AF_INET6); +} + +static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) +{ + return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET); +} + +static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) +{ + return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6); +} + static int xfrmi_rcv_cb(struct sk_buff *skb, int err) { const struct xfrm_mode *inner_mode; @@ -945,8 +991,8 @@ static struct pernet_operations xfrmi_net_ops = { }; static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { - .handler = xfrm6_rcv, - .input_handler = xfrm_input, + .handler = xfrmi6_rcv, + .input_handler = xfrmi6_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -996,8 +1042,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = { #endif static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { - .handler = xfrm4_rcv, - .input_handler = xfrm_input, + .handler = xfrmi4_rcv, + .input_handler = xfrmi4_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, .priority = 10, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6d15788b5123..e7617c9959c3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1831,6 +1831,7 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -1869,6 +1870,7 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -3349,6 +3351,13 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id)) return ++idx; if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { + if (idx < sp->verified_cnt) { + /* Secpath entry previously verified, consider optional and + * continue searching + */ + continue; + } + if (start == -1) start = -2-idx; break; @@ -3723,6 +3732,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. + * Upon success, marks secpath entries as having been + * verified to allow them to be skipped in future policy + * checks (e.g. nested tunnels). */ for (i = xfrm_nr-1, k = 0; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family, if_id); @@ -3741,6 +3753,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } xfrm_pols_put(pols, npols); + sp->verified_cnt = k; + return 1; } XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); |