diff options
Diffstat (limited to 'net')
249 files changed, 5330 insertions, 3212 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 788076b002b3..e40aa3e3641c 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -319,8 +319,7 @@ static void vlan_transfer_features(struct net_device *dev, { struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); - netif_set_gso_max_size(vlandev, dev->gso_max_size); - netif_set_gso_max_segs(vlandev, dev->gso_max_segs); + netif_inherit_tso_max(vlandev, dev); if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e5d23e75572a..839f2020b015 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -573,8 +573,7 @@ static int vlan_dev_init(struct net_device *dev) NETIF_F_ALL_FCOE; dev->features |= dev->hw_features | NETIF_F_LLTX; - netif_set_gso_max_size(dev, real_dev->gso_max_size); - netif_set_gso_max_segs(dev, real_dev->gso_max_segs); + netif_inherit_tso_max(dev, real_dev); if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); diff --git a/net/Kconfig.debug b/net/Kconfig.debug index 2f50611df858..a5781cf63b16 100644 --- a/net/Kconfig.debug +++ b/net/Kconfig.debug @@ -17,3 +17,10 @@ config NET_NS_REFCNT_TRACKER help Enable debugging feature to track netns references. This adds memory and cpu costs. + +config DEBUG_NET + bool "Add generic networking debug" + depends on DEBUG_KERNEL + help + Enable extra sanity checks in networking. + This is mostly used by fuzzers, but is safe to select. diff --git a/net/atm/common.c b/net/atm/common.c index d0c8ab7ff8f6..f7019df41c3e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -553,7 +553,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, error = skb_copy_datagram_msg(skb, 0, msg, copied); if (error) return error; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (!(flags & MSG_PEEK)) { pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc), diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index d2a244e1c260..b80fccbac62a 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -115,23 +115,13 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; - spin_unlock_bh(&ax25_dev_lock); - ax25_dev_put(ax25_dev); - dev->ax25_ptr = NULL; - dev_put_track(dev, &ax25_dev->dev_tracker); - ax25_dev_put(ax25_dev); - return; + goto unlock_put; } while (s != NULL && s->next != NULL) { if (s->next == ax25_dev) { s->next = ax25_dev->next; - spin_unlock_bh(&ax25_dev_lock); - ax25_dev_put(ax25_dev); - dev->ax25_ptr = NULL; - dev_put_track(dev, &ax25_dev->dev_tracker); - ax25_dev_put(ax25_dev); - return; + goto unlock_put; } s = s->next; @@ -139,6 +129,14 @@ void ax25_dev_device_down(struct net_device *dev) spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; ax25_dev_put(ax25_dev); + return; + +unlock_put: + spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); + dev->ax25_ptr = NULL; + dev_put_track(dev, &ax25_dev->dev_tracker); + ax25_dev_put(ax25_dev); } int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 7f8a14d99cdb..37ce6cfb3520 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -65,7 +65,7 @@ batadv_bla_send_announce(struct batadv_priv *bat_priv, */ static inline u32 batadv_choose_claim(const void *data, u32 size) { - struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; + const struct batadv_bla_claim *claim = data; u32 hash = 0; hash = jhash(&claim->addr, sizeof(claim->addr), hash); @@ -86,7 +86,7 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) const struct batadv_bla_backbone_gw *gw; u32 hash = 0; - gw = (struct batadv_bla_backbone_gw *)data; + gw = data; hash = jhash(&gw->orig, sizeof(gw->orig), hash); hash = jhash(&gw->vid, sizeof(gw->vid), hash); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 0899a729a23f..c120c7c6d25f 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto free_skb; } + /* GRO might have added fragments to the fragment list instead of + * frags[]. But this is not handled by skb_split and must be + * linearized to avoid incorrect length information after all + * batman-adv fragments were created and submitted to the + * hard-interface + */ + if (skb_has_frag_list(skb) && __skb_linearize(skb)) { + ret = -ENOMEM; + goto free_skb; + } + /* Create one header to be copied to all fragments */ frag_header.packet_type = BATADV_UNICAST_FRAG; frag_header.version = BATADV_COMPAT_VERSION; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 83fb51b6e299..b8f8da7ee3de 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -307,9 +307,11 @@ static bool batadv_is_cfg80211_netdev(struct net_device *net_device) if (!net_device) return false; +#if IS_ENABLED(CONFIG_CFG80211) /* cfg80211 drivers have to set ieee80211_ptr */ if (net_device->ieee80211_ptr) return true; +#endif return false; } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index f3be82999f1f..23f3d53f4b51 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2022.1" +#define BATADV_SOURCE_VERSION "2022.2" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 8478034d3abf..01d30c1e412c 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -103,10 +103,10 @@ static bool batadv_compare_tt(const struct hlist_node *node, const void *data2) */ static inline u32 batadv_choose_tt(const void *data, u32 size) { - struct batadv_tt_common_entry *tt; + const struct batadv_tt_common_entry *tt; u32 hash = 0; - tt = (struct batadv_tt_common_entry *)data; + tt = data; hash = jhash(&tt->addr, ETH_ALEN, hash); hash = jhash(&tt->vid, sizeof(tt->vid), hash); @@ -2766,7 +2766,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, u32 i; tt_tot = batadv_tt_entries(tt_len); - tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff; + tt_change = tvlv_buff; if (!valid_cb) return; @@ -3994,7 +3994,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, if (tvlv_value_len < sizeof(*tt_data)) return; - tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; + tt_data = tvlv_value; tvlv_value_len -= sizeof(*tt_data); num_vlan = ntohs(tt_data->num_vlan); @@ -4037,7 +4037,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, if (tvlv_value_len < sizeof(*tt_data)) return NET_RX_SUCCESS; - tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; + tt_data = tvlv_value; tvlv_value_len -= sizeof(*tt_data); tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data); @@ -4129,7 +4129,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, goto out; batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); - roaming_adv = (struct batadv_tvlv_roam_adv *)tvlv_value; + roaming_adv = tvlv_value; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received ROAMING_ADV from %pM (client %pM)\n", diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 62705734343b..b506409bb498 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -280,7 +280,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err == 0) { - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name && bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, @@ -384,7 +384,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, copied += chunk; size -= chunk; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (!(flags & MSG_PEEK)) { int skb_len = skb_headlen(skb); diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 7e930f77ecab..7d77fb00c2bf 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -55,6 +55,19 @@ u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len) return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance); } +u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data, + u8 data_len) +{ + eir[eir_len++] = sizeof(u8) + sizeof(uuid) + data_len; + eir[eir_len++] = EIR_SERVICE_DATA; + put_unaligned_le16(uuid, &eir[eir_len]); + eir_len += sizeof(uuid); + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) { u8 *ptr = data, *uuids_start = NULL; @@ -333,3 +346,21 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr) return scan_rsp_len; } + +void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len) +{ + while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) { + u16 value = get_unaligned_le16(eir); + + if (uuid == value) { + if (len) + *len -= 2; + return &eir[2]; + } + + eir += *len; + eir_len -= *len; + } + + return NULL; +} diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h index 43f1945bffc5..62f2374078f2 100644 --- a/net/bluetooth/eir.h +++ b/net/bluetooth/eir.h @@ -14,6 +14,8 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len); u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len); +u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data, + u8 data_len); static inline u16 eir_precalc_len(u8 data_len) { @@ -92,3 +94,5 @@ static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type, return NULL; } + +void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 84312c836549..882a7df13005 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -481,7 +481,7 @@ static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle) bool hci_setup_sync(struct hci_conn *conn, __u16 handle) { - if (enhanced_sco_capable(conn->hdev)) + if (enhanced_sync_conn_capable(conn->hdev)) return hci_enhanced_setup_sync_conn(conn, handle); return hci_setup_sync_conn(conn, handle); @@ -670,7 +670,7 @@ static void le_conn_timeout(struct work_struct *work) /* Disable LE Advertising */ le_disable_advertising(hdev); hci_dev_lock(hdev); - hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); + hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); hci_dev_unlock(hdev); return; } @@ -873,7 +873,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) EXPORT_SYMBOL(hci_get_route); /* This function requires the caller holds hdev->lock */ -void hci_le_conn_failed(struct hci_conn *conn, u8 status) +static void hci_le_conn_failed(struct hci_conn *conn, u8 status) { struct hci_dev *hdev = conn->hdev; struct hci_conn_params *params; @@ -886,8 +886,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) params->conn = NULL; } - conn->state = BT_CLOSED; - /* If the status indicates successful cancellation of * the attempt (i.e. Unknown Connection Id) there's no point of * notifying failure since we'll go back to keep trying to @@ -899,10 +897,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); - hci_connect_cfm(conn, status); - - hci_conn_del(conn); - /* Since we may have temporarily stopped the background scanning in * favor of connection establishment, we should restart it. */ @@ -914,6 +908,28 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) hci_enable_advertising(hdev); } +/* This function requires the caller holds hdev->lock */ +void hci_conn_failed(struct hci_conn *conn, u8 status) +{ + struct hci_dev *hdev = conn->hdev; + + bt_dev_dbg(hdev, "status 0x%2.2x", status); + + switch (conn->type) { + case LE_LINK: + hci_le_conn_failed(conn, status); + break; + case ACL_LINK: + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); + break; + } + + conn->state = BT_CLOSED; + hci_connect_cfm(conn, status); + hci_conn_del(conn); +} + static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) { struct hci_conn *conn = data; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b4782a6c1025..5abb2ca5b129 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2555,10 +2555,10 @@ int hci_register_dev(struct hci_dev *hdev) */ switch (hdev->dev_type) { case HCI_PRIMARY: - id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL); + id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL); break; case HCI_AMP: - id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL); + id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL); break; default: return -EINVAL; @@ -2567,7 +2567,7 @@ int hci_register_dev(struct hci_dev *hdev) if (id < 0) return id; - sprintf(hdev->name, "hci%d", id); + snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); hdev->id = id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); @@ -2675,8 +2675,6 @@ void hci_unregister_dev(struct hci_dev *hdev) list_del(&hdev->list); write_unlock(&hci_dev_list_lock); - cancel_work_sync(&hdev->power_on); - hci_cmd_sync_clear(hdev); if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index abaabfae19cc..0270e597c285 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1835,7 +1835,9 @@ static u8 hci_cc_le_clear_accept_list(struct hci_dev *hdev, void *data, if (rp->status) return rp->status; + hci_dev_lock(hdev); hci_bdaddr_list_clear(&hdev->le_accept_list); + hci_dev_unlock(hdev); return rp->status; } @@ -1855,8 +1857,10 @@ static u8 hci_cc_le_add_to_accept_list(struct hci_dev *hdev, void *data, if (!sent) return rp->status; + hci_dev_lock(hdev); hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr, sent->bdaddr_type); + hci_dev_unlock(hdev); return rp->status; } @@ -1876,8 +1880,10 @@ static u8 hci_cc_le_del_from_accept_list(struct hci_dev *hdev, void *data, if (!sent) return rp->status; + hci_dev_lock(hdev); hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr, sent->bdaddr_type); + hci_dev_unlock(hdev); return rp->status; } @@ -1949,9 +1955,11 @@ static u8 hci_cc_le_add_to_resolv_list(struct hci_dev *hdev, void *data, if (!sent) return rp->status; + hci_dev_lock(hdev); hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr, sent->bdaddr_type, sent->peer_irk, sent->local_irk); + hci_dev_unlock(hdev); return rp->status; } @@ -1971,8 +1979,10 @@ static u8 hci_cc_le_del_from_resolv_list(struct hci_dev *hdev, void *data, if (!sent) return rp->status; + hci_dev_lock(hdev); hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr, sent->bdaddr_type); + hci_dev_unlock(hdev); return rp->status; } @@ -1987,7 +1997,9 @@ static u8 hci_cc_le_clear_resolv_list(struct hci_dev *hdev, void *data, if (rp->status) return rp->status; + hci_dev_lock(hdev); hci_bdaddr_list_clear(&hdev->le_resolv_list); + hci_dev_unlock(hdev); return rp->status; } @@ -2834,7 +2846,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status) bt_dev_dbg(hdev, "status 0x%2.2x", status); /* All connection failure handling is taken care of by the - * hci_le_conn_failed function which is triggered by the HCI + * hci_conn_failed function which is triggered by the HCI * request completion callbacks used for connecting. */ if (status) @@ -2859,7 +2871,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status) bt_dev_dbg(hdev, "status 0x%2.2x", status); /* All connection failure handling is taken care of by the - * hci_le_conn_failed function which is triggered by the HCI + * hci_conn_failed function which is triggered by the HCI * request completion callbacks used for connecting. */ if (status) @@ -3067,18 +3079,20 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, { struct hci_ev_conn_complete *ev = data; struct hci_conn *conn; + u8 status = ev->status; - if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle"); - return; - } - - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { + /* In case of error status and there is no connection pending + * just unlock as there is nothing to cleanup. + */ + if (ev->status) + goto unlock; + /* Connection may not exist if auto-connected. Check the bredr * allowlist to see if this device is allowed to auto connect. * If link is an ACL type, create a connection class @@ -3122,8 +3136,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, goto unlock; } - if (!ev->status) { + if (!status) { conn->handle = __le16_to_cpu(ev->handle); + if (conn->handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", + conn->handle, HCI_CONN_HANDLE_MAX); + status = HCI_ERROR_INVALID_PARAMETERS; + goto done; + } if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; @@ -3164,19 +3184,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp); } - } else { - conn->state = BT_CLOSED; - if (conn->type == ACL_LINK) - mgmt_connect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, ev->status); } if (conn->type == ACL_LINK) hci_sco_setup(conn, ev->status); - if (ev->status) { - hci_connect_cfm(conn, ev->status); - hci_conn_del(conn); +done: + if (status) { + hci_conn_failed(conn, status); } else if (ev->link_type == SCO_LINK) { switch (conn->setting & SCO_AIRMODE_MASK) { case SCO_AIRMODE_CVSD: @@ -3185,7 +3200,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, break; } - hci_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, status); } unlock: @@ -3222,10 +3237,12 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, return; } + hci_dev_lock(hdev); + if (hci_bdaddr_list_lookup(&hdev->reject_list, &ev->bdaddr, BDADDR_BREDR)) { hci_reject_conn(hdev, &ev->bdaddr); - return; + goto unlock; } /* Require HCI_CONNECTABLE or an accept list entry to accept the @@ -3237,13 +3254,11 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, !hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &ev->bdaddr, BDADDR_BREDR)) { hci_reject_conn(hdev, &ev->bdaddr); - return; + goto unlock; } /* Connection accepted */ - hci_dev_lock(hdev); - ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); if (ie) memcpy(ie->data.dev_class, ev->dev_class, 3); @@ -3255,8 +3270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, HCI_ROLE_SLAVE); if (!conn) { bt_dev_err(hdev, "no memory for new connection"); - hci_dev_unlock(hdev); - return; + goto unlock; } } @@ -3296,6 +3310,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, conn->state = BT_CONNECT2; hci_connect_cfm(conn, 0); } + + return; +unlock: + hci_dev_unlock(hdev); } static u8 hci_to_mgmt_reason(u8 err) @@ -4676,6 +4694,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, { struct hci_ev_sync_conn_complete *ev = data; struct hci_conn *conn; + u8 status = ev->status; switch (ev->link_type) { case SCO_LINK: @@ -4690,12 +4709,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, return; } - if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle"); - return; - } - - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); @@ -4729,9 +4743,17 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, goto unlock; } - switch (ev->status) { + switch (status) { case 0x00: conn->handle = __le16_to_cpu(ev->handle); + if (conn->handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", + conn->handle, HCI_CONN_HANDLE_MAX); + status = HCI_ERROR_INVALID_PARAMETERS; + conn->state = BT_CLOSED; + break; + } + conn->state = BT_CONNECTED; conn->type = ev->link_type; @@ -4775,8 +4797,8 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, } } - hci_connect_cfm(conn, ev->status); - if (ev->status) + hci_connect_cfm(conn, status); + if (status) hci_conn_del(conn); unlock: @@ -5527,11 +5549,6 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, struct smp_irk *irk; u8 addr_type; - if (handle > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle"); - return; - } - hci_dev_lock(hdev); /* All controllers implicitly stop advertising in the event of a @@ -5541,6 +5558,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn = hci_lookup_le_connect(hdev); if (!conn) { + /* In case of error status and there is no connection pending + * just unlock as there is nothing to cleanup. + */ + if (status) + goto unlock; + conn = hci_conn_add(hdev, LE_LINK, bdaddr, role); if (!conn) { bt_dev_err(hdev, "no memory for new connection"); @@ -5603,8 +5626,14 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL); + if (handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle, + HCI_CONN_HANDLE_MAX); + status = HCI_ERROR_INVALID_PARAMETERS; + } + if (status) { - hci_le_conn_failed(conn, status); + hci_conn_failed(conn, status); goto unlock; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 42c8047a9897..635cc5fb451e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -261,7 +261,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, if (skb_queue_empty(&req->cmd_q)) bt_cb(skb)->hci.req_flags |= HCI_REQ_START; - bt_cb(skb)->hci.req_event = event; + hci_skb_event(skb) = event; skb_queue_tail(&req->cmd_q, skb); } @@ -2260,6 +2260,7 @@ static int active_scan(struct hci_request *req, unsigned long opt) if (err < 0) own_addr_type = ADDR_LE_DEV_PUBLIC; + hci_dev_lock(hdev); if (hci_is_adv_monitoring(hdev)) { /* Duplicate filter should be disabled when some advertisement * monitor is activated, otherwise AdvMon can only receive one @@ -2276,6 +2277,7 @@ static int active_scan(struct hci_request *req, unsigned long opt) */ filter_dup = LE_SCAN_FILTER_DUP_DISABLE; } + hci_dev_unlock(hdev); hci_req_start_scan(req, LE_SCAN_ACTIVE, interval, hdev->le_scan_window_discovery, own_addr_type, diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 8f4c5698913d..4d2203c5f1bb 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1664,20 +1664,19 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev, struct hci_cp_le_add_to_accept_list cp; int err; + /* During suspend, only wakeable devices can be in acceptlist */ + if (hdev->suspended && + !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags)) + return 0; + /* Select filter policy to accept all advertising */ if (*num_entries >= hdev->le_accept_list_size) return -ENOSPC; /* Accept list can not be used with RPAs */ if (!use_ll_privacy(hdev) && - hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) { + hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) return -EINVAL; - } - - /* During suspend, only wakeable devices can be in acceptlist */ - if (hdev->suspended && - !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags)) - return 0; /* Attempt to program the device in the resolving list first to avoid * having to rollback in case it fails since the resolving list is @@ -3825,6 +3824,30 @@ static int hci_init_sync(struct hci_dev *hdev) return 0; } +#define HCI_QUIRK_BROKEN(_quirk, _desc) { HCI_QUIRK_BROKEN_##_quirk, _desc } + +static const struct { + unsigned long quirk; + const char *desc; +} hci_broken_table[] = { + HCI_QUIRK_BROKEN(LOCAL_COMMANDS, + "HCI Read Local Supported Commands not supported"), + HCI_QUIRK_BROKEN(STORED_LINK_KEY, + "HCI Delete Stored Link Key command is advertised, " + "but not supported."), + HCI_QUIRK_BROKEN(ERR_DATA_REPORTING, + "HCI Read Default Erroneous Data Reporting command is " + "advertised, but not supported."), + HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER, + "HCI Read Transmit Power Level command is advertised, " + "but not supported."), + HCI_QUIRK_BROKEN(FILTER_CLEAR_ALL, + "HCI Set Event Filter command not supported."), + HCI_QUIRK_BROKEN(ENHANCED_SETUP_SYNC_CONN, + "HCI Enhanced Setup Synchronous Connection command is " + "advertised, but not supported.") +}; + int hci_dev_open_sync(struct hci_dev *hdev) { int ret = 0; @@ -3886,12 +3909,19 @@ int hci_dev_open_sync(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_SETUP) || test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) { bool invalid_bdaddr; + size_t i; hci_sock_dev_event(hdev, HCI_DEV_SETUP); if (hdev->setup) ret = hdev->setup(hdev); + for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) { + if (test_bit(hci_broken_table[i].quirk, &hdev->quirks)) + bt_dev_warn(hdev, "%s", + hci_broken_table[i].desc); + } + /* The transport driver can set the quirk to mark the * BD_ADDR invalid before creating the HCI device or in * its setup callback. @@ -4058,6 +4088,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); + cancel_work_sync(&hdev->power_on); cancel_delayed_work(&hdev->power_off); cancel_delayed_work(&hdev->ncmd_timer); @@ -4408,12 +4439,21 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { + int err; + switch (conn->state) { case BT_CONNECTED: case BT_CONFIG: return hci_disconnect_sync(hdev, conn, reason); case BT_CONNECT: - return hci_connect_cancel_sync(hdev, conn); + err = hci_connect_cancel_sync(hdev, conn); + /* Cleanup hci_conn object if it cannot be cancelled as it + * likelly means the controller and host stack are out of sync. + */ + if (err) + hci_conn_failed(conn, err); + + return err; case BT_CONNECT2: return hci_reject_conn_sync(hdev, conn, reason); default: @@ -4872,10 +4912,28 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev) return 0; } +/* This function disables scan (BR and LE) and mark it as paused */ +static int hci_pause_scan_sync(struct hci_dev *hdev) +{ + if (hdev->scanning_paused) + return 0; + + /* Disable page scan if enabled */ + if (test_bit(HCI_PSCAN, &hdev->flags)) + hci_write_scan_enable_sync(hdev, SCAN_DISABLED); + + hci_scan_disable_sync(hdev); + + hdev->scanning_paused = true; + + return 0; +} + /* This function performs the HCI suspend procedures in the follow order: * * Pause discovery (active scanning/inquiry) * Pause Directed Advertising/Advertising + * Pause Scanning (passive scanning in case discovery was not active) * Disconnect all connections * Set suspend_status to BT_SUSPEND_DISCONNECT if hdev cannot wakeup * otherwise: @@ -4901,15 +4959,11 @@ int hci_suspend_sync(struct hci_dev *hdev) /* Pause other advertisements */ hci_pause_advertising_sync(hdev); - /* Disable page scan if enabled */ - if (test_bit(HCI_PSCAN, &hdev->flags)) - hci_write_scan_enable_sync(hdev, SCAN_DISABLED); - /* Suspend monitor filters */ hci_suspend_monitor_sync(hdev); /* Prevent disconnects from causing scanning to be re-enabled */ - hdev->scanning_paused = true; + hci_pause_scan_sync(hdev); /* Soft disconnect everything (power off) */ err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF); @@ -4980,6 +5034,22 @@ static void hci_resume_monitor_sync(struct hci_dev *hdev) } } +/* This function resume scan and reset paused flag */ +static int hci_resume_scan_sync(struct hci_dev *hdev) +{ + if (!hdev->scanning_paused) + return 0; + + hci_update_scan_sync(hdev); + + /* Reset passive scanning to normal */ + hci_update_passive_scan_sync(hdev); + + hdev->scanning_paused = false; + + return 0; +} + /* This function performs the HCI suspend procedures in the follow order: * * Restore event mask @@ -5002,10 +5072,9 @@ int hci_resume_sync(struct hci_dev *hdev) /* Clear any event filters and restore scan state */ hci_clear_event_filter_sync(hdev); - hci_update_scan_sync(hdev); - /* Reset passive scanning to normal */ - hci_update_passive_scan_sync(hdev); + /* Resume scanning */ + hci_resume_scan_sync(hdev); /* Resume monitor filters */ hci_resume_monitor_sync(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d2d390534e54..74937a834648 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4529,6 +4529,23 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, le_addr_type(cp->addr.type)); if (params) { + DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); + + bitmap_from_u64(flags, current_flags); + + /* Devices using RPAs can only be programmed in the + * acceptlist LL Privacy has been enable otherwise they + * cannot mark HCI_CONN_FLAG_REMOTE_WAKEUP. + */ + if (test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, flags) && + !use_ll_privacy(hdev) && + hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type)) { + bt_dev_warn(hdev, + "Cannot set wakeable for RPA"); + goto unlock; + } + bitmap_from_u64(params->flags, current_flags); status = MGMT_STATUS_SUCCESS; @@ -4545,6 +4562,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, } } +unlock: hci_dev_unlock(hdev); done: diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 37eef2ce55ae..b69cfed62088 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -297,7 +297,7 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, if (!cmd) return NULL; - list_add(&cmd->list, &hdev->mgmt_pending); + list_add_tail(&cmd->list, &hdev->mgmt_pending); return cmd; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 8eabf41b2993..1111da4e2f2b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -574,19 +574,24 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen addr->sa_family != AF_BLUETOOTH) return -EINVAL; - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) - return -EBADFD; + lock_sock(sk); + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } - if (sk->sk_type != SOCK_SEQPACKET) - return -EINVAL; + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + goto done; + } hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR); - if (!hdev) - return -EHOSTUNREACH; + if (!hdev) { + err = -EHOSTUNREACH; + goto done; + } hci_dev_lock(hdev); - lock_sock(sk); - /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); @@ -885,7 +890,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, err = -EBADFD; break; } - if (enhanced_sco_capable(hdev) && + if (enhanced_sync_conn_capable(hdev) && voice.setting == BT_VOICE_TRANSPARENT) sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT; hci_dev_put(hdev); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 4d08cca771c7..56f059b3c242 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -108,6 +108,7 @@ struct xdp_test_data { struct page_pool *pp; struct xdp_frame **frames; struct sk_buff **skbs; + struct xdp_mem_info mem; u32 batch_size; u32 frame_cnt; }; @@ -147,7 +148,6 @@ static void xdp_test_run_init_page(struct page *page, void *arg) static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx) { - struct xdp_mem_info mem = {}; struct page_pool *pp; int err = -ENOMEM; struct page_pool_params pp_params = { @@ -174,7 +174,7 @@ static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_c } /* will copy 'mem.id' into pp->xdp_mem_id */ - err = xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pp); + err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp); if (err) goto err_mmodel; @@ -202,6 +202,7 @@ err_skbs: static void xdp_test_run_teardown(struct xdp_test_data *xdp) { + xdp_unreg_mem_model(&xdp->mem); page_pool_destroy(xdp->pp); kfree(xdp->frames); kfree(xdp->skbs); @@ -1011,7 +1012,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) cb->pkt_len = skb->len; } else { if (__skb->wire_len < skb->len || - __skb->wire_len > GSO_MAX_SIZE) + __skb->wire_len > GSO_LEGACY_MAX_SIZE) return -EINVAL; cb->pkt_len = __skb->wire_len; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 1a3d583fbc8e..e7f4fccb6adb 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1253,7 +1253,8 @@ static int __br_fdb_delete(struct net_bridge *br, /* Remove neighbor entry with RTM_DELNEIGH */ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 55f47cadb114..47fcbade7389 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -517,16 +517,16 @@ void br_mtu_auto_adjust(struct net_bridge *br) static void br_set_gso_limits(struct net_bridge *br) { - unsigned int gso_max_size = GSO_MAX_SIZE; - u16 gso_max_segs = GSO_MAX_SEGS; + unsigned int tso_max_size = TSO_MAX_SIZE; const struct net_bridge_port *p; + u16 tso_max_segs = TSO_MAX_SEGS; list_for_each_entry(p, &br->port_list, list) { - gso_max_size = min(gso_max_size, p->dev->gso_max_size); - gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs); + tso_max_size = min(tso_max_size, p->dev->tso_max_size); + tso_max_segs = min(tso_max_segs, p->dev->tso_max_segs); } - netif_set_gso_max_size(br->dev, gso_max_size); - netif_set_gso_max_segs(br->dev, gso_max_segs); + netif_set_tso_max_size(br->dev, tso_max_size); + netif_set_tso_max_segs(br->dev, tso_max_segs); } /* diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 196417859c4a..68b3e850bcb9 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb) dev_sw_netstats_rx_add(brdev, skb->len); vg = br_vlan_group_rcu(br); + + /* Reset the offload_fwd_mark because there could be a stacked + * bridge above, and it should not think this bridge it doing + * that bridge's work forwarding out its ports. + */ + br_switchdev_frame_unmark(skb); + /* Bridge is just like any other port. Make sure the * packet is allowed except in promisc mode when someone * may be running packet capture. diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6ae882cfae1c..06e5f6faa431 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -793,7 +793,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid, unsigned long flags); int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, u16 vid); + struct net_device *dev, const unsigned char *addr, u16 vid, + struct netlink_ext_ack *extack); int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, u16 vid, struct netlink_ext_ack *extack); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 81400e0b26ac..8f3d76c751dd 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -354,6 +354,8 @@ static int br_switchdev_vlan_attr_replay(struct net_device *br_dev, attr.orig_dev = br_dev; vg = br_vlan_group(br); + if (!vg) + return 0; list_for_each_entry(v, &vg->vlan_list, vlist) { if (v->msti) { diff --git a/net/can/bcm.c b/net/can/bcm.c index 64c07e650bb4..65ee1b784a30 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1647,7 +1647,7 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return err; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(BCM_MIN_NAMELEN); diff --git a/net/can/isotp.c b/net/can/isotp.c index 11a65a9d3906..43a27d19cdac 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -104,6 +104,7 @@ MODULE_ALIAS("can-proto-6"); #define FC_CONTENT_SZ 3 /* flow control content size in byte (FS/BS/STmin) */ #define ISOTP_CHECK_PADDING (CAN_ISOTP_CHK_PAD_LEN | CAN_ISOTP_CHK_PAD_DATA) +#define ISOTP_ALL_BC_FLAGS (CAN_ISOTP_SF_BROADCAST | CAN_ISOTP_CF_BROADCAST) /* Flow Status given in FC frame */ #define ISOTP_FC_CTS 0 /* clear to send */ @@ -159,6 +160,23 @@ static inline struct isotp_sock *isotp_sk(const struct sock *sk) return (struct isotp_sock *)sk; } +static u32 isotp_bc_flags(struct isotp_sock *so) +{ + return so->opt.flags & ISOTP_ALL_BC_FLAGS; +} + +static bool isotp_register_rxid(struct isotp_sock *so) +{ + /* no broadcast modes => register rx_id for FC frame reception */ + return (isotp_bc_flags(so) == 0); +} + +static bool isotp_register_txecho(struct isotp_sock *so) +{ + /* all modes but SF_BROADCAST register for tx echo skbs */ + return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST); +} + static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer) { struct isotp_sock *so = container_of(hrtimer, struct isotp_sock, @@ -803,7 +821,6 @@ static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so, cf->data[i] = so->tx.buf[so->tx.idx++]; so->tx.sn = 1; - so->tx.state = ISOTP_WAIT_FIRST_FC; } static void isotp_rcv_echo(struct sk_buff *skb, void *data) @@ -936,7 +953,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0; /* does the given data fit into a single frame for SF_BROADCAST? */ - if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) && + if ((isotp_bc_flags(so) == CAN_ISOTP_SF_BROADCAST) && (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) { err = -EINVAL; goto err_out_drop; @@ -1000,12 +1017,41 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) /* don't enable wait queue for a single frame transmission */ wait_tx_done = 0; } else { - /* send first frame and wait for FC */ + /* send first frame */ isotp_create_fframe(cf, so, ae); - /* start timeout for FC */ - hrtimer_sec = 1; + if (isotp_bc_flags(so) == CAN_ISOTP_CF_BROADCAST) { + /* set timer for FC-less operation (STmin = 0) */ + if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN) + so->tx_gap = ktime_set(0, so->force_tx_stmin); + else + so->tx_gap = ktime_set(0, so->frame_txtime); + + /* disable wait for FCs due to activated block size */ + so->txfc.bs = 0; + + /* cfecho should have been zero'ed by init */ + if (so->cfecho) + pr_notice_once("can-isotp: no fc cfecho %08X\n", + so->cfecho); + + /* set consecutive frame echo tag */ + so->cfecho = *(u32 *)cf->data; + + /* switch directly to ISOTP_SENDING state */ + so->tx.state = ISOTP_SENDING; + + /* start timeout for unlikely lost echo skb */ + hrtimer_sec = 2; + } else { + /* standard flow control check */ + so->tx.state = ISOTP_WAIT_FIRST_FC; + + /* start timeout for FC */ + hrtimer_sec = 1; + } + hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0), HRTIMER_MODE_REL_SOFT); } @@ -1025,6 +1071,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (hrtimer_sec) hrtimer_cancel(&so->txtimer); + /* reset consecutive frame echo tag */ + so->cfecho = 0; + goto err_out_drop; } @@ -1120,15 +1169,17 @@ static int isotp_release(struct socket *sock) lock_sock(sk); /* remove current filters & unregister */ - if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) { + if (so->bound && isotp_register_txecho(so)) { if (so->ifindex) { struct net_device *dev; dev = dev_get_by_index(net, so->ifindex); if (dev) { - can_rx_unregister(net, dev, so->rxid, - SINGLE_MASK(so->rxid), - isotp_rcv, sk); + if (isotp_register_rxid(so)) + can_rx_unregister(net, dev, so->rxid, + SINGLE_MASK(so->rxid), + isotp_rcv, sk); + can_rx_unregister(net, dev, so->txid, SINGLE_MASK(so->txid), isotp_rcv_echo, sk); @@ -1161,45 +1212,51 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct net *net = sock_net(sk); int ifindex; struct net_device *dev; - canid_t tx_id, rx_id; + canid_t tx_id = addr->can_addr.tp.tx_id; + canid_t rx_id = addr->can_addr.tp.rx_id; int err = 0; int notify_enetdown = 0; - int do_rx_reg = 1; if (len < ISOTP_MIN_NAMELEN) return -EINVAL; - /* sanitize tx/rx CAN identifiers */ - tx_id = addr->can_addr.tp.tx_id; + /* sanitize tx CAN identifier */ if (tx_id & CAN_EFF_FLAG) tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); else tx_id &= CAN_SFF_MASK; - rx_id = addr->can_addr.tp.rx_id; - if (rx_id & CAN_EFF_FLAG) - rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); - else - rx_id &= CAN_SFF_MASK; + /* give feedback on wrong CAN-ID value */ + if (tx_id != addr->can_addr.tp.tx_id) + return -EINVAL; + + /* sanitize rx CAN identifier (if needed) */ + if (isotp_register_rxid(so)) { + if (rx_id & CAN_EFF_FLAG) + rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); + else + rx_id &= CAN_SFF_MASK; + + /* give feedback on wrong CAN-ID value */ + if (rx_id != addr->can_addr.tp.rx_id) + return -EINVAL; + } if (!addr->can_ifindex) return -ENODEV; lock_sock(sk); - /* do not register frame reception for functional addressing */ - if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) - do_rx_reg = 0; - - /* do not validate rx address for functional addressing */ - if (do_rx_reg && rx_id == tx_id) { - err = -EADDRNOTAVAIL; + if (so->bound) { + err = -EINVAL; goto out; } - if (so->bound && addr->can_ifindex == so->ifindex && - rx_id == so->rxid && tx_id == so->txid) + /* ensure different CAN IDs when the rx_id is to be registered */ + if (isotp_register_rxid(so) && rx_id == tx_id) { + err = -EADDRNOTAVAIL; goto out; + } dev = dev_get_by_index(net, addr->can_ifindex); if (!dev) { @@ -1221,10 +1278,11 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; - if (do_rx_reg) { + if (isotp_register_rxid(so)) can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id), isotp_rcv, sk, "isotp", sk); + if (isotp_register_txecho(so)) { /* no consecutive frame echo skb in flight */ so->cfecho = 0; @@ -1235,22 +1293,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) dev_put(dev); - if (so->bound && do_rx_reg) { - /* unregister old filter */ - if (so->ifindex) { - dev = dev_get_by_index(net, so->ifindex); - if (dev) { - can_rx_unregister(net, dev, so->rxid, - SINGLE_MASK(so->rxid), - isotp_rcv, sk); - can_rx_unregister(net, dev, so->txid, - SINGLE_MASK(so->txid), - isotp_rcv_echo, sk); - dev_put(dev); - } - } - } - /* switch to new settings */ so->ifindex = ifindex; so->rxid = rx_id; @@ -1309,6 +1351,15 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname, if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR)) so->opt.rx_ext_address = so->opt.ext_address; + /* these broadcast flags are not allowed together */ + if (isotp_bc_flags(so) == ISOTP_ALL_BC_FLAGS) { + /* CAN_ISOTP_SF_BROADCAST is prioritized */ + so->opt.flags &= ~CAN_ISOTP_CF_BROADCAST; + + /* give user feedback on wrong config attempt */ + ret = -EINVAL; + } + /* check for frame_txtime changes (0 => no changes) */ if (so->opt.frame_txtime) { if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO) @@ -1459,10 +1510,12 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg, case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ - if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) { - can_rx_unregister(dev_net(dev), dev, so->rxid, - SINGLE_MASK(so->rxid), - isotp_rcv, sk); + if (so->bound && isotp_register_txecho(so)) { + if (isotp_register_rxid(so)) + can_rx_unregister(dev_net(dev), dev, so->rxid, + SINGLE_MASK(so->rxid), + isotp_rcv, sk); + can_rx_unregister(dev_net(dev), dev, so->txid, SINGLE_MASK(so->txid), isotp_rcv_echo, sk); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 0bb4fd3f6264..f5ecfdcf57b2 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -841,7 +841,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, paddr->can_addr.j1939.pgn = skcb->addr.pgn; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); msg->msg_flags |= skcb->msg_flags; skb_free_datagram(sk, skb); diff --git a/net/can/raw.c b/net/can/raw.c index 0cf728dcff36..d1bd9cc51ebe 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -772,6 +772,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); + struct sockcm_cookie sockc; struct sk_buff *skb; struct net_device *dev; int ifindex; @@ -817,11 +818,18 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err < 0) goto free_skb; - skb_setup_tx_timestamp(skb, sk->sk_tsflags); + sockcm_init(&sockc, sk); + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) + goto free_skb; + } skb->dev = dev; - skb->sk = sk; skb->priority = sk->sk_priority; + skb->tstamp = sockc.transmit_time; + + skb_setup_tx_timestamp(skb, sockc.tsflags); err = can_send(skb, ro->loopback); @@ -866,7 +874,7 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return err; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(RAW_MIN_NAMELEN); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1c5815530e0d..83eb97c94e83 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2385,7 +2385,11 @@ again: if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { err = -ENOSPC; } else { - pr_warn_ratelimited("FULL or reached pool quota\n"); + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) + pr_warn_ratelimited("cluster is full (osdmap FULL)\n"); + else + pr_warn_ratelimited("pool %lld is full or reached quota\n", + req->r_t.base_oloc.pool); req->r_t.paused = true; maybe_request_map(osdc); } diff --git a/net/core/datagram.c b/net/core/datagram.c index 70126d15ca6e..50f4faeea76c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -62,8 +62,6 @@ #include <trace/events/skb.h> #include <net/busy_poll.h> -#include "datagram.h" - /* * Is a socket 'connection oriented' ? */ diff --git a/net/core/datagram.h b/net/core/datagram.h deleted file mode 100644 index bcfb75bfa3b2..000000000000 --- a/net/core/datagram.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _NET_CORE_DATAGRAM_H_ -#define _NET_CORE_DATAGRAM_H_ - -#include <linux/types.h> - -struct sock; -struct sk_buff; -struct iov_iter; - -int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, - struct iov_iter *from, size_t length); - -#endif /* _NET_CORE_DATAGRAM_H_ */ diff --git a/net/core/dev.c b/net/core/dev.c index 611bd7197064..721ba9c26554 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -682,11 +682,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, const struct net_device *last_dev; struct net_device_path_ctx ctx = { .dev = dev, - .daddr = daddr, }; struct net_device_path *path; int ret = 0; + memcpy(ctx.daddr, daddr, sizeof(ctx.daddr)); stack->num_paths = 0; while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) { last_dev = ctx.dev; @@ -2993,6 +2993,52 @@ undo_rx: EXPORT_SYMBOL(netif_set_real_num_queues); /** + * netif_set_tso_max_size() - set the max size of TSO frames supported + * @dev: netdev to update + * @size: max skb->len of a TSO frame + * + * Set the limit on the size of TSO super-frames the device can handle. + * Unless explicitly set the stack will assume the value of + * %GSO_LEGACY_MAX_SIZE. + */ +void netif_set_tso_max_size(struct net_device *dev, unsigned int size) +{ + dev->tso_max_size = min(GSO_MAX_SIZE, size); + if (size < READ_ONCE(dev->gso_max_size)) + netif_set_gso_max_size(dev, size); +} +EXPORT_SYMBOL(netif_set_tso_max_size); + +/** + * netif_set_tso_max_segs() - set the max number of segs supported for TSO + * @dev: netdev to update + * @segs: max number of TCP segments + * + * Set the limit on the number of TCP segments the device can generate from + * a single TSO super-frame. + * Unless explicitly set the stack will assume the value of %GSO_MAX_SEGS. + */ +void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs) +{ + dev->tso_max_segs = segs; + if (segs < READ_ONCE(dev->gso_max_segs)) + netif_set_gso_max_segs(dev, segs); +} +EXPORT_SYMBOL(netif_set_tso_max_segs); + +/** + * netif_inherit_tso_max() - copy all TSO limits from a lower device to an upper + * @to: netdev to update + * @from: netdev from which to copy the limits + */ +void netif_inherit_tso_max(struct net_device *to, const struct net_device *from) +{ + netif_set_tso_max_size(to, from->tso_max_size); + netif_set_tso_max_segs(to, from->tso_max_segs); +} +EXPORT_SYMBOL(netif_inherit_tso_max); + +/** * netif_get_num_default_rss_queues - default number of RSS queues * * Default value is the number of physical cores if there are only 1 or 2, or @@ -3220,12 +3266,18 @@ int skb_checksum_help(struct sk_buff *skb) } offset = skb_checksum_start_offset(skb); - BUG_ON(offset >= skb_headlen(skb)); + ret = -EINVAL; + if (WARN_ON_ONCE(offset >= skb_headlen(skb))) { + DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); + goto out; + } csum = skb_checksum(skb, offset, skb->len - offset, 0); offset += skb->csum_offset; - BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); - + if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) { + DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); + goto out; + } ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); if (ret) goto out; @@ -4086,32 +4138,27 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, } /** - * __dev_queue_xmit - transmit a buffer - * @skb: buffer to transmit - * @sb_dev: suboordinate device used for L2 forwarding offload + * __dev_queue_xmit() - transmit a buffer + * @skb: buffer to transmit + * @sb_dev: suboordinate device used for L2 forwarding offload * - * Queue a buffer for transmission to a network device. The caller must - * have set the device and priority and built the buffer before calling - * this function. The function can be called from an interrupt. + * Queue a buffer for transmission to a network device. The caller must + * have set the device and priority and built the buffer before calling + * this function. The function can be called from an interrupt. * - * A negative errno code is returned on a failure. A success does not - * guarantee the frame will be transmitted as it may be dropped due - * to congestion or traffic shaping. + * When calling this method, interrupts MUST be enabled. This is because + * the BH enable code must have IRQs enabled so that it will not deadlock. * - * ----------------------------------------------------------------------------------- - * I notice this method can also return errors from the queue disciplines, - * including NET_XMIT_DROP, which is a positive value. So, errors can also - * be positive. + * Regardless of the return value, the skb is consumed, so it is currently + * difficult to retry a send to this method. (You can bump the ref count + * before sending to hold a reference for retry if you are careful.) * - * Regardless of the return value, the skb is consumed, so it is currently - * difficult to retry a send to this method. (You can bump the ref count - * before sending to hold a reference for retry if you are careful.) - * - * When calling this method, interrupts MUST be enabled. This is because - * the BH enable code must have IRQs enabled so that it will not deadlock. - * --BLG + * Return: + * * 0 - buffer successfully transmitted + * * positive qdisc return code - NET_XMIT_DROP etc. + * * negative errno - other errors */ -static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) +int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) { struct net_device *dev = skb->dev; struct netdev_queue *txq = NULL; @@ -4235,18 +4282,7 @@ out: rcu_read_unlock_bh(); return rc; } - -int dev_queue_xmit(struct sk_buff *skb) -{ - return __dev_queue_xmit(skb, NULL); -} -EXPORT_SYMBOL(dev_queue_xmit); - -int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) -{ - return __dev_queue_xmit(skb, sb_dev); -} -EXPORT_SYMBOL(dev_queue_xmit_accel); +EXPORT_SYMBOL(__dev_queue_xmit); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { @@ -4294,6 +4330,7 @@ int netdev_max_backlog __read_mostly = 1000; EXPORT_SYMBOL(netdev_max_backlog); int netdev_tstamp_prequeue __read_mostly = 1; +unsigned int sysctl_skb_defer_max __read_mostly = 64; int netdev_budget __read_mostly = 300; /* Must be at least 2 jiffes to guarantee 1 jiffy timeout */ unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ; @@ -4546,9 +4583,12 @@ static void rps_trigger_softirq(void *data) #endif /* CONFIG_RPS */ /* Called from hardirq (IPI) context */ -static void trigger_rx_softirq(void *data __always_unused) +static void trigger_rx_softirq(void *data) { + struct softnet_data *sd = data; + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + smp_store_release(&sd->defer_ipi_scheduled, 0); } /* @@ -6314,8 +6354,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded) } EXPORT_SYMBOL(dev_set_threaded); -void netif_napi_add(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight) +void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) { if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state))) return; @@ -6348,7 +6388,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, if (dev->threaded && napi_kthread_create(napi)) dev->threaded = 0; } -EXPORT_SYMBOL(netif_napi_add); +EXPORT_SYMBOL(netif_napi_add_weight); void napi_disable(struct napi_struct *n) { @@ -6594,7 +6634,7 @@ static void skb_defer_free_flush(struct softnet_data *sd) while (skb != NULL) { next = skb->next; - __kfree_skb(skb); + napi_consume_skb(skb, 1); skb = next; } } @@ -6615,9 +6655,11 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) for (;;) { struct napi_struct *n; + skb_defer_free_flush(sd); + if (list_empty(&list)) { if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) - return; + goto end; break; } @@ -6644,7 +6686,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); - skb_defer_free_flush(sd); +end:; } struct netdev_adjacent { @@ -9891,22 +9933,14 @@ void netif_tx_stop_all_queues(struct net_device *dev) EXPORT_SYMBOL(netif_tx_stop_all_queues); /** - * register_netdevice - register a network device - * @dev: device to register + * register_netdevice() - register a network device + * @dev: device to register * - * Take a completed network device structure and add it to the kernel - * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier - * chain. 0 is returned on success. A negative errno code is returned - * on a failure to set up the device, or if the name is a duplicate. - * - * Callers must hold the rtnl semaphore. You may want - * register_netdev() instead of this. - * - * BUGS: - * The locking appears insufficient to guarantee two parallel registers - * will not get the same name. + * Take a prepared network device structure and make it externally accessible. + * A %NETDEV_REGISTER message is sent to the netdev notifier chain. + * Callers must hold the rtnl lock - you may want register_netdev() + * instead of this. */ - int register_netdevice(struct net_device *dev) { int ret; @@ -10357,7 +10391,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, } EXPORT_SYMBOL(netdev_stats_to_stats64); -struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev) +struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev) { struct net_device_core_stats __percpu *p; @@ -10368,11 +10402,7 @@ struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev) free_percpu(p); /* This READ_ONCE() pairs with the cmpxchg() above */ - p = READ_ONCE(dev->core_stats); - if (!p) - return NULL; - - return this_cpu_ptr(p); + return READ_ONCE(dev->core_stats); } EXPORT_SYMBOL(netdev_core_stats_alloc); @@ -10409,10 +10439,10 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, for_each_possible_cpu(i) { core_stats = per_cpu_ptr(p, i); - storage->rx_dropped += local_read(&core_stats->rx_dropped); - storage->tx_dropped += local_read(&core_stats->tx_dropped); - storage->rx_nohandler += local_read(&core_stats->rx_nohandler); - storage->rx_otherhost_dropped += local_read(&core_stats->rx_otherhost_dropped); + storage->rx_dropped += READ_ONCE(core_stats->rx_dropped); + storage->tx_dropped += READ_ONCE(core_stats->tx_dropped); + storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler); + storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped); } } return storage; @@ -10571,9 +10601,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - dev->gso_max_size = GSO_MAX_SIZE; + dev->gso_max_size = GSO_LEGACY_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; - dev->gro_max_size = GRO_MAX_SIZE; + dev->gro_max_size = GRO_LEGACY_MAX_SIZE; + dev->tso_max_size = TSO_LEGACY_MAX_SIZE; + dev->tso_max_segs = TSO_MAX_SEGS; dev->upper_level = 1; dev->lower_level = 1; #ifdef CONFIG_LOCKDEP @@ -11355,7 +11387,7 @@ static int __init net_dev_init(void) INIT_CSD(&sd->csd, rps_trigger_softirq, sd); sd->cpu = i; #endif - INIT_CSD(&sd->defer_csd, trigger_rx_softirq, NULL); + INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); spin_lock_init(&sd->defer_lock); init_gro_hash(&sd->backlog); diff --git a/net/core/dev.h b/net/core/dev.h index 27923df00637..cbb8a925175a 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -39,7 +39,7 @@ void dev_addr_check(struct net_device *dev); /* sysctls not referred to from outside net/core/ */ extern int netdev_budget; extern unsigned int netdev_budget_usecs; - +extern unsigned int sysctl_skb_defer_max; extern int netdev_tstamp_prequeue; extern int netdev_unregister_timeout_secs; extern int weight_p; @@ -88,4 +88,25 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier); void __dev_set_rx_mode(struct net_device *dev); +static inline void netif_set_gso_max_size(struct net_device *dev, + unsigned int size) +{ + /* dev->gso_max_size is read locklessly from sk_setup_caps() */ + WRITE_ONCE(dev->gso_max_size, size); +} + +static inline void netif_set_gso_max_segs(struct net_device *dev, + unsigned int segs) +{ + /* dev->gso_max_segs is read locklessly from sk_setup_caps() */ + WRITE_ONCE(dev->gso_max_segs, segs); +} + +static inline void netif_set_gro_max_size(struct net_device *dev, + unsigned int size) +{ + /* This pairs with the READ_ONCE() in skb_gro_receive() */ + WRITE_ONCE(dev->gro_max_size, size); +} + #endif diff --git a/net/core/devlink.c b/net/core/devlink.c index 5f441a0e34f4..5cc88490f18f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -83,11 +83,10 @@ struct devlink_linecard { const struct devlink_linecard_ops *ops; void *priv; enum devlink_linecard_state state; - struct mutex state_lock; /* Protects state and device_list */ + struct mutex state_lock; /* Protects state */ const char *type; struct devlink_linecard_type *types; unsigned int types_count; - struct list_head device_list; }; /** @@ -2059,56 +2058,6 @@ struct devlink_linecard_type { const void *priv; }; -struct devlink_linecard_device { - struct list_head list; - unsigned int index; - void *priv; -}; - -static int -devlink_nl_linecard_device_fill(struct sk_buff *msg, - struct devlink_linecard_device *linecard_device) -{ - struct nlattr *attr; - - attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE); - if (!attr) - return -EMSGSIZE; - if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX, - linecard_device->index)) { - nla_nest_cancel(msg, attr); - return -EMSGSIZE; - } - nla_nest_end(msg, attr); - - return 0; -} - -static int devlink_nl_linecard_devices_fill(struct sk_buff *msg, - struct devlink_linecard *linecard) -{ - struct devlink_linecard_device *linecard_device; - struct nlattr *attr; - int err; - - if (list_empty(&linecard->device_list)) - return 0; - - attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST); - if (!attr) - return -EMSGSIZE; - list_for_each_entry(linecard_device, &linecard->device_list, list) { - err = devlink_nl_linecard_device_fill(msg, linecard_device); - if (err) { - nla_nest_cancel(msg, attr); - return err; - } - } - nla_nest_end(msg, attr); - - return 0; -} - static int devlink_nl_linecard_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_linecard *linecard, @@ -2119,7 +2068,6 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, struct devlink_linecard_type *linecard_type; struct nlattr *attr; void *hdr; - int err; int i; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -2152,10 +2100,6 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, nla_nest_end(msg, attr); } - err = devlink_nl_linecard_devices_fill(msg, linecard); - if (err) - goto nla_put_failure; - genlmsg_end(msg, hdr); return 0; @@ -2425,191 +2369,6 @@ static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb, return 0; } -struct devlink_info_req { - struct sk_buff *msg; -}; - -static int -devlink_nl_linecard_device_info_fill(struct sk_buff *msg, - struct devlink_linecard *linecard, - struct devlink_linecard_device *linecard_device, - struct netlink_ext_ack *extack) -{ - struct nlattr *attr; - - attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE); - if (!attr) - return -EMSGSIZE; - if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX, - linecard_device->index)) { - nla_nest_cancel(msg, attr); - return -EMSGSIZE; - } - if (linecard->ops->device_info_get) { - struct devlink_info_req req; - int err; - - req.msg = msg; - err = linecard->ops->device_info_get(linecard_device, - linecard_device->priv, - &req, extack); - if (err) { - nla_nest_cancel(msg, attr); - return err; - } - } - nla_nest_end(msg, attr); - - return 0; -} - -static int devlink_nl_linecard_devices_info_fill(struct sk_buff *msg, - struct devlink_linecard *linecard, - struct netlink_ext_ack *extack) -{ - struct devlink_linecard_device *linecard_device; - struct nlattr *attr; - int err; - - if (list_empty(&linecard->device_list)) - return 0; - - attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST); - if (!attr) - return -EMSGSIZE; - list_for_each_entry(linecard_device, &linecard->device_list, list) { - err = devlink_nl_linecard_device_info_fill(msg, linecard, - linecard_device, - extack); - if (err) { - nla_nest_cancel(msg, attr); - return err; - } - } - nla_nest_end(msg, attr); - - return 0; -} - -static int -devlink_nl_linecard_info_fill(struct sk_buff *msg, struct devlink *devlink, - struct devlink_linecard *linecard, - enum devlink_command cmd, u32 portid, - u32 seq, int flags, struct netlink_ext_ack *extack) -{ - struct devlink_info_req req; - void *hdr; - int err; - - hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); - if (!hdr) - return -EMSGSIZE; - - err = -EMSGSIZE; - if (devlink_nl_put_handle(msg, devlink)) - goto nla_put_failure; - if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index)) - goto nla_put_failure; - - req.msg = msg; - err = linecard->ops->info_get(linecard, linecard->priv, &req, extack); - if (err) - goto nla_put_failure; - - err = devlink_nl_linecard_devices_info_fill(msg, linecard, extack); - if (err) - goto nla_put_failure; - - genlmsg_end(msg, hdr); - return 0; - -nla_put_failure: - genlmsg_cancel(msg, hdr); - return err; -} - -static int devlink_nl_cmd_linecard_info_get_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink_linecard *linecard = info->user_ptr[1]; - struct devlink *devlink = linecard->devlink; - struct sk_buff *msg; - int err; - - if (!linecard->ops->info_get) - return -EOPNOTSUPP; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - mutex_lock(&linecard->state_lock); - err = devlink_nl_linecard_info_fill(msg, devlink, linecard, - DEVLINK_CMD_LINECARD_INFO_GET, - info->snd_portid, info->snd_seq, 0, - info->extack); - mutex_unlock(&linecard->state_lock); - if (err) { - nlmsg_free(msg); - return err; - } - - return genlmsg_reply(msg, info); -} - -static int devlink_nl_cmd_linecard_info_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) -{ - struct devlink_linecard *linecard; - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; - int idx = 0; - int err = 0; - - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->linecards_lock); - list_for_each_entry(linecard, &devlink->linecard_list, list) { - if (idx < start || !linecard->ops->info_get) { - idx++; - continue; - } - mutex_lock(&linecard->state_lock); - err = devlink_nl_linecard_info_fill(msg, devlink, linecard, - DEVLINK_CMD_LINECARD_INFO_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI, - cb->extack); - mutex_unlock(&linecard->state_lock); - if (err) { - mutex_unlock(&devlink->linecards_lock); - devlink_put(devlink); - goto out; - } - idx++; - } - mutex_unlock(&devlink->linecards_lock); -retry: - devlink_put(devlink); - } -out: - mutex_unlock(&devlink_mutex); - - if (err != -EMSGSIZE) - return err; - - cb->args[0] = idx; - return msg->len; -} - static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -6602,6 +6361,10 @@ out_dev: return err; } +struct devlink_info_req { + struct sk_buff *msg; +}; + int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) { return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); @@ -9322,13 +9085,6 @@ static const struct genl_small_ops devlink_nl_ops[] = { .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, }, { - .cmd = DEVLINK_CMD_LINECARD_INFO_GET, - .doit = devlink_nl_cmd_linecard_info_get_doit, - .dumpit = devlink_nl_cmd_linecard_info_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, - /* can be retrieved by unprivileged users */ - }, - { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_get_doit, @@ -10508,7 +10264,6 @@ devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, linecard->priv = priv; linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; mutex_init(&linecard->state_lock); - INIT_LIST_HEAD(&linecard->device_list); err = devlink_linecard_types_init(linecard); if (err) { @@ -10536,7 +10291,6 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard) struct devlink *devlink = linecard->devlink; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL); - WARN_ON(!list_empty(&linecard->device_list)); mutex_lock(&devlink->linecards_lock); list_del(&linecard->list); devlink_linecard_types_fini(linecard); @@ -10546,52 +10300,6 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard) EXPORT_SYMBOL_GPL(devlink_linecard_destroy); /** - * devlink_linecard_device_create - Create a device on linecard - * - * @linecard: devlink linecard - * @device_index: index of the linecard device - * @priv: user priv pointer - * - * Return: Line card device structure or an ERR_PTR() encoded error code. - */ -struct devlink_linecard_device * -devlink_linecard_device_create(struct devlink_linecard *linecard, - unsigned int device_index, void *priv) -{ - struct devlink_linecard_device *linecard_device; - - linecard_device = kzalloc(sizeof(*linecard_device), GFP_KERNEL); - if (!linecard_device) - return ERR_PTR(-ENOMEM); - linecard_device->index = device_index; - linecard_device->priv = priv; - mutex_lock(&linecard->state_lock); - list_add_tail(&linecard_device->list, &linecard->device_list); - devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); - mutex_unlock(&linecard->state_lock); - return linecard_device; -} -EXPORT_SYMBOL_GPL(devlink_linecard_device_create); - -/** - * devlink_linecard_device_destroy - Destroy device on linecard - * - * @linecard: devlink linecard - * @linecard_device: devlink linecard device - */ -void -devlink_linecard_device_destroy(struct devlink_linecard *linecard, - struct devlink_linecard_device *linecard_device) -{ - mutex_lock(&linecard->state_lock); - list_del(&linecard_device->list); - devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); - mutex_unlock(&linecard->state_lock); - kfree(linecard_device); -} -EXPORT_SYMBOL_GPL(devlink_linecard_device_destroy); - -/** * devlink_linecard_provision_set - Set provisioning on linecard * * @linecard: devlink linecard @@ -10623,7 +10331,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); void devlink_linecard_provision_clear(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); - WARN_ON(!list_empty(&linecard->device_list)); linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; linecard->type = NULL; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index b89e3e95bffc..41cac0e4834e 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -517,7 +517,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore, if (!nskb) return; - if ((unsigned int)reason >= SKB_DROP_REASON_MAX) + if (unlikely(reason >= SKB_DROP_REASON_MAX || reason <= 0)) reason = SKB_DROP_REASON_NOT_SPECIFIED; cb = NET_DM_SKB_CB(nskb); cb->reason = reason; diff --git a/net/core/gro.c b/net/core/gro.c index 78110edf5d4b..b4190eb08467 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -167,6 +167,14 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush)) return -E2BIG; + if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) { + if (p->protocol != htons(ETH_P_IPV6) || + skb_headroom(p) < sizeof(struct hop_jumbo_hdr) || + ipv6_hdr(p)->nexthdr != IPPROTO_TCP || + p->encapsulation) + return -E2BIG; + } + lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 349480ef68a5..8b6b5e72b217 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst->lwtstate->orig_output(net, sk, skb); } -static int xmit_check_hhlen(struct sk_buff *skb) +static int xmit_check_hhlen(struct sk_buff *skb, int hh_len) { - int hh_len = skb_dst(skb)->dev->hard_header_len; - if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); @@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb) bpf = bpf_lwt_lwtunnel(dst->lwtstate); if (bpf->xmit.prog) { + int hh_len = dst->dev->hard_header_len; __be16 proto = skb->protocol; int ret; @@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb) /* If the header was expanded, headroom might be too * small for L2 header to come, expand as needed. */ - ret = xmit_check_hhlen(skb); + ret = xmit_check_hhlen(skb, hh_len); if (unlikely(ret)) return ret; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f64ebd050f6c..47b6c1f0fdbb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3728,7 +3728,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; char *p_name; - t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); + t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT); if (!t) goto err; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4980c3a50475..e319e242dddf 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -746,7 +746,6 @@ static const struct attribute_group netstat_group = { .attrs = netstat_attrs, }; -#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211) static struct attribute *wireless_attrs[] = { NULL }; @@ -755,7 +754,19 @@ static const struct attribute_group wireless_group = { .name = "wireless", .attrs = wireless_attrs, }; + +static bool wireless_group_needed(struct net_device *ndev) +{ +#if IS_ENABLED(CONFIG_CFG80211) + if (ndev->ieee80211_ptr) + return true; #endif +#if IS_ENABLED(CONFIG_WIRELESS_EXT) + if (ndev->wireless_handlers) + return true; +#endif + return false; +} #else /* CONFIG_SYSFS */ #define net_class_groups NULL @@ -1996,14 +2007,8 @@ int netdev_register_kobject(struct net_device *ndev) *groups++ = &netstat_group; -#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211) - if (ndev->ieee80211_ptr) - *groups++ = &wireless_group; -#if IS_ENABLED(CONFIG_WIRELESS_EXT) - else if (ndev->wireless_handlers) + if (wireless_group_needed(ndev)) *groups++ = &wireless_group; -#endif -#endif #endif /* CONFIG_SYSFS */ error = device_add(dev); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index bdbadfaee867..f18e6e771993 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -704,8 +704,10 @@ struct page *page_pool_alloc_frag(struct page_pool *pool, if (page && *offset + size > max_size) { page = page_pool_drain_frag(pool, page); - if (page) + if (page) { + alloc_stat_inc(pool, fast); goto frag_reset; + } } if (!page) { @@ -727,6 +729,7 @@ frag_reset: pool->frag_users++; pool->frag_offset = *offset + size; + alloc_stat_inc(pool, fast); return page; } EXPORT_SYMBOL(page_pool_alloc_frag); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 73f2cbc440c9..ac45328607f7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1064,6 +1064,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1769,6 +1771,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || + nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) || + nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif @@ -1922,6 +1926,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING }, [IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT }, + [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2306,6 +2312,19 @@ invalid_attr: return -EINVAL; } +static int rtnl_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_set_vf_rate) + return -EOPNOTSUPP; + if (max_tx_rate && max_tx_rate < min_tx_rate) + return -EINVAL; + + return ops->ndo_set_vf_rate(dev, vf, min_tx_rate, max_tx_rate); +} + static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { @@ -2341,14 +2360,6 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], } } - if (tb[IFLA_GRO_MAX_SIZE]) { - u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]); - - if (gro_max_size > GRO_MAX_SIZE) { - NL_SET_ERR_MSG(extack, "too big gro_max_size"); - return -EINVAL; - } - } return 0; } @@ -2443,11 +2454,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (err < 0) return err; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivf.min_tx_rate, - ivt->rate); + err = rtnl_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, ivt->rate); if (err < 0) return err; } @@ -2457,11 +2465,9 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (ivt->vf >= INT_MAX) return -EINVAL; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivt->min_tx_rate, - ivt->max_tx_rate); + + err = rtnl_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, ivt->max_tx_rate); if (err < 0) return err; } @@ -2803,7 +2809,7 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_GSO_MAX_SIZE]) { u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]); - if (max_size > GSO_MAX_SIZE) { + if (max_size > dev->tso_max_size) { err = -EINVAL; goto errout; } @@ -2817,7 +2823,7 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_GSO_MAX_SEGS]) { u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); - if (max_segs > GSO_MAX_SEGS) { + if (max_segs > GSO_MAX_SEGS || max_segs > dev->tso_max_segs) { err = -EINVAL; goto errout; } @@ -3302,23 +3308,116 @@ static int rtnl_group_changelink(const struct sk_buff *skb, return 0; } -static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attr, struct netlink_ext_ack *extack) +static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, + const struct rtnl_link_ops *ops, + struct nlattr **tb, struct nlattr **data, + struct netlink_ext_ack *extack) { - struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; unsigned char name_assign_type = NET_NAME_USER; + struct net *net = sock_net(skb->sk); + struct net *dest_net, *link_net; + struct net_device *dev; + char ifname[IFNAMSIZ]; + int err; + + if (!ops->alloc && !ops->setup) + return -EOPNOTSUPP; + + if (tb[IFLA_IFNAME]) { + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + } else { + snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + name_assign_type = NET_NAME_ENUM; + } + + dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + + if (tb[IFLA_LINK_NETNSID]) { + int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); + + link_net = get_net_ns_by_id(dest_net, id); + if (!link_net) { + NL_SET_ERR_MSG(extack, "Unknown network namespace id"); + err = -EINVAL; + goto out; + } + err = -EPERM; + if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) + goto out; + } else { + link_net = NULL; + } + + dev = rtnl_create_link(link_net ? : dest_net, ifname, + name_assign_type, ops, tb, extack); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + goto out; + } + + dev->ifindex = ifm->ifi_index; + + if (ops->newlink) + err = ops->newlink(link_net ? : net, dev, tb, data, extack); + else + err = register_netdevice(dev); + if (err < 0) { + free_netdev(dev); + goto out; + } + + err = rtnl_configure_link(dev, ifm); + if (err < 0) + goto out_unregister; + if (link_net) { + err = dev_change_net_namespace(dev, dest_net, ifname); + if (err < 0) + goto out_unregister; + } + if (tb[IFLA_MASTER]) { + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); + if (err) + goto out_unregister; + } +out: + if (link_net) + put_net(link_net); + put_net(dest_net); + return err; +out_unregister: + if (ops->newlink) { + LIST_HEAD(list_kill); + + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + } else { + unregister_netdevice(dev); + } + goto out; +} + +struct rtnl_newlink_tbs { + struct nlattr *tb[IFLA_MAX + 1]; + struct nlattr *attr[RTNL_MAX_TYPE + 1]; + struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; +}; + +static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtnl_newlink_tbs *tbs, + struct netlink_ext_ack *extack) +{ struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; + struct nlattr ** const tb = tbs->tb; const struct rtnl_link_ops *m_ops; struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; - struct nlattr *tb[IFLA_MAX + 1]; - struct net *dest_net, *link_net; struct nlattr **slave_data; char kind[MODULE_NAME_LEN]; struct net_device *dev; struct ifinfomsg *ifm; - char ifname[IFNAMSIZ]; struct nlattr **data; bool link_specified; int err; @@ -3382,12 +3481,12 @@ replay: return -EINVAL; if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { - err = nla_parse_nested_deprecated(attr, ops->maxtype, + err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype, linkinfo[IFLA_INFO_DATA], ops->policy, extack); if (err < 0) return err; - data = attr; + data = tbs->attr; } if (ops->validate) { err = ops->validate(tb, data, extack); @@ -3403,14 +3502,14 @@ replay: if (m_ops->slave_maxtype && linkinfo[IFLA_INFO_SLAVE_DATA]) { - err = nla_parse_nested_deprecated(slave_attr, + err = nla_parse_nested_deprecated(tbs->slave_attr, m_ops->slave_maxtype, linkinfo[IFLA_INFO_SLAVE_DATA], m_ops->slave_policy, extack); if (err < 0) return err; - slave_data = slave_attr; + slave_data = tbs->slave_attr; } } @@ -3478,96 +3577,21 @@ replay: return -EOPNOTSUPP; } - if (!ops->alloc && !ops->setup) - return -EOPNOTSUPP; - - if (tb[IFLA_IFNAME]) { - nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - } else { - snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - name_assign_type = NET_NAME_ENUM; - } - - dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); - if (IS_ERR(dest_net)) - return PTR_ERR(dest_net); - - if (tb[IFLA_LINK_NETNSID]) { - int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); - - link_net = get_net_ns_by_id(dest_net, id); - if (!link_net) { - NL_SET_ERR_MSG(extack, "Unknown network namespace id"); - err = -EINVAL; - goto out; - } - err = -EPERM; - if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) - goto out; - } else { - link_net = NULL; - } - - dev = rtnl_create_link(link_net ? : dest_net, ifname, - name_assign_type, ops, tb, extack); - if (IS_ERR(dev)) { - err = PTR_ERR(dev); - goto out; - } - - dev->ifindex = ifm->ifi_index; - - if (ops->newlink) - err = ops->newlink(link_net ? : net, dev, tb, data, extack); - else - err = register_netdevice(dev); - if (err < 0) { - free_netdev(dev); - goto out; - } - - err = rtnl_configure_link(dev, ifm); - if (err < 0) - goto out_unregister; - if (link_net) { - err = dev_change_net_namespace(dev, dest_net, ifname); - if (err < 0) - goto out_unregister; - } - if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); - if (err) - goto out_unregister; - } -out: - if (link_net) - put_net(link_net); - put_net(dest_net); - return err; -out_unregister: - if (ops->newlink) { - LIST_HEAD(list_kill); - - ops->dellink(dev, &list_kill); - unregister_netdevice_many(&list_kill); - } else { - unregister_netdevice(dev); - } - goto out; + return rtnl_newlink_create(skb, ifm, ops, tb, data, extack); } static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct nlattr **attr; + struct rtnl_newlink_tbs *tbs; int ret; - attr = kmalloc_array(RTNL_MAX_TYPE + 1, sizeof(*attr), GFP_KERNEL); - if (!attr) + tbs = kmalloc(sizeof(*tbs), GFP_KERNEL); + if (!tbs) return -ENOMEM; - ret = __rtnl_newlink(skb, nlh, attr, extack); - kfree(attr); + ret = __rtnl_newlink(skb, nlh, tbs, extack); + kfree(tbs); return ret; } @@ -4240,7 +4264,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, ops = br_dev->netdev_ops; if (!del_bulk) { if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid); + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); } else { if (ops->ndo_fdb_del_bulk) err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, @@ -4258,7 +4282,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, ops = dev->netdev_ops; if (!del_bulk) { if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid); + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); else err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); } else { diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 9b8443774449..5f85e01d4093 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -22,6 +22,8 @@ static siphash_aligned_key_t net_secret; static siphash_aligned_key_t ts_secret; +#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) + static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); @@ -94,17 +96,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, } EXPORT_SYMBOL(secure_tcpv6_seq); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; + unsigned int timeseed; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, - .dport = dport + .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + .dport = dport, }; net_secret_init(); return siphash(&combined, offsetofend(typeof(combined), dport), @@ -142,11 +146,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, } EXPORT_SYMBOL_GPL(secure_tcp_seq); -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); - return siphash_3u32((__force u32)saddr, (__force u32)daddr, - (__force u16)dport, &net_secret); + return siphash_4u32((__force u32)saddr, (__force u32)daddr, + (__force u16)dport, + jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + &net_secret); } EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 00980e62415b..5b3559cb1d82 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -80,7 +80,7 @@ #include <linux/user_namespace.h> #include <linux/indirect_call_wrapper.h> -#include "datagram.h" +#include "dev.h" #include "sock_destructor.h" struct kmem_cache *skbuff_head_cache __ro_after_init; @@ -772,6 +772,8 @@ void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) if (!skb_unref(skb)) return; + DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX); + trace_kfree_skb(skb, __builtin_return_address(0), reason); __kfree_skb(skb); } @@ -1166,7 +1168,7 @@ void mm_unaccount_pinned_pages(struct mmpin *mmp) } EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); -struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) +static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) { struct ubuf_info *uarg; struct sk_buff *skb; @@ -1197,7 +1199,6 @@ struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) return uarg; } -EXPORT_SYMBOL_GPL(msg_zerocopy_alloc); static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) { @@ -1340,18 +1341,11 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) } EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort); -int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) -{ - return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); -} -EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram); - int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg) { struct ubuf_info *orig_uarg = skb_zcopy(skb); - struct iov_iter orig_iter = msg->msg_iter; int err, orig_len = skb->len; /* An skb can only point to one uarg. This edge case happens when @@ -1365,7 +1359,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct sock *save_sk = skb->sk; /* Streams do not free skb on error. Reset to prev state. */ - msg->msg_iter = orig_iter; + iov_iter_revert(&msg->msg_iter, skb->len - orig_len); skb->sk = sk; ___pskb_trim(skb, orig_len); skb->sk = save_sk; @@ -3898,7 +3892,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; - int err; + int len_diff, err; skb_push(skb, -skb_network_offset(skb) + offset); @@ -3938,9 +3932,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_push(nskb, -skb_network_offset(nskb) + offset); skb_release_head_state(nskb); + len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb); __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); + nskb->transport_header += len_diff; skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, offset + tnl_hlen); @@ -6501,37 +6497,35 @@ void skb_attempt_defer_free(struct sk_buff *skb) int cpu = skb->alloc_cpu; struct softnet_data *sd; unsigned long flags; + unsigned int defer_max; bool kick; if (WARN_ON_ONCE(cpu >= nr_cpu_ids) || !cpu_online(cpu) || cpu == raw_smp_processor_id()) { - __kfree_skb(skb); +nodefer: __kfree_skb(skb); return; } sd = &per_cpu(softnet_data, cpu); - /* We do not send an IPI or any signal. - * Remote cpu will eventually call skb_defer_free_flush() - */ + defer_max = READ_ONCE(sysctl_skb_defer_max); + if (READ_ONCE(sd->defer_count) >= defer_max) + goto nodefer; + spin_lock_irqsave(&sd->defer_lock, flags); + /* Send an IPI every time queue reaches half capacity. */ + kick = sd->defer_count == (defer_max >> 1); + /* Paired with the READ_ONCE() few lines above */ + WRITE_ONCE(sd->defer_count, sd->defer_count + 1); + skb->next = sd->defer_list; /* Paired with READ_ONCE() in skb_defer_free_flush() */ WRITE_ONCE(sd->defer_list, skb); - sd->defer_count++; - - /* kick every time queue length reaches 128. - * This should avoid blocking in smp_call_function_single_async(). - * This condition should hardly be bit under normal conditions, - * unless cpu suddenly stopped to receive NIC interrupts. - */ - kick = sd->defer_count == 128; - spin_unlock_irqrestore(&sd->defer_lock, flags); /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU * if we are unlucky enough (this seems very unlikely). */ - if (unlikely(kick)) + if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) smp_call_function_single_async(cpu, &sd->defer_csd); } diff --git a/net/core/sock.c b/net/core/sock.c index a0f3989de3d6..2ff40dd0a7a6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -146,6 +146,9 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +static void sock_def_write_space_wfree(struct sock *sk); +static void sock_def_write_space(struct sock *sk); + /** * sk_ns_capable - General socket capability test * @sk: Socket to use a capability on or through @@ -632,7 +635,9 @@ static int sock_bindtoindex_locked(struct sock *sk, int ifindex) if (ifindex < 0) goto out; - sk->sk_bound_dev_if = ifindex; + /* Paired with all READ_ONCE() done locklessly. */ + WRITE_ONCE(sk->sk_bound_dev_if, ifindex); + if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); sk_dst_reset(sk); @@ -710,10 +715,11 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); struct net *net = sock_net(sk); char devname[IFNAMSIZ]; - if (sk->sk_bound_dev_if == 0) { + if (bound_dev_if == 0) { len = 0; goto zero; } @@ -722,7 +728,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, if (len < IFNAMSIZ) goto out; - ret = netdev_get_name(net, devname, sk->sk_bound_dev_if); + ret = netdev_get_name(net, devname, bound_dev_if); if (ret) goto out; @@ -1311,6 +1317,15 @@ set_sndbuf: __sock_set_mark(sk, val); break; + case SO_RCVMARK: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } + + sock_valbool_flag(sk, SOCK_RCVMARK, valbool); + break; case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); @@ -1737,6 +1752,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_mark; break; + case SO_RCVMARK: + v.val = sock_flag(sk, SOCK_RCVMARK); + break; + case SO_RXQ_OVFL: v.val = sock_flag(sk, SOCK_RXQ_OVFL); break; @@ -1845,7 +1864,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_BINDTOIFINDEX: - v.val = sk->sk_bound_dev_if; + v.val = READ_ONCE(sk->sk_bound_dev_if); break; case SO_NETNS_COOKIE: @@ -2277,6 +2296,19 @@ void sk_free_unlock_clone(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_free_unlock_clone); +static void sk_trim_gso_size(struct sock *sk) +{ + if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE) + return; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6 && + sk_is_tcp(sk) && + !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return; +#endif + sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE; +} + void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { u32 max_segs = 1; @@ -2296,6 +2328,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */ sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size); + sk_trim_gso_size(sk); sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); @@ -2317,8 +2350,20 @@ void sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; unsigned int len = skb->truesize; + bool free; if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { + if (sock_flag(sk, SOCK_RCU_FREE) && + sk->sk_write_space == sock_def_write_space) { + rcu_read_lock(); + free = refcount_sub_and_test(len, &sk->sk_wmem_alloc); + sock_def_write_space_wfree(sk); + rcu_read_unlock(); + if (unlikely(free)) + __sk_free(sk); + return; + } + /* * Keep a reference on sk_wmem_alloc, this will be released * after sk_write_space() call @@ -2628,13 +2673,6 @@ failure: } EXPORT_SYMBOL(sock_alloc_send_pskb); -struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, - int noblock, int *errcode) -{ - return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0); -} -EXPORT_SYMBOL(sock_alloc_send_skb); - int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, struct sockcm_cookie *sockc) { @@ -3191,20 +3229,42 @@ static void sock_def_write_space(struct sock *sk) /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) { + if (sock_writeable(sk)) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); /* Should agree with poll, otherwise some programs break */ - if (sock_writeable(sk)) - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } +/* An optimised version of sock_def_write_space(), should only be called + * for SOCK_RCU_FREE sockets under RCU read section and after putting + * ->sk_wmem_alloc. + */ +static void sock_def_write_space_wfree(struct sock *sk) +{ + /* Do not wake up a writer until he can make "significant" + * progress. --DaveM + */ + if (sock_writeable(sk)) { + struct socket_wq *wq = rcu_dereference(sk->sk_wq); + + /* rely on refcount_sub from sock_wfree() */ + smp_mb__after_atomic(); + if (wq && waitqueue_active(&wq->wait)) + wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | + EPOLLWRNORM | EPOLLWRBAND); + + /* Should agree with poll, otherwise some programs break */ + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + } +} + static void sock_def_destruct(struct sock *sk) { } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index ca4527fcbc75..71a13596ea2b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -25,12 +25,10 @@ #include "dev.h" -static int three = 3; static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; -static long long_one __maybe_unused = 1; static long long_max __maybe_unused = LONG_MAX; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -424,7 +422,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(long), .mode = 0600, .proc_handler = proc_dolongvec_minmax_bpf_restricted, - .extra1 = &long_one, + .extra1 = SYSCTL_LONG_ONE, .extra2 = &bpf_jit_limit_max, }, #endif @@ -560,7 +558,7 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &three, + .extra2 = SYSCTL_THREE, }, { .procname = "high_order_alloc_disable", @@ -586,6 +584,14 @@ static struct ctl_table net_core_table[] = { .extra1 = SYSCTL_ONE, .extra2 = &int_3600, }, + { + .procname = "skb_defer_max", + .data = &sysctl_skb_defer_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, { } }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 82696ab86f74..da6e3b20cd75 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -628,7 +628,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); ireq->ir_mark = inet_request_mark(sk, skb); ireq->ireq_family = AF_INET; - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if); /* * Step 3: Process LISTEN state @@ -1029,9 +1029,15 @@ static void __net_exit dccp_v4_exit_net(struct net *net) inet_ctl_sock_destroy(pn->v4_ctl_sk); } +static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&dccp_hashinfo, AF_INET); +} + static struct pernet_operations dccp_v4_ops = { .init = dccp_v4_init_net, .exit = dccp_v4_exit_net, + .exit_batch = dccp_v4_exit_batch, .id = &dccp_v4_pernet_id, .size = sizeof(struct dccp_v4_pernet), }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4d95b6400915..fd44638ec16b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -374,10 +374,10 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) refcount_inc(&skb->users); ireq->pktopts = skb; } - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if); /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && + if (!ireq->ir_iif && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); @@ -1115,9 +1115,15 @@ static void __net_exit dccp_v6_exit_net(struct net *net) inet_ctl_sock_destroy(pn->v6_ctl_sk); } +static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&dccp_hashinfo, AF_INET6); +} + static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, + .exit_batch = dccp_v6_exit_batch, .id = &dccp_v6_pernet_id, .size = sizeof(struct dccp_v6_pernet), }; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 58421f94427e..2e78458900f2 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1110,7 +1110,6 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > sizeof_field(struct sk_buff, cb)); - inet_hashinfo_init(&dccp_hashinfo); rc = inet_hashinfo2_init_mod(&dccp_hashinfo); if (rc) goto out_fail; @@ -1121,6 +1120,12 @@ static int __init dccp_init(void) SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out_free_hashinfo2; + dccp_hashinfo.bind2_bucket_cachep = + kmem_cache_create("dccp_bind2_bucket", + sizeof(struct inet_bind2_bucket), 0, + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); + if (!dccp_hashinfo.bind2_bucket_cachep) + goto out_free_bind_bucket_cachep; /* * Size and allocate the main established and bind bucket @@ -1151,7 +1156,7 @@ static int __init dccp_init(void) if (!dccp_hashinfo.ehash) { DCCP_CRIT("Failed to allocate DCCP established hash table"); - goto out_free_bind_bucket_cachep; + goto out_free_bind2_bucket_cachep; } for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) @@ -1177,14 +1182,23 @@ static int __init dccp_init(void) goto out_free_dccp_locks; } + dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *) + __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order); + + if (!dccp_hashinfo.bhash2) { + DCCP_CRIT("Failed to allocate DCCP bind2 hash table"); + goto out_free_dccp_bhash; + } + for (i = 0; i < dccp_hashinfo.bhash_size; i++) { spin_lock_init(&dccp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); + INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain); } rc = dccp_mib_init(); if (rc) - goto out_free_dccp_bhash; + goto out_free_dccp_bhash2; rc = dccp_ackvec_init(); if (rc) @@ -1208,30 +1222,38 @@ out_ackvec_exit: dccp_ackvec_exit(); out_free_dccp_mib: dccp_mib_exit(); +out_free_dccp_bhash2: + free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); out_free_dccp_bhash: free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); out_free_dccp_locks: inet_ehash_locks_free(&dccp_hashinfo); out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); +out_free_bind2_bucket_cachep: + kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep); out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); out_free_hashinfo2: inet_hashinfo2_free_mod(&dccp_hashinfo); out_fail: dccp_hashinfo.bhash = NULL; + dccp_hashinfo.bhash2 = NULL; dccp_hashinfo.ehash = NULL; dccp_hashinfo.bind_bucket_cachep = NULL; + dccp_hashinfo.bind2_bucket_cachep = NULL; return rc; } static void __exit dccp_fini(void) { + int bhash_order = get_order(dccp_hashinfo.bhash_size * + sizeof(struct inet_bind_hashbucket)); + ccid_cleanup_builtins(); dccp_mib_exit(); - free_pages((unsigned long)dccp_hashinfo.bhash, - get_order(dccp_hashinfo.bhash_size * - sizeof(struct inet_bind_hashbucket))); + free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); + free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); free_pages((unsigned long)dccp_hashinfo.ehash, get_order((dccp_hashinfo.ehash_mask + 1) * sizeof(struct inet_ehash_bucket))); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 0ee7d4c0c955..a09ba642b5e7 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -854,7 +854,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) memcpy(msg->neighbor, dn_hiord, ETH_ALEN); if (dn_db->router) { - struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; + struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n); dn_dn2eth(msg->neighbor, dn->addr); } @@ -902,7 +902,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) { int n; struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; + struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n); struct sk_buff *skb; size_t size; unsigned char *ptr; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 94b306f6d551..fbd98ac853ea 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -426,7 +426,8 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb) if (!dn_db->router) { dn_db->router = neigh_clone(neigh); } else { - if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority) + if (msg->priority > container_of(dn_db->router, + struct dn_neigh, n)->priority) neigh_release(xchg(&dn_db->router, neigh_clone(neigh))); } } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 7e85f2a1ae25..552a53f1d5d0 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -159,7 +159,7 @@ static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how struct neighbour *n = rt->n; if (n && n->dev == dev) { - n->dev = dev_net(dev)->loopback_dev; + n->dev = blackhole_netdev; dev_hold(n->dev); dev_put(dev); } @@ -1120,7 +1120,7 @@ source_ok: /* Ok then, we assume its directly connected and move on */ select_source: if (neigh) - gateway = ((struct dn_neigh *)neigh)->addr; + gateway = container_of(neigh, struct dn_neigh, n)->addr; if (gateway == 0) gateway = fld.daddr; if (fld.saddr == 0) { @@ -1429,7 +1429,7 @@ static int dn_route_input_slow(struct sk_buff *skb) /* Use the default router if there is one */ neigh = neigh_clone(dn_db->router); if (neigh) { - gateway = ((struct dn_neigh *)neigh)->addr; + gateway = container_of(neigh, struct dn_neigh, n)->addr; goto make_route; } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0c6ae32742ec..be7b320cda76 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -458,46 +458,6 @@ struct dsa_port *dsa_port_from_netdev(struct net_device *netdev) } EXPORT_SYMBOL_GPL(dsa_port_from_netdev); -int dsa_port_walk_fdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb) -{ - struct dsa_port *dp = dsa_to_port(ds, port); - struct dsa_mac_addr *a; - int err = 0; - - mutex_lock(&dp->addr_lists_lock); - - list_for_each_entry(a, &dp->fdbs, list) { - err = cb(ds, port, a->addr, a->vid, a->db); - if (err) - break; - } - - mutex_unlock(&dp->addr_lists_lock); - - return err; -} -EXPORT_SYMBOL_GPL(dsa_port_walk_fdbs); - -int dsa_port_walk_mdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb) -{ - struct dsa_port *dp = dsa_to_port(ds, port); - struct dsa_mac_addr *a; - int err = 0; - - mutex_lock(&dp->addr_lists_lock); - - list_for_each_entry(a, &dp->mdbs, list) { - err = cb(ds, port, a->addr, a->vid, a->db); - if (err) - break; - } - - mutex_unlock(&dp->addr_lists_lock); - - return err; -} -EXPORT_SYMBOL_GPL(dsa_port_walk_mdbs); - bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b) { if (a->type != b->type) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index cf933225df32..cac48a741f27 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/rtnetlink.h> #include <linux/of.h> +#include <linux/of_mdio.h> #include <linux/of_net.h> #include <net/devlink.h> #include <net/sch_generic.h> @@ -809,22 +810,18 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) { const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; struct dsa_switch_tree *dst = ds->dst; - struct dsa_port *cpu_dp; int err; if (tag_ops->proto == dst->default_proto) goto connect; - dsa_switch_for_each_cpu_port(cpu_dp, ds) { - rtnl_lock(); - err = ds->ops->change_tag_protocol(ds, cpu_dp->index, - tag_ops->proto); - rtnl_unlock(); - if (err) { - dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", - tag_ops->name, ERR_PTR(err)); - return err; - } + rtnl_lock(); + err = ds->ops->change_tag_protocol(ds, tag_ops->proto); + rtnl_unlock(); + if (err) { + dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", + tag_ops->name, ERR_PTR(err)); + return err; } connect: @@ -856,6 +853,7 @@ disconnect: static int dsa_switch_setup(struct dsa_switch *ds) { struct dsa_devlink_priv *dl_priv; + struct device_node *dn; struct dsa_port *dp; int err; @@ -911,7 +909,10 @@ static int dsa_switch_setup(struct dsa_switch *ds) dsa_slave_mii_bus_init(ds); - err = mdiobus_register(ds->slave_mii_bus); + dn = of_get_child_by_name(ds->dev->of_node, "mdio"); + + err = of_mdiobus_register(ds->slave_mii_bus, dn); + of_node_put(dn); if (err < 0) goto free_slave_mii_bus; } diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 7c9abd5a0ab9..d9722e49864b 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -291,6 +291,7 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); +void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; diff --git a/net/dsa/port.c b/net/dsa/port.c index 8856ed6afd05..3738f2d40a0b 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -503,6 +503,7 @@ out_rollback_unoffload: switchdev_bridge_port_unoffload(brport_dev, dp, &dsa_slave_switchdev_notifier, &dsa_slave_switchdev_blocking_notifier); + dsa_flush_workqueue(); out_rollback_unbridge: dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); out_rollback: @@ -919,6 +920,14 @@ int dsa_port_bridge_flags(struct dsa_port *dp, return 0; } +void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc) +{ + struct dsa_switch *ds = dp->ds; + + if (ds->ops->port_set_host_flood) + ds->ops->port_set_host_flood(ds, dp->index, uc, mc); +} + int dsa_port_vlan_msti(struct dsa_port *dp, const struct switchdev_vlan_msti *msti) { @@ -1652,8 +1661,10 @@ int dsa_port_link_register_of(struct dsa_port *dp) if (ds->ops->phylink_mac_link_down) ds->ops->phylink_mac_link_down(ds, port, MLO_AN_FIXED, PHY_INTERFACE_MODE_NA); + of_node_put(phy_np); return dsa_port_phylink_register(dp); } + of_node_put(phy_np); return 0; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 63da683d4660..801a5d445833 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -262,37 +262,13 @@ static int dsa_slave_close(struct net_device *dev) return 0; } -/* Keep flooding enabled towards this port's CPU port as long as it serves at - * least one port in the tree that requires it. - */ -static void dsa_port_manage_cpu_flood(struct dsa_port *dp) +static void dsa_slave_manage_host_flood(struct net_device *dev) { - struct switchdev_brport_flags flags = { - .mask = BR_FLOOD | BR_MCAST_FLOOD, - }; - struct dsa_switch_tree *dst = dp->ds->dst; - struct dsa_port *cpu_dp = dp->cpu_dp; - struct dsa_port *other_dp; - int err; - - list_for_each_entry(other_dp, &dst->ports, list) { - if (!dsa_port_is_user(other_dp)) - continue; - - if (other_dp->cpu_dp != cpu_dp) - continue; - - if (other_dp->slave->flags & IFF_ALLMULTI) - flags.val |= BR_MCAST_FLOOD; - if (other_dp->slave->flags & IFF_PROMISC) - flags.val |= BR_FLOOD; - } - - err = dsa_port_pre_bridge_flags(dp, flags, NULL); - if (err) - return; + bool mc = dev->flags & (IFF_PROMISC | IFF_ALLMULTI); + struct dsa_port *dp = dsa_slave_to_port(dev); + bool uc = dev->flags & IFF_PROMISC; - dsa_port_bridge_flags(cpu_dp, flags, NULL); + dsa_port_set_host_flood(dp, uc, mc); } static void dsa_slave_change_rx_flags(struct net_device *dev, int change) @@ -310,7 +286,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) if (dsa_switch_supports_uc_filtering(ds) && dsa_switch_supports_mc_filtering(ds)) - dsa_port_manage_cpu_flood(dp); + dsa_slave_manage_host_flood(dev); } static void dsa_slave_set_rx_mode(struct net_device *dev) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 704975e5c1c2..2b56218fc57c 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -809,14 +809,12 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds, ASSERT_RTNL(); - dsa_switch_for_each_cpu_port(cpu_dp, ds) { - err = ds->ops->change_tag_protocol(ds, cpu_dp->index, - tag_ops->proto); - if (err) - return err; + err = ds->ops->change_tag_protocol(ds, tag_ops->proto); + if (err) + return err; + dsa_switch_for_each_cpu_port(cpu_dp, ds) dsa_port_set_tag_protocol(cpu_dp, tag_ops); - } /* Now that changing the tag protocol can no longer fail, let's update * the remaining bits which are "duplicated for faster access", and the diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 0c5210015911..566adf85e658 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -201,6 +201,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(400000, CR4, Full), __DEFINE_LINK_MODE_NAME(100, FX, Half), __DEFINE_LINK_MODE_NAME(100, FX, Full), + __DEFINE_LINK_MODE_NAME(10, T1L, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -236,6 +237,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1 1 #define __LINK_MODE_LANES_X 1 #define __LINK_MODE_LANES_FX 1 +#define __LINK_MODE_LANES_T1L 1 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ @@ -349,6 +351,7 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(100, FX, Half), __DEFINE_LINK_MODE_PARAMS(100, FX, Full), + __DEFINE_LINK_MODE_PARAMS(10, T1L, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index f24852814fa3..718fb77bb372 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -328,7 +328,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto done; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (flags & MSG_TRUNC) copied = skb->len; @@ -718,7 +718,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto done; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (saddr) { /* Clear the implicit padding in struct sockaddr_ieee802154 diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 53a6b14dc50a..89141ba5e9ee 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2573,7 +2573,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, struct devinet_sysctl_table *t; char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; - t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); + t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT); if (!t) goto out; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d747166bb291..b21238df3301 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -705,7 +705,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) static inline int esp_remove_trailer(struct sk_buff *skb) { struct xfrm_state *x = xfrm_input_state(skb); - struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; int alen, hlen, elen; int padlen, trimlen; @@ -717,11 +716,6 @@ static inline int esp_remove_trailer(struct sk_buff *skb) hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); elen = skb->len - hlen; - if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) { - ret = xo->proto; - goto out; - } - if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2)) BUG(); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index af8209f912ab..f361d3d56be2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1384,7 +1384,7 @@ static void nl_fib_input(struct sk_buff *skb) return; nlh = nlmsg_hdr(skb); - frn = (struct fib_result_nl *) nlmsg_data(nlh); + frn = nlmsg_data(nlh); nl_fib_lookup(net, frn); portid = NETLINK_CB(skb).portid; /* netlink portid */ @@ -1425,7 +1425,7 @@ static void fib_disable_ip(struct net_device *dev, unsigned long event, static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct in_ifaddr *ifa = ptr; struct net_device *dev = ifa->ifa_dev->dev; struct net *net = dev_net(dev); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 001fea394bde..513f475c6a53 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -145,7 +145,7 @@ INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg) { - struct fib_result *result = (struct fib_result *) arg->result; + struct fib_result *result = arg->result; struct net_device *dev = NULL; if (result->fi) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1f7f25532fa1..2734c3af7e24 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2625,7 +2625,7 @@ static void fib_table_print(struct seq_file *seq, struct fib_table *tb) static int fib_triestat_seq_show(struct seq_file *seq, void *v) { - struct net *net = (struct net *)seq->private; + struct net *net = seq->private; unsigned int h; seq_printf(seq, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 236debd9fded..efea0e796f06 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -342,7 +342,7 @@ void icmp_out_count(struct net *net, unsigned char type) static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { - struct icmp_bxm *icmp_param = (struct icmp_bxm *)from; + struct icmp_bxm *icmp_param = from; __wsum csum; csum = skb_copy_and_csum_bits(icmp_param->skb, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 2ad3c7b42d6d..b65d074d9620 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2403,9 +2403,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct /* decrease mem now to avoid the memleak warning */ atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ @@ -2507,11 +2508,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) /* decrease mem now to avoid the memleak warning */ atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); - } else + } else { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); + } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); pmc->sfmode = msf->imsf_fmode; err = 0; done: @@ -2836,7 +2839,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Idx\tDevice : Count Querier\tGroup Users Timer\tReporter\n"); else { - struct ip_mc_list *im = (struct ip_mc_list *)v; + struct ip_mc_list *im = v; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); char *querier; long delta; @@ -2980,7 +2983,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) static int igmp_mcf_seq_show(struct seq_file *seq, void *v) { - struct ip_sf_list *psf = (struct ip_sf_list *)v; + struct ip_sf_list *psf = v; struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); if (v == SEQ_START_TOKEN) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1e5b53c2bb26..c0b7e6c21360 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -117,6 +117,32 @@ bool inet_rcv_saddr_any(const struct sock *sk) return !sk->sk_rcv_saddr; } +static bool use_bhash2_on_bind(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + int addr_type; + + if (sk->sk_family == AF_INET6) { + addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); + return addr_type != IPV6_ADDR_ANY && + addr_type != IPV6_ADDR_MAPPED; + } +#endif + return sk->sk_rcv_saddr != htonl(INADDR_ANY); +} + +static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net, + int port) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr nulladdr = {}; + + if (sk->sk_family == AF_INET6) + return ipv6_portaddr_hash(net, &nulladdr, port); +#endif + return ipv4_portaddr_hash(net, 0, port); +} + void inet_get_local_port_range(struct net *net, int *low, int *high) { unsigned int seq; @@ -130,16 +156,71 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) } EXPORT_SYMBOL(inet_get_local_port_range); -static int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, - bool relax, bool reuseport_ok) +static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2, + kuid_t sk_uid, bool relax, + bool reuseport_cb_ok, bool reuseport_ok) +{ + int bound_dev_if2; + + if (sk == sk2) + return false; + + bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); + + if (!sk->sk_bound_dev_if || !bound_dev_if2 || + sk->sk_bound_dev_if == bound_dev_if2) { + if (sk->sk_reuse && sk2->sk_reuse && + sk2->sk_state != TCP_LISTEN) { + if (!relax || (!reuseport_ok && sk->sk_reuseport && + sk2->sk_reuseport && reuseport_cb_ok && + (sk2->sk_state == TCP_TIME_WAIT || + uid_eq(sk_uid, sock_i_uid(sk2))))) + return true; + } else if (!reuseport_ok || !sk->sk_reuseport || + !sk2->sk_reuseport || !reuseport_cb_ok || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(sk_uid, sock_i_uid(sk2)))) { + return true; + } + } + return false; +} + +static bool check_bhash2_conflict(const struct sock *sk, + struct inet_bind2_bucket *tb2, kuid_t sk_uid, + bool relax, bool reuseport_cb_ok, + bool reuseport_ok) { struct sock *sk2; - bool reuseport_cb_ok; - bool reuse = sk->sk_reuse; - bool reuseport = !!sk->sk_reuseport; - struct sock_reuseport *reuseport_cb; + + sk_for_each_bound_bhash2(sk2, &tb2->owners) { + if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) + continue; + + if (bind_conflict_exist(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok)) + return true; + } + return false; +} + +/* This should be called only when the corresponding inet_bind_bucket spinlock + * is held + */ +static int inet_csk_bind_conflict(const struct sock *sk, int port, + struct inet_bind_bucket *tb, + struct inet_bind2_bucket *tb2, /* may be null */ + bool relax, bool reuseport_ok) +{ + struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; kuid_t uid = sock_i_uid((struct sock *)sk); + struct sock_reuseport *reuseport_cb; + struct inet_bind2_hashbucket *head2; + bool reuseport_cb_ok; + struct sock *sk2; + struct net *net; + int l3mdev; + u32 hash; rcu_read_lock(); reuseport_cb = rcu_dereference(sk->sk_reuseport_cb); @@ -150,36 +231,42 @@ static int inet_csk_bind_conflict(const struct sock *sk, /* * Unlike other sk lookup places we do not check * for sk_net here, since _all_ the socks listed - * in tb->owners list belong to the same net - the - * one this bucket belongs to. + * in tb->owners and tb2->owners list belong + * to the same net */ - sk_for_each_bound(sk2, &tb->owners) { - if (sk != sk2 && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if (reuse && sk2->sk_reuse && - sk2->sk_state != TCP_LISTEN) { - if ((!relax || - (!reuseport_ok && - reuseport && sk2->sk_reuseport && - reuseport_cb_ok && - (sk2->sk_state == TCP_TIME_WAIT || - uid_eq(uid, sock_i_uid(sk2))))) && - inet_rcv_saddr_equal(sk, sk2, true)) - break; - } else if (!reuseport_ok || - !reuseport || !sk2->sk_reuseport || - !reuseport_cb_ok || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, sock_i_uid(sk2)))) { - if (inet_rcv_saddr_equal(sk, sk2, true)) - break; - } - } + if (!use_bhash2_on_bind(sk)) { + sk_for_each_bound(sk2, &tb->owners) + if (bind_conflict_exist(sk, sk2, uid, relax, + reuseport_cb_ok, reuseport_ok) && + inet_rcv_saddr_equal(sk, sk2, true)) + return true; + + return false; } - return sk2 != NULL; + + if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, + reuseport_ok)) + return true; + + net = sock_net(sk); + + /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or + * INADDR_ANY (if ipv4) socket. + */ + hash = get_bhash2_nulladdr_hash(sk, net, port); + head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; + + l3mdev = inet_sk_bound_l3mdev(sk); + inet_bind_bucket_for_each(tb2, &head2->chain) + if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk)) + break; + + if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, + reuseport_ok)) + return true; + + return false; } /* @@ -187,16 +274,20 @@ static int inet_csk_bind_conflict(const struct sock *sk, * inet_bind_hashbucket lock held. */ static struct inet_bind_hashbucket * -inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret) +inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, + struct inet_bind2_bucket **tb2_ret, + struct inet_bind2_hashbucket **head2_ret, int *port_ret) { struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; - int port = 0; + struct inet_bind2_hashbucket *head2; struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); - bool relax = false; int i, low, high, attempt_half; + struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; u32 remaining, offset; + bool relax = false; + int port = 0; int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); @@ -235,10 +326,12 @@ other_parity_scan: head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); + tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, + &head2); inet_bind_bucket_for_each(tb, &head->chain) - if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && - tb->port == port) { - if (!inet_csk_bind_conflict(sk, tb, relax, false)) + if (check_bind_bucket_match(tb, net, port, l3mdev)) { + if (!inet_csk_bind_conflict(sk, port, tb, tb2, + relax, false)) goto success; goto next_port; } @@ -268,6 +361,8 @@ next_port: success: *port_ret = port; *tb_ret = tb; + *tb2_ret = tb2; + *head2_ret = head2; return head; } @@ -363,54 +458,81 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) { bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; - int ret = 1, port = snum; + bool bhash_created = false, bhash2_created = false; + struct inet_bind2_bucket *tb2 = NULL; + struct inet_bind2_hashbucket *head2; + struct inet_bind_bucket *tb = NULL; struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); - struct inet_bind_bucket *tb = NULL; + int ret = 1, port = snum; + bool found_port = false; int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); if (!port) { - head = inet_csk_find_open_port(sk, &tb, &port); + head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port); if (!head) return ret; + if (tb && tb2) + goto success; + found_port = true; + } else { + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; + spin_lock_bh(&head->lock); + inet_bind_bucket_for_each(tb, &head->chain) + if (check_bind_bucket_match(tb, net, port, l3mdev)) + break; + + tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, + &head2); + } + + if (!tb) { + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net, + head, port, l3mdev); if (!tb) - goto tb_not_found; - goto success; + goto fail_unlock; + bhash_created = true; } - head = &hinfo->bhash[inet_bhashfn(net, port, - hinfo->bhash_size)]; - spin_lock_bh(&head->lock); - inet_bind_bucket_for_each(tb, &head->chain) - if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && - tb->port == port) - goto tb_found; -tb_not_found: - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, - net, head, port, l3mdev); - if (!tb) - goto fail_unlock; -tb_found: - if (!hlist_empty(&tb->owners)) { + + if (!tb2) { + tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, + net, head2, port, l3mdev, sk); + if (!tb2) + goto fail_unlock; + bhash2_created = true; + } + + /* If we had to find an open port, we already checked for conflicts */ + if (!found_port && !hlist_empty(&tb->owners)) { if (sk->sk_reuse == SK_FORCE_REUSE) goto success; if ((tb->fastreuse > 0 && reuse) || sk_reuseport_match(tb, sk)) goto success; - if (inet_csk_bind_conflict(sk, tb, true, true)) + if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true)) goto fail_unlock; } success: inet_csk_update_fastreuse(tb, sk); if (!inet_csk(sk)->icsk_bind_hash) - inet_bind_hash(sk, tb, port); + inet_bind_hash(sk, tb, tb2, port); WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); + WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2); ret = 0; fail_unlock: + if (ret) { + if (bhash_created) + inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); + if (bhash2_created) + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, + tb2); + } spin_unlock_bh(&head->lock); return ret; } @@ -957,6 +1079,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; + newicsk->icsk_bind2_hash = NULL; inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 581b5b2d72a5..b812eb36f0e3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1028,12 +1028,13 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) goto skip_listen_ht; - for (i = s_i; i < INET_LHTABLE_SIZE; i++) { + for (i = s_i; i <= hashinfo->lhash2_mask; i++) { struct inet_listen_hashbucket *ilb; struct hlist_nulls_node *node; num = 0; - ilb = &hashinfo->listening_hash[i]; + ilb = &hashinfo->lhash2[i]; + spin_lock(&ilb->lock); sk_nulls_for_each(sk, node, &ilb->nulls_head) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 63948f6aeca0..c9f9ac5013a7 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -510,7 +510,7 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { - struct sk_buff **nextp = (struct sk_buff **)reasm_data; + struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; int sum_truesize; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 17440840a791..e8de5e699b3f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -81,6 +81,41 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, return tb; } +struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, + struct net *net, + struct inet_bind2_hashbucket *head, + const unsigned short port, + int l3mdev, + const struct sock *sk) +{ + struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); + + if (tb) { + write_pnet(&tb->ib_net, net); + tb->l3mdev = l3mdev; + tb->port = port; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr; + else +#endif + tb->rcv_saddr = sk->sk_rcv_saddr; + INIT_HLIST_HEAD(&tb->owners); + hlist_add_head(&tb->node, &head->chain); + } + return tb; +} + +static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + return ipv6_addr_equal(&tb2->v6_rcv_saddr, + &sk->sk_v6_rcv_saddr); +#endif + return tb2->rcv_saddr == sk->sk_rcv_saddr; +} + /* * Caller must hold hashbucket lock for this tb with local BH disabled */ @@ -92,12 +127,25 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket } } +/* Caller must hold the lock for the corresponding hashbucket in the bhash table + * with local BH disabled + */ +void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) +{ + if (hlist_empty(&tb->owners)) { + __hlist_del(&tb->node); + kmem_cache_free(cachep, tb); + } +} + void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - const unsigned short snum) + struct inet_bind2_bucket *tb2, const unsigned short snum) { inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &tb->owners); inet_csk(sk)->icsk_bind_hash = tb; + sk_add_bind2_node(sk, &tb2->owners); + inet_csk(sk)->icsk_bind2_hash = tb2; } /* @@ -109,6 +157,7 @@ static void __inet_put_port(struct sock *sk) const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num, hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; + struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; spin_lock(&head->lock); @@ -117,6 +166,13 @@ static void __inet_put_port(struct sock *sk) inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->inet_num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + + if (inet_csk(sk)->icsk_bind2_hash) { + tb2 = inet_csk(sk)->icsk_bind2_hash; + __sk_del_bind2_node(sk); + inet_csk(sk)->icsk_bind2_hash = NULL; + inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); + } spin_unlock(&head->lock); } @@ -133,14 +189,19 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; unsigned short port = inet_sk(child)->inet_num; const int bhash = inet_bhashfn(sock_net(sk), port, - table->bhash_size); + table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; + struct inet_bind2_hashbucket *head_bhash2; + bool created_inet_bind_bucket = false; + struct net *net = sock_net(sk); + struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; int l3mdev; spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; - if (unlikely(!tb)) { + tb2 = inet_csk(sk)->icsk_bind2_hash; + if (unlikely(!tb || !tb2)) { spin_unlock(&head->lock); return -ENOENT; } @@ -153,25 +214,45 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) * as that of the child socket. We have to look up or * create a new bind bucket for the child here. */ inet_bind_bucket_for_each(tb, &head->chain) { - if (net_eq(ib_net(tb), sock_net(sk)) && - tb->l3mdev == l3mdev && tb->port == port) + if (check_bind_bucket_match(tb, net, port, l3mdev)) break; } if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, - sock_net(sk), head, port, - l3mdev); + net, head, port, l3mdev); if (!tb) { spin_unlock(&head->lock); return -ENOMEM; } + created_inet_bind_bucket = true; } inet_csk_update_fastreuse(tb, child); + + goto bhash2_find; + } else if (!bind2_bucket_addr_match(tb2, child)) { + l3mdev = inet_sk_bound_l3mdev(sk); + +bhash2_find: + tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child, + &head_bhash2); + if (!tb2) { + tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep, + net, head_bhash2, port, + l3mdev, child); + if (!tb2) + goto error; + } } - inet_bind_hash(child, tb, port); + inet_bind_hash(child, tb, tb2, port); spin_unlock(&head->lock); return 0; + +error: + if (created_inet_bind_bucket) + inet_bind_bucket_destroy(table->bind_bucket_cachep, tb); + spin_unlock(&head->lock); + return -ENOMEM; } EXPORT_SYMBOL_GPL(__inet_inherit_port); @@ -193,42 +274,6 @@ inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk) return inet_lhash2_bucket(h, hash); } -static void inet_hash2(struct inet_hashinfo *h, struct sock *sk) -{ - struct inet_listen_hashbucket *ilb2; - - if (!h->lhash2) - return; - - ilb2 = inet_lhash2_bucket_sk(h, sk); - - spin_lock(&ilb2->lock); - if (sk->sk_reuseport && sk->sk_family == AF_INET6) - hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node, - &ilb2->head); - else - hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node, - &ilb2->head); - ilb2->count++; - spin_unlock(&ilb2->lock); -} - -static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk) -{ - struct inet_listen_hashbucket *ilb2; - - if (!h->lhash2 || - WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node))) - return; - - ilb2 = inet_lhash2_bucket_sk(h, sk); - - spin_lock(&ilb2->lock); - hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node); - ilb2->count--; - spin_unlock(&ilb2->lock); -} - static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, const int dif, const int sdif) @@ -282,12 +327,11 @@ static struct sock *inet_lhash2_lookup(struct net *net, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { - struct inet_connection_sock *icsk; struct sock *sk, *result = NULL; + struct hlist_nulls_node *node; int score, hiscore = 0; - inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { - sk = (struct sock *)icsk; + sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { result = lookup_reuseport(net, sk, skb, doff, @@ -410,13 +454,11 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (likely(INET_MATCH(sk, net, acookie, - saddr, daddr, ports, dif, sdif))) { + if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) { if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; - if (unlikely(!INET_MATCH(sk, net, acookie, - saddr, daddr, ports, - dif, sdif))) { + if (unlikely(!inet_match(net, sk, acookie, + ports, dif, sdif))) { sock_gen_put(sk); goto begin; } @@ -465,8 +507,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, if (sk2->sk_hash != hash) continue; - if (likely(INET_MATCH(sk2, net, acookie, - saddr, daddr, ports, dif, sdif))) { + if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) @@ -504,7 +545,7 @@ not_unique: return -EADDRNOTAVAIL; } -static u32 inet_sk_port_offset(const struct sock *sk) +static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -532,16 +573,14 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk, if (esk->sk_hash != sk->sk_hash) continue; if (sk->sk_family == AF_INET) { - if (unlikely(INET_MATCH(esk, net, acookie, - sk->sk_daddr, - sk->sk_rcv_saddr, + if (unlikely(inet_match(net, esk, acookie, ports, dif, sdif))) { return true; } } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - if (unlikely(INET6_MATCH(esk, net, + if (unlikely(inet6_match(net, esk, &sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr, ports, dif, sdif))) { @@ -633,7 +672,7 @@ static int inet_reuseport_add_sock(struct sock *sk, int __inet_hash(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct inet_listen_hashbucket *ilb; + struct inet_listen_hashbucket *ilb2; int err = 0; if (sk->sk_state != TCP_LISTEN) { @@ -643,25 +682,23 @@ int __inet_hash(struct sock *sk, struct sock *osk) return 0; } WARN_ON(!sk_unhashed(sk)); - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); - spin_lock(&ilb->lock); + spin_lock(&ilb2->lock); if (sk->sk_reuseport) { - err = inet_reuseport_add_sock(sk, ilb); + err = inet_reuseport_add_sock(sk, ilb2); if (err) goto unlock; } if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) - __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head); + __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head); else - __sk_nulls_add_node_rcu(sk, &ilb->nulls_head); - inet_hash2(hashinfo, sk); - ilb->count++; + __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head); sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: - spin_unlock(&ilb->lock); + spin_unlock(&ilb2->lock); return err; } @@ -678,23 +715,6 @@ int inet_hash(struct sock *sk) } EXPORT_SYMBOL_GPL(inet_hash); -static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb) -{ - if (sk_unhashed(sk)) - return; - - if (rcu_access_pointer(sk->sk_reuseport_cb)) - reuseport_stop_listen_sock(sk); - if (ilb) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - - inet_unhash2(hashinfo, sk); - ilb->count--; - } - __sk_nulls_del_node_init_rcu(sk); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); -} - void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; @@ -703,47 +723,136 @@ void inet_unhash(struct sock *sk) return; if (sk->sk_state == TCP_LISTEN) { - struct inet_listen_hashbucket *ilb; + struct inet_listen_hashbucket *ilb2; - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); /* Don't disable bottom halves while acquiring the lock to * avoid circular locking dependency on PREEMPT_RT. */ - spin_lock(&ilb->lock); - __inet_unhash(sk, ilb); - spin_unlock(&ilb->lock); + spin_lock(&ilb2->lock); + if (sk_unhashed(sk)) { + spin_unlock(&ilb2->lock); + return; + } + + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_stop_listen_sock(sk); + + __sk_nulls_del_node_init_rcu(sk); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_unlock(&ilb2->lock); } else { spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock_bh(lock); - __inet_unhash(sk, NULL); + if (sk_unhashed(sk)) { + spin_unlock_bh(lock); + return; + } + __sk_nulls_del_node_init_rcu(sk); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); } } EXPORT_SYMBOL_GPL(inet_unhash); +static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb, + struct net *net, unsigned short port, + int l3mdev, struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && + ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); + else +#endif + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr; +} + +bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb, + struct net *net, const unsigned short port, + int l3mdev, const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr nulladdr = {}; + + if (sk->sk_family == AF_INET6) + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && + ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr); + else +#endif + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && tb->rcv_saddr == 0; +} + +static struct inet_bind2_hashbucket * +inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk, + const struct net *net, unsigned short port) +{ + u32 hash; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port); + else +#endif + hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port); + return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; +} + +/* This should only be called when the spinlock for the socket's corresponding + * bind_hashbucket is held + */ +struct inet_bind2_bucket * +inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net, + const unsigned short port, int l3mdev, struct sock *sk, + struct inet_bind2_hashbucket **head) +{ + struct inet_bind2_bucket *bhash2 = NULL; + struct inet_bind2_hashbucket *h; + + h = inet_bhashfn_portaddr(hinfo, sk, net, port); + inet_bind_bucket_for_each(bhash2, &h->chain) { + if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk)) + break; + } + + if (head) + *head = h; + + return bhash2; +} + /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. - * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, - * we use 256 instead to really give more isolation and - * privacy, this only consumes 1 KB of kernel memory. + * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though + * attacks were since demonstrated, thus we use 65536 instead to really + * give more isolation and privacy, at the expense of 256kB of kernel + * memory. */ -#define INET_TABLE_PERTURB_SHIFT 8 -static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; +#define INET_TABLE_PERTURB_SHIFT 16 +#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) +static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_timewait_sock *tw = NULL; + struct inet_bind2_hashbucket *head2; struct inet_bind_hashbucket *head; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); + struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; + bool tb_created = false; u32 remaining, offset; int ret, i, low, high; int l3mdev; @@ -774,10 +883,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; - net_get_random_once(table_perturb, sizeof(table_perturb)); - index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); + net_get_random_once(table_perturb, + INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); + index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); + + offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); + offset %= remaining; - offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining; /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ @@ -797,8 +909,7 @@ other_parity_scan: * the established check is already unique enough. */ inet_bind_bucket_for_each(tb, &head->chain) { - if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && - tb->port == port) { + if (check_bind_bucket_match(tb, net, port, l3mdev)) { if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) goto next_port; @@ -816,6 +927,7 @@ other_parity_scan: spin_unlock_bh(&head->lock); return -ENOMEM; } + tb_created = true; tb->fastreuse = -1; tb->fastreuseport = -1; goto ok; @@ -831,15 +943,27 @@ next_port: return -EADDRNOTAVAIL; ok: - /* If our first attempt found a candidate, skip next candidate - * in 1/16 of cases to add some noise. + /* Find the corresponding tb2 bucket since we need to + * add the socket to the bhash2 table as well + */ + tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2); + if (!tb2) { + tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net, + head2, port, l3mdev, sk); + if (!tb2) + goto error; + } + + /* Here we want to add a little bit of randomness to the next source + * port that will be chosen. We use a max() with a random here so that + * on low contention the randomness is maximal and on high contention + * it may be inexistent. */ - if (!i && !(prandom_u32() % 16)) - i = 2; + i = max_t(int, i, (prandom_u32() & 7) * 2); WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); + inet_bind_hash(sk, tb, tb2, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); inet_ehash_nolisten(sk, (struct sock *)tw, NULL); @@ -851,6 +975,12 @@ ok: inet_twsk_deschedule_put(tw); local_bh_enable(); return 0; + +error: + if (tb_created) + inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); + spin_unlock_bh(&head->lock); + return -ENOMEM; } /* @@ -859,7 +989,7 @@ ok: int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); @@ -868,29 +998,14 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, } EXPORT_SYMBOL_GPL(inet_hash_connect); -void inet_hashinfo_init(struct inet_hashinfo *h) -{ - int i; - - for (i = 0; i < INET_LHTABLE_SIZE; i++) { - spin_lock_init(&h->listening_hash[i].lock); - INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head, - i + LISTENING_NULLS_BASE); - h->listening_hash[i].count = 0; - } - - h->lhash2 = NULL; -} -EXPORT_SYMBOL_GPL(inet_hashinfo_init); - static void init_hashinfo_lhash2(struct inet_hashinfo *h) { int i; for (i = 0; i <= h->lhash2_mask; i++) { spin_lock_init(&h->lhash2[i].lock); - INIT_HLIST_HEAD(&h->lhash2[i].head); - h->lhash2[i].count = 0; + INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head, + i + LISTENING_NULLS_BASE); } } @@ -909,6 +1024,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, low_limit, high_limit); init_hashinfo_lhash2(h); + + /* this one is used for source ports of outgoing connections */ + table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE, + sizeof(*table_perturb), GFP_KERNEL); + if (!table_perturb) + panic("TCP: failed to alloc table_perturb"); } int inet_hashinfo2_init_mod(struct inet_hashinfo *h) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 9e0bbd026560..0ec501845cb3 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) spin_unlock(lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[tw->tw_bslot]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); inet_twsk_bind_unhash(tw, hashinfo); @@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - /* Cache inet_bhashfn(), because 'struct net' might be no longer - * available later in inet_twsk_kill(). - */ - tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num, - hashinfo->bhash_size); - bhead = &hashinfo->bhash[tw->tw_bslot]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); @@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); + +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_nulls_node *node; + unsigned int slot; + + for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; +restart_rcu: + cond_resched(); + rcu_read_lock(); +restart: + sk_nulls_for_each_rcu(sk, node, &head->chain) { + if (sk->sk_state != TCP_TIME_WAIT) + continue; + tw = inet_twsk(sk); + if ((tw->tw_family != family) || + refcount_read(&twsk_net(tw)->ns.count)) + continue; + + if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) + continue; + + if (unlikely((tw->tw_family != family) || + refcount_read(&twsk_net(tw)->ns.count))) { + inet_twsk_put(tw); + goto restart; + } + + rcu_read_unlock(); + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); + goto restart_rcu; + } + /* If the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto restart; + rcu_read_unlock(); + } +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 365caebf51ab..7e474a85deaf 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); - - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; + __be16 flags = tunnel->parms.o_flags; /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, - tunnel->parms.o_flags, proto, tunnel->parms.o_key, - htonl(tunnel->o_seqno)); + flags, proto, tunnel->parms.o_key, + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -504,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); @@ -581,7 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) } gre_build_header(skb, 8, TUNNEL_SEQ, - proto, 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); @@ -750,6 +748,7 @@ free_skb: static void ipgre_link_update(struct net_device *dev, bool set_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); + __be16 flags; int len; len = tunnel->tun_hlen; @@ -765,19 +764,15 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) if (set_mtu) dev->mtu = max_t(int, dev->mtu - len, 68); - if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { - if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || - tunnel->encap.type == TUNNEL_ENCAP_NONE) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } else { - dev->features &= ~NETIF_F_GSO_SOFTWARE; - dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; - } - dev->features |= NETIF_F_LLTX; - } else { + flags = tunnel->parms.o_flags; + + if (flags & TUNNEL_SEQ || + (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) { + dev->features &= ~NETIF_F_GSO_SOFTWARE; dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; - dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE); + } else { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; } } @@ -951,6 +946,7 @@ static void ipgre_tunnel_setup(struct net_device *dev) static void __gre_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel; + __be16 flags; tunnel = netdev_priv(dev); tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); @@ -959,25 +955,21 @@ static void __gre_tunnel_init(struct net_device *dev) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph); - dev->features |= GRE_FEATURES; + dev->features |= GRE_FEATURES | NETIF_F_LLTX; dev->hw_features |= GRE_FEATURES; - if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported, nor - * can we support 2 levels of outer headers requiring - * an update. - */ - if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || - (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } + flags = tunnel->parms.o_flags; - /* Can use a lockless transmit, unless we generate - * output sequences - */ - dev->features |= NETIF_F_LLTX; - } + /* TCP offload with GRE SEQ is not supported, nor can we support 2 + * levels of outer headers requiring an update. + */ + if (flags & TUNNEL_SEQ) + return; + if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE) + return; + + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; } static int ipgre_tunnel_init(struct net_device *dev) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c860519d57ee..13e6329784fb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -356,7 +356,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct mfc_cache_cmp_arg *cmparg = arg->key; - struct mfc_cache *c = (struct mfc_cache *)ptr; + const struct mfc_cache *c = ptr; return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || cmparg->mfc_origin != c->mfc_origin; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index aff707988e23..bd135165482a 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -45,8 +45,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un fl4.saddr = saddr; fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0; - if (!fl4.flowi4_oif) - fl4.flowi4_oif = l3mdev_master_ifindex(dev); + fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev); fl4.flowi4_mark = skb->mark; fl4.flowi4_flags = flags; fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys); diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ /dev/null diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 4eed5afca392..918c61fda0f3 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -80,6 +80,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, struct iphdr *niph; struct icmphdr *icmph; unsigned int len; + int dataoff; __wsum csum; u8 proto; @@ -99,10 +100,11 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len))) return NULL; + dataoff = ip_hdrlen(oldskb); proto = ip_hdr(oldskb)->protocol; if (!skb_csum_unnecessary(oldskb) && - nf_reject_verify_csum(proto) && + nf_reject_verify_csum(oldskb, dataoff, proto) && nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) return NULL; @@ -311,6 +313,7 @@ EXPORT_SYMBOL_GPL(nf_send_reset); void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) { struct iphdr *iph = ip_hdr(skb_in); + int dataoff = ip_hdrlen(skb_in); u8 proto = iph->protocol; if (iph->frag_off & htons(IP_OFFSET)) @@ -320,12 +323,13 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) nf_reject_fill_skb_dst(skb_in) < 0) return; - if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) { + if (skb_csum_unnecessary(skb_in) || + !nf_reject_verify_csum(skb_in, dataoff, proto)) { icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); return; } - if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0) + if (nf_ip_checksum(skb_in, hook, dataoff, proto) == 0) icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); } EXPORT_SYMBOL_GPL(nf_send_unreach); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 319c181bfbb6..1a43ca73f94d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -305,6 +305,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, struct net *net = sock_net(sk); if (sk->sk_family == AF_INET) { struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + u32 tb_id = RT_TABLE_LOCAL; int chk_addr_ret; if (addr_len < sizeof(*addr)) @@ -318,7 +319,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); - chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; + chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk), addr->sin_addr.s_addr, @@ -355,6 +357,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return -ENODEV; } } + + if (!dev && sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, scoped); rcu_read_unlock(); @@ -590,7 +600,7 @@ EXPORT_SYMBOL_GPL(ping_err); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *skb) { - struct pingfakehdr *pfh = (struct pingfakehdr *)from; + struct pingfakehdr *pfh = from; if (offset == 0) { fraglen -= sizeof(struct icmphdr); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4056b0da85ea..bbd717805b10 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -783,7 +783,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto done; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (sin) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ffbe2e4f8c89..356f535f3443 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1727,6 +1727,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct in_device *in_dev = __in_dev_get_rcu(dev); unsigned int flags = RTCF_MULTICAST; struct rtable *rth; + bool no_policy; u32 itag = 0; int err; @@ -1737,8 +1738,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (our) flags |= RTCF_LOCAL; + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_ORCONF(in_dev, NOPOLICY), false); + no_policy, false); if (!rth) return -ENOBUFS; @@ -1754,6 +1759,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif RT_CACHE_STAT_INC(in_slow_mc); + skb_dst_drop(skb); skb_dst_set(skb, &rth->dst); return 0; } @@ -1796,7 +1802,7 @@ static int __mkroute_input(struct sk_buff *skb, struct rtable *rth; int err; struct in_device *out_dev; - bool do_cache; + bool do_cache, no_policy; u32 itag = 0; /* get a working reference to the output device */ @@ -1841,6 +1847,10 @@ static int __mkroute_input(struct sk_buff *skb, } } + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) @@ -1853,8 +1863,7 @@ static int __mkroute_input(struct sk_buff *skb, } } - rth = rt_dst_alloc(out_dev->dev, 0, res->type, - IN_DEV_ORCONF(in_dev, NOPOLICY), + rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy, IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; @@ -2229,6 +2238,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct rtable *rth; struct flowi4 fl4; bool do_cache = true; + bool no_policy; /* IP on this device is disabled. */ @@ -2347,6 +2357,10 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + do_cache &= res->fi && !itag; if (do_cache) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); @@ -2361,7 +2375,7 @@ local_input: rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, - IN_DEV_ORCONF(in_dev, NOPOLICY), false); + no_policy, false); if (!rth) goto e_nobufs; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 2cb3b852d148..f33c31dd7366 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -281,6 +281,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, EXPORT_SYMBOL(cookie_ecn_ok); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) { @@ -297,6 +298,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, return NULL; treq = tcp_rsk(req); + + /* treq->af_specific might be used to perform TCP_MD5 lookup */ + treq->af_specific = af_ops; + treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); @@ -364,7 +369,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, skb); if (!req) goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ad80d180b60b..cd448cdd3b38 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -20,10 +20,6 @@ #include <net/protocol.h> #include <net/netevent.h> -static int two = 2; -static int three __maybe_unused = 3; -static int four = 4; -static int thousand = 1000; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -1006,7 +1002,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "tcp_max_syn_backlog", @@ -1059,7 +1055,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &three, + .extra2 = SYSCTL_THREE, }, { .procname = "fib_multipath_hash_fields", @@ -1117,7 +1113,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &four, + .extra2 = SYSCTL_FOUR, }, { .procname = "tcp_recovery", @@ -1310,7 +1306,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &thousand, + .extra2 = SYSCTL_ONE_THOUSAND, }, { .procname = "tcp_pacing_ca_ratio", @@ -1319,7 +1315,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &thousand, + .extra2 = SYSCTL_ONE_THOUSAND, }, { .procname = "tcp_wmem", @@ -1391,7 +1387,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index db55af9eb37b..9984d23a7f3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2314,8 +2314,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, if (sk->sk_state == TCP_LISTEN) goto out; - if (tp->recvmsg_inq) + if (tp->recvmsg_inq) { *cmsg_flags = TCP_CMSG_INQ; + msg->msg_get_inq = 1; + } timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); /* Urgent data needs to be handled specially. */ @@ -2537,7 +2539,7 @@ recv_sndq: int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { - int cmsg_flags = 0, ret, inq; + int cmsg_flags = 0, ret; struct scm_timestamping_internal tss; if (unlikely(flags & MSG_ERRQUEUE)) @@ -2552,12 +2554,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags); release_sock(sk); - if (cmsg_flags && ret >= 0) { + if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) tcp_recv_timestamp(msg, sk, &tss); - if (cmsg_flags & TCP_CMSG_INQ) { - inq = tcp_inq_hint(sk); - put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); + if (msg->msg_get_inq) { + msg->msg_inq = tcp_inq_hint(sk); + if (cmsg_flags & TCP_CMSG_INQ) + put_cmsg(msg, SOL_TCP, TCP_CM_INQ, + sizeof(msg->msg_inq), &msg->msg_inq); } } return ret; @@ -4591,7 +4595,6 @@ void __init tcp_init(void) timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); - inet_hashinfo_init(&tcp_hashinfo); inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", thash_entries, 21, /* one slot per 2 MB*/ 0, 64 * 1024); @@ -4601,6 +4604,12 @@ void __init tcp_init(void) SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL); + tcp_hashinfo.bind2_bucket_cachep = + kmem_cache_create("tcp_bind2_bucket", + sizeof(struct inet_bind2_bucket), 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC | + SLAB_ACCOUNT, + NULL); /* Size and allocate the main established and bind bucket * hash tables. @@ -4623,8 +4632,9 @@ void __init tcp_init(void) if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); tcp_hashinfo.bhash = - alloc_large_system_hash("TCP bind", - sizeof(struct inet_bind_hashbucket), + alloc_large_system_hash("TCP bind bhash tables", + sizeof(struct inet_bind_hashbucket) + + sizeof(struct inet_bind2_hashbucket), tcp_hashinfo.ehash_mask + 1, 17, /* one slot per 128 KB of memory */ 0, @@ -4633,9 +4643,12 @@ void __init tcp_init(void) 0, 64 * 1024); tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size; + tcp_hashinfo.bhash2 = + (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size); for (i = 0; i < tcp_hashinfo.bhash_size; i++) { spin_lock_init(&tcp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); + INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain); } diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index c7d30a3bbd81..075e744bfb48 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -310,7 +310,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk) */ bytes = min_t(unsigned long, sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), - GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); + GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); return min(segs, 0x7FU); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index b0918839bee7..68178e7280ce 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -372,7 +372,7 @@ static void cubictcp_state(struct sock *sk, u8 new_state) * We apply another 100% factor because @rate is doubled at this point. * We cap the cushion to 1ms. */ -static u32 hystart_ack_delay(struct sock *sk) +static u32 hystart_ack_delay(const struct sock *sk) { unsigned long rate; @@ -380,7 +380,7 @@ static u32 hystart_ack_delay(struct sock *sk) if (!rate) return 0; return min_t(u64, USEC_PER_MSEC, - div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate)); + div64_ul((u64)sk->sk_gso_max_size * 4 * USEC_PER_SEC, rate)); } static void hystart_update(struct sock *sk, u32 delay) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index daff631b9486..3231af73e430 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2620,12 +2620,12 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + tp->prior_cwnd - 1; sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; - } else if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) { - sndcnt = min_t(int, delta, - max_t(int, tp->prr_delivered - tp->prr_out, - newly_acked_sacked) + 1); } else { - sndcnt = min(delta, newly_acked_sacked); + sndcnt = max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked); + if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) + sndcnt++; + sndcnt = min(delta, sndcnt); } /* Force a fast retransmit upon entering fast recovery */ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1)); @@ -3867,7 +3867,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_process_tlp_ack(sk, ack, flag); if (tcp_ack_is_dubious(sk, flag)) { - if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) { + if (!(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DSACKING_ACK))) { num_dupack = 1; /* Consider if pure acks were aggregated in tcp_add_backlog() */ if (!(flag & FLAG_DATA)) @@ -5450,7 +5451,17 @@ static void tcp_new_space(struct sock *sk) INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); } -static void tcp_check_space(struct sock *sk) +/* Caller made space either from: + * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced) + * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt) + * + * We might be able to generate EPOLLOUT to the application if: + * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2 + * 2) notsent amount (tp->write_seq - tp->snd_nxt) became + * small enough that tcp_stream_memory_free() decides it + * is time to generate EPOLLOUT. + */ +void tcp_check_space(struct sock *sk) { /* pairs with tcp_poll() */ smp_mb(); @@ -5917,6 +5928,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ + skb_dst_drop(skb); __skb_pull(skb, tcp_header_len); eaten = tcp_queue_rcv(sk, skb, &fragstolen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 918816ec5dd4..dac2650f3863 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2101,6 +2101,7 @@ bad_packet: } discard_it: + SKB_DR_OR(drop_reason, NOT_SPECIFIED); /* Discard frame. */ kfree_skb_reason(skb, drop_reason); return 0; @@ -2283,16 +2284,15 @@ static void *listening_get_first(struct seq_file *seq) st->offset = 0; for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) { struct inet_listen_hashbucket *ilb2; - struct inet_connection_sock *icsk; + struct hlist_nulls_node *node; struct sock *sk; ilb2 = &tcp_hashinfo.lhash2[st->bucket]; - if (hlist_empty(&ilb2->head)) + if (hlist_nulls_empty(&ilb2->nulls_head)) continue; spin_lock(&ilb2->lock); - inet_lhash2_for_each_icsk(icsk, &ilb2->head) { - sk = (struct sock *)icsk; + sk_nulls_for_each(sk, node, &ilb2->nulls_head) { if (seq_sk_match(seq, sk)) return sk; } @@ -2311,15 +2311,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur) { struct tcp_iter_state *st = seq->private; struct inet_listen_hashbucket *ilb2; - struct inet_connection_sock *icsk; + struct hlist_nulls_node *node; struct sock *sk = cur; ++st->num; ++st->offset; - icsk = inet_csk(sk); - inet_lhash2_for_each_icsk_continue(icsk) { - sk = (struct sock *)icsk; + sk = sk_nulls_next(sk); + sk_nulls_for_each_from(sk, node) { if (seq_sk_match(seq, sk)) return sk; } @@ -2728,16 +2727,15 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, { struct bpf_tcp_iter_state *iter = seq->private; struct tcp_iter_state *st = &iter->state; - struct inet_connection_sock *icsk; + struct hlist_nulls_node *node; unsigned int expected = 1; struct sock *sk; sock_hold(start_sk); iter->batch[iter->end_sk++] = start_sk; - icsk = inet_csk(start_sk); - inet_lhash2_for_each_icsk_continue(icsk) { - sk = (struct sock *)icsk; + sk = sk_nulls_next(start_sk); + sk_nulls_for_each_from(sk, node) { if (seq_sk_match(seq, sk)) { if (iter->end_sk < iter->max_sk) { sock_hold(sk); @@ -3171,6 +3169,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; + inet_twsk_purge(&tcp_hashinfo, AF_INET); + list_for_each_entry(net, net_exit_list, exit_list) tcp_fastopen_ctx_destroy(net); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6366df7aaf2a..6854bb1fb32b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -531,7 +531,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ - if (newtp->af_specific->md5_lookup(sk, newsk)) + if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req))) newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c221f3bce975..b4b2284ed4a2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -82,6 +82,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); + tcp_check_space(sk); } /* SND.NXT, if window was not shrunk or the amount of shrunk was less than one @@ -444,12 +445,13 @@ struct tcp_out_options { struct mptcp_out_options mptcp; }; -static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp, +static void mptcp_options_write(struct tcphdr *th, __be32 *ptr, + struct tcp_sock *tp, struct tcp_out_options *opts) { #if IS_ENABLED(CONFIG_MPTCP) if (unlikely(OPTION_MPTCP & opts->options)) - mptcp_write_options(ptr, tp, &opts->mptcp); + mptcp_write_options(th, ptr, tp, &opts->mptcp); #endif } @@ -605,9 +607,10 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, * At least SACK_PERM as the first option is known to lead to a disaster * (but it may well be that other scenarios fail similarly). */ -static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, +static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, struct tcp_out_options *opts) { + __be32 *ptr = (__be32 *)(th + 1); u16 options = opts->options; /* mungable copy */ if (unlikely(OPTION_MD5 & options)) { @@ -701,7 +704,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, smc_options_write(ptr, &options); - mptcp_options_write(ptr, tp, opts); + mptcp_options_write(th, ptr, tp, opts); } static void smc_set_option(const struct tcp_sock *tp, @@ -1354,7 +1357,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, th->window = htons(min(tp->rcv_wnd, 65535U)); } - tcp_options_write((__be32 *)(th + 1), tp, &opts); + tcp_options_write(th, tp, &opts); #ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ @@ -1550,7 +1553,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, * SO_SNDBUF values. * Also allow first and last skb in retransmit queue to be split. */ - limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE); + limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_LEGACY_MAX_SIZE); if (unlikely((sk->sk_wmem_queued >> 1) > limit && tcp_queue != TCP_FRAG_IN_WRITE_QUEUE && skb != tcp_rtx_queue_head(sk) && @@ -3590,7 +3593,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rsk_rcv_wnd, 65535U)); - tcp_options_write((__be32 *)(th + 1), NULL, &opts); + tcp_options_write(th, NULL, &opts); th->doff = (tcp_header_size >> 2); __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 617b8187c03d..a8f6d9d06f2e 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -74,27 +74,32 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) * * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is * called multiple times. We favor the information from the most recently - * sent skb, i.e., the skb with the highest prior_delivered count. + * sent skb, i.e., the skb with the most recently sent time and the highest + * sequence. */ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + u64 tx_tstamp; if (!scb->tx.delivered_mstamp) return; + tx_tstamp = tcp_skb_timestamp_us(skb); if (!rs->prior_delivered || - after(scb->tx.delivered, rs->prior_delivered)) { + tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, + scb->end_seq, rs->last_end_seq)) { rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; + rs->last_end_seq = scb->end_seq; /* Record send time of most recently ACKed packet: */ - tp->first_tx_mstamp = tcp_skb_timestamp_us(skb); + tp->first_tx_mstamp = tx_tstamp; /* Find the duration of the "send phase" of this window: */ rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, scb->tx.first_tx_mstamp); diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index fd113f6226ef..48f30e7209f2 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -2,11 +2,6 @@ #include <linux/tcp.h> #include <net/tcp.h> -static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) -{ - return t1 > t2 || (t1 == t2 && after(seq1, seq2)); -} - static u32 tcp_rack_reo_wnd(const struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -77,9 +72,9 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) !(scb->sacked & TCPCB_SACKED_RETRANS)) continue; - if (!tcp_rack_sent_after(tp->rack.mstamp, - tcp_skb_timestamp_us(skb), - tp->rack.end_seq, scb->end_seq)) + if (!tcp_skb_sent_after(tp->rack.mstamp, + tcp_skb_timestamp_us(skb), + tp->rack.end_seq, scb->end_seq)) break; /* A packet is lost if it has not been s/acked beyond @@ -140,8 +135,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, } tp->rack.advanced = 1; tp->rack.rtt_us = rtt_us; - if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp, - end_seq, tp->rack.end_seq)) { + if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp, + end_seq, tp->rack.end_seq)) { tp->rack.mstamp = xmit_time; tp->rack.end_seq = end_seq; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aa8545ca6964..aa9f2ec3dc46 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1909,7 +1909,7 @@ try_again: UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (sin) { @@ -2563,8 +2563,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, struct sock *sk; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - if (INET_MATCH(sk, net, acookie, rmt_addr, - loc_addr, ports, dif, sdif)) + if (inet_match(net, sk, acookie, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f01b8a3e1952..cde242dca530 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -335,7 +335,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) { int i; - idev->stats.ipv6 = alloc_percpu(struct ipstats_mib); + idev->stats.ipv6 = alloc_percpu_gfp(struct ipstats_mib, GFP_KERNEL_ACCOUNT); if (!idev->stats.ipv6) goto err_ip; @@ -351,7 +351,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) if (!idev->stats.icmpv6dev) goto err_icmp; idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!idev->stats.icmpv6msgdev) goto err_icmpmsg; @@ -375,7 +375,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev) return ERR_PTR(-EINVAL); - ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); + ndev = kzalloc(sizeof(*ndev), GFP_KERNEL_ACCOUNT); if (!ndev) return ERR_PTR(err); @@ -4219,7 +4219,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_rs = send_mld && ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits != 0 && - (dev->flags&IFF_LOOPBACK) == 0; + (dev->flags & IFF_LOOPBACK) == 0 && + (dev->type != ARPHRD_TUNNEL); read_unlock_bh(&ifp->idev->lock); /* While dad is in progress mld report's source address is in6_addrany. @@ -7059,7 +7060,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, struct ctl_table *table; char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; - table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL); + table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL_ACCOUNT); if (!table) goto out; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 39b2327edc4e..df665d4e8f0f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -218,11 +218,11 @@ ipv4_connected: err = -EINVAL; goto out; } - sk->sk_bound_dev_if = usin->sin6_scope_id; + WRITE_ONCE(sk->sk_bound_dev_if, usin->sin6_scope_id); } if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST)) - sk->sk_bound_dev_if = np->mcast_oif; + WRITE_ONCE(sk->sk_bound_dev_if, np->mcast_oif); /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { @@ -798,7 +798,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, if (src_idx) { if (fl6->flowi6_oif && src_idx != fl6->flowi6_oif && - (sk->sk_bound_dev_if != fl6->flowi6_oif || + (READ_ONCE(sk->sk_bound_dev_if) != fl6->flowi6_oif || !sk_dev_equal_l3scope(sk, src_idx))) return -EINVAL; fl6->flowi6_oif = src_idx; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index f2120e92caf1..36e1d0f8dd06 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -741,7 +741,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) static inline int esp_remove_trailer(struct sk_buff *skb) { struct xfrm_state *x = xfrm_input_state(skb); - struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; int alen, hlen, elen; int padlen, trimlen; @@ -753,11 +752,6 @@ static inline int esp_remove_trailer(struct sk_buff *skb) hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); elen = skb->len - hlen; - if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) { - ret = xo->proto; - goto out; - } - ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2); BUG_ON(ret); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 4740afecf7c6..7d53d62783b1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -71,12 +71,12 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif)) + if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) { + if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } @@ -138,12 +138,11 @@ static struct sock *inet6_lhash2_lookup(struct net *net, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { - struct inet_connection_sock *icsk; struct sock *sk, *result = NULL; + struct hlist_nulls_node *node; int score, hiscore = 0; - inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { - sk = (struct sock *)icsk; + sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { result = lookup_reuseport(net, sk, skb, doff, @@ -269,7 +268,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, if (sk2->sk_hash != hash) continue; - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, + if (likely(inet6_match(net, sk2, saddr, daddr, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); @@ -308,7 +307,7 @@ not_unique: return -EADDRNOTAVAIL; } -static u32 inet6_sk_port_offset(const struct sock *sk) +static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -320,7 +319,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 976236736146..4e37f7c29900 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -382,11 +382,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, goto failed_free; ip6gre_tnl_link_config(nt, 1); - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & TUNNEL_SEQ)) - dev->features |= NETIF_F_LLTX; - ip6gre_tunnel_link(ign, nt); return nt; @@ -724,6 +719,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, { struct ip6_tnl *tunnel = netdev_priv(dev); __be16 protocol; + __be16 flags; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -739,7 +735,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, if (tunnel->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; - __be16 flags; int tun_hlen; tun_info = skb_tunnel_info_txcheck(skb); @@ -766,19 +761,19 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); } else { - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; - if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) return -ENOMEM; - gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + flags = tunnel->parms.o_flags; + + gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel->parms.o_key, - htonl(tunnel->o_seqno)); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) + : 0); } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, @@ -1056,7 +1051,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* Push GRE header. */ proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) : htons(ETH_P_ERSPAN2); - gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); + gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) @@ -1445,26 +1440,23 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static void ip6gre_tnl_init_features(struct net_device *dev) { struct ip6_tnl *nt = netdev_priv(dev); + __be16 flags; - dev->features |= GRE6_FEATURES; + dev->features |= GRE6_FEATURES | NETIF_F_LLTX; dev->hw_features |= GRE6_FEATURES; - if (!(nt->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported, nor - * can we support 2 levels of outer headers requiring - * an update. - */ - if (!(nt->parms.o_flags & TUNNEL_CSUM) || - nt->encap.type == TUNNEL_ENCAP_NONE) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } + flags = nt->parms.o_flags; - /* Can use a lockless transmit, unless we generate - * output sequences - */ - dev->features |= NETIF_F_LLTX; - } + /* TCP offload with GRE SEQ is not supported, nor can we support 2 + * levels of outer headers requiring an update. + */ + if (flags & TUNNEL_SEQ) + return; + if (flags & TUNNEL_CSUM && nt->encap.type != TUNNEL_ENCAP_NONE) + return; + + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; } static int ip6gre_tunnel_init_common(struct net_device *dev) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index c4fc03c1ac99..d12dba2dd535 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -77,7 +77,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct ipv6hdr *ipv6h; const struct net_offload *ops; - int proto; + int proto, nexthdr; struct frag_hdr *fptr; unsigned int payload_len; u8 *prevhdr; @@ -87,6 +87,28 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, bool gso_partial; skb_reset_network_header(skb); + nexthdr = ipv6_has_hopopt_jumbo(skb); + if (nexthdr) { + const int hophdr_len = sizeof(struct hop_jumbo_hdr); + int err; + + err = skb_cow_head(skb, 0); + if (err < 0) + return ERR_PTR(err); + + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + memmove(skb_mac_header(skb) + hophdr_len, + skb_mac_header(skb), + ETH_HLEN + sizeof(struct ipv6hdr)); + skb->data += hophdr_len; + skb->len -= hophdr_len; + skb->network_header += hophdr_len; + skb->mac_header += hophdr_len; + ipv6h = (struct ipv6hdr *)skb->data; + ipv6h->nexthdr = nexthdr; + } nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out; @@ -320,15 +342,43 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head, INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; - struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); + struct ipv6hdr *iph; int err = -ENOSYS; + u32 payload_len; if (skb->encapsulation) { skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6)); skb_set_inner_network_header(skb, nhoff); } - iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); + payload_len = skb->len - nhoff - sizeof(*iph); + if (unlikely(payload_len > IPV6_MAXPLEN)) { + struct hop_jumbo_hdr *hop_jumbo; + int hoplen = sizeof(*hop_jumbo); + + /* Move network header left */ + memmove(skb_mac_header(skb) - hoplen, skb_mac_header(skb), + skb->transport_header - skb->mac_header); + skb->data -= hoplen; + skb->len += hoplen; + skb->mac_header -= hoplen; + skb->network_header -= hoplen; + iph = (struct ipv6hdr *)(skb->data + nhoff); + hop_jumbo = (struct hop_jumbo_hdr *)(iph + 1); + + /* Build hop-by-hop options */ + hop_jumbo->nexthdr = iph->nexthdr; + hop_jumbo->hdrlen = 0; + hop_jumbo->tlv_type = IPV6_TLV_JUMBO; + hop_jumbo->tlv_len = 4; + hop_jumbo->jumbo_payload_len = htonl(payload_len + hoplen); + + iph->nexthdr = NEXTHDR_HOP; + iph->payload_len = 0; + } else { + iph = (struct ipv6hdr *)(skb->data + nhoff); + iph->payload_len = htons(payload_len); + } nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1f3d777e7694..4081b12a01ff 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -119,19 +119,21 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * rcu_read_lock_bh(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); - if (unlikely(!neigh)) - neigh = __neigh_create(&nd_tbl, nexthop, dev, false); - if (!IS_ERR(neigh)) { - sock_confirm_neigh(skb, neigh); - ret = neigh_output(neigh, skb, false); - rcu_read_unlock_bh(); - return ret; + + if (unlikely(IS_ERR_OR_NULL(neigh))) { + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dev, false); + if (IS_ERR(neigh)) { + rcu_read_unlock_bh(); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); + kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); + return -EINVAL; + } } + sock_confirm_neigh(skb, neigh); + ret = neigh_output(neigh, skb, false); rcu_read_unlock_bh(); - - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); - kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); - return -EINVAL; + return ret; } static int @@ -180,7 +182,9 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff #endif mtu = ip6_skb_dst_mtu(skb); - if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) + if (skb_is_gso(skb) && + !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && + !skb_gso_validate_network_len(skb, mtu)) return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); if ((skb->len > mtu && !skb_is_gso(skb)) || @@ -198,7 +202,6 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); switch (ret) { case NET_XMIT_SUCCESS: - return __ip6_finish_output(net, sk, skb); case NET_XMIT_CN: return __ip6_finish_output(net, sk, skb) ? : ret; default: @@ -251,6 +254,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct inet6_dev *idev = ip6_dst_idev(dst); + struct hop_jumbo_hdr *hop_jumbo; + int hoplen = sizeof(*hop_jumbo); unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; @@ -258,7 +263,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, int hlimit = -1; u32 mtu; - head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); + head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; @@ -281,6 +286,20 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, &fl6->saddr); } + if (unlikely(seg_len > IPV6_MAXPLEN)) { + hop_jumbo = skb_push(skb, hoplen); + + hop_jumbo->nexthdr = proto; + hop_jumbo->hdrlen = 0; + hop_jumbo->tlv_type = IPV6_TLV_JUMBO; + hop_jumbo->tlv_len = 4; + hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen); + + proto = IPPROTO_HOPOPTS; + seg_len = 0; + IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO; + } + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 909f937befd7..7f695c39d9a8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -460,10 +460,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, newpsl->sl_addr[i] = psl->sl_addr[i]; atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } + rcu_assign_pointer(pmc->sflist, newpsl); + kfree_rcu(psl, rcu); psl = newpsl; - rcu_assign_pointer(pmc->sflist, psl); } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i = 0; i < psl->sl_count; i++) { @@ -565,12 +565,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, psl->sl_count, psl->sl_addr, 0); atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); } - mutex_unlock(&idev->mc_lock); rcu_assign_pointer(pmc->sflist, newpsl); + mutex_unlock(&idev->mc_lock); + kfree_rcu(psl, rcu); pmc->sfmode = gsf->gf_fmode; err = 0; done: diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 1da332450d98..857713d7a38a 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -24,14 +24,14 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff { const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *sk = sk_to_full_sk(sk_partial); + struct net_device *dev = skb_dst(skb)->dev; struct flow_keys flkeys; unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { - .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : - strict ? skb_dst(skb)->dev->ifindex : 0, + .flowi6_l3mdev = l3mdev_master_ifindex(dev), .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, @@ -39,6 +39,11 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff }; int err; + if (sk && sk->sk_bound_dev_if) + fl6.flowi6_oif = sk->sk_bound_dev_if; + else if (strict) + fl6.flowi6_oif = dev->ifindex; + fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys); dst = ip6_route_output(net, sk, &fl6); err = dst->error; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index dffeaaaadcde..f61d4f18e1cf 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -31,7 +31,7 @@ static bool nf_reject_v6_csum_ok(struct sk_buff *skb, int hook) if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; - if (!nf_reject_verify_csum(proto)) + if (!nf_reject_verify_csum(skb, thoff, proto)) return true; return nf_ip6_checksum(skb, hook, thoff, proto) == 0; @@ -388,7 +388,7 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook) if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; - if (!nf_reject_verify_csum(proto)) + if (!nf_reject_verify_csum(skb, thoff, proto)) return true; return nf_ip6_checksum(skb, hook, thoff, proto) == 0; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0d7c13d33d1a..3b7cbd522b54 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -512,7 +512,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, *addr_len = sizeof(*sin6); } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (np->rxopt.all) ip6_datagram_recv_ctl(sk, msg, skb); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index d1b61d00368e..9cc123f000fb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -170,7 +170,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, skb); if (!req) goto out; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d53dd142bf87..94a0a294c6a1 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -23,8 +23,6 @@ #endif #include <linux/ioam6.h> -static int two = 2; -static int three = 3; static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static u32 rt6_multipath_hash_fields_all_mask = @@ -172,7 +170,7 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &three, + .extra2 = SYSCTL_THREE, }, { .procname = "fib_multipath_hash_fields", @@ -197,7 +195,7 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "ioam6_id", diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 60bdec257ba7..f37dd4aa91c6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1762,6 +1762,7 @@ bad_packet: } discard_it: + SKB_DR_OR(drop_reason, NOT_SPECIFIED); kfree_skb_reason(skb, drop_reason); return 0; @@ -2206,9 +2207,15 @@ static void __net_exit tcpv6_net_exit(struct net *net) inet_ctl_sock_destroy(net->ipv6.tcp_sk); } +static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&tcp_hashinfo, AF_INET6); +} + static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, + .exit_batch = tcpv6_net_exit_batch, }; int __init tcpv6_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 688af6f809fe..55afd7f39c04 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -105,7 +105,7 @@ static int compute_score(struct sock *sk, struct net *net, const struct in6_addr *daddr, unsigned short hnum, int dif, int sdif) { - int score; + int bound_dev_if, score; struct inet_sock *inet; bool dev_match; @@ -132,10 +132,11 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + dev_match = udp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif); if (!dev_match) return -1; - if (sk->sk_bound_dev_if) + if (bound_dev_if) score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) @@ -391,7 +392,7 @@ try_again: if (!peeking) SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { @@ -789,7 +790,7 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport && inet->inet_dport != rmt_port) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || - !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) || + !udp_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif) || (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))) return false; @@ -1043,7 +1044,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (sk->sk_state == TCP_ESTABLISHED && - INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif)) + inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; @@ -1433,7 +1434,7 @@ do_udp_sendmsg: } if (!fl6->flowi6_oif) - fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->flowi6_oif = READ_ONCE(sk->sk_bound_dev_if); if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; diff --git a/net/key/af_key.c b/net/key/af_key.c index d09ec26b1081..11e1a3a3e442 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2826,8 +2826,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, - BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); + err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, + BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); + if (err) + return err; memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); @@ -2898,7 +2900,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2916,7 +2918,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg))) + if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; for (k = 1; ; k++) { @@ -2927,7 +2929,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } } @@ -3711,7 +3713,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (err) goto out_free; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 6af09e188e52..4db5a554bdbd 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -50,11 +50,13 @@ static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr, sk_for_each_bound(sk, &l2tp_ip_bind_table) { const struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk); const struct inet_sock *inet = inet_sk(sk); + int bound_dev_if; if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_bound_dev_if && dif && sk->sk_bound_dev_if != dif) + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && dif && bound_dev_if != dif) continue; if (inet->inet_rcv_saddr && laddr && diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 217c7192691e..c6ff8bf9b55f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -62,11 +62,13 @@ static struct sock *__l2tp_ip6_bind_lookup(const struct net *net, const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); const struct in6_addr *sk_raddr = &sk->sk_v6_daddr; const struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); + int bound_dev_if; if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_bound_dev_if && dif && sk->sk_bound_dev_if != dif) + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && dif && bound_dev_if != dif) continue; if (sk_laddr && !ipv6_addr_any(sk_laddr) && @@ -445,7 +447,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, lsa->l2tp_conn_id = lsk->conn_id; } if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) - lsa->l2tp_scope_id = sk->sk_bound_dev_if; + lsa->l2tp_scope_id = READ_ONCE(sk->sk_bound_dev_if); return sizeof(*lsa); } @@ -560,7 +562,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } if (fl6.flowi6_oif == 0) - fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_oif = READ_ONCE(sk->sk_bound_dev_if); if (msg->msg_controllen) { opt = &opt_space; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 218cdc554d71..bfab39320004 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -263,7 +263,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); - if (sta->sta.he_cap.has_he && addbaext) + if (sta->sta.deflink.he_cap.has_he && addbaext) ieee80211_add_addbaext(sdata, skb, addbaext, buf_size); ieee80211_tx_skb(sdata, skb); @@ -296,7 +296,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } - if (!sta->sta.ht_cap.ht_supported && + if (!sta->sta.deflink.ht_cap.ht_supported && sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", @@ -312,9 +312,9 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } - if (sta->sta.eht_cap.has_eht) + if (sta->sta.deflink.eht_cap.has_eht) max_buf_size = IEEE80211_MAX_AMPDU_BUF_EHT; - else if (sta->sta.he_cap.has_he) + else if (sta->sta.deflink.he_cap.has_he) max_buf_size = IEEE80211_MAX_AMPDU_BUF_HE; else max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT; @@ -324,7 +324,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, * and if buffer size does not exceeds max value */ /* XXX: check own ht delayed BA capability?? */ if (((ba_policy != 1) && - (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || + (!(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || (buf_size > max_buf_size)) { status = WLAN_STATUS_INVALID_QOS_PARAM; ht_dbg_ratelimited(sta->sdata, @@ -507,7 +507,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, goto free; } - if (sta->sta.eht_cap.has_eht && elems && elems->addba_ext_ie) { + if (sta->sta.deflink.eht_cap.has_eht && elems && elems->addba_ext_ie) { u8 buf_size_1k = u8_get_bits(elems->addba_ext_ie->data, IEEE80211_ADDBA_EXT_BUF_SIZE_MASK); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 1deb3d874a4b..91878ed5ec46 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -467,7 +467,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, sta->ampdu_mlme.addba_req_num[tid]++; spin_unlock_bh(&sta->lock); - if (sta->sta.he_cap.has_he) { + if (sta->sta.deflink.he_cap.has_he) { buf_size = local->hw.max_tx_aggregation_subframes; } else { /* @@ -594,7 +594,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, "Requested to start BA session on reserved tid=%d", tid)) return -EINVAL; - if (!pubsta->ht_cap.ht_supported && + if (!pubsta->deflink.ht_cap.ht_supported && sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) return -EINVAL; @@ -647,7 +647,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, * is set when we receive a bss info from a probe response or a beacon. */ if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC && - !sta->sta.ht_cap.ht_supported) { + !sta->sta.deflink.ht_cap.ht_supported) { ht_dbg(sdata, "BA request denied - IBSS STA %pM does not advertise HT support\n", pubsta->addr); diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 2619e12c8bda..4bab1683652d 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -647,8 +647,8 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_rx_status stat; - struct ieee80211_tx_rate *tx_rate = &sta->tx_stats.last_rate; - struct rate_info *ri = &sta->tx_stats.last_rate_info; + struct ieee80211_tx_rate *tx_rate = &sta->deflink.tx_stats.last_rate; + struct rate_info *ri = &sta->deflink.tx_stats.last_rate_info; u32 duration, overhead; u8 agg_shift; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ba752539d1d9..f7896f257e1b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -570,7 +570,8 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, if (pairwise) key = key_mtx_dereference(local, sta->ptk[key_idx]); else - key = key_mtx_dereference(local, sta->gtk[key_idx]); + key = key_mtx_dereference(local, + sta->deflink.gtk[key_idx]); } else key = key_mtx_dereference(local, sdata->keys[key_idx]); @@ -620,7 +621,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, else if (!pairwise && key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) - key = rcu_dereference(sta->gtk[key_idx]); + key = rcu_dereference(sta->deflink.gtk[key_idx]); } else key = rcu_dereference(sdata->keys[key_idx]); @@ -1173,7 +1174,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); changed |= BSS_CHANGED_HE_OBSS_PD; - if (params->he_bss_color.enabled) + if (params->beacon.he_bss_color.enabled) changed |= BSS_CHANGED_HE_BSS_COLOR; } @@ -1230,7 +1231,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p; sdata->vif.bss_conf.twt_responder = params->twt_responder; sdata->vif.bss_conf.he_obss_pd = params->he_obss_pd; - sdata->vif.bss_conf.he_bss_color = params->he_bss_color; + sdata->vif.bss_conf.he_bss_color = params->beacon.he_bss_color; sdata->vif.bss_conf.s1g = params->chandef.chan->band == NL80211_BAND_S1GHZ; @@ -1315,6 +1316,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_beacon_data *params) { struct ieee80211_sub_if_data *sdata; + struct ieee80211_bss_conf *bss_conf; struct beacon_data *old; int err; @@ -1334,10 +1336,28 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, err = ieee80211_assign_beacon(sdata, params, NULL, NULL); if (err < 0) return err; + + bss_conf = &sdata->vif.bss_conf; + if (params->he_bss_color_valid && + params->he_bss_color.enabled != bss_conf->he_bss_color.enabled) { + bss_conf->he_bss_color.enabled = params->he_bss_color.enabled; + err |= BSS_CHANGED_HE_BSS_COLOR; + } + ieee80211_bss_info_change_notify(sdata, err); return 0; } +static void ieee80211_free_next_beacon(struct ieee80211_sub_if_data *sdata) +{ + if (!sdata->u.ap.next_beacon) + return; + + kfree(sdata->u.ap.next_beacon->mbssid_ies); + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; +} + static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1372,11 +1392,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) mutex_unlock(&local->mtx); - if (sdata->u.ap.next_beacon) { - kfree(sdata->u.ap.next_beacon->mbssid_ies); - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; - } + ieee80211_free_next_beacon(sdata); /* turn off carrier for this interface and dependent VLANs */ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) @@ -1728,9 +1744,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, sta->listen_interval = params->listen_interval; if (params->sta_modify_mask & STATION_PARAM_APPLY_STA_TXPOWER) { - sta->sta.txpwr.type = params->txpwr.type; + sta->sta.deflink.txpwr.type = params->txpwr.type; if (params->txpwr.type == NL80211_TX_POWER_LIMITED) - sta->sta.txpwr.power = params->txpwr.power; + sta->sta.deflink.txpwr.power = params->txpwr.power; ret = drv_sta_set_txpwr(local, sdata, sta); if (ret) return ret; @@ -1740,7 +1756,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef, sband, params->supported_rates, params->supported_rates_len, - &sta->sta.supp_rates[sband->band]); + &sta->sta.deflink.supp_rates[sband->band]); } if (params->ht_capa) @@ -2927,7 +2943,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) return 0; - ap = sdata->u.mgd.associated->bssid; + ap = sdata->u.mgd.bssid; rcu_read_lock(); list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { @@ -3306,13 +3322,12 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP: + if (!sdata->u.ap.next_beacon) + return -EINVAL; + err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon, NULL, NULL); - if (sdata->u.ap.next_beacon) { - kfree(sdata->u.ap.next_beacon->mbssid_ies); - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; - } + ieee80211_free_next_beacon(sdata); if (err < 0) return err; @@ -3468,9 +3483,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, IEEE80211_MAX_CNTDWN_COUNTERS_NUM) || (params->n_counter_offsets_presp > IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) { - kfree(sdata->u.ap.next_beacon->mbssid_ies); - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; + ieee80211_free_next_beacon(sdata); return -EINVAL; } @@ -3482,9 +3495,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa, &csa, NULL); if (err < 0) { - kfree(sdata->u.ap.next_beacon->mbssid_ies); - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; + ieee80211_free_next_beacon(sdata); return err; } *changed |= err; @@ -3574,11 +3585,8 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata) { sdata->vif.color_change_active = false; - if (sdata->u.ap.next_beacon) { - kfree(sdata->u.ap.next_beacon->mbssid_ies); - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; - } + + ieee80211_free_next_beacon(sdata); cfg80211_color_change_aborted_notify(sdata->dev); } @@ -4314,13 +4322,12 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: { int ret; + if (!sdata->u.ap.next_beacon) + return -EINVAL; + ret = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon, NULL, NULL); - if (sdata->u.ap.next_beacon) { - kfree(sdata->u.ap.next_beacon->mbssid_ies); - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; - } + ieee80211_free_next_beacon(sdata); if (ret < 0) return ret; @@ -4363,11 +4370,7 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata, err = ieee80211_assign_beacon(sdata, ¶ms->beacon_color_change, NULL, &color_change); if (err < 0) { - if (sdata->u.ap.next_beacon) { - kfree(sdata->u.ap.next_beacon->mbssid_ies); - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; - } + ieee80211_free_next_beacon(sdata); return err; } *changed |= err; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index e26d42de14ec..e3452445b363 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -199,7 +199,7 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta) switch (width) { case IEEE80211_STA_RX_BW_20: - if (sta->sta.ht_cap.ht_supported) + if (sta->sta.deflink.ht_cap.ht_supported) return NL80211_CHAN_WIDTH_20; else return NL80211_CHAN_WIDTH_20_NOHT; @@ -375,15 +375,15 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, new_sta_bw = ieee80211_sta_cur_vht_bw(sta); /* nothing change */ - if (new_sta_bw == sta->sta.bandwidth) + if (new_sta_bw == sta->sta.deflink.bandwidth) continue; /* vif changed to narrow BW and narrow BW for station wasn't * requested or vise versa */ - if ((new_sta_bw < sta->sta.bandwidth) == !narrowed) + if ((new_sta_bw < sta->sta.deflink.bandwidth) == !narrowed) continue; - sta->sta.bandwidth = new_sta_bw; + sta->sta.deflink.bandwidth = new_sta_bw; rate_control_rate_update(local, sband, sta, IEEE80211_RC_BW_CHANGED); } diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index f4c9a92f50f9..1fe43b264d75 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -504,6 +504,7 @@ static const char *hw_flag_names[] = { FLAG(SUPPORTS_TX_ENCAP_OFFLOAD), FLAG(SUPPORTS_RX_DECAP_OFFLOAD), FLAG(SUPPORTS_CONC_MON_RX_DECAP), + FLAG(DETECTS_COLOR_COLLISION), #undef FLAG }; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index e490c3da3aca..cf71484658c6 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -337,7 +337,7 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( dev_kfree_skb(skb); return -ENOTCONN; } - memcpy(hdr->addr1, sdata->u.mgd.associated->bssid, ETH_ALEN); + memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr->addr3, addr, ETH_ALEN); sdata_unlock(sdata); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 88d9cc945a21..182094be9001 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -447,7 +447,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, int i; ssize_t bufsz = 512; struct sta_info *sta = file->private_data; - struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap; + struct ieee80211_sta_ht_cap *htc = &sta->sta.deflink.ht_cap; ssize_t ret; buf = kzalloc(bufsz, GFP_KERNEL); @@ -531,7 +531,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, { char *buf, *p; struct sta_info *sta = file->private_data; - struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap *vhtc = &sta->sta.deflink.vht_cap; ssize_t ret; ssize_t bufsz = 512; @@ -646,7 +646,7 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf, char *buf, *p; size_t buf_sz = PAGE_SIZE; struct sta_info *sta = file->private_data; - struct ieee80211_sta_he_cap *hec = &sta->sta.he_cap; + struct ieee80211_sta_he_cap *hec = &sta->sta.deflink.he_cap; struct ieee80211_he_mcs_nss_supp *nss = &hec->he_mcs_nss_supp; u8 ppe_size; u8 *cap; @@ -1052,9 +1052,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(vht_capa); DEBUGFS_ADD(he_capa); - DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates); - DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments); - DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered); + DEBUGFS_ADD_COUNTER(rx_duplicates, deflink.rx_stats.num_duplicates); + DEBUGFS_ADD_COUNTER(rx_fragments, deflink.rx_stats.fragments); + DEBUGFS_ADD_COUNTER(tx_filtered, deflink.status_stats.filtered); if (local->ops->wake_tx_queue) { DEBUGFS_ADD(aqm); diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 364ad0ef7692..96c9486bf2fe 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -14,7 +14,7 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_eht_cap_elem *eht_cap_ie_elem, u8 eht_cap_len, struct sta_info *sta) { - struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.eht_cap; + struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.deflink.eht_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 eht_ppe_size = 0; u8 mcs_nss_size; @@ -71,6 +71,6 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, eht_cap->has_eht = true; - sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); - sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); + sta->deflink.cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); + sta->sta.deflink.bandwidth = ieee80211_sta_cur_vht_bw(sta); } diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index b2253df54413..31cd3c1ac07f 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -114,7 +114,7 @@ static void ieee80211_get_stats(struct net_device *dev, sta_set_sinfo(sta, &sinfo, false); i = 0; - ADD_STA_STATS(sta); + ADD_STA_STATS(sta->link[0]); data[i++] = sta->sta_state; @@ -140,7 +140,7 @@ static void ieee80211_get_stats(struct net_device *dev, memset(&sinfo, 0, sizeof(sinfo)); sta_set_sinfo(sta, &sinfo, false); i = 0; - ADD_STA_STATS(sta); + ADD_STA_STATS(sta->link[0]); } } diff --git a/net/mac80211/he.c b/net/mac80211/he.c index c05af7018f79..1a61f7552edd 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -49,7 +49,7 @@ ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_ break; } - sta->sta.he_6ghz_capa = *he_6ghz_capa; + sta->sta.deflink.he_6ghz_capa = *he_6ghz_capa; } static void ieee80211_he_mcs_disable(__le16 *he_mcs) @@ -110,7 +110,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_he_6ghz_capa *he_6ghz_capa, struct sta_info *sta) { - struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; + struct ieee80211_sta_he_cap *he_cap = &sta->sta.deflink.he_cap; struct ieee80211_sta_he_cap own_he_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 he_ppe_size; @@ -153,8 +153,8 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, he_cap->has_he = true; - sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); - sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); + sta->deflink.cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); + sta->sta.deflink.bandwidth = ieee80211_sta_cur_vht_bw(sta); if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa) ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, sta); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 2eb7641f5556..171bd16b13f3 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -243,9 +243,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839; apply: - changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); + changed = memcmp(&sta->sta.deflink.ht_cap, &ht_cap, sizeof(ht_cap)); - memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); + memcpy(&sta->sta.deflink.ht_cap, &ht_cap, sizeof(ht_cap)); switch (sdata->vif.bss_conf.chandef.width) { default: @@ -264,9 +264,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, break; } - sta->sta.bandwidth = bw; + sta->sta.deflink.bandwidth = bw; - sta->cur_max_bandwidth = + sta->deflink.cur_max_bandwidth = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 0416c4d22292..14c04fd48b7a 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -637,7 +637,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; - sta->sta.supp_rates[band] = supp_rates | + sta->sta.deflink.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband, scan_width); return ieee80211_ibss_finish_sta(sta); @@ -1005,7 +1005,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, if (sta) { u32 prev_rates; - prev_rates = sta->sta.supp_rates[band]; + prev_rates = sta->sta.deflink.supp_rates[band]; /* make sure mandatory rates are always added */ scan_width = NL80211_BSS_CHAN_WIDTH_20; if (rx_status->bw == RATE_INFO_BW_5) @@ -1013,13 +1013,13 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, else if (rx_status->bw == RATE_INFO_BW_10) scan_width = NL80211_BSS_CHAN_WIDTH_10; - sta->sta.supp_rates[band] = supp_rates | + sta->sta.deflink.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband, scan_width); - if (sta->sta.supp_rates[band] != prev_rates) { + if (sta->sta.deflink.supp_rates[band] != prev_rates) { ibss_dbg(sdata, "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", sta->sta.addr, prev_rates, - sta->sta.supp_rates[band]); + sta->sta.deflink.supp_rates[band]); rates_updated = true; } } else { @@ -1043,7 +1043,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, /* we both use HT */ struct ieee80211_ht_cap htcap_ie; struct cfg80211_chan_def chandef; - enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; + enum ieee80211_sta_rx_bandwidth bw = sta->sta.deflink.bandwidth; cfg80211_chandef_create(&chandef, channel, NL80211_CHAN_NO_HT); ieee80211_chandef_ht_oper(elems->ht_operation, &chandef); @@ -1058,7 +1058,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_40) { /* we both use VHT */ struct ieee80211_vht_cap cap_ie; - struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap; + struct ieee80211_sta_vht_cap cap = sta->sta.deflink.vht_cap; u32 vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info); @@ -1069,11 +1069,11 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, &cap_ie, sta); - if (memcmp(&cap, &sta->sta.vht_cap, sizeof(cap))) + if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap))) rates_updated |= true; } - if (bw != sta->sta.bandwidth) + if (bw != sta->sta.deflink.bandwidth) rates_updated |= true; if (!cfg80211_chandef_compatible(&sdata->u.ibss.chandef, @@ -1083,12 +1083,12 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, if (sta && rates_updated) { u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED; - u8 rx_nss = sta->sta.rx_nss; + u8 rx_nss = sta->sta.deflink.rx_nss; /* Force rx_nss recalculation */ - sta->sta.rx_nss = 0; + sta->sta.deflink.rx_nss = 0; rate_control_rate_init(sta); - if (sta->sta.rx_nss != rx_nss) + if (sta->sta.deflink.rx_nss != rx_nss) changed |= IEEE80211_RC_NSS_CHANGED; drv_sta_rc_update(local, sdata, &sta->sta, changed); @@ -1235,7 +1235,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; - sta->sta.supp_rates[band] = supp_rates | + sta->sta.deflink.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband, scan_width); spin_lock(&ifibss->incomplete_lock); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d4a7ba4a8202..86ef0a46a68c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -453,9 +453,10 @@ struct ieee80211_if_managed { bool nullfunc_failed; u8 connection_loss:1, driver_disconnect:1, - reconnect:1; + reconnect:1, + associated:1; - struct cfg80211_bss *associated; + struct cfg80211_bss *assoc_bss; struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_mgd_assoc_data *assoc_data; @@ -1148,6 +1149,9 @@ struct tpt_led_trigger { * a scan complete for an aborted scan. * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being * cancelled. + * @SCAN_BEACON_WAIT: Set whenever we're passive scanning because of radar/no-IR + * and could send a probe request after receiving a beacon. + * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request */ enum { SCAN_SW_SCANNING, @@ -1156,6 +1160,8 @@ enum { SCAN_COMPLETED, SCAN_ABORTED, SCAN_HW_CANCELLED, + SCAN_BEACON_WAIT, + SCAN_BEACON_DONE, }; /** @@ -1854,7 +1860,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, - u8 *bssid, u8 reason, bool tx); + u8 reason, bool tx); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index f695fc80088b..0fcf8aebedc4 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -476,7 +476,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)) _ieee80211_set_tx_key(new, true); } else { - rcu_assign_pointer(sta->gtk[idx], new); + rcu_assign_pointer(sta->deflink.gtk[idx], new); } /* Only needed for transition from no key -> key. * Still triggers unnecessary when using Extended Key ID @@ -826,7 +826,8 @@ int ieee80211_key_link(struct ieee80211_key *key, (old_key && old_key->conf.cipher != key->conf.cipher)) goto out; } else if (sta) { - old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]); + old_key = key_mtx_dereference(sdata->local, + sta->deflink.gtk[idx]); } else { old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]); } @@ -1076,8 +1077,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, int i; mutex_lock(&local->key_mtx); - for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) { - key = key_mtx_dereference(local, sta->gtk[i]); + for (i = 0; i < ARRAY_SIZE(sta->deflink.gtk); i++) { + key = key_mtx_dereference(local, sta->deflink.gtk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, key->sta, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a48a32f87897..5a385d4146b9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -287,8 +287,8 @@ static void ieee80211_restart_work(struct work_struct *work) if (sdata->vif.csa_active) { sdata_lock(sdata); ieee80211_sta_connection_lost(sdata, - sdata->u.mgd.associated->bssid, - WLAN_REASON_UNSPECIFIED, false); + WLAN_REASON_UNSPECIFIED, + false); sdata_unlock(sdata); } } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 44a6fdb6efbd..58ebdcd69d05 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -310,7 +310,7 @@ void ieee80211s_update_metric(struct ieee80211_local *local, LINK_FAIL_THRESH) mesh_plink_broken(sta); - sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); + sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &rinfo); ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg, cfg80211_calculate_bitrate(&rinfo)); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a829470dd59e..42ba7424589e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -61,8 +61,8 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata, s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold; return rssi_threshold == 0 || (sta && - (s8)-ewma_signal_read(&sta->rx_stats_avg.signal) > - rssi_threshold); + (s8)-ewma_signal_read(&sta->deflink.rx_stats_avg.signal) > + rssi_threshold); } /** @@ -125,7 +125,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) continue; short_slot = false; - if (erp_rates & sta->sta.supp_rates[sband->band]) + if (erp_rates & sta->sta.deflink.supp_rates[sband->band]) short_slot = true; else break; @@ -175,10 +175,10 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) sta->mesh->plink_state != NL80211_PLINK_ESTAB) continue; - if (sta->sta.bandwidth > IEEE80211_STA_RX_BW_20) + if (sta->sta.deflink.bandwidth > IEEE80211_STA_RX_BW_20) continue; - if (!sta->sta.ht_cap.ht_supported) { + if (!sta->sta.deflink.ht_cap.ht_supported) { mpl_dbg(sdata, "nonHT sta (%pM) is present\n", sta->sta.addr); non_ht_sta = true; @@ -415,7 +415,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; u32 rates, basic_rates = 0, changed = 0; - enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; + enum ieee80211_sta_rx_bandwidth bw = sta->sta.deflink.bandwidth; sband = ieee80211_get_sband(sdata); if (!sband) @@ -425,7 +425,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, &basic_rates); spin_lock_bh(&sta->mesh->plink_lock); - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; /* rates and capabilities don't change during peering */ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB && @@ -433,9 +433,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, goto out; sta->mesh->processed_beacon = true; - if (sta->sta.supp_rates[sband->band] != rates) + if (sta->sta.deflink.supp_rates[sband->band] != rates) changed |= IEEE80211_RC_SUPP_RATES_CHANGED; - sta->sta.supp_rates[sband->band] = rates; + sta->sta.deflink.supp_rates[sband->band] = rates; if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, elems->ht_cap_elem, sta)) @@ -449,16 +449,16 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, elems->he_6ghz_capa, sta); - if (bw != sta->sta.bandwidth) + if (bw != sta->sta.deflink.bandwidth) changed |= IEEE80211_RC_BW_CHANGED; /* HT peer is operating 20MHz-only */ if (elems->ht_operation && !(elems->ht_operation->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) { - if (sta->sta.bandwidth != IEEE80211_STA_RX_BW_20) + if (sta->sta.deflink.bandwidth != IEEE80211_STA_RX_BW_20) changed |= IEEE80211_RC_BW_CHANGED; - sta->sta.bandwidth = IEEE80211_STA_RX_BW_20; + sta->sta.deflink.bandwidth = IEEE80211_STA_RX_BW_20; } if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1b30c724ca8d..58d48dcae030 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1376,7 +1376,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct cfg80211_bss *cbss = ifmgd->associated; + struct cfg80211_bss *cbss = ifmgd->assoc_bss; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; enum nl80211_band current_band; @@ -1398,7 +1398,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band, bss->vht_cap_info, ifmgd->flags, - ifmgd->associated->bssid, &csa_ie); + ifmgd->bssid, &csa_ie); if (!res) { ch_switch.timestamp = timestamp; @@ -1427,7 +1427,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, csa_ie.chandef.chan->band) { sdata_info(sdata, "AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", - ifmgd->associated->bssid, + ifmgd->bssid, csa_ie.chandef.chan->center_freq, csa_ie.chandef.width, csa_ie.chandef.center_freq1, csa_ie.chandef.center_freq2); @@ -1440,7 +1440,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, "AP %pM switches to unsupported channel " "(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), " "disconnecting\n", - ifmgd->associated->bssid, + ifmgd->bssid, csa_ie.chandef.chan->center_freq, csa_ie.chandef.chan->freq_offset, csa_ie.chandef.width, csa_ie.chandef.center_freq1, @@ -1456,7 +1456,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; sdata_info(sdata, "AP %pM tries to chanswitch to same channel, ignore\n", - ifmgd->associated->bssid); + ifmgd->bssid); ifmgd->csa_ignored_same_chan = true; return; } @@ -2266,7 +2266,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec( beacon_loss_count * bss_conf->beacon_int)); - sdata->u.mgd.associated = cbss; + sdata->u.mgd.associated = true; + sdata->u.mgd.assoc_bss = cbss; memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN); ieee80211_check_rate_mask(sdata); @@ -2361,7 +2362,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_stop_poll(sdata); - ifmgd->associated = NULL; + ifmgd->associated = false; + ifmgd->assoc_bss = NULL; netif_carrier_off(sdata->dev); /* @@ -2608,8 +2610,7 @@ static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata, static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - const struct element *ssid; - u8 *dst = ifmgd->associated->bssid; + u8 *dst = ifmgd->bssid; u8 unicast_limit = max(1, max_probe_tries - 3); struct sta_info *sta; @@ -2642,19 +2643,10 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ifmgd->nullfunc_failed = false; ieee80211_send_nullfunc(sdata->local, sdata, false); } else { - int ssid_len; - - rcu_read_lock(); - ssid = ieee80211_bss_get_elem(ifmgd->associated, WLAN_EID_SSID); - if (WARN_ON_ONCE(ssid == NULL)) - ssid_len = 0; - else - ssid_len = ssid->datalen; - ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst, - ssid->data, ssid_len, - ifmgd->associated->channel); - rcu_read_unlock(); + sdata->vif.bss_conf.ssid, + sdata->vif.bss_conf.ssid_len, + ifmgd->assoc_bss->channel); } ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); @@ -2744,7 +2736,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, sdata_assert_lock(sdata); if (ifmgd->associated) - cbss = ifmgd->associated; + cbss = ifmgd->assoc_bss; else if (ifmgd->auth_data) cbss = ifmgd->auth_data->bss; else if (ifmgd->assoc_data) @@ -2809,7 +2801,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) * AP is probably out of range (or not reachable for another * reason) so remove the bss struct for that AP. */ - cfg80211_unlink_bss(local->hw.wiphy, ifmgd->associated); + cfg80211_unlink_bss(local->hw.wiphy, ifmgd->assoc_bss); } ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, @@ -3219,8 +3211,8 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, } if (ifmgd->associated && - ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) { - const u8 *bssid = ifmgd->associated->bssid; + ether_addr_equal(mgmt->bssid, ifmgd->bssid)) { + const u8 *bssid = ifmgd->bssid; sdata_info(sdata, "deauthenticated from %pM (Reason: %u=%s)\n", bssid, reason_code, @@ -3262,7 +3254,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, return; if (!ifmgd->associated || - !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) + !ether_addr_equal(mgmt->bssid, ifmgd->bssid)) return; reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); @@ -3342,7 +3334,7 @@ static bool ieee80211_twt_req_supported(const struct sta_info *sta, if (!(elems->ext_capab[9] & WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT)) return false; - return sta->sta.he_cap.he_cap_elem.mac_cap_info[0] & + return sta->sta.deflink.he_cap.he_cap_elem.mac_cap_info[0] & IEEE80211_HE_MAC_CAP0_TWT_RES; } @@ -3369,7 +3361,7 @@ static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata, ieee80211_vif_type_p2p(&sdata->vif)); return bss_conf->he_support && - (sta->sta.he_cap.he_cap_elem.mac_cap_info[2] & + (sta->sta.deflink.he_cap.he_cap_elem.mac_cap_info[2] & IEEE80211_HE_MAC_CAP2_BCAST_TWT) && own_he_cap && (own_he_cap->he_cap_elem.mac_cap_info[2] & @@ -3587,7 +3579,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, elems->he_6ghz_capa, sta); - bss_conf->he_support = sta->sta.he_cap.has_he; + bss_conf->he_support = sta->sta.deflink.he_cap.has_he; if (elems->rsnx && elems->rsnx_len && (elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) && wiphy_ext_feature_isset(local->hw.wiphy, @@ -3607,7 +3599,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, elems->eht_cap_len, sta); - bss_conf->eht_support = sta->sta.eht_cap.has_eht; + bss_conf->eht_support = sta->sta.deflink.eht_cap.has_eht; } else { bss_conf->eht_support = false; } @@ -3657,6 +3649,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, cbss->transmitted_bss->bssid); bss_conf->bssid_indicator = cbss->max_bssid_indicator; bss_conf->bssid_index = cbss->bssid_index; + } else { + bss_conf->nontransmitted = false; + memset(bss_conf->transmitter_bssid, 0, + sizeof(bss_conf->transmitter_bssid)); + bss_conf->bssid_indicator = 0; + bss_conf->bssid_index = 0; } /* @@ -3678,7 +3676,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, nss = *elems->opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK; nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; nss += 1; - sta->sta.rx_nss = nss; + sta->sta.deflink.rx_nss = nss; } rate_control_rate_init(sta); @@ -3966,7 +3964,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_info(sdata, mgmt, len, rx_status); if (ifmgd->associated && - ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) + ether_addr_equal(mgmt->bssid, ifmgd->bssid)) ieee80211_reset_ap_probe(sdata); } @@ -4195,9 +4193,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (!ifmgd->associated || - !ieee80211_rx_our_beacon(bssid, ifmgd->associated)) + !ieee80211_rx_our_beacon(bssid, ifmgd->assoc_bss)) return; - bssid = ifmgd->associated->bssid; + bssid = ifmgd->bssid; if (!(rx_status->flag & RX_FLAG_NO_SIGNAL_VAL)) ieee80211_handle_beacon_sig(sdata, ifmgd, bss_conf, @@ -4513,7 +4511,7 @@ static void ieee80211_sta_timer(struct timer_list *t) } void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, - u8 *bssid, u8 reason, bool tx) + u8 reason, bool tx) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -4744,11 +4742,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL && ifmgd->associated) { - u8 bssid[ETH_ALEN]; + u8 *bssid = ifmgd->bssid; int max_tries; - memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) max_tries = max_nullfunc_tries; else @@ -4768,7 +4764,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) mlme_dbg(sdata, "No ack for nullfunc frame to AP %pM, disconnecting.\n", bssid); - ieee80211_sta_connection_lost(sdata, bssid, + ieee80211_sta_connection_lost(sdata, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); } @@ -4778,7 +4774,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) mlme_dbg(sdata, "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", bssid, probe_wait_ms); - ieee80211_sta_connection_lost(sdata, bssid, + ieee80211_sta_connection_lost(sdata, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); } else if (ifmgd->probe_send_count < max_tries) { mlme_dbg(sdata, @@ -4795,7 +4791,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "No probe response from AP %pM after %dms, disconnecting.\n", bssid, probe_wait_ms); - ieee80211_sta_connection_lost(sdata, bssid, + ieee80211_sta_connection_lost(sdata, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); } } @@ -4836,9 +4832,9 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t) if (!sta) return; - timeout = sta->status_stats.last_ack; - if (time_before(sta->status_stats.last_ack, sta->rx_stats.last_rx)) - timeout = sta->rx_stats.last_rx; + timeout = sta->deflink.status_stats.last_ack; + if (time_before(sta->deflink.status_stats.last_ack, sta->deflink.rx_stats.last_rx)) + timeout = sta->deflink.rx_stats.last_rx; timeout += IEEE80211_CONNECTION_IDLE_TIME; /* If timeout is after now, then update timer to fire at @@ -4928,7 +4924,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) .bssid = bssid, }; - memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); + memcpy(bssid, ifmgd->bssid, ETH_ALEN); ieee80211_mgd_deauth(sdata, &req); } @@ -4950,7 +4946,6 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; mlme_dbg(sdata, "driver requested disconnect after resume\n"); ieee80211_sta_connection_lost(sdata, - ifmgd->associated->bssid, WLAN_REASON_UNSPECIFIED, true); sdata_unlock(sdata); @@ -4961,7 +4956,6 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_HW_RESTART; mlme_dbg(sdata, "driver requested disconnect after hardware restart\n"); ieee80211_sta_connection_lost(sdata, - ifmgd->associated->bssid, WLAN_REASON_UNSPECIFIED, true); sdata_unlock(sdata); @@ -5638,7 +5632,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } if (rates) - new_sta->sta.supp_rates[cbss->channel->band] = rates; + new_sta->sta.deflink.supp_rates[cbss->channel->band] = rates; else sdata_info(sdata, "No rates found, keeping mandatory only\n"); @@ -5836,7 +5830,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "disconnect from AP %pM for new auth to %pM\n", - ifmgd->associated->bssid, req->bss->bssid); + ifmgd->bssid, req->bss->bssid); ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_UNSPECIFIED, false, frame_buf); @@ -5912,7 +5906,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "disconnect from AP %pM for new assoc to %pM\n", - ifmgd->associated->bssid, req->bss->bssid); + ifmgd->bssid, req->bss->bssid); ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_UNSPECIFIED, false, frame_buf); @@ -6126,6 +6120,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_EHT; } + if (req->flags & ASSOC_REQ_DISABLE_EHT) + ifmgd->flags |= IEEE80211_STA_DISABLE_EHT; + err = ieee80211_prep_connection(sdata, req->bss, true, override); if (err) goto err_clear; @@ -6267,7 +6264,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, } if (ifmgd->associated && - ether_addr_equal(ifmgd->associated->bssid, req->bssid)) { + ether_addr_equal(ifmgd->bssid, req->bssid)) { sdata_info(sdata, "deauthenticating from %pM by local choice (Reason: %u=%s)\n", req->bssid, req->reason_code, @@ -6298,7 +6295,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, * to cfg80211 while that's in a locked section already * trying to tell us that the user wants to disconnect. */ - if (ifmgd->associated != req->bss) + if (ifmgd->assoc_bss != req->bss) return -ENOLINK; sdata_info(sdata, @@ -6376,3 +6373,43 @@ void ieee80211_cqm_beacon_loss_notify(struct ieee80211_vif *vif, gfp_t gfp) cfg80211_cqm_beacon_loss_notify(sdata->dev, gfp); } EXPORT_SYMBOL(ieee80211_cqm_beacon_loss_notify); + +static void _ieee80211_enable_rssi_reports(struct ieee80211_sub_if_data *sdata, + int rssi_min_thold, + int rssi_max_thold) +{ + trace_api_enable_rssi_reports(sdata, rssi_min_thold, rssi_max_thold); + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) + return; + + /* + * Scale up threshold values before storing it, as the RSSI averaging + * algorithm uses a scaled up value as well. Change this scaling + * factor if the RSSI averaging algorithm changes. + */ + sdata->u.mgd.rssi_min_thold = rssi_min_thold*16; + sdata->u.mgd.rssi_max_thold = rssi_max_thold*16; +} + +void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif, + int rssi_min_thold, + int rssi_max_thold) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + WARN_ON(rssi_min_thold == rssi_max_thold || + rssi_min_thold > rssi_max_thold); + + _ieee80211_enable_rssi_reports(sdata, rssi_min_thold, + rssi_max_thold); +} +EXPORT_SYMBOL(ieee80211_enable_rssi_reports); + +void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + _ieee80211_enable_rssi_reports(sdata, 0, 0); +} +EXPORT_SYMBOL(ieee80211_disable_rssi_reports); diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 7c1a735b9eee..f97cb4c453d3 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -74,7 +74,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, /* Add only mandatory rates for now */ sband = local->hw.wiphy->bands[band]; - sta->sta.supp_rates[band] = + sta->sta.deflink.supp_rates[band] = ieee80211_mandatory_rates(sband, scan_width); spin_lock(&ifocb->incomplete_lock); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 853c9a369d72..c5d2ab9df1e7 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -819,7 +819,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, if (!sdata->u.mgd.associated || (params->offchan && params->wait && local->ops->remain_on_channel && - memcmp(sdata->u.mgd.associated->bssid, + memcmp(sdata->u.mgd.bssid, mgmt->bssid, ETH_ALEN))) need_offchan = true; sdata_unlock(sdata); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 8c6416129d5b..ae9700e0a1a5 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -371,7 +371,7 @@ static void __rate_control_send_low(struct ieee80211_hw *hw, WARN_ONCE(i == sband->n_bitrates, "no supported rates for sta %pM (0x%x, band %d) in rate_mask 0x%x with flags 0x%x\n", sta ? sta->addr : NULL, - sta ? sta->supp_rates[sband->band] : -1, + sta ? sta->deflink.supp_rates[sband->band] : -1, sband->band, rate_mask, rate_flags); @@ -781,11 +781,11 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata, u16 sta_vht_mask[NL80211_VHT_NSS_MAX]; /* Filter out rates that the STA does not support */ - *mask &= sta->supp_rates[sband->band]; + *mask &= sta->deflink.supp_rates[sband->band]; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i]; + mcs_mask[i] &= sta->deflink.ht_cap.mcs.rx_mask[i]; - sta_vht_cap = sta->vht_cap.vht_mcs.rx_mcs_map; + sta_vht_cap = sta->deflink.vht_cap.vht_mcs.rx_mcs_map; ieee80211_get_vht_mask_from_cap(sta_vht_cap, sta_vht_mask); for (i = 0; i < NL80211_VHT_NSS_MAX; i++) vht_mask[i] &= sta_vht_mask[i]; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 9c6ace858107..5f27e6746762 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -333,6 +333,17 @@ minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)); } +/* + * Look up an MCS group index based on new cfg80211 rate_info. + */ +static int +minstrel_ht_ri_get_group_idx(struct rate_info *rate) +{ + return GROUP_IDX((rate->mcs / 8) + 1, + !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI), + !!(rate->bw & RATE_INFO_BW_40)); +} + static int minstrel_vht_get_group_idx(struct ieee80211_tx_rate *rate) { @@ -342,6 +353,18 @@ minstrel_vht_get_group_idx(struct ieee80211_tx_rate *rate) 2*!!(rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)); } +/* + * Look up an MCS group index based on new cfg80211 rate_info. + */ +static int +minstrel_vht_ri_get_group_idx(struct rate_info *rate) +{ + return VHT_GROUP_IDX(rate->nss, + !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI), + !!(rate->bw & RATE_INFO_BW_40) + + 2*!!(rate->bw & RATE_INFO_BW_80)); +} + static struct minstrel_rate_stats * minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_tx_rate *rate) @@ -362,6 +385,9 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, group = MINSTREL_CCK_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { + if (!(mi->supported[group] & BIT(idx))) + continue; + if (rate->idx != mp->cck_rates[idx]) continue; @@ -382,6 +408,50 @@ out: return &mi->groups[group].rates[idx]; } +/* + * Get the minstrel rate statistics for specified STA and rate info. + */ +static struct minstrel_rate_stats * +minstrel_ht_ri_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_rate_status *rate_status) +{ + int group, idx; + struct rate_info *rate = &rate_status->rate_idx; + + if (rate->flags & RATE_INFO_FLAGS_MCS) { + group = minstrel_ht_ri_get_group_idx(rate); + idx = rate->mcs % 8; + goto out; + } + + if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) { + group = minstrel_vht_ri_get_group_idx(rate); + idx = rate->mcs; + goto out; + } + + group = MINSTREL_CCK_GROUP; + for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { + if (rate->legacy != minstrel_cck_bitrates[ mp->cck_rates[idx] ]) + continue; + + /* short preamble */ + if ((mi->supported[group] & BIT(idx + 4)) && + mi->use_short_preamble) + idx += 4; + goto out; + } + + group = MINSTREL_OFDM_GROUP; + for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++) + if (rate->legacy == minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][idx] ]) + goto out; + + idx = 0; +out: + return &mi->groups[group].rates[idx]; +} + static inline struct minstrel_rate_stats * minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) { @@ -603,7 +673,7 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) int tmp_max_streams, group, tmp_idx, tmp_prob; int tmp_tp = 0; - if (!mi->sta->ht_cap.ht_supported) + if (!mi->sta->deflink.ht_cap.ht_supported) return; group = MI_RATE_GROUP(mi->max_tp_rate[0]); @@ -993,7 +1063,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; u16 tmp_legacy_tp_rate[MAX_THR_RATES], tmp_max_prob_rate; u16 index; - bool ht_supported = mi->sta->ht_cap.ht_supported; + bool ht_supported = mi->sta->deflink.ht_cap.ht_supported; if (mi->ampdu_packets > 0) { if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN)) @@ -1149,6 +1219,40 @@ minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, return false; } +/* + * Check whether rate_status contains valid information. + */ +static bool +minstrel_ht_ri_txstat_valid(struct minstrel_priv *mp, + struct minstrel_ht_sta *mi, + struct ieee80211_rate_status *rate_status) +{ + int i; + + if (!rate_status) + return false; + if (!rate_status->try_count) + return false; + + if (rate_status->rate_idx.flags & RATE_INFO_FLAGS_MCS || + rate_status->rate_idx.flags & RATE_INFO_FLAGS_VHT_MCS) + return true; + + for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) { + if (rate_status->rate_idx.legacy == + minstrel_cck_bitrates[ mp->cck_rates[i] ]) + return true; + } + + for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates); i++) { + if (rate_status->rate_idx.legacy == + minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][i] ]) + return true; + } + + return false; +} + static void minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary) { @@ -1214,16 +1318,34 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->ampdu_packets++; mi->ampdu_len += info->status.ampdu_len; - last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]); - for (i = 0; !last; i++) { - last = (i == IEEE80211_TX_MAX_RATES - 1) || - !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]); + if (st->rates && st->n_rates) { + last = !minstrel_ht_ri_txstat_valid(mp, mi, &(st->rates[0])); + for (i = 0; !last; i++) { + last = (i == st->n_rates - 1) || + !minstrel_ht_ri_txstat_valid(mp, mi, + &(st->rates[i + 1])); + + rate = minstrel_ht_ri_get_stats(mp, mi, + &(st->rates[i])); - rate = minstrel_ht_get_stats(mp, mi, &ar[i]); - if (last) - rate->success += info->status.ampdu_ack_len; + if (last) + rate->success += info->status.ampdu_ack_len; - rate->attempts += ar[i].count * info->status.ampdu_len; + rate->attempts += st->rates[i].try_count * + info->status.ampdu_len; + } + } else { + last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]); + for (i = 0; !last; i++) { + last = (i == IEEE80211_TX_MAX_RATES - 1) || + !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]); + + rate = minstrel_ht_get_stats(mp, mi, &ar[i]); + if (last) + rate->success += info->status.ampdu_ack_len; + + rate->attempts += ar[i].count * info->status.ampdu_len; + } } if (mp->hw->max_rates > 1) { @@ -1416,7 +1538,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) * the limit here to avoid the complexity of having to de-aggregate * packets in the queue. */ - if (!mi->sta->vht_cap.vht_supported) + if (!mi->sta->deflink.vht_cap.vht_supported) return IEEE80211_MAX_MPDU_LEN_HT_BA; /* unlimited */ @@ -1436,17 +1558,17 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) /* Start with max_tp_rate[0] */ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]); - if (mp->hw->max_rates >= 3) { - /* At least 3 tx rates supported, use max_tp_rate[1] next */ - minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[1]); - } + /* Fill up remaining, keep one entry for max_probe_rate */ + for (; i < (mp->hw->max_rates - 1); i++) + minstrel_ht_set_rate(mp, mi, rates, i, mi->max_tp_rate[i]); - if (mp->hw->max_rates >= 2) { + if (i < mp->hw->max_rates) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate); - } + + if (i < IEEE80211_TX_RATE_TABLE_SIZE) + rates->rate[i].idx = -1; mi->sta->max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); - rates->rate[i].idx = -1; rate_control_set_rates(mp->hw, mi->sta, rates); } @@ -1533,7 +1655,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (sband->band != NL80211_BAND_2GHZ) return; - if (sta->ht_cap.ht_supported && + if (sta->deflink.ht_cap.ht_supported && !ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) return; @@ -1556,7 +1678,7 @@ minstrel_ht_update_ofdm(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, const u8 *rates; int i; - if (sta->ht_cap.ht_supported) + if (sta->deflink.ht_cap.ht_supported) return; rates = mp->ofdm_rates[sband->band]; @@ -1576,10 +1698,11 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, { struct minstrel_priv *mp = priv; struct minstrel_ht_sta *mi = priv_sta; - struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; - u16 ht_cap = sta->ht_cap.cap; - struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap; + struct ieee80211_mcs_info *mcs = &sta->deflink.ht_cap.mcs; + u16 ht_cap = sta->deflink.ht_cap.cap; + struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap; const struct ieee80211_rate *ctl_rate; + struct sta_info *sta_info; bool ldpc, erp; int use_vht; int n_supported = 0; @@ -1650,7 +1773,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, } if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH && - sta->bandwidth < IEEE80211_STA_RX_BW_40) + sta->deflink.bandwidth < IEEE80211_STA_RX_BW_40) continue; nss = minstrel_mcs_groups[i].streams; @@ -1677,7 +1800,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, continue; if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) { - if (sta->bandwidth < IEEE80211_STA_RX_BW_80 || + if (sta->deflink.bandwidth < IEEE80211_STA_RX_BW_80 || ((gflags & IEEE80211_TX_RC_SHORT_GI) && !(vht_cap->cap & IEEE80211_VHT_CAP_SHORT_GI_80))) { continue; @@ -1698,6 +1821,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, n_supported++; } + sta_info = container_of(sta, struct sta_info, sta); + mi->use_short_preamble = test_sta_flag(sta_info, WLAN_STA_SHORT_PREAMBLE) && + sta_info->sdata->vif.bss_conf.use_short_preamble; + minstrel_ht_update_cck(mp, mi, sband, sta); minstrel_ht_update_ofdm(mp, mi, sband, sta); diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 06e7126727ad..1766ff0c78d3 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -180,7 +180,7 @@ struct minstrel_ht_sta { /* tx flags to add for frames for this sta */ u32 tx_flags; - + bool use_short_preamble; u8 band; u8 sample_seq; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index beb6b92eb780..3c08ae04ddbc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -221,7 +221,7 @@ static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, skb_queue_tail(&sdata->skb_queue, skb); ieee80211_queue_work(&sdata->local->hw, &sdata->work); if (sta) - sta->rx_stats.packets++; + sta->deflink.rx_stats.packets++; } static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, @@ -1405,8 +1405,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, goto dont_reorder; /* not part of a BA session */ - if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && - ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK) goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ @@ -1465,7 +1464,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); - rx->sta->rx_stats.num_duplicates++; + rx->sta->deflink.rx_stats.num_duplicates++; return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; @@ -1761,46 +1760,47 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) NL80211_IFTYPE_ADHOC); if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) - sta->rx_stats.last_rate = + sta->deflink.rx_stats.last_rate = sta_stats_encode_rate(status); } } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; } else if (!ieee80211_is_s1g_beacon(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. */ - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control)) - sta->rx_stats.last_rate = sta_stats_encode_rate(status); + sta->deflink.rx_stats.last_rate = sta_stats_encode_rate(status); } - sta->rx_stats.fragments++; + sta->deflink.rx_stats.fragments++; - u64_stats_update_begin(&rx->sta->rx_stats.syncp); - sta->rx_stats.bytes += rx->skb->len; - u64_stats_update_end(&rx->sta->rx_stats.syncp); + u64_stats_update_begin(&rx->sta->deflink.rx_stats.syncp); + sta->deflink.rx_stats.bytes += rx->skb->len; + u64_stats_update_end(&rx->sta->deflink.rx_stats.syncp); if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { - sta->rx_stats.last_signal = status->signal; - ewma_signal_add(&sta->rx_stats_avg.signal, -status->signal); + sta->deflink.rx_stats.last_signal = status->signal; + ewma_signal_add(&sta->deflink.rx_stats_avg.signal, + -status->signal); } if (status->chains) { - sta->rx_stats.chains = status->chains; + sta->deflink.rx_stats.chains = status->chains; for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { int signal = status->chain_signal[i]; if (!(status->chains & BIT(i))) continue; - sta->rx_stats.chain_signal_last[i] = signal; - ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], + sta->deflink.rx_stats.chain_signal_last[i] = signal; + ewma_signal_add(&sta->deflink.rx_stats_avg.chain_signal[i], -signal); } } @@ -1861,7 +1861,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Update counter and free packet here to avoid * counting this as a dropped packed. */ - sta->rx_stats.packets++; + sta->deflink.rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -1893,11 +1893,11 @@ ieee80211_rx_get_bigtk(struct ieee80211_rx_data *rx, int idx) } if (rx->sta) - key = rcu_dereference(rx->sta->gtk[idx]); + key = rcu_dereference(rx->sta->deflink.gtk[idx]); if (!key) key = rcu_dereference(sdata->keys[idx]); if (!key && rx->sta) - key = rcu_dereference(rx->sta->gtk[idx2]); + key = rcu_dereference(rx->sta->deflink.gtk[idx2]); if (!key) key = rcu_dereference(sdata->keys[idx2]); @@ -2012,7 +2012,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) test_sta_flag(rx->sta, WLAN_STA_MFP)) return RX_DROP_MONITOR; - rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]); + rx->key = rcu_dereference(rx->sta->deflink.gtk[mmie_keyidx]); } if (!rx->key) rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); @@ -2035,7 +2035,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) } else { if (rx->sta) { for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - key = rcu_dereference(rx->sta->gtk[i]); + key = rcu_dereference(rx->sta->deflink.gtk[i]); if (key) break; } @@ -2072,7 +2072,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* check per-station GTK first, if multicast packet */ if (is_multicast_ether_addr(hdr->addr1) && rx->sta) - rx->key = rcu_dereference(rx->sta->gtk[keyidx]); + rx->key = rcu_dereference(rx->sta->deflink.gtk[keyidx]); /* if not found, try default key */ if (!rx->key) { @@ -2398,7 +2398,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) out: ieee80211_led_rx(rx->local); if (rx->sta) - rx->sta->rx_stats.packets++; + rx->sta->deflink.rx_stats.packets++; return RX_CONTINUE; } @@ -2645,9 +2645,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) * for non-QoS-data frames. Here we know it's a data * frame, so count MSDUs. */ - u64_stats_update_begin(&rx->sta->rx_stats.syncp); - rx->sta->rx_stats.msdu[rx->seqno_idx]++; - u64_stats_update_end(&rx->sta->rx_stats.syncp); + u64_stats_update_begin(&rx->sta->deflink.rx_stats.syncp); + rx->sta->deflink.rx_stats.msdu[rx->seqno_idx]++; + u64_stats_update_end(&rx->sta->deflink.rx_stats.syncp); } if ((sdata->vif.type == NL80211_IFTYPE_AP || @@ -3178,6 +3178,49 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } +static void +ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) +{ + struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; + const struct element *ie; + size_t baselen; + + if (!wiphy_ext_feature_isset(rx->local->hw.wiphy, + NL80211_EXT_FEATURE_BSS_COLOR)) + return; + + if (ieee80211_hw_check(&rx->local->hw, DETECTS_COLOR_COLLISION)) + return; + + if (rx->sdata->vif.csa_active) + return; + + baselen = mgmt->u.beacon.variable - rx->skb->data; + if (baselen > rx->skb->len) + return; + + ie = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, + mgmt->u.beacon.variable, + rx->skb->len - baselen); + if (ie && ie->datalen >= sizeof(struct ieee80211_he_operation) && + ie->datalen >= ieee80211_he_oper_size(ie->data + 1)) { + struct ieee80211_bss_conf *bss_conf = &rx->sdata->vif.bss_conf; + const struct ieee80211_he_operation *he_oper; + u8 color; + + he_oper = (void *)(ie->data + 1); + if (le32_get_bits(he_oper->he_oper_params, + IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED)) + return; + + color = le32_get_bits(he_oper->he_oper_params, + IEEE80211_HE_OPERATION_BSS_COLOR_MASK); + if (color == bss_conf->he_bss_color.color) + ieeee80211_obss_color_collision_notify(&rx->sdata->vif, + BIT_ULL(color)); + } +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) { @@ -3203,6 +3246,9 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) { int sig = 0; + /* sw bss color collision detection */ + ieee80211_rx_check_bss_color_collision(rx); + if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) && !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) sig = status->signal; @@ -3296,7 +3342,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) switch (mgmt->u.action.category) { case WLAN_CATEGORY_HT: /* reject HT action frames from stations not supporting HT */ - if (!rx->sta->sta.ht_cap.ht_supported) + if (!rx->sta->sta.deflink.ht_cap.ht_supported) goto invalid; if (sdata->vif.type != NL80211_IFTYPE_STATION && @@ -3360,7 +3406,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) struct sta_opmode_info sta_opmode = {}; /* If it doesn't support 40 MHz it can't change ... */ - if (!(rx->sta->sta.ht_cap.cap & + if (!(rx->sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) goto handled; @@ -3370,13 +3416,13 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) max_bw = ieee80211_sta_cap_rx_bw(rx->sta); /* set cur_max_bandwidth and recalc sta bw */ - rx->sta->cur_max_bandwidth = max_bw; + rx->sta->deflink.cur_max_bandwidth = max_bw; new_bw = ieee80211_sta_cur_vht_bw(rx->sta); - if (rx->sta->sta.bandwidth == new_bw) + if (rx->sta->sta.deflink.bandwidth == new_bw) goto handled; - rx->sta->sta.bandwidth = new_bw; + rx->sta->sta.deflink.bandwidth = new_bw; sband = rx->local->hw.wiphy->bands[status->band]; sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(rx->sta); @@ -3573,7 +3619,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) handled: if (rx->sta) - rx->sta->rx_stats.packets++; + rx->sta->deflink.rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; @@ -3607,7 +3653,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) ieee80211_rx_status_to_khz(status), sig, rx->skb->data, rx->skb->len, 0)) { if (rx->sta) - rx->sta->rx_stats.packets++; + rx->sta->deflink.rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -3645,7 +3691,7 @@ ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx) handled: if (rx->sta) - rx->sta->rx_stats.packets++; + rx->sta->deflink.rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -3865,7 +3911,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, case RX_DROP_MONITOR: I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); if (rx->sta) - rx->sta->rx_stats.dropped++; + rx->sta->deflink.rx_stats.dropped++; fallthrough; case RX_CONTINUE: { struct ieee80211_rate *rate = NULL; @@ -3884,7 +3930,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, case RX_DROP_UNUSABLE: I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); if (rx->sta) - rx->sta->rx_stats.dropped++; + rx->sta->deflink.rx_stats.dropped++; dev_kfree_skb(rx->skb); break; case RX_QUEUED: @@ -4436,15 +4482,15 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, void *sa = skb->data + ETH_ALEN; void *da = skb->data; - stats = &sta->rx_stats; + stats = &sta->deflink.rx_stats; if (fast_rx->uses_rss) - stats = this_cpu_ptr(sta->pcpu_rx_stats); + stats = this_cpu_ptr(sta->deflink.pcpu_rx_stats); /* statistics part of ieee80211_rx_h_sta_process() */ if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { stats->last_signal = status->signal; if (!fast_rx->uses_rss) - ewma_signal_add(&sta->rx_stats_avg.signal, + ewma_signal_add(&sta->deflink.rx_stats_avg.signal, -status->signal); } @@ -4460,7 +4506,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, stats->chain_signal_last[i] = signal; if (!fast_rx->uses_rss) - ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], + ewma_signal_add(&sta->deflink.rx_stats_avg.chain_signal[i], -signal); } } @@ -4536,7 +4582,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; } addrs __aligned(2); - struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; + struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats; /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write * to a common data structure; drivers can implement that per queue @@ -4638,7 +4684,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, drop: dev_kfree_skb(skb); if (fast_rx->uses_rss) - stats = this_cpu_ptr(sta->pcpu_rx_stats); + stats = this_cpu_ptr(sta->deflink.pcpu_rx_stats); stats->dropped++; return true; diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index 4141bc80cdfd..8ca7d45d6daa 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -11,8 +11,8 @@ void ieee80211_s1g_sta_rate_init(struct sta_info *sta) { /* avoid indicating legacy bitrates for S1G STAs */ - sta->tx_stats.last_rate.flags |= IEEE80211_TX_RC_S1G_MCS; - sta->rx_stats.last_rate = + sta->deflink.tx_stats.last_rate.flags |= IEEE80211_TX_RC_S1G_MCS; + sta->deflink.rx_stats.last_rate = STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_S1G); } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5e6b275afc9e..b698756887eb 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -281,6 +281,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) if (likely(!sdata1 && !sdata2)) return; + if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) { + /* + * we were passive scanning because of radar/no-IR, but + * the beacon/proberesp rx gives us an opportunity to upgrade + * to active scan + */ + set_bit(SCAN_BEACON_DONE, &local->scanning); + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); + } + if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; @@ -787,6 +797,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, IEEE80211_CHAN_RADAR)) || !req->n_ssids) { next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; + if (req->n_ssids) + set_bit(SCAN_BEACON_WAIT, &local->scanning); } else { ieee80211_scan_state_send_probe(local, &next_delay); next_delay = IEEE80211_CHANNEL_TIME; @@ -998,6 +1010,8 @@ set_channel: !scan_req->n_ssids) { *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; local->next_scan_state = SCAN_DECISION; + if (scan_req->n_ssids) + set_bit(SCAN_BEACON_WAIT, &local->scanning); return; } @@ -1090,6 +1104,8 @@ void ieee80211_scan_work(struct work_struct *work) goto out; } + clear_bit(SCAN_BEACON_WAIT, &local->scanning); + /* * as long as no delay is required advance immediately * without scheduling a new work @@ -1100,6 +1116,10 @@ void ieee80211_scan_work(struct work_struct *work) goto out_complete; } + if (test_and_clear_bit(SCAN_BEACON_DONE, &local->scanning) && + local->next_scan_state == SCAN_DECISION) + local->next_scan_state = SCAN_SEND_PROBE; + switch (local->next_scan_state) { case SCAN_DECISION: /* if no more bands/channels left, complete scan */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 91fbb1ee5c38..e04a0905e941 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -287,7 +287,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif - free_percpu(sta->pcpu_rx_stats); + free_percpu(sta->deflink.pcpu_rx_stats); kfree(sta); } @@ -346,9 +346,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return NULL; if (ieee80211_hw_check(hw, USES_RSS)) { - sta->pcpu_rx_stats = + sta->deflink.pcpu_rx_stats = alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp); - if (!sta->pcpu_rx_stats) + if (!sta->deflink.pcpu_rx_stats) goto free; } @@ -376,6 +376,14 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sta.max_rx_aggregation_subframes = local->hw.max_rx_aggregation_subframes; + /* TODO link specific alloc and assignments for MLO Link STA */ + + /* For non MLO STA, link info can be accessed either via deflink + * or link[0] + */ + sta->link[0] = &sta->deflink; + sta->sta.link[0] = &sta->sta.deflink; + /* Extended Key ID needs to install keys for keyid 0 and 1 Rx-only. * The Tx path starts to use a key as soon as the key slot ptk_idx * references to is not NULL. To not use the initial Rx-only key @@ -387,9 +395,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; - u64_stats_init(&sta->rx_stats.syncp); + u64_stats_init(&sta->deflink.rx_stats.syncp); ieee80211_init_frag_cache(&sta->frags); @@ -399,10 +407,10 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->reserved_tid = IEEE80211_TID_UNRESERVED; sta->last_connected = ktime_get_seconds(); - ewma_signal_init(&sta->rx_stats_avg.signal); - ewma_avg_signal_init(&sta->status_stats.avg_ack_signal); - for (i = 0; i < ARRAY_SIZE(sta->rx_stats_avg.chain_signal); i++) - ewma_signal_init(&sta->rx_stats_avg.chain_signal[i]); + ewma_signal_init(&sta->deflink.rx_stats_avg.signal); + ewma_avg_signal_init(&sta->deflink.status_stats.avg_ack_signal); + for (i = 0; i < ARRAY_SIZE(sta->deflink.rx_stats_avg.chain_signal); i++) + ewma_signal_init(&sta->deflink.rx_stats_avg.chain_signal[i]); if (local->ops->wake_tx_queue) { void *txq_data; @@ -472,7 +480,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (!(rate->flags & mandatory)) continue; - sta->sta.supp_rates[i] |= BIT(r); + sta->sta.deflink.supp_rates[i] |= BIT(r); } } @@ -524,7 +532,7 @@ free_txq: if (sta->sta.txq[0]) kfree(to_txq_info(sta->sta.txq[0])); free: - free_percpu(sta->pcpu_rx_stats); + free_percpu(sta->deflink.pcpu_rx_stats); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif @@ -2087,16 +2095,16 @@ int sta_info_move_state(struct sta_info *sta, u8 sta_info_tx_streams(struct sta_info *sta) { - struct ieee80211_sta_ht_cap *ht_cap = &sta->sta.ht_cap; + struct ieee80211_sta_ht_cap *ht_cap = &sta->sta.deflink.ht_cap; u8 rx_streams; - if (!sta->sta.ht_cap.ht_supported) + if (!sta->sta.deflink.ht_cap.ht_supported) return 1; - if (sta->sta.vht_cap.vht_supported) { + if (sta->sta.deflink.vht_cap.vht_supported) { int i; u16 tx_mcs_map = - le16_to_cpu(sta->sta.vht_cap.vht_mcs.tx_mcs_map); + le16_to_cpu(sta->sta.deflink.vht_cap.vht_mcs.tx_mcs_map); for (i = 7; i >= 0; i--) if ((tx_mcs_map & (0x3 << (i * 2))) != @@ -2123,16 +2131,16 @@ u8 sta_info_tx_streams(struct sta_info *sta) static struct ieee80211_sta_rx_stats * sta_get_last_rx_stats(struct sta_info *sta) { - struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; + struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats; int cpu; - if (!sta->pcpu_rx_stats) + if (!sta->deflink.pcpu_rx_stats) return stats; for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpustats; - cpustats = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpustats = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); if (time_after(cpustats->last_rx, stats->last_rx)) stats = cpustats; @@ -2226,13 +2234,15 @@ static void sta_set_tidstats(struct sta_info *sta, int cpu; if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { - tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->rx_stats, tid); + tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->deflink.rx_stats, + tid); - if (sta->pcpu_rx_stats) { + if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; - cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, + cpu); tidstats->rx_msdu += sta_get_tidstats_msdu(cpurxs, tid); } @@ -2243,19 +2253,19 @@ static void sta_set_tidstats(struct sta_info *sta, if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); - tidstats->tx_msdu = sta->tx_stats.msdu[tid]; + tidstats->tx_msdu = sta->deflink.tx_stats.msdu[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); - tidstats->tx_msdu_retries = sta->status_stats.msdu_retries[tid]; + tidstats->tx_msdu_retries = sta->deflink.status_stats.msdu_retries[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED); - tidstats->tx_msdu_failed = sta->status_stats.msdu_failed[tid]; + tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid]; } if (local->ops->wake_tx_queue && tid < IEEE80211_NUM_TIDS) { @@ -2326,26 +2336,27 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { sinfo->tx_bytes = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) - sinfo->tx_bytes += sta->tx_stats.bytes[ac]; + sinfo->tx_bytes += sta->deflink.tx_stats.bytes[ac]; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) { sinfo->tx_packets = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) - sinfo->tx_packets += sta->tx_stats.packets[ac]; + sinfo->tx_packets += sta->deflink.tx_stats.packets[ac]; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS); } if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) | BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) { - sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats); + sinfo->rx_bytes += sta_get_stats_bytes(&sta->deflink.rx_stats); - if (sta->pcpu_rx_stats) { + if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; - cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, + cpu); sinfo->rx_bytes += sta_get_stats_bytes(cpurxs); } } @@ -2354,12 +2365,13 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) { - sinfo->rx_packets = sta->rx_stats.packets; - if (sta->pcpu_rx_stats) { + sinfo->rx_packets = sta->deflink.rx_stats.packets; + if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; - cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, + cpu); sinfo->rx_packets += cpurxs->packets; } } @@ -2367,12 +2379,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) { - sinfo->tx_retries = sta->status_stats.retry_count; + sinfo->tx_retries = sta->deflink.status_stats.retry_count; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) { - sinfo->tx_failed = sta->status_stats.retry_failed; + sinfo->tx_failed = sta->deflink.status_stats.retry_failed; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); } @@ -2393,12 +2405,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT); } - sinfo->rx_dropped_misc = sta->rx_stats.dropped; - if (sta->pcpu_rx_stats) { + sinfo->rx_dropped_misc = sta->deflink.rx_stats.dropped; + if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; - cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); sinfo->rx_dropped_misc += cpurxs->dropped; } } @@ -2417,10 +2429,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL); } - if (!sta->pcpu_rx_stats && + if (!sta->deflink.pcpu_rx_stats && !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) { sinfo->signal_avg = - -ewma_signal_read(&sta->rx_stats_avg.signal); + -ewma_signal_read(&sta->deflink.rx_stats_avg.signal); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG); } } @@ -2433,7 +2445,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, !(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) | BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); - if (!sta->pcpu_rx_stats) + if (!sta->deflink.pcpu_rx_stats) sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); sinfo->chains = last_rxstats->chains; @@ -2442,12 +2454,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->chain_signal[i] = last_rxstats->chain_signal_last[i]; sinfo->chain_signal_avg[i] = - -ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]); + -ewma_signal_read(&sta->deflink.rx_stats_avg.chain_signal[i]); } } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) { - sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, + sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &sinfo->txrate); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); } @@ -2529,16 +2541,16 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) && - sta->status_stats.ack_signal_filled) { - sinfo->ack_signal = sta->status_stats.last_ack_signal; + sta->deflink.status_stats.ack_signal_filled) { + sinfo->ack_signal = sta->deflink.status_stats.last_ack_signal; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) && - sta->status_stats.ack_signal_filled) { + sta->deflink.status_stats.ack_signal_filled) { sinfo->avg_ack_signal = -(s8)ewma_avg_signal_read( - &sta->status_stats.avg_ack_signal); + &sta->deflink.status_stats.avg_ack_signal); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG); } @@ -2573,10 +2585,10 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); - if (!sta->status_stats.last_ack || - time_after(stats->last_rx, sta->status_stats.last_ack)) + if (!sta->deflink.status_stats.last_ack || + time_after(stats->last_rx, sta->deflink.status_stats.last_ack)) return stats->last_rx; - return sta->status_stats.last_ack; + return sta->deflink.status_stats.last_ack; } static void sta_update_codel_params(struct sta_info *sta, u32 thr) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 379fd367197f..35c390bedfba 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -484,6 +484,86 @@ struct ieee80211_fragment_cache { #define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */ /** + * struct link_sta_info - Link STA information + * All link specific sta info are stored here for reference. This can be + * a single entry for non-MLD STA or multiple entries for MLD STA + * @addr: Link MAC address - Can be same as MLD STA mac address and is always + * same for non-MLD STA. This is used as key for searching link STA + * @link_id: Link ID uniquely identifying the link STA. This is 0 for non-MLD + * and set to the corresponding vif LinkId for MLD STA + * @sta: Points to the STA info + * @gtk: group keys negotiated with this station, if any + * @tx_stats: TX statistics + * @tx_stats.packets: # of packets transmitted + * @tx_stats.bytes: # of bytes in all packets transmitted + * @tx_stats.last_rate: last TX rate + * @tx_stats.msdu: # of transmitted MSDUs per TID + * @rx_stats: RX statistics + * @rx_stats_avg: averaged RX statistics + * @rx_stats_avg.signal: averaged signal + * @rx_stats_avg.chain_signal: averaged per-chain signal + * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs + * this (by advertising the USES_RSS hw flag) + * @status_stats: TX status statistics + * @status_stats.filtered: # of filtered frames + * @status_stats.retry_failed: # of frames that failed after retry + * @status_stats.retry_count: # of retries attempted + * @status_stats.lost_packets: # of lost packets + * @status_stats.last_pkt_time: timestamp of last ACKed packet + * @status_stats.msdu_retries: # of MSDU retries + * @status_stats.msdu_failed: # of failed MSDUs + * @status_stats.last_ack: last ack timestamp (jiffies) + * @status_stats.last_ack_signal: last ACK signal + * @status_stats.ack_signal_filled: last ACK signal validity + * @status_stats.avg_ack_signal: average ACK signal + * TODO Move other link params from sta_info as required for MLD operation + */ +struct link_sta_info { + u8 addr[ETH_ALEN]; + u8 link_id; + + /* TODO rhash head/node for finding link_sta based on addr */ + + struct sta_info *sta; + struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS]; + struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats; + + /* Updated from RX path only, no locking requirements */ + struct ieee80211_sta_rx_stats rx_stats; + struct { + struct ewma_signal signal; + struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS]; + } rx_stats_avg; + + /* Updated from TX status path only, no locking requirements */ + struct { + unsigned long filtered; + unsigned long retry_failed, retry_count; + unsigned int lost_packets; + unsigned long last_pkt_time; + u64 msdu_retries[IEEE80211_NUM_TIDS + 1]; + u64 msdu_failed[IEEE80211_NUM_TIDS + 1]; + unsigned long last_ack; + s8 last_ack_signal; + bool ack_signal_filled; + struct ewma_avg_signal avg_ack_signal; + } status_stats; + + /* Updated from TX path only, no locking requirements */ + struct { + u64 packets[IEEE80211_NUM_ACS]; + u64 bytes[IEEE80211_NUM_ACS]; + struct ieee80211_tx_rate last_rate; + struct rate_info last_rate_info; + u64 msdu[IEEE80211_NUM_TIDS + 1]; + } tx_stats; + + enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; +}; + +/** * struct sta_info - STA information * * This structure collects information about a station that @@ -498,7 +578,6 @@ struct ieee80211_fragment_cache { * @sdata: virtual interface this station belongs to * @ptk: peer keys negotiated with this station, if any * @ptk_idx: last installed peer key index - * @gtk: group keys negotiated with this station, if any * @rate_ctrl: rate control algorithm reference * @rate_ctrl_lock: spinlock used to protect rate control data * (data inside the algorithm, so serializes calls there) @@ -544,30 +623,19 @@ struct ieee80211_fragment_cache { * @fast_rx: RX fastpath information * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. - * @tx_stats: TX statistics - * @tx_stats.packets: # of packets transmitted - * @tx_stats.bytes: # of bytes in all packets transmitted - * @tx_stats.last_rate: last TX rate - * @tx_stats.msdu: # of transmitted MSDUs per TID - * @rx_stats: RX statistics - * @rx_stats_avg: averaged RX statistics - * @rx_stats_avg.signal: averaged signal - * @rx_stats_avg.chain_signal: averaged per-chain signal - * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs - * this (by advertising the USES_RSS hw flag) - * @status_stats: TX status statistics - * @status_stats.filtered: # of filtered frames - * @status_stats.retry_failed: # of frames that failed after retry - * @status_stats.retry_count: # of retries attempted - * @status_stats.lost_packets: # of lost packets - * @status_stats.last_pkt_time: timestamp of last ACKed packet - * @status_stats.msdu_retries: # of MSDU retries - * @status_stats.msdu_failed: # of failed MSDUs - * @status_stats.last_ack: last ack timestamp (jiffies) - * @status_stats.last_ack_signal: last ACK signal - * @status_stats.ack_signal_filled: last ACK signal validity - * @status_stats.avg_ack_signal: average ACK signal * @frags: fragment cache + * @multi_link_sta: Identifies if this sta is a MLD STA or regular STA + * @deflink: This is the default link STA information, for non MLO STA all link + * specific STA information is accessed through @deflink or through + * link[0] which points to address of @deflink. For MLO Link STA + * the first added link STA will point to deflink. + * @link: reference to Link Sta entries. For Non MLO STA, except 1st link, + * i.e link[0] all links would be assigned to NULL by default and + * would access link information via @deflink or link[0]. For MLO + * STA, first link STA being added will point its link pointer to + * @deflink address and remaining would be allocated and the address + * would be assigned to link[link_id] where link_id is the id assigned + * by the AP. */ struct sta_info { /* General information, mostly static */ @@ -577,9 +645,6 @@ struct sta_info { u8 addr[ETH_ALEN]; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + - NUM_DEFAULT_MGMT_KEYS + - NUM_DEFAULT_BEACON_KEYS]; struct ieee80211_key __rcu *ptk[NUM_DEFAULT_KEYS]; u8 ptk_idx; struct rate_control_ref *rate_ctrl; @@ -589,7 +654,6 @@ struct sta_info { struct ieee80211_fast_tx __rcu *fast_tx; struct ieee80211_fast_rx __rcu *fast_rx; - struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats; #ifdef CONFIG_MAC80211_MESH struct mesh_sta *mesh; @@ -619,38 +683,9 @@ struct sta_info { u64 assoc_at; long last_connected; - /* Updated from RX path only, no locking requirements */ - struct ieee80211_sta_rx_stats rx_stats; - struct { - struct ewma_signal signal; - struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS]; - } rx_stats_avg; - /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; - /* Updated from TX status path only, no locking requirements */ - struct { - unsigned long filtered; - unsigned long retry_failed, retry_count; - unsigned int lost_packets; - unsigned long last_pkt_time; - u64 msdu_retries[IEEE80211_NUM_TIDS + 1]; - u64 msdu_failed[IEEE80211_NUM_TIDS + 1]; - unsigned long last_ack; - s8 last_ack_signal; - bool ack_signal_filled; - struct ewma_avg_signal avg_ack_signal; - } status_stats; - - /* Updated from TX path only, no locking requirements */ - struct { - u64 packets[IEEE80211_NUM_ACS]; - u64 bytes[IEEE80211_NUM_ACS]; - struct ieee80211_tx_rate last_rate; - struct rate_info last_rate_info; - u64 msdu[IEEE80211_NUM_TIDS + 1]; - } tx_stats; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; struct airtime_info airtime[IEEE80211_NUM_ACS]; @@ -664,8 +699,6 @@ struct sta_info { struct dentry *debugfs_dir; #endif - enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; - enum ieee80211_smps_mode known_smps_mode; const struct ieee80211_cipher_scheme *cipher_scheme; @@ -677,6 +710,10 @@ struct sta_info { struct ieee80211_fragment_cache frags; + bool multi_link_sta; + struct link_sta_info deflink; + struct link_sta_info *link[MAX_STA_LINKS]; + /* keep last! */ struct ieee80211_sta sta; }; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index e81e8a5bb774..e69272139437 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -72,7 +72,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, info->flags |= IEEE80211_TX_INTFL_RETRANSMISSION; info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; - sta->status_stats.filtered++; + sta->deflink.status_stats.filtered++; /* * Clear more-data bit on filtered frames, it might be set @@ -247,15 +247,19 @@ static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn) static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info, struct ieee80211_tx_status *status) { + struct ieee80211_rate_status *status_rate = NULL; int len = sizeof(struct ieee80211_radiotap_header); + if (status && status->n_rates) + status_rate = &status->rates[status->n_rates - 1]; + /* IEEE80211_RADIOTAP_RATE rate */ - if (status && status->rate && !(status->rate->flags & - (RATE_INFO_FLAGS_MCS | - RATE_INFO_FLAGS_DMG | - RATE_INFO_FLAGS_EDMG | - RATE_INFO_FLAGS_VHT_MCS | - RATE_INFO_FLAGS_HE_MCS))) + if (status_rate && !(status_rate->rate_idx.flags & + (RATE_INFO_FLAGS_MCS | + RATE_INFO_FLAGS_DMG | + RATE_INFO_FLAGS_EDMG | + RATE_INFO_FLAGS_VHT_MCS | + RATE_INFO_FLAGS_HE_MCS))) len += 2; else if (info->status.rates[0].idx >= 0 && !(info->status.rates[0].flags & @@ -270,12 +274,12 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info, /* IEEE80211_RADIOTAP_MCS * IEEE80211_RADIOTAP_VHT */ - if (status && status->rate) { - if (status->rate->flags & RATE_INFO_FLAGS_MCS) + if (status_rate) { + if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_MCS) len += 3; - else if (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS) + else if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_VHT_MCS) len = ALIGN(len, 2) + 12; - else if (status->rate->flags & RATE_INFO_FLAGS_HE_MCS) + else if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_HE_MCS) len = ALIGN(len, 2) + 12; } else if (info->status.rates[0].idx >= 0) { if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) @@ -297,10 +301,14 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_radiotap_header *rthdr; + struct ieee80211_rate_status *status_rate = NULL; unsigned char *pos; u16 legacy_rate = 0; u16 txflags; + if (status && status->n_rates) + status_rate = &status->rates[status->n_rates - 1]; + rthdr = skb_push(skb, rtap_len); memset(rthdr, 0, rtap_len); @@ -318,13 +326,14 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_RATE */ - if (status && status->rate) { - if (!(status->rate->flags & (RATE_INFO_FLAGS_MCS | - RATE_INFO_FLAGS_DMG | - RATE_INFO_FLAGS_EDMG | - RATE_INFO_FLAGS_VHT_MCS | - RATE_INFO_FLAGS_HE_MCS))) - legacy_rate = status->rate->legacy; + if (status_rate) { + if (!(status_rate->rate_idx.flags & + (RATE_INFO_FLAGS_MCS | + RATE_INFO_FLAGS_DMG | + RATE_INFO_FLAGS_EDMG | + RATE_INFO_FLAGS_VHT_MCS | + RATE_INFO_FLAGS_HE_MCS))) + legacy_rate = status_rate->rate_idx.legacy; } else if (info->status.rates[0].idx >= 0 && !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_VHT_MCS))) @@ -357,20 +366,21 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, *pos = retry_count; pos++; - if (status && status->rate && - (status->rate->flags & RATE_INFO_FLAGS_MCS)) { + if (status_rate && (status_rate->rate_idx.flags & RATE_INFO_FLAGS_MCS)) + { rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS)); pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS | IEEE80211_RADIOTAP_MCS_HAVE_GI | IEEE80211_RADIOTAP_MCS_HAVE_BW; - if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI) + if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_SHORT_GI) pos[1] |= IEEE80211_RADIOTAP_MCS_SGI; - if (status->rate->bw == RATE_INFO_BW_40) + if (status_rate->rate_idx.bw == RATE_INFO_BW_40) pos[1] |= IEEE80211_RADIOTAP_MCS_BW_40; - pos[2] = status->rate->mcs; + pos[2] = status_rate->rate_idx.mcs; pos += 3; - } else if (status && status->rate && - (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)) { + } else if (status_rate && (status_rate->rate_idx.flags & + RATE_INFO_FLAGS_VHT_MCS)) + { u16 known = local->hw.radiotap_vht_details & (IEEE80211_RADIOTAP_VHT_KNOWN_GI | IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH); @@ -385,12 +395,12 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, pos += 2; /* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */ - if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI) + if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_SHORT_GI) *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI; pos++; /* u8 bandwidth */ - switch (status->rate->bw) { + switch (status_rate->rate_idx.bw) { case RATE_INFO_BW_160: *pos = 11; break; @@ -407,7 +417,8 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, pos++; /* u8 mcs_nss[4] */ - *pos = (status->rate->mcs << 4) | status->rate->nss; + *pos = (status_rate->rate_idx.mcs << 4) | + status_rate->rate_idx.nss; pos += 4; /* u8 coding */ @@ -416,8 +427,9 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, pos++; /* u16 partial_aid */ pos += 2; - } else if (status && status->rate && - (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) { + } else if (status_rate && (status_rate->rate_idx.flags & + RATE_INFO_FLAGS_HE_MCS)) + { struct ieee80211_radiotap_he *he; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE)); @@ -435,7 +447,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, #define HE_PREP(f, val) le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f) - he->data6 |= HE_PREP(DATA6_NSTS, status->rate->nss); + he->data6 |= HE_PREP(DATA6_NSTS, status_rate->rate_idx.nss); #define CHECK_GI(s) \ BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \ @@ -445,12 +457,12 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, CHECK_GI(1_6); CHECK_GI(3_2); - he->data3 |= HE_PREP(DATA3_DATA_MCS, status->rate->mcs); - he->data3 |= HE_PREP(DATA3_DATA_DCM, status->rate->he_dcm); + he->data3 |= HE_PREP(DATA3_DATA_MCS, status_rate->rate_idx.mcs); + he->data3 |= HE_PREP(DATA3_DATA_DCM, status_rate->rate_idx.he_dcm); - he->data5 |= HE_PREP(DATA5_GI, status->rate->he_gi); + he->data5 |= HE_PREP(DATA5_GI, status_rate->rate_idx.he_gi); - switch (status->rate->bw) { + switch (status_rate->rate_idx.bw) { case RATE_INFO_BW_20: he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ); @@ -481,16 +493,16 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, CHECK_RU_ALLOC(2x996); he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, - status->rate->he_ru_alloc + 4); + status_rate->rate_idx.he_ru_alloc + 4); break; default: - WARN_ONCE(1, "Invalid SU BW %d\n", status->rate->bw); + WARN_ONCE(1, "Invalid SU BW %d\n", status_rate->rate_idx.bw); } pos += sizeof(struct ieee80211_radiotap_he); } - if ((status && status->rate) || info->status.rates[0].idx < 0) + if (status_rate || info->status.rates[0].idx < 0) return; /* IEEE80211_RADIOTAP_MCS @@ -776,7 +788,7 @@ static void ieee80211_lost_packet(struct sta_info *sta, !(info->flags & IEEE80211_TX_STAT_AMPDU)) return; - sta->status_stats.lost_packets++; + sta->deflink.status_stats.lost_packets++; if (sta->sta.tdls) { pkt_time = STA_LOST_TDLS_PKT_TIME; pkt_thr = STA_LOST_PKT_THRESHOLD; @@ -789,13 +801,14 @@ static void ieee80211_lost_packet(struct sta_info *sta, * mechanism. * For non-TDLS, use STA_LOST_PKT_THRESHOLD and STA_LOST_PKT_TIME */ - if (sta->status_stats.lost_packets < pkt_thr || - !time_after(jiffies, sta->status_stats.last_pkt_time + pkt_time)) + if (sta->deflink.status_stats.lost_packets < pkt_thr || + !time_after(jiffies, sta->deflink.status_stats.last_pkt_time + pkt_time)) return; cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr, - sta->status_stats.lost_packets, GFP_ATOMIC); - sta->status_stats.lost_packets = 0; + sta->deflink.status_stats.lost_packets, + GFP_ATOMIC); + sta->deflink.status_stats.lost_packets = 0; } static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, @@ -930,7 +943,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) && (ieee80211_is_data(hdr->frame_control)) && (rates_idx != -1)) - sta->tx_stats.last_rate = + sta->deflink.tx_stats.last_rate = info->status.rates[rates_idx]; if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && @@ -976,9 +989,9 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, return; } else if (ieee80211_is_data_present(fc)) { if (!acked && !noack_success) - sta->status_stats.msdu_failed[tid]++; + sta->deflink.status_stats.msdu_failed[tid]++; - sta->status_stats.msdu_retries[tid] += + sta->deflink.status_stats.msdu_retries[tid] += retry_count; } @@ -1110,8 +1123,9 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, if (pubsta) { sta = container_of(pubsta, struct sta_info, sta); - if (status->rate) - sta->tx_stats.last_rate_info = *status->rate; + if (status->n_rates) + sta->deflink.tx_stats.last_rate_info = + status->rates[status->n_rates - 1].rate_idx; } if (skb && (tx_time_est = @@ -1142,8 +1156,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata = sta->sdata; if (!acked && !noack_success) - sta->status_stats.retry_failed++; - sta->status_stats.retry_count += retry_count; + sta->deflink.status_stats.retry_failed++; + sta->deflink.status_stats.retry_count += retry_count; if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { if (sdata->vif.type == NL80211_IFTYPE_STATION && @@ -1152,13 +1166,13 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, acked, info->status.tx_time); if (acked) { - sta->status_stats.last_ack = jiffies; + sta->deflink.status_stats.last_ack = jiffies; - if (sta->status_stats.lost_packets) - sta->status_stats.lost_packets = 0; + if (sta->deflink.status_stats.lost_packets) + sta->deflink.status_stats.lost_packets = 0; /* Track when last packet was ACKed */ - sta->status_stats.last_pkt_time = jiffies; + sta->deflink.status_stats.last_pkt_time = jiffies; /* Reset connection monitor */ if (sdata->vif.type == NL80211_IFTYPE_STATION && @@ -1166,10 +1180,10 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, sdata->u.mgd.probe_send_count = 0; if (ack_signal_valid) { - sta->status_stats.last_ack_signal = + sta->deflink.status_stats.last_ack_signal = (s8)info->status.ack_signal; - sta->status_stats.ack_signal_filled = true; - ewma_avg_signal_add(&sta->status_stats.avg_ack_signal, + sta->deflink.status_stats.ack_signal_filled = true; + ewma_avg_signal_add(&sta->deflink.status_stats.avg_ack_signal, -info->status.ack_signal); } } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) { @@ -1235,7 +1249,7 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw, rate_control_tx_status(local, sband, &status); if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) - sta->tx_stats.last_rate = info->status.rates[0]; + sta->deflink.tx_stats.last_rate = info->status.rates[0]; } EXPORT_SYMBOL(ieee80211_tx_rate_update); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 137be9ec94af..4e2d22e47429 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -459,9 +459,9 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && - ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { + ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) { /* the peer caps are already intersected with our own */ - memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap)); + memcpy(&ht_cap, &sta->sta.deflink.ht_cap, sizeof(ht_cap)); pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); @@ -510,9 +510,9 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap); } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && - vht_cap.vht_supported && sta->sta.vht_cap.vht_supported) { + vht_cap.vht_supported && sta->sta.deflink.vht_cap.vht_supported) { /* the peer caps are already intersected with our own */ - memcpy(&vht_cap, &sta->sta.vht_cap, sizeof(vht_cap)); + memcpy(&vht_cap, &sta->sta.deflink.vht_cap, sizeof(vht_cap)); /* the AID is present only when VHT is implemented */ ieee80211_tdls_add_aid(sdata, skb); @@ -603,13 +603,13 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, * if HT support is only added in TDLS, we need an HT-operation IE. * add the IE as required by IEEE802.11-2012 9.23.3.2. */ - if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { + if (!ap_sta->sta.deflink.ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) { u16 prot = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED | IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT | IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); - ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap, + ieee80211_ie_build_ht_oper(pos, &sta->sta.deflink.ht_cap, &sdata->vif.bss_conf.chandef, prot, true); } @@ -618,7 +618,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, /* only include VHT-operation if not on the 2.4GHz band */ if (sband->band != NL80211_BAND_2GHZ && - sta->sta.vht_cap.vht_supported) { + sta->sta.deflink.vht_cap.vht_supported) { /* * if both peers support WIDER_BW, we can expand the chandef to * a wider compatible one, up to 80MHz @@ -627,7 +627,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, ieee80211_tdls_chandef_vht_upgrade(sdata, sta); pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation)); - ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap, + ieee80211_ie_build_vht_oper(pos, &sta->sta.deflink.vht_cap, &sta->tdls_chandef); } @@ -1269,8 +1269,8 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, bw = ieee80211_chan_width_to_rx_bw(conf->def.width); bw = min(bw, ieee80211_sta_cap_rx_bw(sta)); - if (bw != sta->sta.bandwidth) { - sta->sta.bandwidth = bw; + if (bw != sta->sta.deflink.bandwidth) { + sta->sta.deflink.bandwidth = bw; rate_control_rate_update(local, sband, sta, IEEE80211_RC_BW_CHANGED); /* @@ -1296,7 +1296,7 @@ static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata) if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH) || - !sta->sta.ht_cap.ht_supported) + !sta->sta.deflink.ht_cap.ht_supported) continue; result = true; break; @@ -1321,7 +1321,7 @@ iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata, if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) return; - tdls_ht = (sta && sta->sta.ht_cap.ht_supported) || + tdls_ht = (sta && sta->sta.deflink.ht_cap.ht_supported) || iee80211_tdls_have_ht_peers(sdata); opmode = sdata->vif.bss_conf.ht_operation_mode; @@ -1900,7 +1900,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, } /* peer should have known better */ - if (!sta->sta.ht_cap.ht_supported && elems->sec_chan_offs && + if (!sta->sta.deflink.ht_cap.ht_supported && elems->sec_chan_offs && elems->sec_chan_offs->sec_chan_offs) { tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n"); ret = -ENOTSUPP; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index d91498f77796..743adfbb9b15 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -860,8 +860,8 @@ TRACE_EVENT(drv_sta_set_txpwr, LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->txpwr = sta->txpwr.power; - __entry->type = sta->txpwr.type; + __entry->txpwr = sta->deflink.txpwr.power; + __entry->type = sta->deflink.txpwr.type; ), TP_printk( diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b6b20f38de0e..0e4efc08c762 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -768,9 +768,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) if (txrc.reported_rate.idx < 0) { txrc.reported_rate = tx->rate; if (tx->sta && ieee80211_is_tx_data(tx->skb)) - tx->sta->tx_stats.last_rate = txrc.reported_rate; + tx->sta->deflink.tx_stats.last_rate = txrc.reported_rate; } else if (tx->sta) - tx->sta->tx_stats.last_rate = txrc.reported_rate; + tx->sta->deflink.tx_stats.last_rate = txrc.reported_rate; if (ratetbl) return TX_CONTINUE; @@ -837,7 +837,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number); tx->sdata->sequence_number += 0x10; if (tx->sta) - tx->sta->tx_stats.msdu[IEEE80211_NUM_TIDS]++; + tx->sta->deflink.tx_stats.msdu[IEEE80211_NUM_TIDS]++; return TX_CONTINUE; } @@ -851,7 +851,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) /* include per-STA, per-TID sequence counter */ tid = ieee80211_get_tid(hdr); - tx->sta->tx_stats.msdu[tid]++; + tx->sta->deflink.tx_stats.msdu[tid]++; hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); @@ -1004,10 +1004,10 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) skb_queue_walk(&tx->skbs, skb) { ac = skb_get_queue_mapping(skb); - tx->sta->tx_stats.bytes[ac] += skb->len; + tx->sta->deflink.tx_stats.bytes[ac] += skb->len; } if (ac >= 0) - tx->sta->tx_stats.packets[ac]++; + tx->sta->deflink.tx_stats.packets[ac]++; return TX_CONTINUE; } @@ -1159,7 +1159,7 @@ ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) return; - if (!sta || !sta->sta.ht_cap.ht_supported || + if (!sta || !sta->sta.deflink.ht_cap.ht_supported || !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || skb->protocol == sdata->control_port_protocol) return; @@ -3150,8 +3150,6 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) fast_tx = kmemdup(&build, sizeof(build), GFP_ATOMIC); /* if the kmemdup fails, continue w/o fast_tx */ - if (!fast_tx) - goto out; out: /* we might have raced against another call to this function */ @@ -3462,18 +3460,18 @@ ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, } if (skb_shinfo(skb)->gso_size) - sta->tx_stats.msdu[tid] += + sta->deflink.tx_stats.msdu[tid] += DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); else - sta->tx_stats.msdu[tid]++; + sta->deflink.tx_stats.msdu[tid]++; info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; /* statistics normally done by ieee80211_tx_h_stats (but that * has to consider fragmentation, so is more complex) */ - sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; - sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + sta->deflink.tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->deflink.tx_stats.packets[skb_get_queue_mapping(skb)]++; if (pn_offs) { u64 pn; @@ -4481,8 +4479,8 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, dev_sw_netstats_tx_add(dev, 1, skb->len); - sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; - sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + sta->deflink.tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->deflink.tx_stats.packets[skb_get_queue_mapping(skb)]++; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 682a164f795a..1e26b5235add 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2854,46 +2854,6 @@ size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset) return pos; } -static void _ieee80211_enable_rssi_reports(struct ieee80211_sub_if_data *sdata, - int rssi_min_thold, - int rssi_max_thold) -{ - trace_api_enable_rssi_reports(sdata, rssi_min_thold, rssi_max_thold); - - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) - return; - - /* - * Scale up threshold values before storing it, as the RSSI averaging - * algorithm uses a scaled up value as well. Change this scaling - * factor if the RSSI averaging algorithm changes. - */ - sdata->u.mgd.rssi_min_thold = rssi_min_thold*16; - sdata->u.mgd.rssi_max_thold = rssi_max_thold*16; -} - -void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif, - int rssi_min_thold, - int rssi_max_thold) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - - WARN_ON(rssi_min_thold == rssi_max_thold || - rssi_min_thold > rssi_max_thold); - - _ieee80211_enable_rssi_reports(sdata, rssi_min_thold, - rssi_max_thold); -} -EXPORT_SYMBOL(ieee80211_enable_rssi_reports); - -void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - - _ieee80211_enable_rssi_reports(sdata, 0, 0); -} -EXPORT_SYMBOL(ieee80211_disable_rssi_reports); - u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u16 cap) { diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 8f16aa9c725d..ff26e0c4787b 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -118,14 +118,14 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_vht_cap *vht_cap_ie, struct sta_info *sta) { - struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap; struct ieee80211_sta_vht_cap own_cap; u32 cap_info, i; bool have_80mhz; memset(vht_cap, 0, sizeof(*vht_cap)); - if (!sta->sta.ht_cap.ht_supported) + if (!sta->sta.deflink.ht_cap.ht_supported) return; if (!vht_cap_ie || !sband->vht_cap.vht_supported) @@ -295,10 +295,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160; break; default: - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_80; if (!(vht_cap->vht_mcs.tx_highest & cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE))) @@ -310,10 +310,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, * above) between 160 and 80+80 yet. */ if (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160; } - sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); + sta->sta.deflink.bandwidth = ieee80211_sta_cur_vht_bw(sta); switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: @@ -332,9 +332,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* FIXME: move this to some better location - parses HE/EHT now */ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) { - struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; - struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; - struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.eht_cap; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap; + struct ieee80211_sta_he_cap *he_cap = &sta->sta.deflink.he_cap; + struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.deflink.eht_cap; u32 cap_width; if (he_cap->has_he) { @@ -369,7 +369,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) } if (!vht_cap->vht_supported) - return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + return sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; @@ -392,14 +392,14 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta) { - struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap; u32 cap_width; if (!vht_cap->vht_supported) { - if (!sta->sta.ht_cap.ht_supported) + if (!sta->sta.deflink.ht_cap.ht_supported) return NL80211_CHAN_WIDTH_20_NOHT; - return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + return sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? NL80211_CHAN_WIDTH_40 : NL80211_CHAN_WIDTH_20; } @@ -416,13 +416,13 @@ enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta) enum nl80211_chan_width ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta) { - enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.bandwidth; - struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.deflink.bandwidth; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap; u32 cap_width; switch (cur_bw) { case IEEE80211_STA_RX_BW_20: - if (!sta->sta.ht_cap.ht_supported) + if (!sta->sta.deflink.ht_cap.ht_supported) return NL80211_CHAN_WIDTH_20_NOHT; else return NL80211_CHAN_WIDTH_20; @@ -473,7 +473,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) enum nl80211_chan_width bss_width = sdata->vif.bss_conf.chandef.width; bw = ieee80211_sta_cap_rx_bw(sta); - bw = min(bw, sta->cur_max_bandwidth); + bw = min(bw, sta->deflink.cur_max_bandwidth); /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of * IEEE80211-2016 specification makes higher bandwidth operation @@ -501,12 +501,12 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) bool support_160; /* if we received a notification already don't overwrite it */ - if (sta->sta.rx_nss) + if (sta->sta.deflink.rx_nss) return; - if (sta->sta.eht_cap.has_eht) { + if (sta->sta.deflink.eht_cap.has_eht) { int i; - const u8 *rx_nss_mcs = (void *)&sta->sta.eht_cap.eht_mcs_nss_supp; + const u8 *rx_nss_mcs = (void *)&sta->sta.deflink.eht_cap.eht_mcs_nss_supp; /* get the max nss for EHT over all possible bandwidths and mcs */ for (i = 0; i < sizeof(struct ieee80211_eht_mcs_nss_supp); i++) @@ -515,10 +515,10 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) IEEE80211_EHT_MCS_NSS_RX)); } - if (sta->sta.he_cap.has_he) { + if (sta->sta.deflink.he_cap.has_he) { int i; u8 rx_mcs_80 = 0, rx_mcs_160 = 0; - const struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; + const struct ieee80211_sta_he_cap *he_cap = &sta->sta.deflink.he_cap; u16 mcs_160_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160); u16 mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80); @@ -549,23 +549,23 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) he_rx_nss = rx_mcs_80; } - if (sta->sta.ht_cap.ht_supported) { - if (sta->sta.ht_cap.mcs.rx_mask[0]) + if (sta->sta.deflink.ht_cap.ht_supported) { + if (sta->sta.deflink.ht_cap.mcs.rx_mask[0]) ht_rx_nss++; - if (sta->sta.ht_cap.mcs.rx_mask[1]) + if (sta->sta.deflink.ht_cap.mcs.rx_mask[1]) ht_rx_nss++; - if (sta->sta.ht_cap.mcs.rx_mask[2]) + if (sta->sta.deflink.ht_cap.mcs.rx_mask[2]) ht_rx_nss++; - if (sta->sta.ht_cap.mcs.rx_mask[3]) + if (sta->sta.deflink.ht_cap.mcs.rx_mask[3]) ht_rx_nss++; /* FIXME: consider rx_highest? */ } - if (sta->sta.vht_cap.vht_supported) { + if (sta->sta.deflink.vht_cap.vht_supported) { int i; u16 rx_mcs_map; - rx_mcs_map = le16_to_cpu(sta->sta.vht_cap.vht_mcs.rx_mcs_map); + rx_mcs_map = le16_to_cpu(sta->sta.deflink.vht_cap.vht_mcs.rx_mcs_map); for (i = 7; i >= 0; i--) { u8 mcs = (rx_mcs_map >> (2 * i)) & 3; @@ -581,7 +581,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) rx_nss = max(vht_rx_nss, ht_rx_nss); rx_nss = max(he_rx_nss, rx_nss); rx_nss = max(eht_rx_nss, rx_nss); - sta->sta.rx_nss = max_t(u8, 1, rx_nss); + sta->sta.deflink.rx_nss = max_t(u8, 1, rx_nss); } u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, @@ -601,8 +601,8 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; nss += 1; - if (sta->sta.rx_nss != nss) { - sta->sta.rx_nss = nss; + if (sta->sta.deflink.rx_nss != nss) { + sta->sta.deflink.rx_nss = nss; sta_opmode.rx_nss = nss; changed |= IEEE80211_RC_NSS_CHANGED; sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED; @@ -611,27 +611,27 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */ - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_20; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */ - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_40; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: if (opmode & IEEE80211_OPMODE_NOTIF_BW_160_80P80) - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160; else - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_80; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: /* legacy only, no longer used by newer spec */ - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160; break; } new_bw = ieee80211_sta_cur_vht_bw(sta); - if (new_bw != sta->sta.bandwidth) { - sta->sta.bandwidth = new_bw; + if (new_bw != sta->sta.deflink.bandwidth) { + sta->sta.deflink.bandwidth = new_bw; sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(sta); changed |= IEEE80211_RC_BW_CHANGED; sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 7ed0d268aff2..5fd8a3e8b5b4 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -311,19 +311,21 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } - -static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad) +/* + * Calculate AAD for CCMP/GCMP, returning qos_tid since we + * need that in CCMP also for b_0. + */ +static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad) { + struct ieee80211_hdr *hdr = (void *)skb->data; __le16 mask_fc; int a4_included, mgmt; u8 qos_tid; - u16 len_a; - unsigned int hdrlen; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + u16 len_a = 22; /* * Mask FC: zero subtype b4 b5 b6 (if not mgmt) - * Retry, PwrMgt, MoreData; set Protected + * Retry, PwrMgt, MoreData, Order (if Qos Data); set Protected */ mgmt = ieee80211_is_mgmt(hdr->frame_control); mask_fc = hdr->frame_control; @@ -333,30 +335,17 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad) mask_fc &= ~cpu_to_le16(0x0070); mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - hdrlen = ieee80211_hdrlen(hdr->frame_control); - len_a = hdrlen - 2; a4_included = ieee80211_has_a4(hdr->frame_control); + if (a4_included) + len_a += 6; - if (ieee80211_is_data_qos(hdr->frame_control)) + if (ieee80211_is_data_qos(hdr->frame_control)) { qos_tid = ieee80211_get_tid(hdr); - else + mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_ORDER); + len_a += 2; + } else { qos_tid = 0; - - /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC - * mode authentication are not allowed to collide, yet both are derived - * from this vector b_0. We only set L := 1 here to indicate that the - * data size can be represented in (L+1) bytes. The CCM layer will take - * care of storing the data length in the top (L+1) bytes and setting - * and clearing the other bits as is required to derive the two IVs. - */ - b_0[0] = 0x1; - - /* Nonce: Nonce Flags | A2 | PN - * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7) - */ - b_0[1] = qos_tid | (mgmt << 4); - memcpy(&b_0[2], hdr->addr2, ETH_ALEN); - memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN); + } /* AAD (extra authenticate-only data) / masked 802.11 header * FC | A1 | A2 | A3 | SC | [A4] | [QC] */ @@ -376,8 +365,31 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad) memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN); aad[24] = qos_tid; } + + return qos_tid; } +static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + u8 qos_tid = ccmp_gcmp_aad(skb, aad); + + /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC + * mode authentication are not allowed to collide, yet both are derived + * from this vector b_0. We only set L := 1 here to indicate that the + * data size can be represented in (L+1) bytes. The CCM layer will take + * care of storing the data length in the top (L+1) bytes and setting + * and clearing the other bits as is required to derive the two IVs. + */ + b_0[0] = 0x1; + + /* Nonce: Nonce Flags | A2 | PN + * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7) + */ + b_0[1] = qos_tid | (ieee80211_is_mgmt(hdr->frame_control) << 4); + memcpy(&b_0[2], hdr->addr2, ETH_ALEN); + memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN); +} static inline void ccmp_pn2hdr(u8 *hdr, u8 *pn, int key_id) { @@ -571,9 +583,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad) { - __le16 mask_fc; - u8 qos_tid; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_hdr *hdr = (void *)skb->data; memcpy(j_0, hdr->addr2, ETH_ALEN); memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN); @@ -581,40 +591,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad) j_0[14] = 0; j_0[AES_BLOCK_SIZE - 1] = 0x01; - /* AAD (extra authenticate-only data) / masked 802.11 header - * FC | A1 | A2 | A3 | SC | [A4] | [QC] - */ - put_unaligned_be16(ieee80211_hdrlen(hdr->frame_control) - 2, &aad[0]); - /* Mask FC: zero subtype b4 b5 b6 (if not mgmt) - * Retry, PwrMgt, MoreData; set Protected - */ - mask_fc = hdr->frame_control; - mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY | - IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); - if (!ieee80211_is_mgmt(hdr->frame_control)) - mask_fc &= ~cpu_to_le16(0x0070); - mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - - put_unaligned(mask_fc, (__le16 *)&aad[2]); - memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN); - - /* Mask Seq#, leave Frag# */ - aad[22] = *((u8 *)&hdr->seq_ctrl) & 0x0f; - aad[23] = 0; - - if (ieee80211_is_data_qos(hdr->frame_control)) - qos_tid = ieee80211_get_tid(hdr); - else - qos_tid = 0; - - if (ieee80211_has_a4(hdr->frame_control)) { - memcpy(&aad[24], hdr->addr4, ETH_ALEN); - aad[30] = qos_tid; - aad[31] = 0; - } else { - memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN); - aad[24] = qos_tid; - } + ccmp_gcmp_aad(skb, aad); } static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id) diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index fbeebe3bc31d..1e4a9f74ed43 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -118,6 +118,7 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel) if (!ret) { wpan_phy->current_page = page; wpan_phy->current_channel = channel; + ieee802154_configure_durations(wpan_phy); } return ret; diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 702560acc8ce..1381e6a5e180 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -56,6 +56,8 @@ struct ieee802154_local { struct sk_buff *tx_skb; struct work_struct tx_work; + /* A negative Linux error code or a null/positive MLME error status */ + int tx_result; }; enum { diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 520cedc594e1..bd7bdb1219dd 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -113,6 +113,50 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) } EXPORT_SYMBOL(ieee802154_alloc_hw); +void ieee802154_configure_durations(struct wpan_phy *phy) +{ + u32 duration = 0; + + switch (phy->current_page) { + case 0: + if (BIT(phy->current_channel) & 0x1) + /* 868 MHz BPSK 802.15.4-2003: 20 ksym/s */ + duration = 50 * NSEC_PER_USEC; + else if (BIT(phy->current_channel) & 0x7FE) + /* 915 MHz BPSK 802.15.4-2003: 40 ksym/s */ + duration = 25 * NSEC_PER_USEC; + else if (BIT(phy->current_channel) & 0x7FFF800) + /* 2400 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */ + duration = 16 * NSEC_PER_USEC; + break; + case 2: + if (BIT(phy->current_channel) & 0x1) + /* 868 MHz O-QPSK 802.15.4-2006: 25 ksym/s */ + duration = 40 * NSEC_PER_USEC; + else if (BIT(phy->current_channel) & 0x7FE) + /* 915 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */ + duration = 16 * NSEC_PER_USEC; + break; + case 3: + if (BIT(phy->current_channel) & 0x3FFF) + /* 2.4 GHz CSS 802.15.4a-2007: 1/6 Msym/s */ + duration = 6 * NSEC_PER_USEC; + break; + default: + break; + } + + if (!duration) { + pr_debug("Unknown PHY symbol duration\n"); + return; + } + + phy->symbol_duration = duration; + phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC; + phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC; +} +EXPORT_SYMBOL(ieee802154_configure_durations); + void ieee802154_free_hw(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); @@ -131,10 +175,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy) * Should be done when all drivers sets this value. */ - wpan_phy->lifs_period = IEEE802154_LIFS_PERIOD * - wpan_phy->symbol_duration; - wpan_phy->sifs_period = IEEE802154_SIFS_PERIOD * - wpan_phy->symbol_duration; + wpan_phy->lifs_period = + (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000; + wpan_phy->sifs_period = + (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000; } int ieee802154_register_hw(struct ieee802154_hw *hw) @@ -157,6 +201,8 @@ int ieee802154_register_hw(struct ieee802154_hw *hw) ieee802154_setup_wpan_phy_pib(local->phy); + ieee802154_configure_durations(local->phy); + if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) { local->phy->supported.min_csma_backoffs = 4; local->phy->supported.max_csma_backoffs = 4; diff --git a/net/mac802154/util.c b/net/mac802154/util.c index f2078238718b..9f024d85563b 100644 --- a/net/mac802154/util.c +++ b/net/mac802154/util.c @@ -58,8 +58,11 @@ enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer) void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, bool ifs_handling) { + struct ieee802154_local *local = hw_to_local(hw); + + local->tx_result = IEEE802154_SUCCESS; + if (ifs_handling) { - struct ieee802154_local *local = hw_to_local(hw); u8 max_sifs_size; /* If transceiver sets CRC on his own we need to use lifs @@ -88,6 +91,23 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, } EXPORT_SYMBOL(ieee802154_xmit_complete); +void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb, + int reason) +{ + struct ieee802154_local *local = hw_to_local(hw); + + local->tx_result = reason; + ieee802154_wake_queue(hw); + dev_kfree_skb_any(skb); +} +EXPORT_SYMBOL(ieee802154_xmit_error); + +void ieee802154_xmit_hw_error(struct ieee802154_hw *hw, struct sk_buff *skb) +{ + ieee802154_xmit_error(hw, skb, IEEE802154_SYSTEM_ERROR); +} +EXPORT_SYMBOL(ieee802154_xmit_hw_error); + void ieee802154_stop_device(struct ieee802154_local *local) { flush_workqueue(local->workqueue); diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 221863afc4b1..c2fc2a7b2528 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -238,7 +238,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (rc < 0) goto out_free; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (addr) { struct mctp_skb_cb *cb = mctp_cb(skb); diff --git a/net/mctp/device.c b/net/mctp/device.c index f49be882e98e..99a3bda8852f 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -313,6 +313,7 @@ void mctp_dev_hold(struct mctp_dev *mdev) void mctp_dev_put(struct mctp_dev *mdev) { if (mdev && refcount_dec_and_test(&mdev->refs)) { + kfree(mdev->addrs); dev_put(mdev->dev); kfree_rcu(mdev, rcu); } @@ -441,7 +442,6 @@ static void mctp_unregister(struct net_device *dev) mctp_route_remove_dev(mdev); mctp_neigh_remove_dev(mdev); - kfree(mdev->addrs); mctp_dev_put(mdev); } diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index 99dddf08ca73..6e7df47c9584 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o sockopt.o + mib.o pm_netlink.o sockopt.o pm_userspace.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 8b235468c88f..ae20b7d92e28 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -16,6 +16,11 @@ #define MPTCP_SYSCTL_PATH "net/mptcp" static int mptcp_pernet_id; + +#ifdef CONFIG_SYSCTL +static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX; +#endif + struct mptcp_pernet { #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_table_hdr; @@ -26,6 +31,7 @@ struct mptcp_pernet { u8 mptcp_enabled; u8 checksum_enabled; u8 allow_join_initial_addr_port; + u8 pm_type; }; static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) @@ -58,6 +64,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net) return mptcp_get_pernet(net)->stale_loss_cnt; } +int mptcp_get_pm_type(const struct net *net) +{ + return mptcp_get_pernet(net)->pm_type; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; @@ -65,6 +76,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; pernet->stale_loss_cnt = 4; + pernet->pm_type = MPTCP_PM_TYPE_KERNEL; } #ifdef CONFIG_SYSCTL @@ -108,6 +120,14 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_douintvec_minmax, }, + { + .procname = "pm_type", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &mptcp_pm_type_max + }, {} }; @@ -128,6 +148,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[2].data = &pernet->checksum_enabled; table[3].data = &pernet->allow_join_initial_addr_port; table[4].data = &pernet->stale_loss_cnt; + table[5].data = &pernet->pm_type; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index d93a8c9996fd..0dac2863c6e1 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -56,6 +56,10 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), + SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED), + SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), + SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), + SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 529d07af9e14..2be3596374f4 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -49,6 +49,12 @@ enum linux_mptcp_mib_field { MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */ MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */ + MPTCP_MIB_SNDWNDSHARED, /* Subflow snd wnd is overridden by msk's one */ + MPTCP_MIB_RCVWNDSHARED, /* Subflow rcv wnd is overridden by msk's one */ + MPTCP_MIB_RCVWNDCONFLICTUPDATE, /* subflow rcv wnd is overridden by msk's one due to + * conflict with another subflow while updating msk rcv wnd + */ + MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index dbb6d876a203..7f9a71780437 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -83,13 +83,13 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba struct net *net = sock_net(skb->sk); int i; - for (i = diag_ctx->l_slot; i < INET_LHTABLE_SIZE; i++) { + for (i = diag_ctx->l_slot; i <= tcp_hashinfo.lhash2_mask; i++) { struct inet_listen_hashbucket *ilb; struct hlist_nulls_node *node; struct sock *sk; int num = 0; - ilb = &tcp_hashinfo.listening_hash[i]; + ilb = &tcp_hashinfo.lhash2[i]; rcu_read_lock(); spin_lock(&ilb->lock); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 88f4ebbd6515..be3b918a6d15 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -107,7 +107,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; } if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { - mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum = get_unaligned((__force __sum16 *)ptr); mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; ptr += 2; } @@ -221,7 +221,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; - mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum = get_unaligned((__force __sum16 *)ptr); ptr += 2; } @@ -931,7 +931,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq && subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && - READ_ONCE(msk->pm.server_side)) + !subflow->request_join) tcp_send_ack(ssk); goto fully_established; } @@ -1133,7 +1133,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && add_addr_hmac_valid(msk, &mp_opt)) { if (!mp_opt.echo) { - mptcp_pm_add_addr_received(msk, &mp_opt.addr); + mptcp_pm_add_addr_received(sk, &mp_opt.addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); @@ -1224,23 +1224,65 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) return true; } -static void mptcp_set_rwin(const struct tcp_sock *tp) +static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) { const struct sock *ssk = (const struct sock *)tp; - const struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow; + u64 ack_seq, rcv_wnd_old, rcv_wnd_new; struct mptcp_sock *msk; - u64 ack_seq; + u32 new_win; + u64 win; subflow = mptcp_subflow_ctx(ssk); msk = mptcp_sk(subflow->conn); - ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd; + ack_seq = READ_ONCE(msk->ack_seq); + rcv_wnd_new = ack_seq + tp->rcv_wnd; + + rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent); + if (after64(rcv_wnd_new, rcv_wnd_old)) { + u64 rcv_wnd; + + for (;;) { + rcv_wnd = atomic64_cmpxchg(&msk->rcv_wnd_sent, rcv_wnd_old, rcv_wnd_new); + + if (rcv_wnd == rcv_wnd_old) + break; + if (before64(rcv_wnd_new, rcv_wnd)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE); + goto raise_win; + } + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); + rcv_wnd_old = rcv_wnd; + } + return; + } + + if (rcv_wnd_new != rcv_wnd_old) { +raise_win: + win = rcv_wnd_old - ack_seq; + tp->rcv_wnd = min_t(u64, win, U32_MAX); + new_win = tp->rcv_wnd; - if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent))) - WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); + /* Make sure we do not exceed the maximum possible + * scaled window. + */ + if (unlikely(th->syn)) + new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale; + if (!tp->rx_opt.rcv_wscale && + sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows) + new_win = min(new_win, MAX_TCP_WINDOW); + else + new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); + + /* RFC1323 scaling applied */ + new_win >>= tp->rx_opt.rcv_wscale; + th->window = htons(new_win); + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED); + } } -u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) +__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) { struct csum_pseudo_header header; __wsum csum; @@ -1256,16 +1298,26 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) header.csum = 0; csum = csum_partial(&header, sizeof(header), sum); - return (__force u16)csum_fold(csum); + return csum_fold(csum); } -static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext) { return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, ~csum_unfold(mpext->csum)); } -void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, +static void put_len_csum(u16 len, __sum16 csum, void *data) +{ + __sum16 *sumptr = data + 2; + __be16 *ptr = data; + + put_unaligned_be16(len, ptr); + + put_unaligned(csum, sumptr); +} + +void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp, struct mptcp_out_options *opts) { const struct sock *ssk = (const struct sock *)tp; @@ -1343,9 +1395,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, /* data_len == 0 is reserved for the infinite mapping, * the checksum will also be set to 0. */ - put_unaligned_be32(mpext->data_len << 16 | - (mpext->data_len ? mptcp_make_csum(mpext) : 0), - ptr); + put_len_csum(mpext->data_len, + (mpext->data_len ? mptcp_make_csum(mpext) : 0), + ptr); } else { put_unaligned_be32(mpext->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); @@ -1396,11 +1448,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, goto mp_capable_done; if (opts->csum_reqd) { - put_unaligned_be32(opts->data_len << 16 | - __mptcp_make_csum(opts->data_seq, - opts->subflow_seq, - opts->data_len, - ~csum_unfold(opts->csum)), ptr); + put_len_csum(opts->data_len, + __mptcp_make_csum(opts->data_seq, + opts->subflow_seq, + opts->data_len, + ~csum_unfold(opts->csum)), + ptr); } else { put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); @@ -1554,7 +1607,7 @@ mp_capable_done: } if (tp) - mptcp_set_rwin(tp); + mptcp_set_rwin(tp, th); } __be32 mptcp_get_reset_option(const struct sk_buff *skb) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 14f448d82bb2..59a85220edc9 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -87,6 +87,9 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) unsigned int subflows_max; int ret = 0; + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_active(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, @@ -178,14 +181,14 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, struct mptcp_pm_data *pm = &msk->pm; bool update_subflows; - update_subflows = (ssk->sk_state == TCP_CLOSE) && - (subflow->request_join || subflow->mp_join); + update_subflows = (subflow->request_join || subflow->mp_join) && + mptcp_pm_is_kernel(msk); if (!READ_ONCE(pm->work_pending) && !update_subflows) return; spin_lock_bh(&pm->lock); if (update_subflows) - pm->subflows--; + __mptcp_pm_close_subflow(msk); /* Even if this subflow is not really established, tell the PM to try * to pick the next ones, if possible. @@ -196,19 +199,28 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, spin_unlock_bh(&pm->lock); } -void mptcp_pm_add_addr_received(struct mptcp_sock *msk, +void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_pm_data *pm = &msk->pm; pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, READ_ONCE(pm->accept_addr)); - mptcp_event_addr_announced(msk, addr); + mptcp_event_addr_announced(ssk, addr); spin_lock_bh(&pm->lock); - if (!READ_ONCE(pm->accept_addr)) { + if (mptcp_pm_is_userspace(msk)) { + if (mptcp_userspace_pm_active(msk)) { + mptcp_pm_announce_addr(msk, addr, true); + mptcp_pm_add_addr_send_ack(msk); + } else { + __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); + } + } else if (!READ_ONCE(pm->accept_addr)) { mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { @@ -291,7 +303,7 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) pr_debug("fail_seq=%llu", fail_seq); - if (mptcp_has_another_subflow(sk) || !READ_ONCE(msk->allow_infinite_fallback)) + if (!READ_ONCE(msk->allow_infinite_fallback)) return; if (!READ_ONCE(subflow->mp_fail_response_expect)) { @@ -300,13 +312,10 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) subflow->send_mp_fail = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); subflow->send_infinite_map = 1; - } else if (s && inet_sk_state_load(s) != TCP_CLOSE) { + } else if (!sock_flag(sk, SOCK_DEAD)) { pr_debug("MP_FAIL response received"); - mptcp_data_lock(s); - if (inet_sk_state_load(s) != TCP_CLOSE) - sk_stop_timer(s, &s->sk_timer); - mptcp_data_unlock(s); + sk_stop_timer(s, &s->sk_timer); } } @@ -415,27 +424,48 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) void mptcp_pm_data_reset(struct mptcp_sock *msk) { - msk->pm.add_addr_signaled = 0; - msk->pm.add_addr_accepted = 0; - msk->pm.local_addr_used = 0; - msk->pm.subflows = 0; - msk->pm.rm_list_tx.nr = 0; - msk->pm.rm_list_rx.nr = 0; - WRITE_ONCE(msk->pm.work_pending, false); - WRITE_ONCE(msk->pm.addr_signal, 0); - WRITE_ONCE(msk->pm.accept_addr, false); - WRITE_ONCE(msk->pm.accept_subflow, false); - WRITE_ONCE(msk->pm.remote_deny_join_id0, false); - msk->pm.status = 0; - bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk)); + struct mptcp_pm_data *pm = &msk->pm; - mptcp_pm_nl_data_init(msk); + pm->add_addr_signaled = 0; + pm->add_addr_accepted = 0; + pm->local_addr_used = 0; + pm->subflows = 0; + pm->rm_list_tx.nr = 0; + pm->rm_list_rx.nr = 0; + WRITE_ONCE(pm->pm_type, pm_type); + + if (pm_type == MPTCP_PM_TYPE_KERNEL) { + bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk); + + /* pm->work_pending must be only be set to 'true' when + * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL + */ + WRITE_ONCE(pm->work_pending, + (!!mptcp_pm_get_local_addr_max(msk) && + subflows_allowed) || + !!mptcp_pm_get_add_addr_signal_max(msk)); + WRITE_ONCE(pm->accept_addr, + !!mptcp_pm_get_add_addr_accept_max(msk) && + subflows_allowed); + WRITE_ONCE(pm->accept_subflow, subflows_allowed); + } else { + WRITE_ONCE(pm->work_pending, 0); + WRITE_ONCE(pm->accept_addr, 0); + WRITE_ONCE(pm->accept_subflow, 0); + } + + WRITE_ONCE(pm->addr_signal, 0); + WRITE_ONCE(pm->remote_deny_join_id0, false); + pm->status = 0; + bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); } void mptcp_pm_data_init(struct mptcp_sock *msk) { spin_lock_init(&msk->pm.lock); INIT_LIST_HEAD(&msk->pm.anno_list); + INIT_LIST_HEAD(&msk->pm.userspace_pm_local_addr_list); mptcp_pm_data_reset(msk); } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c20261b612e9..e099f2a12504 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -22,14 +22,6 @@ static struct genl_family mptcp_genl_family; static int pm_nl_pernet_id; -struct mptcp_pm_addr_entry { - struct list_head list; - struct mptcp_addr_info addr; - u8 flags; - int ifindex; - struct socket *lsk; -}; - struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; @@ -66,8 +58,8 @@ pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) return pm_nl_get_pernet(sock_net((struct sock *)msk)); } -static bool addresses_equal(const struct mptcp_addr_info *a, - const struct mptcp_addr_info *b, bool use_port) +bool mptcp_addresses_equal(const struct mptcp_addr_info *a, + const struct mptcp_addr_info *b, bool use_port) { bool addr_equals = false; @@ -131,7 +123,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list, skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); local_address(skc, &cur); - if (addresses_equal(&cur, saddr, saddr->port)) + if (mptcp_addresses_equal(&cur, saddr, saddr->port)) return true; } @@ -149,7 +141,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); remote_address(skc, &cur); - if (addresses_equal(&cur, daddr, daddr->port)) + if (mptcp_addresses_equal(&cur, daddr, daddr->port)) return true; } @@ -269,7 +261,7 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.anno_list, list) { - if (addresses_equal(&entry->addr, addr, true)) + if (mptcp_addresses_equal(&entry->addr, addr, true)) return entry; } @@ -286,7 +278,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) spin_lock_bh(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.anno_list, list) { - if (addresses_equal(&entry->addr, &saddr, true)) { + if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { ret = true; goto out; } @@ -360,8 +352,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, return entry; } -static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_pm_addr_entry *entry) +bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + const struct mptcp_pm_addr_entry *entry) { struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; @@ -369,8 +361,16 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); - if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr)) - return false; + add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr); + + if (add_entry) { + if (mptcp_pm_is_kernel(msk)) + return false; + + sk_reset_timer(sk, &add_entry->add_timer, + jiffies + mptcp_get_add_addr_timeout(net)); + return true; + } add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); if (!add_entry) @@ -413,7 +413,7 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int i; for (i = 0; i < nr; i++) { - if (addresses_equal(&addrs[i], addr, addr->port)) + if (mptcp_addresses_equal(&addrs[i], addr, addr->port)) return true; } @@ -449,7 +449,7 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - if (deny_id0 && addresses_equal(&addrs[i], &remote, false)) + if (deny_id0 && mptcp_addresses_equal(&addrs[i], &remote, false)) continue; if (!lookup_address_in_vec(addrs, i, &addrs[i]) && @@ -482,7 +482,7 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) || + if ((!lookup_by_id && mptcp_addresses_equal(&entry->addr, info, true)) || (lookup_by_id && entry->addr.id == info->id)) return entry; } @@ -497,7 +497,7 @@ lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_inf rcu_read_lock(); list_for_each_entry(entry, &pernet->local_addr_list, list) { - if (addresses_equal(&entry->addr, addr, entry->addr.port)) { + if (mptcp_addresses_equal(&entry->addr, addr, entry->addr.port)) { ret = entry->addr.id; break; } @@ -731,7 +731,7 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info local; local_address((struct sock_common *)ssk, &local); - if (!addresses_equal(&local, addr, addr->port)) + if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; if (subflow->backup != bkup) @@ -805,6 +805,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (!removed) continue; + if (!mptcp_pm_is_kernel(msk)) + continue; + if (rm_type == MPTCP_MIB_RMADDR) { msk->pm.add_addr_accepted--; WRITE_ONCE(msk->pm.accept_addr, true); @@ -898,9 +901,9 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, * singled addresses */ list_for_each_entry(cur, &pernet->local_addr_list, list) { - if (addresses_equal(&cur->addr, &entry->addr, - address_use_port(entry) && - address_use_port(cur))) { + if (mptcp_addresses_equal(&cur->addr, &entry->addr, + address_use_port(entry) && + address_use_port(cur))) { /* allow replacing the exiting endpoint only if such * endpoint is an implicit one and the user-space * did not provide an endpoint id @@ -1027,14 +1030,17 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) */ local_address((struct sock_common *)msk, &msk_local); local_address((struct sock_common *)skc, &skc_local); - if (addresses_equal(&msk_local, &skc_local, false)) + if (mptcp_addresses_equal(&msk_local, &skc_local, false)) return 0; + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_local_id(msk, &skc_local); + pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (addresses_equal(&entry->addr, &skc_local, entry->addr.port)) { + if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) { ret = entry->addr.id; break; } @@ -1061,18 +1067,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return ret; } -void mptcp_pm_nl_data_init(struct mptcp_sock *msk) -{ - struct mptcp_pm_data *pm = &msk->pm; - bool subflows; - - subflows = !!mptcp_pm_get_subflows_max(msk); - WRITE_ONCE(pm->work_pending, (!!mptcp_pm_get_local_addr_max(msk) && subflows) || - !!mptcp_pm_get_add_addr_signal_max(msk)); - WRITE_ONCE(pm->accept_addr, !!mptcp_pm_get_add_addr_accept_max(msk) && subflows); - WRITE_ONCE(pm->accept_subflow, subflows); -} - #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 @@ -1100,6 +1094,10 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { NLA_POLICY_NESTED(mptcp_pm_addr_policy), [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = + NLA_POLICY_NESTED(mptcp_pm_addr_policy), }; void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) @@ -1148,11 +1146,12 @@ static int mptcp_pm_family_to_addr(int family) return MPTCP_PM_ADDR_ATTR_ADDR4; } -static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, - bool require_family, - struct mptcp_pm_addr_entry *entry) +static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], + const struct nlattr *attr, + struct genl_info *info, + struct mptcp_addr_info *addr, + bool require_family) { - struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; int err, addr_addr; if (!attr) { @@ -1166,27 +1165,29 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, if (err) return err; - memset(entry, 0, sizeof(*entry)); + if (tb[MPTCP_PM_ADDR_ATTR_ID]) + addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); + if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) { if (!require_family) - goto skip_family; + return err; NL_SET_ERR_MSG_ATTR(info->extack, attr, "missing family"); return -EINVAL; } - entry->addr.family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]); - if (entry->addr.family != AF_INET + addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]); + if (addr->family != AF_INET #if IS_ENABLED(CONFIG_MPTCP_IPV6) - && entry->addr.family != AF_INET6 + && addr->family != AF_INET6 #endif ) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "unknown address family"); return -EINVAL; } - addr_addr = mptcp_pm_family_to_addr(entry->addr.family); + addr_addr = mptcp_pm_family_to_addr(addr->family); if (!tb[addr_addr]) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "missing address data"); @@ -1194,22 +1195,47 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (entry->addr.family == AF_INET6) - entry->addr.addr6 = nla_get_in6_addr(tb[addr_addr]); + if (addr->family == AF_INET6) + addr->addr6 = nla_get_in6_addr(tb[addr_addr]); else #endif - entry->addr.addr.s_addr = nla_get_in_addr(tb[addr_addr]); + addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]); + + if (tb[MPTCP_PM_ADDR_ATTR_PORT]) + addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); + + return err; +} + +int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, + struct mptcp_addr_info *addr) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + memset(addr, 0, sizeof(*addr)); + + return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true); +} + +int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, + bool require_family, + struct mptcp_pm_addr_entry *entry) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + int err; + + memset(entry, 0, sizeof(*entry)); + + err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family); + if (err) + return err; -skip_family: if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) { u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]); entry->ifindex = val; } - if (tb[MPTCP_PM_ADDR_ATTR_ID]) - entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); - if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); @@ -1232,7 +1258,8 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; - if (!READ_ONCE(msk->fully_established)) + if (!READ_ONCE(msk->fully_established) || + mptcp_pm_is_userspace(msk)) goto next; lock_sock(sk); @@ -1256,7 +1283,7 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) struct mptcp_pm_addr_entry addr, *entry; int ret; - ret = mptcp_pm_parse_addr(attr, info, true, &addr); + ret = mptcp_pm_parse_entry(attr, info, true, &addr); if (ret < 0) return ret; @@ -1305,15 +1332,23 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) return 0; } -int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, +int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { struct mptcp_pm_addr_entry *entry; + struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); *flags = 0; *ifindex = 0; if (id) { + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, + id, + flags, + ifindex); + rcu_read_lock(); entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); if (entry) { @@ -1375,6 +1410,9 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, struct sock *sk = (struct sock *)msk; bool remove_subflow; + if (mptcp_pm_is_userspace(msk)) + goto next; + if (list_empty(&msk->conn_list)) { mptcp_pm_remove_anno_addr(msk, addr, false); goto next; @@ -1409,11 +1447,11 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, struct sock *sk = (struct sock *)msk; struct mptcp_addr_info msk_local; - if (list_empty(&msk->conn_list)) + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next; local_address((struct sock_common *)msk, &msk_local); - if (!addresses_equal(&msk_local, addr, addr->port)) + if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) goto next; lock_sock(sk); @@ -1439,7 +1477,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) unsigned int addr_max; int ret; - ret = mptcp_pm_parse_addr(attr, info, false, &addr); + ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; @@ -1479,8 +1517,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) return ret; } -static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) +void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; @@ -1516,9 +1554,11 @@ static void mptcp_nl_remove_addrs_list(struct net *net, while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; - lock_sock(sk); - mptcp_pm_remove_addrs_and_subflows(msk, rm_list); - release_sock(sk); + if (!mptcp_pm_is_userspace(msk)) { + lock_sock(sk); + mptcp_pm_remove_addrs_and_subflows(msk, rm_list); + release_sock(sk); + } sock_put(sk); cond_resched(); @@ -1611,7 +1651,7 @@ static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info) void *reply; int ret; - ret = mptcp_pm_parse_addr(attr, info, false, &addr); + ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; @@ -1791,7 +1831,7 @@ static int mptcp_nl_set_flags(struct net *net, while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; - if (list_empty(&msk->conn_list)) + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next; lock_sock(sk); @@ -1822,7 +1862,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) u8 bkup = 0, lookup_by_id = 0; int ret; - ret = mptcp_pm_parse_addr(attr, info, false, &addr); + ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; @@ -1861,6 +1901,13 @@ static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gf nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); } +bool mptcp_userspace_pm_active(const struct mptcp_sock *msk) +{ + return genl_has_listeners(&mptcp_genl_family, + sock_net((const struct sock *)msk), + MPTCP_PM_EV_GRP_OFFSET); +} + static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) { const struct inet_sock *issk = inet_sk(ssk); @@ -1981,6 +2028,9 @@ static int mptcp_event_created(struct sk_buff *skb, if (err) return err; + if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side))) + return -EMSGSIZE; + return mptcp_event_add_subflow(skb, ssk); } @@ -2015,10 +2065,12 @@ nla_put_failure: kfree_skb(skb); } -void mptcp_event_addr_announced(const struct mptcp_sock *msk, +void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info) { - struct net *net = sock_net((const struct sock *)msk); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + struct net *net = sock_net(ssk); struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2040,7 +2092,10 @@ void mptcp_event_addr_announced(const struct mptcp_sock *msk, if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) goto nla_put_failure; - if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port)) + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, + info->port == 0 ? + inet_sk(ssk)->inet_dport : + info->port)) goto nla_put_failure; switch (info->family) { @@ -2157,6 +2212,26 @@ static const struct genl_small_ops mptcp_pm_ops[] = { .doit = mptcp_nl_cmd_set_flags, .flags = GENL_ADMIN_PERM, }, + { + .cmd = MPTCP_PM_CMD_ANNOUNCE, + .doit = mptcp_nl_cmd_announce, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_REMOVE, + .doit = mptcp_nl_cmd_remove, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, + .doit = mptcp_nl_cmd_sf_create, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, + .doit = mptcp_nl_cmd_sf_destroy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family mptcp_genl_family __ro_after_init = { diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c new file mode 100644 index 000000000000..f56378e4f597 --- /dev/null +++ b/net/mptcp/pm_userspace.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2022, Intel Corporation. + */ + +#include "protocol.h" + +void mptcp_free_local_addr_list(struct mptcp_sock *msk) +{ + struct mptcp_pm_addr_entry *entry, *tmp; + struct sock *sk = (struct sock *)msk; + LIST_HEAD(free_list); + + if (!mptcp_pm_is_userspace(msk)) + return; + + spin_lock_bh(&msk->pm.lock); + list_splice_init(&msk->pm.userspace_pm_local_addr_list, &free_list); + spin_unlock_bh(&msk->pm.lock); + + list_for_each_entry_safe(entry, tmp, &free_list, list) { + sock_kfree_s(sk, entry, sizeof(*entry)); + } +} + +int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry) +{ + DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + struct mptcp_pm_addr_entry *match = NULL; + struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry *e; + bool addr_match = false; + bool id_match = false; + int ret = -EINVAL; + + bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { + addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); + if (addr_match && entry->addr.id == 0) + entry->addr.id = e->addr.id; + id_match = (e->addr.id == entry->addr.id); + if (addr_match && id_match) { + match = e; + break; + } else if (addr_match || id_match) { + break; + } + __set_bit(e->addr.id, id_bitmap); + } + + if (!match && !addr_match && !id_match) { + /* Memory for the entry is allocated from the + * sock option buffer. + */ + e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC); + if (!e) { + spin_unlock_bh(&msk->pm.lock); + return -ENOMEM; + } + + *e = *entry; + if (!e->addr.id) + e->addr.id = find_next_zero_bit(id_bitmap, + MPTCP_PM_MAX_ADDR_ID + 1, + 1); + list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list); + ret = e->addr.id; + } else if (match) { + ret = entry->addr.id; + } + + spin_unlock_bh(&msk->pm.lock); + return ret; +} + +int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, + unsigned int id, + u8 *flags, int *ifindex) +{ + struct mptcp_pm_addr_entry *entry, *match = NULL; + + *flags = 0; + *ifindex = 0; + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (id == entry->addr.id) { + match = entry; + break; + } + } + spin_unlock_bh(&msk->pm.lock); + if (match) { + *flags = match->flags; + *ifindex = match->ifindex; + } + + return 0; +} + +int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, + struct mptcp_addr_info *skc) +{ + struct mptcp_pm_addr_entry new_entry; + __be16 msk_sport = ((struct inet_sock *) + inet_sk((struct sock *)msk))->inet_sport; + + memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); + new_entry.addr = *skc; + new_entry.addr.id = 0; + new_entry.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; + + if (new_entry.addr.port == msk_sport) + new_entry.addr.port = 0; + + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); +} + +int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_pm_addr_entry addr_val; + struct mptcp_sock *msk; + int err = -EINVAL; + u32 token_val; + + if (!addr || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(sock_net(skb->sk), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto announce_err; + } + + err = mptcp_pm_parse_entry(addr, info, true, &addr_val); + if (err < 0) { + GENL_SET_ERR_MSG(info, "error parsing local address"); + goto announce_err; + } + + if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + GENL_SET_ERR_MSG(info, "invalid addr id or flags"); + goto announce_err; + } + + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + if (err < 0) { + GENL_SET_ERR_MSG(info, "did not match address and id"); + goto announce_err; + } + + lock_sock((struct sock *)msk); + spin_lock_bh(&msk->pm.lock); + + if (mptcp_pm_alloc_anno_list(msk, &addr_val)) { + mptcp_pm_announce_addr(msk, &addr_val.addr, false); + mptcp_pm_nl_addr_send_ack(msk); + } + + spin_unlock_bh(&msk->pm.lock); + release_sock((struct sock *)msk); + + err = 0; + announce_err: + sock_put((struct sock *)msk); + return err; +} + +int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; + struct mptcp_pm_addr_entry *match = NULL; + struct mptcp_pm_addr_entry *entry; + struct mptcp_sock *msk; + LIST_HEAD(free_list); + int err = -EINVAL; + u32 token_val; + u8 id_val; + + if (!id || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + id_val = nla_get_u8(id); + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(sock_net(skb->sk), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto remove_err; + } + + lock_sock((struct sock *)msk); + + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (entry->addr.id == id_val) { + match = entry; + break; + } + } + + if (!match) { + GENL_SET_ERR_MSG(info, "address with specified id not found"); + release_sock((struct sock *)msk); + goto remove_err; + } + + list_move(&match->list, &free_list); + + mptcp_pm_remove_addrs_and_subflows(msk, &free_list); + + release_sock((struct sock *)msk); + + list_for_each_entry_safe(match, entry, &free_list, list) { + sock_kfree_s((struct sock *)msk, match, sizeof(*match)); + } + + err = 0; + remove_err: + sock_put((struct sock *)msk); + return err; +} + +int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_addr_info addr_r; + struct mptcp_addr_info addr_l; + struct mptcp_sock *msk; + int err = -EINVAL; + struct sock *sk; + u32 token_val; + + if (!laddr || !raddr || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(genl_info_net(info), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto create_err; + } + + err = mptcp_pm_parse_addr(laddr, info, &addr_l); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + goto create_err; + } + + if (addr_l.id == 0) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id"); + goto create_err; + } + + err = mptcp_pm_parse_addr(raddr, info, &addr_r); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + goto create_err; + } + + sk = &msk->sk.icsk_inet.sk; + lock_sock(sk); + + err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); + + release_sock(sk); + + create_err: + sock_put((struct sock *)msk); + return err; +} + +static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, + const struct mptcp_addr_info *local, + const struct mptcp_addr_info *remote) +{ + struct sock *sk = &msk->sk.icsk_inet.sk; + struct mptcp_subflow_context *subflow; + struct sock *found = NULL; + + if (local->family != remote->family) + return NULL; + + lock_sock(sk); + + mptcp_for_each_subflow(msk, subflow) { + const struct inet_sock *issk; + struct sock *ssk; + + ssk = mptcp_subflow_tcp_sock(subflow); + + if (local->family != ssk->sk_family) + continue; + + issk = inet_sk(ssk); + + switch (ssk->sk_family) { + case AF_INET: + if (issk->inet_saddr != local->addr.s_addr || + issk->inet_daddr != remote->addr.s_addr) + continue; + break; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + case AF_INET6: { + const struct ipv6_pinfo *pinfo = inet6_sk(ssk); + + if (!ipv6_addr_equal(&local->addr6, &pinfo->saddr) || + !ipv6_addr_equal(&remote->addr6, &ssk->sk_v6_daddr)) + continue; + break; + } +#endif + default: + continue; + } + + if (issk->inet_sport == local->port && + issk->inet_dport == remote->port) { + found = ssk; + goto found; + } + } + +found: + release_sock(sk); + + return found; +} + +int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_addr_info addr_l; + struct mptcp_addr_info addr_r; + struct mptcp_sock *msk; + struct sock *sk, *ssk; + int err = -EINVAL; + u32 token_val; + + if (!laddr || !raddr || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(genl_info_net(info), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto destroy_err; + } + + err = mptcp_pm_parse_addr(laddr, info, &addr_l); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + goto destroy_err; + } + + err = mptcp_pm_parse_addr(raddr, info, &addr_r); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + goto destroy_err; + } + + if (addr_l.family != addr_r.family) { + GENL_SET_ERR_MSG(info, "address families do not match"); + goto destroy_err; + } + + if (!addr_l.port || !addr_r.port) { + GENL_SET_ERR_MSG(info, "missing local or remote port"); + goto destroy_err; + } + + sk = &msk->sk.icsk_inet.sk; + ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); + if (ssk) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); + mptcp_close_ssk(sk, ssk, subflow); + err = 0; + } else { + err = -ESRCH; + } + + destroy_err: + sock_put((struct sock *)msk); + return err; +} diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a5d466e6b538..17e13396024a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -216,7 +216,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) seq = MPTCP_SKB_CB(skb)->map_seq; end_seq = MPTCP_SKB_CB(skb)->end_seq; - max_seq = READ_ONCE(msk->rcv_wnd_sent); + max_seq = atomic64_read(&msk->rcv_wnd_sent); pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq, RB_EMPTY_ROOT(&msk->out_of_order_queue)); @@ -225,7 +225,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) mptcp_drop(sk, skb); pr_debug("oow by %lld, rcv_wnd_sent %llu\n", (unsigned long long)end_seq - (unsigned long)max_seq, - (unsigned long long)msk->rcv_wnd_sent); + (unsigned long long)atomic64_read(&msk->rcv_wnd_sent)); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW); return; } @@ -1141,18 +1141,21 @@ struct mptcp_sendmsg_info { bool data_lock_held; }; -static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq, - int avail_size) +static int mptcp_check_allowed_size(const struct mptcp_sock *msk, struct sock *ssk, + u64 data_seq, int avail_size) { u64 window_end = mptcp_wnd_end(msk); + u64 mptcp_snd_wnd; if (__mptcp_check_fallback(msk)) return avail_size; - if (!before64(data_seq + avail_size, window_end)) { - u64 allowed_size = window_end - data_seq; + mptcp_snd_wnd = window_end - data_seq; + avail_size = min_t(unsigned int, mptcp_snd_wnd, avail_size); - return min_t(unsigned int, allowed_size, avail_size); + if (unlikely(tcp_sk(ssk)->snd_wnd < mptcp_snd_wnd)) { + tcp_sk(ssk)->snd_wnd = min_t(u64, U32_MAX, mptcp_snd_wnd); + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_SNDWNDSHARED); } return avail_size; @@ -1305,7 +1308,7 @@ alloc_skb: } /* Zero window and all data acked? Probe. */ - copy = mptcp_check_allowed_size(msk, data_seq, copy); + copy = mptcp_check_allowed_size(msk, ssk, data_seq, copy); if (copy == 0) { u64 snd_una = READ_ONCE(msk->snd_una); @@ -1498,11 +1501,16 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) * to check that subflow has a non empty cwin. */ ssk = send_info[SSK_MODE_ACTIVE].ssk; - if (!ssk || !sk_stream_memory_free(ssk) || !tcp_sk(ssk)->snd_wnd) + if (!ssk || !sk_stream_memory_free(ssk)) return NULL; - burst = min_t(int, MPTCP_SEND_BURST_SIZE, tcp_sk(ssk)->snd_wnd); + burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt); wmem = READ_ONCE(ssk->sk_wmem_queued); + if (!burst) { + msk->last_snd = NULL; + return ssk; + } + subflow = mptcp_subflow_ctx(ssk); subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem + READ_ONCE(ssk->sk_pacing_rate) * burst, @@ -1605,10 +1613,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) out: /* ensure the rtx timer is running */ - mptcp_data_lock(sk); if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); - mptcp_data_unlock(sk); if (copied) __mptcp_check_send_data_fin(sk); } @@ -2184,23 +2190,10 @@ mp_fail_response_expect_subflow(struct mptcp_sock *msk) return ret; } -static void mptcp_check_mp_fail_response(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - - bh_lock_sock(sk); - subflow = mp_fail_response_expect_subflow(msk); - if (subflow) - __set_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags); - bh_unlock_sock(sk); -} - static void mptcp_timeout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); - mptcp_check_mp_fail_response(mptcp_sk(sk)); mptcp_schedule_work(sk); sock_put(sk); } @@ -2521,10 +2514,8 @@ static void __mptcp_retrans(struct sock *sk) reset_timer: mptcp_check_and_set_pending(sk); - mptcp_data_lock(sk); if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); - mptcp_data_unlock(sk); } static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) @@ -2584,8 +2575,7 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); - if (test_and_clear_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags)) - mptcp_mp_fail_no_response(msk); + mptcp_mp_fail_no_response(msk); unlock: release_sock(sk); @@ -2703,10 +2693,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); - mptcp_data_lock(sk); if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); - mptcp_data_unlock(sk); } break; } @@ -2807,10 +2795,8 @@ static void __mptcp_destroy_sock(struct sock *sk) /* join list will be eventually flushed (with rst) at sock lock release time*/ list_splice_init(&msk->conn_list, &conn_list); - mptcp_data_lock(sk); mptcp_stop_timer(sk); sk_stop_timer(sk, &sk->sk_timer); - mptcp_data_unlock(sk); msk->pm.status = 0; /* clears msk->subflow, allowing the following loop to close @@ -2872,9 +2858,7 @@ cleanup: __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - mptcp_data_lock(sk); sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN); - mptcp_data_unlock(sk); } release_sock(sk); if (do_cancel_work) @@ -2919,10 +2903,8 @@ static int mptcp_disconnect(struct sock *sk, int flags) __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE); } - mptcp_data_lock(sk); mptcp_stop_timer(sk); sk_stop_timer(sk, &sk->sk_timer); - mptcp_data_unlock(sk); if (mptcp_sk(sk)->token) mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); @@ -2996,7 +2978,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; WRITE_ONCE(msk->ack_seq, ack_seq); - WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); + atomic64_set(&msk->rcv_wnd_sent, ack_seq); } sock_reset_flag(nsk, SOCK_RCU_FREE); @@ -3097,6 +3079,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk) msk->rmem_fwd_alloc = 0; mptcp_token_destroy(msk); mptcp_pm_free_anno_list(msk); + mptcp_free_local_addr_list(msk); } static void mptcp_destroy(struct sock *sk) @@ -3288,9 +3271,9 @@ void mptcp_finish_connect(struct sock *ssk) WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->snd_nxt, msk->write_seq); WRITE_ONCE(msk->ack_seq, ack_seq); - WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); WRITE_ONCE(msk->can_ack, 1); WRITE_ONCE(msk->snd_una, msk->write_seq); + atomic64_set(&msk->rcv_wnd_sent, ack_seq); mptcp_pm_new_connection(msk, ssk, 0); @@ -3321,15 +3304,12 @@ bool mptcp_finish_join(struct sock *ssk) return false; } - if (!msk->pm.server_side) + if (!list_empty(&subflow->node)) goto out; if (!mptcp_pm_allow_new_subflow(msk)) goto err_prohibited; - if (WARN_ON_ONCE(!list_empty(&subflow->node))) - goto err_prohibited; - /* active connections are already on conn_list. * If we can't acquire msk socket lock here, let the release callback * handle it @@ -3788,8 +3768,8 @@ void __init mptcp_proto_init(void) for_each_possible_cpu(cpu) { delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu); INIT_LIST_HEAD(&delegated->head); - netif_tx_napi_add(&mptcp_napi_dev, &delegated->napi, mptcp_napi_poll, - NAPI_POLL_WEIGHT); + netif_napi_add_tx(&mptcp_napi_dev, &delegated->napi, + mptcp_napi_poll); napi_enable(&delegated->napi); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3a8740fef918..200f89f6d62f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -11,6 +11,7 @@ #include <net/tcp.h> #include <net/inet_connection_sock.h> #include <uapi/linux/mptcp.h> +#include <net/genetlink.h> #define MPTCP_SUPPORTED_VERSION 1 @@ -116,7 +117,6 @@ #define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 -#define MPTCP_FAIL_NO_RESPONSE 6 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 @@ -184,6 +184,14 @@ enum mptcp_pm_status { */ }; +enum mptcp_pm_type { + MPTCP_PM_TYPE_KERNEL = 0, + MPTCP_PM_TYPE_USERSPACE, + + __MPTCP_PM_TYPE_NR, + __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1, +}; + /* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */ #define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1) @@ -200,6 +208,7 @@ struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; struct list_head anno_list; + struct list_head userspace_pm_local_addr_list; spinlock_t lock; /*protects the whole PM data */ @@ -212,6 +221,7 @@ struct mptcp_pm_data { u8 add_addr_signaled; u8 add_addr_accepted; u8 local_addr_used; + u8 pm_type; u8 subflows; u8 status; DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); @@ -219,6 +229,14 @@ struct mptcp_pm_data { struct mptcp_rm_list rm_list_rx; }; +struct mptcp_pm_addr_entry { + struct list_head list; + struct mptcp_addr_info addr; + u8 flags; + int ifindex; + struct socket *lsk; +}; + struct mptcp_data_frag { struct list_head list; u64 data_seq; @@ -238,7 +256,7 @@ struct mptcp_sock { u64 write_seq; u64 snd_nxt; u64 ack_seq; - u64 rcv_wnd_sent; + atomic64_t rcv_wnd_sent; u64 rcv_data_fin_seq; int rmem_fwd_alloc; struct sock *last_snd; @@ -447,7 +465,8 @@ struct mptcp_subflow_context { can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1; /* local_id is correctly initialized */ + local_id_valid : 1, /* local_id is correctly initialized */ + valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; bool mp_fail_response_expect; u32 remote_nonce; @@ -576,6 +595,7 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net); int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); +int mptcp_get_pm_type(const struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); @@ -591,6 +611,9 @@ void mptcp_subflow_reset(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); +bool mptcp_addresses_equal(const struct mptcp_addr_info *a, + const struct mptcp_addr_info *b, bool use_port); + /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *remote); @@ -626,19 +649,6 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } -static inline bool mptcp_has_another_subflow(struct sock *ssk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp; - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - - mptcp_for_each_subflow(msk, tmp) { - if (tmp != subflow) - return true; - } - - return false; -} - void __init mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) int __init mptcp_proto_v6_init(void); @@ -728,11 +738,16 @@ void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); -u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); +__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_reset(struct mptcp_sock *msk); +int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, + struct mptcp_addr_info *addr); +int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, + bool require_family, + struct mptcp_pm_addr_entry *entry); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); @@ -743,7 +758,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, const struct mptcp_subflow_context *subflow); -void mptcp_pm_add_addr_received(struct mptcp_sock *msk, +void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); @@ -753,6 +768,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); +bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + const struct mptcp_pm_addr_entry *entry); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * @@ -761,19 +778,34 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, +int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, + unsigned int id, u8 *flags, int *ifindex); +int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, + unsigned int id, + u8 *flags, int *ifindex); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); +void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list); + +int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry); +void mptcp_free_local_addr_list(struct mptcp_sock *msk); +int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); +int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); +int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info); +int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); -void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); +void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); +bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { @@ -796,6 +828,16 @@ static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL); } +static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk) +{ + return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE; +} + +static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk) +{ + return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL; +} + static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; @@ -826,9 +868,9 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); +int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); void __init mptcp_pm_nl_init(void); -void mptcp_pm_nl_data_init(struct mptcp_sock *msk); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); @@ -838,6 +880,20 @@ unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk); +/* called under PM lock */ +static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk) +{ + if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk)) + WRITE_ONCE(msk->pm.accept_subflow, true); +} + +static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk) +{ + spin_lock_bh(&msk->pm.lock); + __mptcp_pm_close_subflow(msk); + spin_unlock_bh(&msk->pm.lock); +} + void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk); void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk); @@ -891,13 +947,17 @@ static inline bool mptcp_check_infinite_map(struct sk_buff *skb) return false; } +static inline bool is_active_ssk(struct mptcp_subflow_context *subflow) +{ + return (subflow->request_mptcp || subflow->request_join); +} + static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct sock *parent = subflow->conn; return sk->sk_state == TCP_ESTABLISHED && - !mptcp_sk(parent)->pm.server_side && + is_active_ssk(subflow) && !subflow->conn_finished; } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 826b0c1dae98..423d3826ca1e 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -756,6 +756,18 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, return -EOPNOTSUPP; } +static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen) +{ + struct socket *listener; + + listener = __mptcp_nmpc_socket(msk); + if (!listener) + return 0; /* TCP_DEFER_ACCEPT does not fail */ + + return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen); +} + static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { @@ -782,6 +794,8 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); case TCP_NODELAY: return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); + case TCP_DEFER_ACCEPT: + return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen); } return -EOPNOTSUPP; @@ -1142,6 +1156,7 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_CONGESTION: case TCP_INFO: case TCP_CC_INFO: + case TCP_DEFER_ACCEPT: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); case TCP_INQ: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 75c824b67ca9..8841e8cd9ad8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) { return mptcp_is_fully_established((void *)msk) && - READ_ONCE(msk->pm.accept_subflow); + ((mptcp_pm_is_userspace(msk) && + mptcp_userspace_pm_active(msk)) || + READ_ONCE(msk->pm.accept_subflow)); } /* validate received token and create truncated hmac and nonce for SYN-ACK */ @@ -441,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; + subflow->remote_id = mp_opt.join_id; pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -888,7 +891,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); u32 offset, seq, delta; - u16 csum; + __sum16 csum; int len; if (!csum_reqd) @@ -955,11 +958,14 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * subflow->map_data_csum); if (unlikely(csum)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); - subflow->send_mp_fail = 1; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); + if (subflow->mp_join || subflow->valid_csum_seen) { + subflow->send_mp_fail = 1; + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); + } return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; } + subflow->valid_csum_seen = 1; return MAPPING_OK; } @@ -1010,12 +1016,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, pr_debug("infinite mapping received"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); subflow->map_data_len = 0; - if (sk && inet_sk_state_load(sk) != TCP_CLOSE) { - mptcp_data_lock(sk); - if (inet_sk_state_load(sk) != TCP_CLOSE) - sk_stop_timer(sk, &sk->sk_timer); - mptcp_data_unlock(sk); - } + if (!sock_flag(ssk, SOCK_DEAD)) + sk_stop_timer(sk, &sk->sk_timer); + return MAPPING_INVALID; } @@ -1150,6 +1153,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss } } +static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) +{ + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + + if (subflow->mp_join) + return false; + else if (READ_ONCE(msk->csum_enabled)) + return !subflow->valid_csum_seen; + else + return !subflow->fully_established; +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -1215,8 +1230,7 @@ fallback: if (!__mptcp_check_fallback(msk)) { /* RFC 8684 section 3.7. */ if (subflow->send_mp_fail) { - if (mptcp_has_another_subflow(ssk) || - !READ_ONCE(msk->allow_infinite_fallback)) { + if (!READ_ONCE(msk->allow_infinite_fallback)) { ssk->sk_err = EBADMSG; tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; @@ -1224,9 +1238,8 @@ fallback: tcp_send_active_reset(ssk, GFP_ATOMIC); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); - } else { + } else if (!sock_flag(ssk, SOCK_DEAD)) { WRITE_ONCE(subflow->mp_fail_response_expect, true); - /* The data lock is acquired in __mptcp_move_skbs() */ sk_reset_timer((struct sock *)msk, &((struct sock *)msk)->sk_timer, jiffies + TCP_RTO_MAX); @@ -1235,7 +1248,7 @@ fallback: return true; } - if ((subflow->mp_join || subflow->fully_established) && subflow->map_data_len) { + if (!subflow_can_fallback(subflow) && subflow->map_data_len) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ @@ -1441,20 +1454,20 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, struct sockaddr_storage addr; int remote_id = remote->id; int local_id = loc->id; + int err = -ENOTCONN; struct socket *sf; struct sock *ssk; u32 remote_token; int addrlen; int ifindex; u8 flags; - int err; if (!mptcp_is_fully_established(sk)) - return -ENOTCONN; + goto err_out; err = mptcp_subflow_create_socket(sk, &sf); if (err) - return err; + goto err_out; ssk = sf->sk; subflow = mptcp_subflow_ctx(ssk); @@ -1465,7 +1478,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (local_id) subflow_set_local_id(subflow, local_id); - mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id, + mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id, &flags, &ifindex); subflow->remote_key = msk->remote_key; subflow->local_key = msk->local_key; @@ -1512,6 +1525,12 @@ failed_unlink: failed: subflow->disposable = 1; sock_release(sf); + +err_out: + /* we account subflows before the creation, and this failures will not + * be caught by sk_state_change() + */ + mptcp_pm_close_subflow(msk); return err; } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2c467c422dc6..fb67f1ca2495 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1495,7 +1495,7 @@ int __init ip_vs_conn_init(void) pr_info("Connection hash table configured " "(size=%d, memory=%ldKbytes)\n", ip_vs_conn_tab_size, - (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024); + (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024); IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n", sizeof(struct ip_vs_conn)); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7f645328b47f..efab2b06d373 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1767,8 +1767,6 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) #ifdef CONFIG_SYSCTL -static int three = 3; - static int proc_do_defense_mode(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -1977,7 +1975,7 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &three, + .extra2 = SYSCTL_THREE, }, { .procname = "nat_icmp_send", diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 82f36beb2e76..5d8ed6c90b7e 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -132,6 +132,9 @@ static int __nf_conncount_add(struct net *net, struct nf_conn *found_ct; unsigned int collect = 0; + if (time_is_after_eq_jiffies((unsigned long)list->last_gc)) + goto add_new_node; + /* check the saved connections */ list_for_each_entry_safe(conn, conn_n, &list->head, node) { if (collect > CONNCOUNT_GC_MAX_NODES) @@ -177,6 +180,7 @@ static int __nf_conncount_add(struct net *net, nf_ct_put(found_ct); } +add_new_node: if (WARN_ON_ONCE(list->count > INT_MAX)) return -EOVERFLOW; @@ -190,6 +194,7 @@ static int __nf_conncount_add(struct net *net, conn->jiffies32 = (u32)jiffies; list_add_tail(&conn->node, &list->head); list->count++; + list->last_gc = (u32)jiffies; return 0; } @@ -214,6 +219,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list) spin_lock_init(&list->list_lock); INIT_LIST_HEAD(&list->head); list->count = 0; + list->last_gc = (u32)jiffies; } EXPORT_SYMBOL_GPL(nf_conncount_list_init); @@ -227,6 +233,10 @@ bool nf_conncount_gc_list(struct net *net, unsigned int collected = 0; bool ret = false; + /* don't bother if we just did GC */ + if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc))) + return false; + /* don't bother if other cpu is already doing GC */ if (!spin_trylock(&list->list_lock)) return false; @@ -258,6 +268,7 @@ bool nf_conncount_gc_list(struct net *net, if (!list->count) ret = true; + list->last_gc = (u32)jiffies; spin_unlock(&list->list_lock); return ret; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0164e5f522e8..082a2fd8d85b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -525,50 +525,6 @@ clean_from_lists(struct nf_conn *ct) nf_ct_remove_expectations(ct); } -/* must be called with local_bh_disable */ -static void nf_ct_add_to_dying_list(struct nf_conn *ct) -{ - struct ct_pcpu *pcpu; - - /* add this conntrack to the (per cpu) dying list */ - ct->cpu = smp_processor_id(); - pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); - - spin_lock(&pcpu->lock); - hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &pcpu->dying); - spin_unlock(&pcpu->lock); -} - -/* must be called with local_bh_disable */ -static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) -{ - struct ct_pcpu *pcpu; - - /* add this conntrack to the (per cpu) unconfirmed list */ - ct->cpu = smp_processor_id(); - pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); - - spin_lock(&pcpu->lock); - hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &pcpu->unconfirmed); - spin_unlock(&pcpu->lock); -} - -/* must be called with local_bh_disable */ -static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) -{ - struct ct_pcpu *pcpu; - - /* We overload first tuple to link into unconfirmed or dying list.*/ - pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); - - spin_lock(&pcpu->lock); - BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); - hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); - spin_unlock(&pcpu->lock); -} - #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) /* Released via nf_ct_destroy() */ @@ -640,7 +596,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct) if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE)) destroy_gre_conntrack(ct); - local_bh_disable(); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, @@ -648,10 +603,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct) */ nf_ct_remove_expectations(ct); - nf_ct_del_from_dying_or_unconfirmed_list(ct); - - local_bh_enable(); - if (ct->master) nf_ct_put(ct->master); @@ -660,15 +611,12 @@ void nf_ct_destroy(struct nf_conntrack *nfct) } EXPORT_SYMBOL(nf_ct_destroy); -static void nf_ct_delete_from_lists(struct nf_conn *ct) +static void __nf_ct_delete_from_lists(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; unsigned int sequence; - nf_ct_helper_destroy(ct); - - local_bh_disable(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, @@ -681,12 +629,30 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) clean_from_lists(ct); nf_conntrack_double_unlock(hash, reply_hash); +} + +static void nf_ct_delete_from_lists(struct nf_conn *ct) +{ + nf_ct_helper_destroy(ct); + local_bh_disable(); - nf_ct_add_to_dying_list(ct); + __nf_ct_delete_from_lists(ct); local_bh_enable(); } +static void nf_ct_add_to_ecache_list(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + struct nf_conntrack_net *cnet = nf_ct_pernet(nf_ct_net(ct)); + + spin_lock(&cnet->ecache.dying_lock); + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &cnet->ecache.dying_list); + spin_unlock(&cnet->ecache.dying_lock); +#endif +} + bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; @@ -709,7 +675,12 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) /* destroy event was not delivered. nf_ct_put will * be done by event cache worker on redelivery. */ - nf_ct_delete_from_lists(ct); + nf_ct_helper_destroy(ct); + local_bh_disable(); + __nf_ct_delete_from_lists(ct); + nf_ct_add_to_ecache_list(ct); + local_bh_enable(); + nf_conntrack_ecache_work(nf_ct_net(ct), NFCT_ECACHE_DESTROY_FAIL); return false; } @@ -870,6 +841,33 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, &nf_conntrack_hash[reply_hash]); } +static bool nf_ct_ext_valid_pre(const struct nf_ct_ext *ext) +{ + /* if ext->gen_id is not equal to nf_conntrack_ext_genid, some extensions + * may contain stale pointers to e.g. helper that has been removed. + * + * The helper can't clear this because the nf_conn object isn't in + * any hash and synchronize_rcu() isn't enough because associated skb + * might sit in a queue. + */ + return !ext || ext->gen_id == atomic_read(&nf_conntrack_ext_genid); +} + +static bool nf_ct_ext_valid_post(struct nf_ct_ext *ext) +{ + if (!ext) + return true; + + if (ext->gen_id != atomic_read(&nf_conntrack_ext_genid)) + return false; + + /* inserted into conntrack table, nf_ct_iterate_cleanup() + * will find it. Disable nf_ct_ext_find() id check. + */ + WRITE_ONCE(ext->gen_id, 0); + return true; +} + int nf_conntrack_hash_check_insert(struct nf_conn *ct) { @@ -885,6 +883,11 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone = nf_ct_zone(ct); + if (!nf_ct_ext_valid_pre(ct->ext)) { + NF_CT_STAT_INC(net, insert_failed); + return -ETIMEDOUT; + } + local_bh_disable(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); @@ -925,6 +928,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert); local_bh_enable(); + + if (!nf_ct_ext_valid_post(ct->ext)) { + nf_ct_kill(ct); + NF_CT_STAT_INC(net, drop); + return -ETIMEDOUT; + } + return 0; chaintoolong: NF_CT_STAT_INC(net, chaintoolong); @@ -972,7 +982,6 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct) struct nf_conn_tstamp *tstamp; refcount_inc(&ct->ct_general.use); - ct->status |= IPS_CONFIRMED; /* set conntrack timestamp, if enabled. */ tstamp = nf_conn_tstamp_find(ct); @@ -1001,7 +1010,6 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, nf_conntrack_get(&ct->ct_general); nf_ct_acct_merge(ct, ctinfo, loser_ct); - nf_ct_add_to_dying_list(loser_ct); nf_ct_put(loser_ct); nf_ct_set(skb, ct, ctinfo); @@ -1134,7 +1142,6 @@ nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h, return ret; drop: - nf_ct_add_to_dying_list(loser_ct); NF_CT_STAT_INC(net, drop); NF_CT_STAT_INC(net, insert_failed); return NF_DROP; @@ -1195,16 +1202,20 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_DROP; } + if (!nf_ct_ext_valid_pre(ct->ext)) { + NF_CT_STAT_INC(net, insert_failed); + goto dying; + } + pr_debug("Confirming conntrack %p\n", ct); /* We have to check the DYING flag after unlink to prevent * a race against nf_ct_get_next_corpse() possibly called from * user context, else we insert an already 'dead' hash, blocking * further use of that particular connection -JM. */ - nf_ct_del_from_dying_or_unconfirmed_list(ct); + ct->status |= IPS_CONFIRMED; if (unlikely(nf_ct_is_dying(ct))) { - nf_ct_add_to_dying_list(ct); NF_CT_STAT_INC(net, insert_failed); goto dying; } @@ -1228,7 +1239,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) goto out; if (chainlen++ > max_chainlen) { chaintoolong: - nf_ct_add_to_dying_list(ct); NF_CT_STAT_INC(net, chaintoolong); NF_CT_STAT_INC(net, insert_failed); ret = NF_DROP; @@ -1252,6 +1262,16 @@ chaintoolong: nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); + /* ext area is still valid (rcu read lock is held, + * but will go out of scope soon, we need to remove + * this conntrack again. + */ + if (!nf_ct_ext_valid_post(ct->ext)) { + nf_ct_kill(ct); + NF_CT_STAT_INC(net, drop); + return NF_DROP; + } + help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); @@ -1678,7 +1698,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; +#ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; +#endif struct nf_conntrack_expect *exp = NULL; const struct nf_conntrack_zone *zone; struct nf_conn_timeout *timeout_ext; @@ -1711,15 +1733,21 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_labels_ext_add(ct); +#ifdef CONFIG_NF_CONNTRACK_EVENTS ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; - nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, - ecache ? ecache->expmask : 0, - GFP_ATOMIC); - local_bh_disable(); + if ((ecache || net->ct.sysctl_events) && + !nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, + ecache ? ecache->expmask : 0, + GFP_ATOMIC)) { + nf_conntrack_free(ct); + return ERR_PTR(-ENOMEM); + } +#endif + cnet = nf_ct_pernet(net); if (cnet->expect_count) { - spin_lock(&nf_conntrack_expect_lock); + spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_find_expectation(net, zone, tuple); if (exp) { pr_debug("expectation arrives ct=%p exp=%p\n", @@ -1742,16 +1770,13 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, #endif NF_CT_STAT_INC(net, expect_new); } - spin_unlock(&nf_conntrack_expect_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); } if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); - /* Now it is inserted into the unconfirmed list, set refcount to 1. */ + /* Now it is going to be associated with an sk_buff, set refcount to 1. */ refcount_set(&ct->ct_general.use, 1); - nf_ct_add_to_unconfirmed_list(ct); - - local_bh_enable(); if (exp) { if (exp->expectfn) @@ -2319,7 +2344,7 @@ static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, /* Bring out ya dead! */ static struct nf_conn * get_next_corpse(int (*iter)(struct nf_conn *i, void *data), - void *data, unsigned int *bucket) + const struct nf_ct_iter_data *iter_data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; @@ -2350,7 +2375,12 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), * tuple while iterating. */ ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) + + if (iter_data->net && + !net_eq(iter_data->net, nf_ct_net(ct))) + continue; + + if (iter(ct, iter_data->data)) goto found; } spin_unlock(lockp); @@ -2367,7 +2397,7 @@ found: } static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report) + const struct nf_ct_iter_data *iter_data) { unsigned int bucket = 0; struct nf_conn *ct; @@ -2375,91 +2405,28 @@ static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), might_sleep(); mutex_lock(&nf_conntrack_mutex); - while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + while ((ct = get_next_corpse(iter, iter_data, &bucket)) != NULL) { /* Time to push up daises... */ - nf_ct_delete(ct, portid, report); + nf_ct_delete(ct, iter_data->portid, iter_data->report); nf_ct_put(ct); cond_resched(); } mutex_unlock(&nf_conntrack_mutex); } -struct iter_data { - int (*iter)(struct nf_conn *i, void *data); - void *data; - struct net *net; -}; - -static int iter_net_only(struct nf_conn *i, void *data) -{ - struct iter_data *d = data; - - if (!net_eq(d->net, nf_ct_net(i))) - return 0; - - return d->iter(i, d->data); -} - -static void -__nf_ct_unconfirmed_destroy(struct net *net) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct nf_conntrack_tuple_hash *h; - struct hlist_nulls_node *n; - struct ct_pcpu *pcpu; - - pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { - struct nf_conn *ct; - - ct = nf_ct_tuplehash_to_ctrack(h); - - /* we cannot call iter() on unconfirmed list, the - * owning cpu can reallocate ct->ext at any time. - */ - set_bit(IPS_DYING_BIT, &ct->status); - } - spin_unlock_bh(&pcpu->lock); - cond_resched(); - } -} - -void nf_ct_unconfirmed_destroy(struct net *net) +void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data), + const struct nf_ct_iter_data *iter_data) { + struct net *net = iter_data->net; struct nf_conntrack_net *cnet = nf_ct_pernet(net); might_sleep(); - if (atomic_read(&cnet->count) > 0) { - __nf_ct_unconfirmed_destroy(net); - nf_queue_nf_hook_drop(net); - synchronize_net(); - } -} -EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy); - -void nf_ct_iterate_cleanup_net(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report) -{ - struct nf_conntrack_net *cnet = nf_ct_pernet(net); - struct iter_data d; - - might_sleep(); - if (atomic_read(&cnet->count) == 0) return; - d.iter = iter; - d.data = data; - d.net = net; - - nf_ct_iterate_cleanup(iter_net_only, &d, portid, report); + nf_ct_iterate_cleanup(iter, iter_data); } EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); @@ -2477,6 +2444,7 @@ EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) { + struct nf_ct_iter_data iter_data = {}; struct net *net; down_read(&net_rwsem); @@ -2485,31 +2453,41 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) if (atomic_read(&cnet->count) == 0) continue; - __nf_ct_unconfirmed_destroy(net); nf_queue_nf_hook_drop(net); } up_read(&net_rwsem); /* Need to wait for netns cleanup worker to finish, if its * running -- it might have deleted a net namespace from - * the global list, so our __nf_ct_unconfirmed_destroy() might - * not have affected all namespaces. + * the global list, so hook drop above might not have + * affected all namespaces. */ net_ns_barrier(); - /* a conntrack could have been unlinked from unconfirmed list - * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy(). + /* a skb w. unconfirmed conntrack could have been reinjected just + * before we called nf_queue_nf_hook_drop(). + * * This makes sure its inserted into conntrack table. */ synchronize_net(); - nf_ct_iterate_cleanup(iter, data, 0, 0); + nf_ct_ext_bump_genid(); + iter_data.data = data; + nf_ct_iterate_cleanup(iter, &iter_data); + + /* Another cpu might be in a rcu read section with + * rcu protected pointer cleared in iter callback + * or hidden via nf_ct_ext_bump_genid() above. + * + * Wait until those are done. + */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy); static int kill_all(struct nf_conn *i, void *data) { - return net_eq(nf_ct_net(i), data); + return 1; } void nf_conntrack_cleanup_start(void) @@ -2544,8 +2522,9 @@ void nf_conntrack_cleanup_net(struct net *net) void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) { - int busy; + struct nf_ct_iter_data iter_data = {}; struct net *net; + int busy; /* * This makes sure all current packets have passed through @@ -2558,7 +2537,8 @@ i_see_dead_people: list_for_each_entry(net, net_exit_list, exit_list) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); - nf_ct_iterate_cleanup(kill_all, net, 0, 0); + iter_data.net = net; + nf_ct_iterate_cleanup_net(kill_all, &iter_data); if (atomic_read(&cnet->count) != 0) busy = 1; } @@ -2571,7 +2551,6 @@ i_see_dead_people: nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); - free_percpu(net->ct.pcpu_lists); } } @@ -2777,33 +2756,19 @@ void nf_conntrack_init_end(void) * We need to use special "null" values, not used in hash table */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) -#define DYING_NULLS_VAL ((1<<30)+1) int nf_conntrack_init_net(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); int ret = -ENOMEM; - int cpu; BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS); atomic_set(&cnet->count, 0); - net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); - if (!net->ct.pcpu_lists) - goto err_stat; - - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - - spin_lock_init(&pcpu->lock); - INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); - INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); - } - net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) - goto err_pcpu_lists; + return ret; ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) @@ -2819,8 +2784,5 @@ int nf_conntrack_init_net(struct net *net) err_expect: free_percpu(net->ct.stat); -err_pcpu_lists: - free_percpu(net->ct.pcpu_lists); -err_stat: return ret; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 0cb2da0a759a..8698b3424646 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -16,7 +16,6 @@ #include <linux/vmalloc.h> #include <linux/stddef.h> #include <linux/err.h> -#include <linux/percpu.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/slab.h> @@ -29,8 +28,9 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); -#define ECACHE_RETRY_WAIT (HZ/10) -#define ECACHE_STACK_ALLOC (256 / sizeof(void *)) +#define DYING_NULLS_VAL ((1 << 30) + 1) +#define ECACHE_MAX_JIFFIES msecs_to_jiffies(10) +#define ECACHE_RETRY_JIFFIES msecs_to_jiffies(10) enum retry_state { STATE_CONGESTED, @@ -38,58 +38,67 @@ enum retry_state { STATE_DONE, }; -static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) +struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net) { - struct nf_conn *refs[ECACHE_STACK_ALLOC]; + struct nf_conntrack_net *cnet = nf_ct_pernet(net); + + return &cnet->ecache; +} +#if IS_MODULE(CONFIG_NF_CT_NETLINK) +EXPORT_SYMBOL_GPL(nf_conn_pernet_ecache); +#endif + +static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet) +{ + unsigned long stop = jiffies + ECACHE_MAX_JIFFIES; + struct hlist_nulls_head evicted_list; enum retry_state ret = STATE_DONE; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int evicted = 0; + unsigned int sent; - spin_lock(&pcpu->lock); + INIT_HLIST_NULLS_HEAD(&evicted_list, DYING_NULLS_VAL); - hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { +next: + sent = 0; + spin_lock_bh(&cnet->ecache.dying_lock); + + hlist_nulls_for_each_entry_safe(h, n, &cnet->ecache.dying_list, hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - struct nf_conntrack_ecache *e; - - if (!nf_ct_is_confirmed(ct)) - continue; - - /* This ecache access is safe because the ct is on the - * pcpu dying list and we hold the spinlock -- the entry - * cannot be free'd until after the lock is released. - * - * This is true even if ct has a refcount of 0: the - * cpu that is about to free the entry must remove it - * from the dying list and needs the lock to do so. - */ - e = nf_ct_ecache_find(ct); - if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL) - continue; - /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means - * the worker owns this entry: the ct will remain valid - * until the worker puts its ct reference. + /* The worker owns all entries, ct remains valid until nf_ct_put + * in the loop below. */ if (nf_conntrack_event(IPCT_DESTROY, ct)) { ret = STATE_CONGESTED; break; } - e->state = NFCT_ECACHE_DESTROY_SENT; - refs[evicted] = ct; + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &evicted_list); - if (++evicted >= ARRAY_SIZE(refs)) { + if (time_after(stop, jiffies)) { ret = STATE_RESTART; break; } + + if (sent++ > 16) { + spin_unlock_bh(&cnet->ecache.dying_lock); + cond_resched(); + goto next; + } } - spin_unlock(&pcpu->lock); + spin_unlock_bh(&cnet->ecache.dying_lock); - /* can't _put while holding lock */ - while (evicted) - nf_ct_put(refs[--evicted]); + hlist_nulls_for_each_entry_safe(h, n, &evicted_list, hnnode) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); + nf_ct_put(ct); + + cond_resched(); + } return ret; } @@ -97,35 +106,20 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) static void ecache_work(struct work_struct *work) { struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work); - struct netns_ct *ctnet = cnet->ecache.ct_net; - int cpu, delay = -1; - struct ct_pcpu *pcpu; - - local_bh_disable(); - - for_each_possible_cpu(cpu) { - enum retry_state ret; - - pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); - - ret = ecache_work_evict_list(pcpu); - - switch (ret) { - case STATE_CONGESTED: - delay = ECACHE_RETRY_WAIT; - goto out; - case STATE_RESTART: - delay = 0; - break; - case STATE_DONE: - break; - } + int ret, delay = -1; + + ret = ecache_work_evict_list(cnet); + switch (ret) { + case STATE_CONGESTED: + delay = ECACHE_RETRY_JIFFIES; + break; + case STATE_RESTART: + delay = 0; + break; + case STATE_DONE: + break; } - out: - local_bh_enable(); - - ctnet->ecache_dwork_pending = delay > 0; if (delay >= 0) schedule_delayed_work(&cnet->ecache.dwork, delay); } @@ -199,7 +193,6 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, */ if (e->portid == 0 && portid != 0) e->portid = portid; - e->state = NFCT_ECACHE_DESTROY_FAIL; } return ret; @@ -297,12 +290,51 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) schedule_delayed_work(&cnet->ecache.dwork, HZ); net->ct.ecache_dwork_pending = true; } else if (state == NFCT_ECACHE_DESTROY_SENT) { - net->ct.ecache_dwork_pending = false; - mod_delayed_work(system_wq, &cnet->ecache.dwork, 0); + if (!hlist_nulls_empty(&cnet->ecache.dying_list)) + mod_delayed_work(system_wq, &cnet->ecache.dwork, 0); + else + net->ct.ecache_dwork_pending = false; } } -#define NF_CT_EVENTS_DEFAULT 1 +bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) +{ + struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *e; + + switch (net->ct.sysctl_events) { + case 0: + /* assignment via template / ruleset? ignore sysctl. */ + if (ctmask || expmask) + break; + return true; + case 2: /* autodetect: no event listener, don't allocate extension. */ + if (!READ_ONCE(net->ct.ctnetlink_has_listener)) + return true; + fallthrough; + case 1: + /* always allocate an extension. */ + if (!ctmask && !expmask) { + ctmask = ~0; + expmask = ~0; + } + break; + default: + WARN_ON_ONCE(1); + return true; + } + + e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + if (e) { + e->ctmask = ctmask; + e->expmask = expmask; + } + + return e != NULL; +} +EXPORT_SYMBOL_GPL(nf_ct_ecache_ext_add); + +#define NF_CT_EVENTS_DEFAULT 2 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; void nf_conntrack_ecache_pernet_init(struct net *net) @@ -311,8 +343,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net) net->ct.sysctl_events = nf_ct_events; - cnet->ecache.ct_net = &net->ct; INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work); + INIT_HLIST_NULLS_HEAD(&cnet->ecache.dying_list, DYING_NULLS_VAL); + spin_lock_init(&cnet->ecache.dying_lock); BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */ } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 1296fda54ac6..0b513f7bf9f3 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -27,6 +27,8 @@ #define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */ +atomic_t nf_conntrack_ext_genid __read_mostly = ATOMIC_INIT(1); + static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = { [NF_CT_EXT_HELPER] = sizeof(struct nf_conn_help), #if IS_ENABLED(CONFIG_NF_NAT) @@ -116,8 +118,10 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) if (!new) return NULL; - if (!ct->ext) + if (!ct->ext) { memset(new->offset, 0, sizeof(new->offset)); + new->gen_id = atomic_read(&nf_conntrack_ext_genid); + } new->offset[id] = newoff; new->len = newlen; @@ -127,3 +131,29 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) return (void *)new + newoff; } EXPORT_SYMBOL(nf_ct_ext_add); + +/* Use nf_ct_ext_find wrapper. This is only useful for unconfirmed entries. */ +void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id) +{ + unsigned int gen_id = atomic_read(&nf_conntrack_ext_genid); + unsigned int this_id = READ_ONCE(ext->gen_id); + + if (!__nf_ct_ext_exist(ext, id)) + return NULL; + + if (this_id == 0 || ext->gen_id == gen_id) + return (void *)ext + ext->offset[id]; + + return NULL; +} +EXPORT_SYMBOL(__nf_ct_ext_find); + +void nf_ct_ext_bump_genid(void) +{ + unsigned int value = atomic_inc_return(&nf_conntrack_ext_genid); + + if (value == UINT_MAX) + atomic_set(&nf_conntrack_ext_genid, 1); + + msleep(HZ); +} diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 8dec42ec603e..c12a87ebc3ee 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -468,11 +468,6 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); - - /* Maybe someone has gotten the helper already when unhelp above. - * So need to wait it. - */ - synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 924d766e6c53..722af5e309ba 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1559,6 +1559,11 @@ static int ctnetlink_flush_conntrack(struct net *net, u32 portid, int report, u8 family) { struct ctnetlink_filter *filter = NULL; + struct nf_ct_iter_data iter = { + .net = net, + .portid = portid, + .report = report, + }; if (ctnetlink_needs_filter(family, cda)) { if (cda[CTA_FILTER]) @@ -1567,10 +1572,11 @@ static int ctnetlink_flush_conntrack(struct net *net, filter = ctnetlink_alloc_filter(cda, family); if (IS_ERR(filter)) return PTR_ERR(filter); + + iter.data = filter; } - nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter, - portid, report); + nf_ct_iterate_cleanup_net(ctnetlink_flush_iterate, &iter); kfree(filter); return 0; @@ -1708,6 +1714,7 @@ static int ctnetlink_done_list(struct netlink_callback *cb) return 0; } +#ifdef CONFIG_NF_CONNTRACK_EVENTS static int ctnetlink_dump_one_entry(struct sk_buff *skb, struct netlink_callback *cb, struct nf_conn *ct, @@ -1748,63 +1755,62 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb, return res; } +#endif static int -ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) +ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) +{ + return 0; +} + +static int +ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) { struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; - struct nf_conn *ct, *last; + struct nf_conn *last = ctx->last; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + const struct net *net = sock_net(skb->sk); + struct nf_conntrack_net_ecache *ecache_net; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - struct hlist_nulls_head *list; - struct net *net = sock_net(skb->sk); - int res, cpu; +#endif if (ctx->done) return 0; - last = ctx->last; + ctx->last = NULL; - for (cpu = ctx->cpu; cpu < nr_cpu_ids; cpu++) { - struct ct_pcpu *pcpu; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + ecache_net = nf_conn_pernet_ecache(net); + spin_lock_bh(&ecache_net->dying_lock); - if (!cpu_possible(cpu)) - continue; + hlist_nulls_for_each_entry(h, n, &ecache_net->dying_list, hnnode) { + struct nf_conn *ct; + int res; - pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - spin_lock_bh(&pcpu->lock); - list = dying ? &pcpu->dying : &pcpu->unconfirmed; -restart: - hlist_nulls_for_each_entry(h, n, list, hnnode) { - ct = nf_ct_tuplehash_to_ctrack(h); + ct = nf_ct_tuplehash_to_ctrack(h); + if (last && last != ct) + continue; - res = ctnetlink_dump_one_entry(skb, cb, ct, dying); - if (res < 0) { - ctx->cpu = cpu; - spin_unlock_bh(&pcpu->lock); - goto out; - } - } - if (ctx->last) { - ctx->last = NULL; - goto restart; + res = ctnetlink_dump_one_entry(skb, cb, ct, true); + if (res < 0) { + spin_unlock_bh(&ecache_net->dying_lock); + nf_ct_put(last); + return skb->len; } - spin_unlock_bh(&pcpu->lock); + + nf_ct_put(last); + last = NULL; } + + spin_unlock_bh(&ecache_net->dying_lock); +#endif ctx->done = true; -out: - if (last) - nf_ct_put(last); + nf_ct_put(last); return skb->len; } -static int -ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) -{ - return ctnetlink_dump_list(skb, cb, true); -} - static int ctnetlink_get_ct_dying(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) @@ -1820,12 +1826,6 @@ static int ctnetlink_get_ct_dying(struct sk_buff *skb, return -EOPNOTSUPP; } -static int -ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) -{ - return ctnetlink_dump_list(skb, cb, false); -} - static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index d1f2d3c8d2b1..895b09cbd7cf 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -538,9 +538,13 @@ retry: out_unlock: mutex_unlock(&nf_ct_proto_mutex); - if (fixup_needed) - nf_ct_iterate_cleanup_net(net, nf_ct_tcp_fixup, - (void *)(unsigned long)nfproto, 0, 0); + if (fixup_needed) { + struct nf_ct_iter_data iter_data = { + .net = net, + .data = (void *)(unsigned long)nfproto, + }; + nf_ct_iterate_cleanup_net(nf_ct_tcp_fixup, &iter_data); + } return err; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 8ec55cd72572..a63b51dceaf2 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -485,7 +485,6 @@ static bool tcp_in_window(struct nf_conn *ct, struct nf_tcp_net *tn = nf_tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; - const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; u16 win_raw; s32 receiver_offset; @@ -508,18 +507,6 @@ static bool tcp_in_window(struct nf_conn *ct, ack -= receiver_offset; sack -= receiver_offset; - pr_debug("tcp_in_window: START\n"); - pr_debug("tcp_in_window: "); - nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", - seq, ack, receiver_offset, sack, receiver_offset, win, end); - pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); - if (sender->td_maxwin == 0) { /* * Initialize sender data. @@ -556,24 +543,14 @@ static bool tcp_in_window(struct nf_conn *ct, } } - } else if (((state->state == TCP_CONNTRACK_SYN_SENT - && dir == IP_CT_DIR_ORIGINAL) - || (state->state == TCP_CONNTRACK_SYN_RECV - && dir == IP_CT_DIR_REPLY)) - && after(end, sender->td_end)) { + } else if (tcph->syn && + after(end, sender->td_end) && + (state->state == TCP_CONNTRACK_SYN_SENT || + state->state == TCP_CONNTRACK_SYN_RECV)) { /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence * numbers larger than those recently used." - */ - sender->td_end = - sender->td_maxend = end; - sender->td_maxwin = (win == 0 ? 1 : win); - - tcp_options(skb, dataoff, tcph, sender); - } else if (tcph->syn && dir == IP_CT_DIR_REPLY && - state->state == TCP_CONNTRACK_SYN_SENT) { - /* Retransmitted syn-ack, or syn (simultaneous open). * * Re-init state for this direction, just like for the first * syn(-ack) reply, it might differ in seq, ack or tcp options. @@ -581,7 +558,8 @@ static bool tcp_in_window(struct nf_conn *ct, tcp_init_sender(sender, receiver, skb, dataoff, tcph, end, win); - if (!tcph->ack) + + if (dir == IP_CT_DIR_REPLY && !tcph->ack) return true; } @@ -606,27 +584,10 @@ static bool tcp_in_window(struct nf_conn *ct, */ seq = end = sender->td_end; - pr_debug("tcp_in_window: "); - nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", - seq, ack, receiver_offset, sack, receiver_offset, win, end); - pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); - /* Is the ending sequence in the receive window (if available)? */ in_recv_win = !receiver->td_maxwin || after(end, sender->td_end - receiver->td_maxwin - 1); - pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", - before(seq, sender->td_maxend + 1), - (in_recv_win ? 1 : 0), - before(sack, receiver->td_end + 1), - after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); - if (before(seq, sender->td_maxend + 1) && in_recv_win && before(sack, receiver->td_end + 1) && @@ -707,11 +668,6 @@ static bool tcp_in_window(struct nf_conn *ct, } } - pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " - "receiver end=%u maxend=%u maxwin=%u\n", - res, sender->td_end, sender->td_maxend, sender->td_maxwin, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin); - return res; } @@ -781,8 +737,6 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, enum tcp_conntrack new_state; struct net *net = nf_ct_net(ct); const struct nf_tcp_net *tn = nf_tcp_pernet(net); - const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; - const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; /* Don't need lock here: this conntrack not in circulation yet */ new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; @@ -835,14 +789,6 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, /* tcp_packet will set them */ ct->proto.tcp.last_index = TCP_NONE_SET; - - pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - __func__, - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); return true; } @@ -1041,10 +987,11 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, } /* Invalid packet */ - pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", - dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, state, "invalid state"); + nf_ct_l4proto_log_invalid(skb, ct, state, + "packet (index %d) in dir %d invalid, state %s", + index, dir, + tcp_conntrack_names[old_state]); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 3e1afd10a9b6..6ad7bbc90d38 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -693,7 +693,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, + .extra2 = SYSCTL_TWO, }, #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP @@ -823,7 +823,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, -#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD) +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = { .procname = "nf_flowtable_udp_timeout", .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index cec166ecba77..0f828d05ea60 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -38,7 +38,12 @@ static int untimeout(struct nf_conn *ct, void *timeout) void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout) { - nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0); + struct nf_ct_iter_data iter_data = { + .net = net, + .data = timeout, + }; + + nf_ct_iterate_cleanup_net(untimeout, &iter_data); } EXPORT_SYMBOL_GPL(nf_ct_untimeout); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 3db256da919b..f2def06d1070 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init); static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) { - tcp->state = TCP_CONNTRACK_ESTABLISHED; tcp->seen[0].td_maxwin = 0; tcp->seen[1].td_maxwin = 0; } -static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) +static void flow_offload_fixup_ct(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); int l4num = nf_ct_protonum(ct); @@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); - timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + flow_offload_fixup_tcp(&ct->proto.tcp); + + timeout = tn->timeouts[ct->proto.tcp.state]; timeout -= tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { struct nf_udp_net *tn = nf_udp_pernet(net); @@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); } -static void flow_offload_fixup_ct_state(struct nf_conn *ct) -{ - if (nf_ct_protonum(ct) == IPPROTO_TCP) - flow_offload_fixup_tcp(&ct->proto.tcp); -} - -static void flow_offload_fixup_ct(struct nf_conn *ct) -{ - flow_offload_fixup_ct_state(ct); - flow_offload_fixup_ct_timeout(ct); -} - static void flow_offload_route_release(struct flow_offload *flow) { nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); @@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, u32 timeout; timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); - if (READ_ONCE(flow->timeout) != timeout) + if (timeout - READ_ONCE(flow->timeout) > HZ) WRITE_ONCE(flow->timeout, timeout); + else + return; if (likely(!nf_flowtable_hw_offload(flow_table))) return; @@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table, rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, nf_flow_offload_rhash_params); - - clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); - - if (nf_flow_has_expired(flow)) - flow_offload_fixup_ct(flow->ct); - else - flow_offload_fixup_ct_timeout(flow->ct); - flow_offload_free(flow); } void flow_offload_teardown(struct flow_offload *flow) { + clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); set_bit(NF_FLOW_TEARDOWN, &flow->flags); - - flow_offload_fixup_ct_state(flow->ct); + flow_offload_fixup_ct(flow->ct); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } -static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple) -{ - struct dst_entry *dst; - - if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || - tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) { - dst = tuple->dst_cache; - if (!dst_check(dst, tuple->dst_cookie)) - return true; - } - - return false; -} - -static bool nf_flow_has_stale_dst(struct flow_offload *flow) -{ - return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) || - flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple); -} - static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { if (nf_flow_has_expired(flow) || - nf_ct_is_dying(flow->ct) || - nf_flow_has_stale_dst(flow)) - set_bit(NF_FLOW_TEARDOWN, &flow->flags); + nf_ct_is_dying(flow->ct)) + flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { if (test_bit(NF_FLOW_HW, &flow->flags)) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 32c0eb1b4821..b350fe9d00b0 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } +static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple) +{ + if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH && + tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM) + return true; + + return dst_check(tuple->dst_cache, tuple->dst_cookie); +} + static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, const struct nf_hook_state *state, struct dst_entry *dst) @@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) return NF_ACCEPT; + if (!nf_flow_dst_check(&tuplehash->tuple)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; @@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) return NF_ACCEPT; + if (!nf_flow_dst_check(&tuplehash->tuple)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index e32fac374608..1a506b0c6511 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -77,11 +77,14 @@ EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); static void iterate_cleanup_work(struct work_struct *work) { + struct nf_ct_iter_data iter_data = {}; struct masq_dev_work *w; w = container_of(work, struct masq_dev_work, work); - nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0); + iter_data.net = w->net; + iter_data.data = (void *)w; + nf_ct_iterate_cleanup_net(w->iter, &iter_data); put_net_track(w->net, &w->ns_tracker); kfree(w); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f3ad02a399f8..12fc9cda4a2c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); static bool nft_expr_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { - if (!expr->ops->reduce) { - pr_warn_once("missing reduce for expression %s ", - expr->ops->type->name); - return false; - } - - if (nft_reduce_is_readonly(expr)) - return false; - - return expr->ops->reduce(track, expr); + return false; } static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 7e2c8dd01408..ad3bbe34ca88 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -45,6 +45,7 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); static unsigned int nfnetlink_pernet_id __read_mostly; struct nfnl_net { + unsigned int ctnetlink_listeners; struct sock *nfnl; }; @@ -654,7 +655,6 @@ static void nfnetlink_rcv(struct sk_buff *skb) netlink_rcv_skb(skb, nfnetlink_rcv_msg); } -#ifdef CONFIG_MODULES static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; @@ -670,9 +670,44 @@ static int nfnetlink_bind(struct net *net, int group) rcu_read_unlock(); if (!ss) request_module_nowait("nfnetlink-subsys-%d", type); + +#ifdef CONFIG_NF_CONNTRACK_EVENTS + if (type == NFNL_SUBSYS_CTNETLINK) { + struct nfnl_net *nfnlnet = nfnl_pernet(net); + + nfnl_lock(NFNL_SUBSYS_CTNETLINK); + + if (WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == UINT_MAX)) { + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + return -EOVERFLOW; + } + + nfnlnet->ctnetlink_listeners++; + if (nfnlnet->ctnetlink_listeners == 1) + WRITE_ONCE(net->ct.ctnetlink_has_listener, true); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + } +#endif return 0; } + +static void nfnetlink_unbind(struct net *net, int group) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + int type = nfnl_group2type[group]; + + if (type == NFNL_SUBSYS_CTNETLINK) { + struct nfnl_net *nfnlnet = nfnl_pernet(net); + + nfnl_lock(NFNL_SUBSYS_CTNETLINK); + WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == 0); + nfnlnet->ctnetlink_listeners--; + if (nfnlnet->ctnetlink_listeners == 0) + WRITE_ONCE(net->ct.ctnetlink_has_listener, false); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + } #endif +} static int __net_init nfnetlink_net_init(struct net *net) { @@ -680,9 +715,8 @@ static int __net_init nfnetlink_net_init(struct net *net) struct netlink_kernel_cfg cfg = { .groups = NFNLGRP_MAX, .input = nfnetlink_rcv, -#ifdef CONFIG_MODULES .bind = nfnetlink_bind, -#endif + .unbind = nfnetlink_unbind, }; nfnlnet->nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index eea486f32971..f069c24c6146 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -33,8 +33,19 @@ static unsigned int nfct_timeout_id __read_mostly; +struct ctnl_timeout { + struct list_head head; + struct rcu_head rcu_head; + refcount_t refcnt; + char name[CTNL_TIMEOUT_NAME_MAX]; + struct nf_ct_timeout timeout; + + struct list_head free_head; +}; + struct nfct_timeout_pernet { struct list_head nfct_timeout_list; + struct list_head nfct_timeout_freelist; }; MODULE_LICENSE("GPL"); @@ -574,20 +585,36 @@ static int __net_init cttimeout_net_init(struct net *net) struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); INIT_LIST_HEAD(&pernet->nfct_timeout_list); + INIT_LIST_HEAD(&pernet->nfct_timeout_freelist); return 0; } +static void __net_exit cttimeout_net_pre_exit(struct net *net) +{ + struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); + struct ctnl_timeout *cur, *tmp; + + list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) { + list_del_rcu(&cur->head); + list_add(&cur->free_head, &pernet->nfct_timeout_freelist); + } + + /* core calls synchronize_rcu() after this */ +} + static void __net_exit cttimeout_net_exit(struct net *net) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *cur, *tmp; - nf_ct_unconfirmed_destroy(net); + if (list_empty(&pernet->nfct_timeout_freelist)) + return; + nf_ct_untimeout(net, NULL); - list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) { - list_del_rcu(&cur->head); + list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, head) { + list_del(&cur->free_head); if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); @@ -596,6 +623,7 @@ static void __net_exit cttimeout_net_exit(struct net *net) static struct pernet_operations cttimeout_ops = { .init = cttimeout_net_init, + .pre_exit = cttimeout_net_pre_exit, .exit = cttimeout_net_exit, .id = &nfct_timeout_id, .size = sizeof(struct nfct_timeout_pernet), @@ -628,13 +656,24 @@ err_out: return ret; } +static int untimeout(struct nf_conn *ct, void *timeout) +{ + struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); + + if (timeout_ext) + RCU_INIT_POINTER(timeout_ext->timeout, NULL); + + return 0; +} + static void __exit cttimeout_exit(void) { nfnetlink_subsys_unregister(&cttimeout_subsys); unregister_pernet_subsys(&cttimeout_ops); RCU_INIT_POINTER(nf_ct_timeout_hook, NULL); - synchronize_rcu(); + + nf_ct_iterate_destroy(untimeout, NULL); } module_init(cttimeout_init); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 900d48c810a1..a16cf47199b7 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route, route->tuple[dir].xmit_type = nft_xmit_type(dst_cache); } +static bool nft_is_valid_ether_device(const struct net_device *dev) +{ + if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || + dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) + return false; + + return true; +} + static int nft_dev_fill_forward_path(const struct nf_flow_route *route, const struct dst_entry *dst_cache, const struct nf_conn *ct, @@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route, struct neighbour *n; u8 nud_state; + if (!nft_is_valid_ether_device(dev)) + goto out; + n = dst_neigh_lookup(dst_cache, daddr); if (!n) return -1; @@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route, if (!(nud_state & NUD_VALID)) return -1; +out: return dev_fill_forward_path(dev, ha, stack); } @@ -78,15 +91,6 @@ struct nft_forward_info { enum flow_offload_xmit_type xmit_type; }; -static bool nft_is_valid_ether_device(const struct net_device *dev) -{ - if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || - dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) - return false; - - return true; -} - static void nft_dev_path_info(const struct net_device_path_stack *stack, struct nft_forward_info *info, unsigned char *ha, struct nf_flowtable *flowtable) @@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack, info->indev = NULL; break; } - info->outdev = path->dev; + if (!info->outdev) + info->outdev = path->dev; info->encap[info->num_encaps].id = path->encap.id; info->encap[info->num_encaps].proto = path->encap.proto; info->num_encaps++; @@ -227,11 +232,19 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, switch (nft_pf(pkt)) { case NFPROTO_IPV4: fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; + fl.u.ip4.saddr = ct->tuplehash[dir].tuple.dst.u3.ip; fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; + fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; + fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos); + fl.u.ip4.flowi4_mark = pkt->skb->mark; break; case NFPROTO_IPV6: fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; + fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6; fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex; + fl.u.ip6.flowi6_iif = this_dst->dev->ifindex; + fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb)); + fl.u.ip6.flowi6_mark = pkt->skb->mark; break; } @@ -293,7 +306,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, case IPPROTO_TCP: tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(_tcph), &_tcph); - if (unlikely(!tcph || tcph->fin || tcph->rst)) + if (unlikely(!tcph || tcph->fin || tcph->rst || + !nf_conntrack_tcp_established(ct))) goto out; break; case IPPROTO_UDP: diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index d600a566da32..7325bee7d144 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -349,7 +349,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, *ext = &rbe->ext; return -EEXIST; } else { - p = &parent->rb_left; + overlap = false; + if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; + else + p = &parent->rb_right; } } diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 6d9e8e0a3a7d..05ae5a338b6f 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -54,6 +54,32 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo } #endif +static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) +{ + const struct net_device *indev = nft_in(pkt); + const struct sk_buff *skb = pkt->skb; + struct sock *sk = NULL; + + if (!indev) + return NULL; + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev); + break; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case NFPROTO_IPV6: + sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } + + return sk; +} + static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -67,20 +93,7 @@ static void nft_socket_eval(const struct nft_expr *expr, sk = NULL; if (!sk) - switch(nft_pf(pkt)) { - case NFPROTO_IPV4: - sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt)); - break; -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - case NFPROTO_IPV6: - sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt)); - break; -#endif - default: - WARN_ON_ONCE(1); - regs->verdict.code = NFT_BREAK; - return; - } + sk = nft_socket_do_lookup(pkt); if (!sk) { regs->verdict.code = NFT_BREAK; @@ -224,6 +237,16 @@ static bool nft_socket_reduce(struct nft_regs_track *track, return nft_expr_reduce_bitwise(track, expr); } +static int nft_socket_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT)); +} + static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, @@ -231,6 +254,7 @@ static const struct nft_expr_ops nft_socket_ops = { .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, + .validate = nft_socket_validate, .reduce = nft_socket_reduce, }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1b5a9c2e1c29..0cd91f813a3b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1974,7 +1974,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, copied = len; } - skb_reset_transport_header(data_skb); err = skb_copy_datagram_msg(data_skb, 0, msg, copied); if (msg->msg_name) { diff --git a/net/nfc/core.c b/net/nfc/core.c index 67524982b89b..6ff3e10ff8e3 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -94,7 +94,7 @@ int nfc_dev_up(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -207,7 +207,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -246,7 +246,7 @@ int nfc_stop_poll(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -291,7 +291,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -335,7 +335,7 @@ int nfc_dep_link_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -401,7 +401,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -448,7 +448,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -495,7 +495,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; kfree_skb(skb); goto error; @@ -552,7 +552,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -601,7 +601,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -1134,6 +1134,7 @@ int nfc_register_device(struct nfc_dev *dev) dev->rfkill = NULL; } } + dev->shutting_down = false; device_unlock(&dev->dev); rc = nfc_genl_device_added(dev); @@ -1167,12 +1168,10 @@ void nfc_unregister_device(struct nfc_dev *dev) rfkill_destroy(dev->rfkill); dev->rfkill = NULL; } + dev->shutting_down = true; device_unlock(&dev->dev); if (dev->ops->check_presence) { - device_lock(&dev->dev); - dev->shutting_down = true; - device_unlock(&dev->dev); del_timer_sync(&dev->check_pres_timer); cancel_work_sync(&dev->check_pres_work); } diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 6055dc9a82aa..aa5e712adf07 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, skb_frag = nci_skb_alloc(ndev, (NCI_DATA_HDR_SIZE + frag_len), - GFP_KERNEL); + GFP_ATOMIC); if (skb_frag == NULL) { rc = -ENOMEM; goto free_exit; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 19703a649b5a..78c4b6addf15 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, i = 0; skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; @@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (i < data_len) { skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f184b0db79d4..7c62417ccfd7 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1244,7 +1244,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -1260,7 +1260,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, genlmsg_end(msg, hdr); - genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fd31334cf688..677f9cfa9660 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3477,7 +3477,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sll->sll_protocol = skb->protocol; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { const size_t max_len = min(sizeof(skb->cb), diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 5327d130c4b5..73ee2771093d 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -487,14 +487,27 @@ struct rds_tcp_net { /* All module specific customizations to the RDS-TCP socket should be done in * rds_tcp_tune() and applied after socket creation. */ -void rds_tcp_tune(struct socket *sock) +bool rds_tcp_tune(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct rds_tcp_net *rtn; tcp_sock_set_nodelay(sock->sk); lock_sock(sk); + /* TCP timer functions might access net namespace even after + * a process which created this net namespace terminated. + */ + if (!sk->sk_net_refcnt) { + if (!maybe_get_net(net)) { + release_sock(sk); + return false; + } + sk->sk_net_refcnt = 1; + netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); + } + rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; @@ -504,6 +517,7 @@ void rds_tcp_tune(struct socket *sock) sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); + return true; } static void rds_tcp_accept_worker(struct work_struct *work) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index dc8d745d6857..f8b5930d7b34 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -49,7 +49,7 @@ struct rds_tcp_statistics { }; /* tcp.c */ -void rds_tcp_tune(struct socket *sock); +bool rds_tcp_tune(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 5461d77fff4f..f0c477c5d1db 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) if (ret < 0) goto out; - rds_tcp_tune(sock); + if (!rds_tcp_tune(sock)) { + ret = -EINVAL; + goto out; + } if (isv6) { sin6.sin6_family = AF_INET6; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 09cadd556d1e..7edf2e69d3fe 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock) __module_get(new_sock->ops->owner); rds_tcp_keepalive(new_sock); - rds_tcp_tune(new_sock); + if (!rds_tcp_tune(new_sock)) { + ret = -EINVAL; + goto out; + } inet = inet_sk(new_sock->sk); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index e2e6b6b78578..fee6409c2bb3 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -1128,22 +1128,15 @@ static int rose_node_show(struct seq_file *seq, void *v) seq_puts(seq, "address mask n neigh neigh neigh\n"); else { const struct rose_node *rose_node = v; - /* if (rose_node->loopback) { - seq_printf(seq, "%-10s %04d 1 loopback\n", - rose2asc(rsbuf, &rose_node->address), - rose_node->mask); - } else { */ - seq_printf(seq, "%-10s %04d %d", - rose2asc(rsbuf, &rose_node->address), - rose_node->mask, - rose_node->count); - - for (i = 0; i < rose_node->count; i++) - seq_printf(seq, " %05d", - rose_node->neighbour[i]->number); - - seq_puts(seq, "\n"); - /* } */ + seq_printf(seq, "%-10s %04d %d", + rose2asc(rsbuf, &rose_node->address), + rose_node->mask, + rose_node->count); + + for (i = 0; i < rose_node->count; i++) + seq_printf(seq, " %05d", rose_node->neighbour[i]->number); + + seq_puts(seq, "\n"); } return 0; } diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 2b5f89713e36..ceba28e9dce6 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -351,7 +351,7 @@ static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, */ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { - _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); + _enter("%d{%d}", call->debug_id, refcount_read(&call->ref)); mutex_lock(&call->user_mutex); rxrpc_release_call(rxrpc_sk(sock->sk), call); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 969e532f77a9..dcc0ec0bf3de 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -15,14 +15,6 @@ #include <keys/rxrpc-type.h> #include "protocol.h" -#if 0 -#define CHECK_SLAB_OKAY(X) \ - BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \ - (POISON_FREE << 8 | POISON_FREE)) -#else -#define CHECK_SLAB_OKAY(X) do {} while (0) -#endif - #define FCRYPT_BSIZE 8 struct rxrpc_crypt { union { @@ -68,7 +60,7 @@ struct rxrpc_net { struct proc_dir_entry *proc_net; /* Subdir in /proc/net */ u32 epoch; /* Local epoch for detecting local-end reset */ struct list_head calls; /* List of calls active in this namespace */ - rwlock_t call_lock; /* Lock for ->calls */ + spinlock_t call_lock; /* Lock for ->calls */ atomic_t nr_calls; /* Count of allocated calls */ atomic_t nr_conns; @@ -88,7 +80,7 @@ struct rxrpc_net { struct work_struct client_conn_reaper; struct timer_list client_conn_reap_timer; - struct list_head local_endpoints; + struct hlist_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ DECLARE_HASHTABLE (peer_hash, 10); @@ -279,9 +271,9 @@ struct rxrpc_security { struct rxrpc_local { struct rcu_head rcu; atomic_t active_users; /* Number of users of the local endpoint */ - atomic_t usage; /* Number of references to the structure */ + refcount_t ref; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ - struct list_head link; + struct hlist_node link; struct socket *socket; /* my UDP socket */ struct work_struct processor; struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ @@ -304,7 +296,7 @@ struct rxrpc_local { */ struct rxrpc_peer { struct rcu_head rcu; /* This must be first */ - atomic_t usage; + refcount_t ref; unsigned long hash_key; struct hlist_node hash_link; struct rxrpc_local *local; @@ -406,7 +398,7 @@ enum rxrpc_conn_proto_state { */ struct rxrpc_bundle { struct rxrpc_conn_parameters params; - atomic_t usage; + refcount_t ref; unsigned int debug_id; bool try_upgrade; /* True if the bundle is attempting upgrade */ bool alloc_conn; /* True if someone's getting a conn */ @@ -427,7 +419,7 @@ struct rxrpc_connection { struct rxrpc_conn_proto proto; struct rxrpc_conn_parameters params; - atomic_t usage; + refcount_t ref; struct rcu_head rcu; struct list_head cache_link; @@ -609,7 +601,7 @@ struct rxrpc_call { int error; /* Local error incurred */ enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ - atomic_t usage; + refcount_t ref; u16 service_id; /* service ID */ u8 security_ix; /* Security type */ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */ @@ -1014,6 +1006,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *); extern const struct seq_operations rxrpc_call_seq_ops; extern const struct seq_operations rxrpc_connection_seq_ops; extern const struct seq_operations rxrpc_peer_seq_ops; +extern const struct seq_operations rxrpc_local_seq_ops; /* * recvmsg.c diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 1ae90fb97936..99e10eea3732 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -91,7 +91,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, (head + 1) & (size - 1)); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - atomic_read(&conn->usage), here); + refcount_read(&conn->ref), here); } /* Now it gets complicated, because calls get registered with the @@ -104,7 +104,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->state = RXRPC_CALL_SERVER_PREALLOC; trace_rxrpc_call(call->debug_id, rxrpc_call_new_service, - atomic_read(&call->usage), + refcount_read(&call->ref), here, (const void *)user_call_ID); write_lock(&rx->call_lock); @@ -140,9 +140,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxnet = call->rxnet; - write_lock(&rxnet->call_lock); - list_add_tail(&call->link, &rxnet->calls); - write_unlock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); + list_add_tail_rcu(&call->link, &rxnet->calls); + spin_unlock_bh(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 22e05de5d1ca..e426f6831aab 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -377,9 +377,9 @@ recheck_state: if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && (int)call->conn->hi_serial - (int)call->rx_serial > 0) { trace_rxrpc_call_reset(call); - rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET); + rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET); } else { - rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); } set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 043508fd8d8a..84d0a4109645 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -112,7 +112,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, found_extant_call: rxrpc_get_call(call, rxrpc_call_got); read_unlock(&rx->call_lock); - _leave(" = %p [%d]", call, atomic_read(&call->usage)); + _leave(" = %p [%d]", call, refcount_read(&call->ref)); return call; } @@ -160,7 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, spin_lock_init(&call->notify_lock); spin_lock_init(&call->input_lock); rwlock_init(&call->state_lock); - atomic_set(&call->usage, 1); + refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; call->next_rx_timo = 20 * HZ; @@ -299,7 +299,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, - atomic_read(&call->usage), + refcount_read(&call->ref), here, (const void *)p->user_call_ID); if (p->kernel) __set_bit(RXRPC_CALL_KERNEL, &call->flags); @@ -337,9 +337,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxnet = call->rxnet; - write_lock(&rxnet->call_lock); - list_add_tail(&call->link, &rxnet->calls); - write_unlock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); + list_add_tail_rcu(&call->link, &rxnet->calls); + spin_unlock_bh(&rxnet->call_lock); /* From this point on, the call is protected by its own lock. */ release_sock(&rx->sk); @@ -352,7 +352,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto error_attached_to_socket; trace_rxrpc_call(call->debug_id, rxrpc_call_connected, - atomic_read(&call->usage), here, NULL); + refcount_read(&call->ref), here, NULL); rxrpc_start_call_timer(call); @@ -372,7 +372,7 @@ error_dup_user_ID: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, -EEXIST); trace_rxrpc_call(call->debug_id, rxrpc_call_error, - atomic_read(&call->usage), here, ERR_PTR(-EEXIST)); + refcount_read(&call->ref), here, ERR_PTR(-EEXIST)); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); @@ -386,7 +386,7 @@ error_dup_user_ID: */ error_attached_to_socket: trace_rxrpc_call(call->debug_id, rxrpc_call_error, - atomic_read(&call->usage), here, ERR_PTR(ret)); + refcount_read(&call->ref), here, ERR_PTR(ret)); set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, ret); @@ -442,8 +442,9 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, bool rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); - int n = atomic_fetch_add_unless(&call->usage, 1, 0); - if (n == 0) + int n; + + if (!__refcount_inc_not_zero(&call->ref, &n)) return false; if (rxrpc_queue_work(&call->processor)) trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1, @@ -459,7 +460,7 @@ bool rxrpc_queue_call(struct rxrpc_call *call) bool __rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); - int n = atomic_read(&call->usage); + int n = refcount_read(&call->ref); ASSERTCMP(n, >=, 1); if (rxrpc_queue_work(&call->processor)) trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n, @@ -476,7 +477,7 @@ void rxrpc_see_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); if (call) { - int n = atomic_read(&call->usage); + int n = refcount_read(&call->ref); trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n, here, NULL); @@ -486,11 +487,11 @@ void rxrpc_see_call(struct rxrpc_call *call) bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_fetch_add_unless(&call->usage, 1, 0); + int n; - if (n == 0) + if (!__refcount_inc_not_zero(&call->ref, &n)) return false; - trace_rxrpc_call(call->debug_id, op, n, here, NULL); + trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); return true; } @@ -500,9 +501,10 @@ bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&call->usage); + int n; - trace_rxrpc_call(call->debug_id, op, n, here, NULL); + __refcount_inc(&call->ref, &n); + trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); } /* @@ -527,10 +529,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) struct rxrpc_connection *conn = call->conn; bool put = false; - _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); + _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); trace_rxrpc_call(call->debug_id, rxrpc_call_release, - atomic_read(&call->usage), + refcount_read(&call->ref), here, (const void *)call->flags); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); @@ -619,21 +621,21 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) struct rxrpc_net *rxnet = call->rxnet; const void *here = __builtin_return_address(0); unsigned int debug_id = call->debug_id; + bool dead; int n; ASSERT(call != NULL); - n = atomic_dec_return(&call->usage); + dead = __refcount_dec_and_test(&call->ref, &n); trace_rxrpc_call(debug_id, op, n, here, NULL); - ASSERTCMP(n, >=, 0); - if (n == 0) { + if (dead) { _debug("call %d dead", call->debug_id); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { - write_lock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); list_del_init(&call->link); - write_unlock(&rxnet->call_lock); + spin_unlock_bh(&rxnet->call_lock); } rxrpc_cleanup_call(call); @@ -705,7 +707,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); if (!list_empty(&rxnet->calls)) { - write_lock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); while (!list_empty(&rxnet->calls)) { call = list_entry(rxnet->calls.next, @@ -716,16 +718,16 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) list_del_init(&call->link); pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), + call, refcount_read(&call->ref), rxrpc_call_states[call->state], call->flags, call->events); - write_unlock(&rxnet->call_lock); + spin_unlock_bh(&rxnet->call_lock); cond_resched(); - write_lock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); } - write_unlock(&rxnet->call_lock); + spin_unlock_bh(&rxnet->call_lock); } atomic_dec(&rxnet->nr_calls); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 8120138dac01..3c9eeb5b750c 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -102,7 +102,7 @@ void rxrpc_destroy_client_conn_ids(void) if (!idr_is_empty(&rxrpc_client_conn_ids)) { idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) { pr_err("AF_RXRPC: Leaked client conn %p {%d}\n", - conn, atomic_read(&conn->usage)); + conn, refcount_read(&conn->ref)); } BUG(); } @@ -122,7 +122,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, if (bundle) { bundle->params = *cp; rxrpc_get_peer(bundle->params.peer); - atomic_set(&bundle->usage, 1); + refcount_set(&bundle->ref, 1); spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); } @@ -131,7 +131,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) { - atomic_inc(&bundle->usage); + refcount_inc(&bundle->ref); return bundle; } @@ -144,10 +144,13 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) void rxrpc_put_bundle(struct rxrpc_bundle *bundle) { unsigned int d = bundle->debug_id; - unsigned int u = atomic_dec_return(&bundle->usage); + bool dead; + int r; - _debug("PUT B=%x %u", d, u); - if (u == 0) + dead = __refcount_dec_and_test(&bundle->ref, &r); + + _debug("PUT B=%x %d", d, r); + if (dead) rxrpc_free_bundle(bundle); } @@ -169,7 +172,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) return ERR_PTR(-ENOMEM); } - atomic_set(&conn->usage, 1); + refcount_set(&conn->ref, 1); conn->bundle = bundle; conn->params = bundle->params; conn->out_clientflag = RXRPC_CLIENT_INITIATED; @@ -195,7 +198,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) key_get(conn->params.key); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client, - atomic_read(&conn->usage), + refcount_read(&conn->ref), __builtin_return_address(0)); atomic_inc(&rxnet->nr_client_conns); @@ -966,14 +969,13 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); unsigned int debug_id = conn->debug_id; - int n; + bool dead; + int r; - n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here); - if (n <= 0) { - ASSERTCMP(n, >=, 0); + dead = __refcount_dec_and_test(&conn->ref, &r); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here); + if (dead) rxrpc_kill_client_conn(conn); - } } /* diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index b2159dbf5412..22089e37e97f 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -104,7 +104,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, goto not_found; *_peer = peer; conn = rxrpc_find_service_conn_rcu(peer, skb); - if (!conn || atomic_read(&conn->usage) == 0) + if (!conn || refcount_read(&conn->ref) == 0) goto not_found; _leave(" = %p", conn); return conn; @@ -114,7 +114,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, */ conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); - if (!conn || atomic_read(&conn->usage) == 0) { + if (!conn || refcount_read(&conn->ref) == 0) { _debug("no conn"); goto not_found; } @@ -183,7 +183,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; default: - chan->last_abort = RX_USER_ABORT; + chan->last_abort = RX_CALL_DEAD; chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; } @@ -263,11 +263,12 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn) bool rxrpc_queue_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); - int n = atomic_fetch_add_unless(&conn->usage, 1, 0); - if (n == 0) + int r; + + if (!__refcount_inc_not_zero(&conn->ref, &r)) return false; if (rxrpc_queue_work(&conn->processor)) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here); else rxrpc_put_connection(conn); return true; @@ -280,7 +281,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); if (conn) { - int n = atomic_read(&conn->usage); + int n = refcount_read(&conn->ref); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here); } @@ -292,9 +293,10 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&conn->usage); + int r; - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here); + __refcount_inc(&conn->ref, &r); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here); return conn; } @@ -305,11 +307,11 @@ struct rxrpc_connection * rxrpc_get_connection_maybe(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); + int r; if (conn) { - int n = atomic_fetch_add_unless(&conn->usage, 1, 0); - if (n > 0) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here); + if (__refcount_inc_not_zero(&conn->ref, &r)) + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here); else conn = NULL; } @@ -333,12 +335,11 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); unsigned int debug_id = conn->debug_id; - int n; + int r; - n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here); - ASSERTCMP(n, >=, 0); - if (n == 1) + __refcount_dec(&conn->ref, &r); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here); + if (r - 1 == 1) rxrpc_set_service_reap_timer(conn->params.local->rxnet, jiffies + rxrpc_connection_expiry); } @@ -351,9 +352,9 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) struct rxrpc_connection *conn = container_of(rcu, struct rxrpc_connection, rcu); - _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage)); + _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); - ASSERTCMP(atomic_read(&conn->usage), ==, 0); + ASSERTCMP(refcount_read(&conn->ref), ==, 0); _net("DESTROY CONN %d", conn->debug_id); @@ -392,8 +393,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work) write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { - ASSERTCMP(atomic_read(&conn->usage), >, 0); - if (likely(atomic_read(&conn->usage) > 1)) + ASSERTCMP(refcount_read(&conn->ref), >, 0); + if (likely(refcount_read(&conn->ref) > 1)) continue; if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; @@ -405,7 +406,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, atomic_read(&conn->usage), + conn->debug_id, refcount_read(&conn->ref), (long)expire_at - (long)now); if (time_before(now, expire_at)) { @@ -418,7 +419,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) /* The usage count sits at 1 whilst the object is unused on the * list; we reduce that to 0 to make the object unavailable. */ - if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) + if (!refcount_dec_if_one(&conn->ref)) continue; trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL); @@ -442,7 +443,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) link); list_del_init(&conn->link); - ASSERTCMP(atomic_read(&conn->usage), ==, 0); + ASSERTCMP(refcount_read(&conn->ref), ==, 0); rxrpc_kill_connection(conn); } @@ -470,7 +471,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { pr_err("AF_RXRPC: Leaked conn %p {%d}\n", - conn, atomic_read(&conn->usage)); + conn, refcount_read(&conn->ref)); leak = true; } write_unlock(&rxnet->conn_lock); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index e1966dfc9152..6e6aa02c6f9e 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -9,7 +9,7 @@ #include "ar-internal.h" static struct rxrpc_bundle rxrpc_service_dummy_bundle = { - .usage = ATOMIC_INIT(1), + .ref = REFCOUNT_INIT(1), .debug_id = UINT_MAX, .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock), }; @@ -99,7 +99,7 @@ conn_published: return; found_extant_conn: - if (atomic_read(&cursor->usage) == 0) + if (refcount_read(&cursor->ref) == 0) goto replace_old_connection; write_sequnlock_bh(&peer->service_conn_lock); /* We should not be able to get here. rxrpc_incoming_connection() is @@ -132,7 +132,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn * the rxrpc_connections list. */ conn->state = RXRPC_CONN_SERVICE_PREALLOC; - atomic_set(&conn->usage, 2); + refcount_set(&conn->ref, 2); conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle); atomic_inc(&rxnet->nr_conns); @@ -142,7 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn write_unlock(&rxnet->conn_lock); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - atomic_read(&conn->usage), + refcount_read(&conn->ref), __builtin_return_address(0)); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index dc201363f2c4..16c0af41c202 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -903,6 +903,33 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_respond_to_ack); } + /* If we get an EXCEEDS_WINDOW ACK from the server, it probably + * indicates that the client address changed due to NAT. The server + * lost the call because it switched to a different peer. + */ + if (unlikely(buf.ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) && + first_soft_ack == 1 && + prev_pkt == 0 && + rxrpc_is_client_call(call)) { + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + 0, -ENETRESET); + return; + } + + /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also + * indicate a change of address. However, we can retransmit the call + * if we still have it buffered to the beginning. + */ + if (unlikely(buf.ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) && + first_soft_ack == 1 && + prev_pkt == 0 && + call->tx_hard_ack == 0 && + rxrpc_is_client_call(call)) { + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + 0, -ENETRESET); + return; + } + /* Discard any out-of-order or duplicate ACKs (outside lock). */ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, @@ -1154,8 +1181,6 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, */ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) { - CHECK_SLAB_OKAY(&local->usage); - if (rxrpc_get_local_maybe(local)) { skb_queue_tail(&local->reject_queue, skb); rxrpc_queue_local(local); @@ -1413,7 +1438,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) } } - if (!call || atomic_read(&call->usage) == 0) { + if (!call || refcount_read(&call->ref) == 0) { if (rxrpc_to_client(sp) || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) goto bad_message; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a4111408ffd0..96ecb7356c0f 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -79,10 +79,10 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { - atomic_set(&local->usage, 1); + refcount_set(&local->ref, 1); atomic_set(&local->active_users, 1); local->rxnet = rxnet; - INIT_LIST_HEAD(&local->link); + INIT_HLIST_NODE(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); init_rwsem(&local->defrag_sem); skb_queue_head_init(&local->reject_queue); @@ -117,6 +117,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) local, srx->transport_type, srx->transport.family); udp_conf.family = srx->transport.family; + udp_conf.use_udp_checksums = true; if (udp_conf.family == AF_INET) { udp_conf.local_ip = srx->transport.sin.sin_addr; udp_conf.local_udp_port = srx->transport.sin.sin_port; @@ -124,6 +125,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } else { udp_conf.local_ip6 = srx->transport.sin6.sin6_addr; udp_conf.local_udp_port = srx->transport.sin6.sin6_port; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; #endif } ret = udp_sock_create(net, &udp_conf, &local->socket); @@ -177,7 +180,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, { struct rxrpc_local *local; struct rxrpc_net *rxnet = rxrpc_net(net); - struct list_head *cursor; + struct hlist_node *cursor; const char *age; long diff; int ret; @@ -187,16 +190,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, mutex_lock(&rxnet->local_mutex); - for (cursor = rxnet->local_endpoints.next; - cursor != &rxnet->local_endpoints; - cursor = cursor->next) { - local = list_entry(cursor, struct rxrpc_local, link); + hlist_for_each(cursor, &rxnet->local_endpoints) { + local = hlist_entry(cursor, struct rxrpc_local, link); diff = rxrpc_local_cmp_key(local, srx); - if (diff < 0) + if (diff != 0) continue; - if (diff > 0) - break; /* Services aren't allowed to share transport sockets, so * reject that here. It is possible that the object is dying - @@ -208,9 +207,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, goto addr_in_use; } - /* Found a match. We replace a dying object. Attempting to - * bind the transport socket may still fail if we're attempting - * to use a local address that the dying object is still using. + /* Found a match. We want to replace a dying object. + * Attempting to bind the transport socket may still fail if + * we're attempting to use a local address that the dying + * object is still using. */ if (!rxrpc_use_local(local)) break; @@ -227,10 +227,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, if (ret < 0) goto sock_error; - if (cursor != &rxnet->local_endpoints) - list_replace_init(cursor, &local->link); - else - list_add_tail(&local->link, cursor); + if (cursor) { + hlist_replace_rcu(cursor, &local->link); + cursor->pprev = NULL; + } else { + hlist_add_head_rcu(&local->link, &rxnet->local_endpoints); + } age = "new"; found: @@ -263,10 +265,10 @@ addr_in_use: struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); - int n; + int r; - n = atomic_inc_return(&local->usage); - trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here); + __refcount_inc(&local->ref, &r); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here); return local; } @@ -276,12 +278,12 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + int r; if (local) { - int n = atomic_fetch_add_unless(&local->usage, 1, 0); - if (n > 0) + if (__refcount_inc_not_zero(&local->ref, &r)) trace_rxrpc_local(local->debug_id, rxrpc_local_got, - n + 1, here); + r + 1, here); else local = NULL; } @@ -295,10 +297,10 @@ void rxrpc_queue_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); unsigned int debug_id = local->debug_id; - int n = atomic_read(&local->usage); + int r = refcount_read(&local->ref); if (rxrpc_queue_work(&local->processor)) - trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here); + trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here); else rxrpc_put_local(local); } @@ -310,15 +312,16 @@ void rxrpc_put_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); unsigned int debug_id; - int n; + bool dead; + int r; if (local) { debug_id = local->debug_id; - n = atomic_dec_return(&local->usage); - trace_rxrpc_local(debug_id, rxrpc_local_put, n, here); + dead = __refcount_dec_and_test(&local->ref, &r); + trace_rxrpc_local(debug_id, rxrpc_local_put, r, here); - if (n == 0) + if (dead) call_rcu(&local->rcu, rxrpc_local_rcu); } } @@ -371,7 +374,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) local->dead = true; mutex_lock(&rxnet->local_mutex); - list_del_init(&local->link); + hlist_del_init_rcu(&local->link); mutex_unlock(&rxnet->local_mutex); rxrpc_clean_up_local_conns(local); @@ -403,7 +406,7 @@ static void rxrpc_local_processor(struct work_struct *work) bool again; trace_rxrpc_local(local->debug_id, rxrpc_local_processing, - atomic_read(&local->usage), NULL); + refcount_read(&local->ref), NULL); do { again = false; @@ -455,11 +458,11 @@ void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) flush_workqueue(rxrpc_workqueue); - if (!list_empty(&rxnet->local_endpoints)) { + if (!hlist_empty(&rxnet->local_endpoints)) { mutex_lock(&rxnet->local_mutex); - list_for_each_entry(local, &rxnet->local_endpoints, link) { + hlist_for_each_entry(local, &rxnet->local_endpoints, link) { pr_err("AF_RXRPC: Leaked local %p {%d}\n", - local, atomic_read(&local->usage)); + local, refcount_read(&local->ref)); } mutex_unlock(&rxnet->local_mutex); BUG(); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index cc7e30733feb..bb4c25d6df64 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -50,7 +50,7 @@ static __net_init int rxrpc_init_net(struct net *net) rxnet->epoch |= RXRPC_RANDOM_EPOCH; INIT_LIST_HEAD(&rxnet->calls); - rwlock_init(&rxnet->call_lock); + spin_lock_init(&rxnet->call_lock); atomic_set(&rxnet->nr_calls, 1); atomic_set(&rxnet->nr_conns, 1); @@ -72,7 +72,7 @@ static __net_init int rxrpc_init_net(struct net *net) timer_setup(&rxnet->client_conn_reap_timer, rxrpc_client_conn_reap_timeout, 0); - INIT_LIST_HEAD(&rxnet->local_endpoints); + INIT_HLIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); hash_init(rxnet->peer_hash); @@ -98,6 +98,9 @@ static __net_init int rxrpc_init_net(struct net *net) proc_create_net("peers", 0444, rxnet->proc_net, &rxrpc_peer_seq_ops, sizeof(struct seq_net_private)); + proc_create_net("locals", 0444, rxnet->proc_net, + &rxrpc_local_seq_ops, + sizeof(struct seq_net_private)); return 0; err_proc: diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 0298fe2ad6d3..26d2ae9baaf2 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -121,7 +121,7 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 && - atomic_read(&peer->usage) > 0) + refcount_read(&peer->ref) > 0) return peer; } @@ -140,7 +140,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); if (peer) { _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); - _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); + _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); } return peer; } @@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer = kzalloc(sizeof(struct rxrpc_peer), gfp); if (peer) { - atomic_set(&peer->usage, 1); + refcount_set(&peer->ref, 1); peer->local = rxrpc_get_local(local); INIT_HLIST_HEAD(&peer->error_targets); peer->service_conns = RB_ROOT; @@ -378,7 +378,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); - _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); + _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); return peer; } @@ -388,10 +388,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); - int n; + int r; - n = atomic_inc_return(&peer->usage); - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here); + __refcount_inc(&peer->ref, &r); + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); return peer; } @@ -401,11 +401,11 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); + int r; if (peer) { - int n = atomic_fetch_add_unless(&peer->usage, 1, 0); - if (n > 0) - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here); + if (__refcount_inc_not_zero(&peer->ref, &r)) + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); else peer = NULL; } @@ -436,13 +436,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); unsigned int debug_id; - int n; + bool dead; + int r; if (peer) { debug_id = peer->debug_id; - n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); - if (n == 0) + dead = __refcount_dec_and_test(&peer->ref, &r); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + if (dead) __rxrpc_put_peer(peer); } } @@ -455,11 +456,12 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); unsigned int debug_id = peer->debug_id; - int n; + bool dead; + int r; - n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); - if (n == 0) { + dead = __refcount_dec_and_test(&peer->ref, &r); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + if (dead) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); rxrpc_free_peer(peer); @@ -481,7 +483,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) { pr_err("Leaked peer %u {%u} %pISp\n", peer->debug_id, - atomic_read(&peer->usage), + refcount_read(&peer->ref), &peer->srx.transport); } } diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index e2f990754f88..245418943e01 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -26,29 +26,23 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) __acquires(rcu) - __acquires(rxnet->call_lock) { struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); rcu_read_lock(); - read_lock(&rxnet->call_lock); - return seq_list_start_head(&rxnet->calls, *_pos); + return seq_list_start_head_rcu(&rxnet->calls, *_pos); } static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); - return seq_list_next(v, &rxnet->calls, pos); + return seq_list_next_rcu(v, &rxnet->calls, pos); } static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) - __releases(rxnet->call_lock) __releases(rcu) { - struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); - - read_unlock(&rxnet->call_lock); rcu_read_unlock(); } @@ -107,7 +101,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call->cid, call->call_id, rxrpc_is_service_call(call) ? "Svc" : "Clt", - atomic_read(&call->usage), + refcount_read(&call->ref), rxrpc_call_states[call->state], call->abort_code, call->debug_id, @@ -189,7 +183,7 @@ print: conn->service_id, conn->proto.cid, rxrpc_conn_is_service(conn) ? "Svc" : "Clt", - atomic_read(&conn->usage), + refcount_read(&conn->ref), rxrpc_conn_states[conn->state], key_serial(conn->params.key), atomic_read(&conn->serial), @@ -239,7 +233,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) " %3u %5u %6llus %8u %8u\n", lbuff, rbuff, - atomic_read(&peer->usage), + refcount_read(&peer->ref), peer->cong_cwnd, peer->mtu, now - peer->last_tx_at, @@ -334,3 +328,72 @@ const struct seq_operations rxrpc_peer_seq_ops = { .stop = rxrpc_peer_seq_stop, .show = rxrpc_peer_seq_show, }; + +/* + * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals + */ +static int rxrpc_local_seq_show(struct seq_file *seq, void *v) +{ + struct rxrpc_local *local; + char lbuff[50]; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "Proto Local " + " Use Act\n"); + return 0; + } + + local = hlist_entry(v, struct rxrpc_local, link); + + sprintf(lbuff, "%pISpc", &local->srx.transport); + + seq_printf(seq, + "UDP %-47.47s %3u %3u\n", + lbuff, + refcount_read(&local->ref), + atomic_read(&local->active_users)); + + return 0; +} + +static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos) + __acquires(rcu) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + unsigned int n; + + rcu_read_lock(); + + if (*_pos >= UINT_MAX) + return NULL; + + n = *_pos; + if (n == 0) + return SEQ_START_TOKEN; + + return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1); +} + +static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + if (*_pos >= UINT_MAX) + return NULL; + + return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos); +} + +static void rxrpc_local_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) +{ + rcu_read_unlock(); +} + +const struct seq_operations rxrpc_local_seq_ops = { + .start = rxrpc_local_seq_start, + .next = rxrpc_local_seq_next, + .stop = rxrpc_local_seq_stop, + .show = rxrpc_local_seq_show, +}; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index af8ad6c30b9f..1d38e279e2ef 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -444,6 +444,12 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, success: ret = copied; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { + read_lock_bh(&call->state_lock); + if (call->error < 0) + ret = call->error; + read_unlock_bh(&call->state_lock); + } out: call->tx_pending = skb; _leave(" = %d", ret); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 0348d2bf6f7d..580a5acffee7 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -71,7 +71,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) const void *here = __builtin_return_address(0); if (skb) { int n; - CHECK_SLAB_OKAY(&skb->users); n = atomic_dec_return(select_skb_count(skb)); trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, rxrpc_skb(skb)->rx_flags, here); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index e01ef7f109f4..823ee643371c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *pattr; struct tcf_pedit *p; int ret = 0, err; - int ksize; + int i, ksize; u32 index; if (!nla) { @@ -228,6 +228,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + p->tcfp_off_max_hint = 0; + for (i = 0; i < p->tcfp_nkeys; ++i) { + u32 cur = p->tcfp_keys[i].off; + + /* sanitize the shift value for any later use */ + p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1, + p->tcfp_keys[i].shift); + + /* The AT option can read a single byte, we can bound the actual + * value with uchar max. + */ + cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift; + + /* Each key touches 4 bytes starting from the computed offset */ + p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4); + } p->tcfp_flags = parm->flags; goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -308,13 +324,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); + u32 max_offset; int i; - if (skb_unclone(skb, GFP_ATOMIC)) - return p->tcf_action; - spin_lock(&p->tcf_lock); + max_offset = (skb_transport_header_was_set(skb) ? + skb_transport_offset(skb) : + skb_network_offset(skb)) + + p->tcfp_off_max_hint; + if (skb_ensure_writable(skb, min(skb->len, max_offset))) + goto unlock; + tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { @@ -403,6 +424,7 @@ bad: p->tcf_qstats.overlimits++; done: bstats_update(&p->tcf_bstats, skb); +unlock: spin_unlock(&p->tcf_lock); return p->tcf_action; } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 0a04468b7314..49bae3d5006b 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -311,12 +311,15 @@ META_COLLECTOR(int_sk_bound_if) META_COLLECTOR(var_sk_bound_if) { + int bound_dev_if; + if (skip_nonlocal(skb)) { *err = -1; return; } - if (skb->sk->sk_bound_dev_if == 0) { + bound_dev_if = READ_ONCE(skb->sk->sk_bound_dev_if); + if (bound_dev_if == 0) { dst->value = (unsigned long) "any"; dst->len = 3; } else { @@ -324,7 +327,7 @@ META_COLLECTOR(var_sk_bound_if) rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(skb->sk), - skb->sk->sk_bound_dev_if); + bound_dev_if); *err = var_dev(dev, dst); rcu_read_unlock(); } diff --git a/net/sctp/input.c b/net/sctp/input.c index 90e12bafdd48..4f43afa8678f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -92,6 +92,7 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_chunk *chunk; union sctp_addr src; union sctp_addr dest; + int bound_dev_if; int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); @@ -169,7 +170,8 @@ int sctp_rcv(struct sk_buff *skb) * If a frame arrives on an interface and the receiving socket is * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ - if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) { if (transport) { sctp_transport_put(transport); asoc = NULL; diff --git a/net/sctp/output.c b/net/sctp/output.c index 72fe6669c50d..a63df055ac57 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -134,7 +134,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, dst_hold(tp->dst); sk_setup_caps(sk, tp->dst); } - packet->max_size = sk_can_gso(sk) ? READ_ONCE(tp->dst->dev->gso_max_size) + packet->max_size = sk_can_gso(sk) ? min(READ_ONCE(tp->dst->dev->gso_max_size), + GSO_LEGACY_MAX_SIZE) : asoc->pathmtu; rcu_read_unlock(); } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index b3815b568e8e..463c4a58d2c3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t) goto out_unlock; } + /* This happens when the response arrives after the timer is triggered. */ + if (!asoc->strreset_chunk) + goto out_unlock; + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF), asoc->state, asoc->ep, asoc, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3e3fe923bed5..6d37d2dfb3da 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2128,7 +2128,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, head_skb = event->chunk->head_skb; else head_skb = skb; - sock_recv_ts_and_drops(msg, sk, head_skb); + sock_recv_cmsgs(msg, sk, head_skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; sp->pf->event_msgname(event, msg->msg_name, addr_len); diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 99e5f69fbb74..518b1b9bf89d 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -146,14 +146,11 @@ int sctp_sched_set_sched(struct sctp_association *asoc, /* Give the next scheduler a clean slate. */ for (i = 0; i < asoc->stream.outcnt; i++) { - void *p = SCTP_SO(&asoc->stream, i)->ext; + struct sctp_stream_out_ext *ext = SCTP_SO(&asoc->stream, i)->ext; - if (!p) + if (!ext) continue; - - p += offsetofend(struct sctp_stream_out_ext, outq); - memset(p, 0, sizeof(struct sctp_stream_out_ext) - - offsetofend(struct sctp_stream_out_ext, outq)); + memset_after(ext, 0, outq); } } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index fc7b6eb22143..5f70642a8044 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -243,11 +243,27 @@ struct proto smc_proto6 = { }; EXPORT_SYMBOL_GPL(smc_proto6); +static void smc_fback_restore_callbacks(struct smc_sock *smc) +{ + struct sock *clcsk = smc->clcsock->sk; + + write_lock_bh(&clcsk->sk_callback_lock); + clcsk->sk_user_data = NULL; + + smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change); + smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready); + smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space); + smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report); + + write_unlock_bh(&clcsk->sk_callback_lock); +} + static void smc_restore_fallback_changes(struct smc_sock *smc) { if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ smc->clcsock->file->private_data = smc->sk.sk_socket; smc->clcsock->file = NULL; + smc_fback_restore_callbacks(smc); } } @@ -373,6 +389,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk); mutex_init(&smc->clcsock_release_lock); + smc_init_saved_callbacks(smc); return sk; } @@ -744,47 +761,73 @@ out: static void smc_fback_state_change(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_state_change); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_data_ready(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_data_ready); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_write_space(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_write_space); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_error_report(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_error_report); + read_unlock_bh(&clcsk->sk_callback_lock); +} + +static void smc_fback_replace_callbacks(struct smc_sock *smc) +{ + struct sock *clcsk = smc->clcsock->sk; + + write_lock_bh(&clcsk->sk_callback_lock); + clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + + smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change, + &smc->clcsk_state_change); + smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready, + &smc->clcsk_data_ready); + smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space, + &smc->clcsk_write_space); + smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report, + &smc->clcsk_error_report); + + write_unlock_bh(&clcsk->sk_callback_lock); } static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) { - struct sock *clcsk; int rc = 0; mutex_lock(&smc->clcsock_release_lock); @@ -792,10 +835,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) rc = -EBADF; goto out; } - clcsk = smc->clcsock->sk; - if (smc->use_fallback) - goto out; smc->use_fallback = true; smc->fallback_rsn = reason_code; smc_stat_fallback(smc); @@ -810,18 +850,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) * in smc sk->sk_wq and they should be woken up * as clcsock's wait queue is woken up. */ - smc->clcsk_state_change = clcsk->sk_state_change; - smc->clcsk_data_ready = clcsk->sk_data_ready; - smc->clcsk_write_space = clcsk->sk_write_space; - smc->clcsk_error_report = clcsk->sk_error_report; - - clcsk->sk_state_change = smc_fback_state_change; - clcsk->sk_data_ready = smc_fback_data_ready; - clcsk->sk_write_space = smc_fback_write_space; - clcsk->sk_error_report = smc_fback_error_report; - - smc->clcsock->sk->sk_user_data = - (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + smc_fback_replace_callbacks(smc); } out: mutex_unlock(&smc->clcsock_release_lock); @@ -1475,6 +1504,8 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_state = SMC_CLOSED; if (rc == -EPIPE || rc == -EAGAIN) smc->sk.sk_err = EPIPE; + else if (rc == -ECONNREFUSED) + smc->sk.sk_err = ECONNREFUSED; else if (signal_pending(current)) smc->sk.sk_err = -sock_intr_errno(timeo); sock_put(&smc->sk); /* passive closing */ @@ -1513,9 +1544,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, goto out_err; lock_sock(sk); + switch (sock->state) { + default: + rc = -EINVAL; + goto out; + case SS_CONNECTED: + rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL; + goto out; + case SS_CONNECTING: + if (sk->sk_state == SMC_ACTIVE) + goto connected; + break; + case SS_UNCONNECTED: + sock->state = SS_CONNECTING; + break; + } + switch (sk->sk_state) { default: goto out; + case SMC_CLOSED: + rc = sock_error(sk) ? : -ECONNABORTED; + sock->state = SS_UNCONNECTED; + goto out; case SMC_ACTIVE: rc = -EISCONN; goto out; @@ -1534,20 +1585,24 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, goto out; sock_hold(&smc->sk); /* sock put in passive closing */ - if (smc->use_fallback) + if (smc->use_fallback) { + sock->state = rc ? SS_CONNECTING : SS_CONNECTED; goto out; + } if (flags & O_NONBLOCK) { if (queue_work(smc_hs_wq, &smc->connect_work)) smc->connect_nonblock = 1; rc = -EINPROGRESS; + goto out; } else { rc = __smc_connect(smc); if (rc < 0) goto out; - else - rc = 0; /* success cases including fallback */ } +connected: + rc = 0; + sock->state = SS_CONNECTED; out: release_sock(sk); out_err: @@ -1594,6 +1649,19 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) * function; switch it back to the original sk_data_ready function */ new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready; + + /* if new clcsock has also inherited the fallback-specific callback + * functions, switch them back to the original ones. + */ + if (lsmc->use_fallback) { + if (lsmc->clcsk_state_change) + new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change; + if (lsmc->clcsk_write_space) + new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space; + if (lsmc->clcsk_error_report) + new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report; + } + (*new_smc)->clcsock = new_clcsock; out: return rc; @@ -1649,6 +1717,7 @@ struct sock *smc_accept_dequeue(struct sock *parent, } if (new_sock) { sock_graft(new_sk, new_sock); + new_sock->state = SS_CONNECTED; if (isk->use_fallback) { smc_sk(new_sk)->clcsock->file = new_sock->file; isk->clcsock->file->private_data = isk->clcsock; @@ -2353,17 +2422,20 @@ out: static void smc_clcsock_data_ready(struct sock *listen_clcsock) { - struct smc_sock *lsmc = - smc_clcsock_user_data(listen_clcsock); + struct smc_sock *lsmc; + read_lock_bh(&listen_clcsock->sk_callback_lock); + lsmc = smc_clcsock_user_data(listen_clcsock); if (!lsmc) - return; + goto out; lsmc->clcsk_data_ready(listen_clcsock); if (lsmc->sk.sk_state == SMC_LISTEN) { sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */ if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work)) sock_put(&lsmc->sk); } +out: + read_unlock_bh(&listen_clcsock->sk_callback_lock); } static int smc_listen(struct socket *sock, int backlog) @@ -2377,7 +2449,7 @@ static int smc_listen(struct socket *sock, int backlog) rc = -EINVAL; if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) || - smc->connect_nonblock) + smc->connect_nonblock || sock->state != SS_UNCONNECTED) goto out; rc = 0; @@ -2395,10 +2467,12 @@ static int smc_listen(struct socket *sock, int backlog) /* save original sk_data_ready function and establish * smc-specific sk_data_ready function */ - smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready; - smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready; + write_lock_bh(&smc->clcsock->sk->sk_callback_lock); smc->clcsock->sk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready, + smc_clcsock_data_ready, &smc->clcsk_data_ready); + write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); /* save original ops */ smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops; @@ -2413,7 +2487,11 @@ static int smc_listen(struct socket *sock, int backlog) rc = kernel_listen(smc->clcsock, backlog); if (rc) { - smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; + write_lock_bh(&smc->clcsock->sk->sk_callback_lock); + smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, + &smc->clcsk_data_ready); + smc->clcsock->sk->sk_user_data = NULL; + write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); goto out; } sk->sk_max_ack_backlog = backlog; @@ -2663,6 +2741,17 @@ static int smc_shutdown(struct socket *sock, int how) lock_sock(sk); + if (sock->state == SS_CONNECTING) { + if (sk->sk_state == SMC_ACTIVE) + sock->state = SS_CONNECTED; + else if (sk->sk_state == SMC_PEERCLOSEWAIT1 || + sk->sk_state == SMC_PEERCLOSEWAIT2 || + sk->sk_state == SMC_APPCLOSEWAIT1 || + sk->sk_state == SMC_APPCLOSEWAIT2 || + sk->sk_state == SMC_APPFINCLOSEWAIT) + sock->state = SS_DISCONNECTING; + } + rc = -ENOTCONN; if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_PEERCLOSEWAIT1) && @@ -2676,6 +2765,7 @@ static int smc_shutdown(struct socket *sock, int how) sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; if (sk->sk_shutdown == SHUTDOWN_MASK) { sk->sk_state = SMC_CLOSED; + sk->sk_socket->state = SS_UNCONNECTED; sock_put(sk); } goto out; @@ -2701,6 +2791,10 @@ static int smc_shutdown(struct socket *sock, int how) /* map sock_shutdown_cmd constants to sk_shutdown value range */ sk->sk_shutdown |= how + 1; + if (sk->sk_state == SMC_CLOSED) + sock->state = SS_UNCONNECTED; + else + sock->state = SS_DISCONNECTING; out: release_sock(sk); return rc ? rc : rc1; @@ -3086,6 +3180,7 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, rc = -ENOBUFS; sock->ops = &smc_sock_ops; + sock->state = SS_UNCONNECTED; sk = smc_sock_alloc(net, sock, protocol); if (!sk) goto out; diff --git a/net/smc/smc.h b/net/smc/smc.h index ea0620529ebe..5ed765ea0c73 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -288,12 +288,41 @@ static inline struct smc_sock *smc_sk(const struct sock *sk) return (struct smc_sock *)sk; } +static inline void smc_init_saved_callbacks(struct smc_sock *smc) +{ + smc->clcsk_state_change = NULL; + smc->clcsk_data_ready = NULL; + smc->clcsk_write_space = NULL; + smc->clcsk_error_report = NULL; +} + static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk) { return (struct smc_sock *) ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); } +/* save target_cb in saved_cb, and replace target_cb with new_cb */ +static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *), + void (*new_cb)(struct sock *), + void (**saved_cb)(struct sock *)) +{ + /* only save once */ + if (!*saved_cb) + *saved_cb = *target_cb; + *target_cb = new_cb; +} + +/* restore target_cb to saved_cb, and reset saved_cb to NULL */ +static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *), + void (**saved_cb)(struct sock *)) +{ + if (!*saved_cb) + return; + *target_cb = *saved_cb; + *saved_cb = NULL; +} + extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 676cb2333d3c..31db7438857c 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -214,8 +214,11 @@ again: sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); /* wake up accept */ if (smc->clcsock && smc->clcsock->sk) { - smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; + write_lock_bh(&smc->clcsock->sk->sk_callback_lock); + smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, + &smc->clcsk_data_ready); smc->clcsock->sk->sk_user_data = NULL; + write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } smc_close_cleanup_listen(sk); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index a3e2d3b89568..dcda4165d107 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -671,6 +671,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = sges_per_buf, + .max_inline_data = 0, }, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 51e8eb2933ff..338b9ef806e8 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, } break; } + if (!timeo) + return -EAGAIN; if (signal_pending(current)) { read_done = sock_intr_errno(timeo); break; } - if (!timeo) - return -EAGAIN; } if (!smc_rx_data_available(conn)) { diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 98ca9229fe87..805a546e8c04 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -391,12 +391,20 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, int rc; for (dstchunk = 0; dstchunk < 2; dstchunk++) { - struct ib_sge *sge = - wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; + struct ib_rdma_wr *wr = &wr_rdma_buf->wr_tx_rdma[dstchunk]; + struct ib_sge *sge = wr->wr.sg_list; + u64 base_addr = dma_addr; + + if (dst_len < link->qp_attr.cap.max_inline_data) { + base_addr = (uintptr_t)conn->sndbuf_desc->cpu_addr; + wr->wr.send_flags |= IB_SEND_INLINE; + } else { + wr->wr.send_flags &= ~IB_SEND_INLINE; + } num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sge[srcchunk].addr = dma_addr + src_off; + sge[srcchunk].addr = base_addr + src_off; sge[srcchunk].length = src_len; num_sges++; @@ -410,8 +418,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, src_len = dst_len - src_len; /* remainder */ src_len_sum += src_len; } - rc = smc_tx_rdma_write(conn, dst_off, num_sges, - &wr_rdma_buf->wr_tx_rdma[dstchunk]); + rc = smc_tx_rdma_write(conn, dst_off, num_sges, wr); if (rc) return rc; if (dst_len_sum == len) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 24be1d03fef9..26f8f240d9e8 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -554,10 +554,11 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) static void smc_wr_init_sge(struct smc_link *lnk) { int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; + bool send_inline = (lnk->qp_attr.cap.max_inline_data > SMC_WR_TX_SIZE); u32 i; for (i = 0; i < lnk->wr_tx_cnt; i++) { - lnk->wr_tx_sges[i].addr = + lnk->wr_tx_sges[i].addr = send_inline ? (uintptr_t)(&lnk->wr_tx_bufs[i]) : lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE; lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE; lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; @@ -575,6 +576,8 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; lnk->wr_tx_ibs[i].send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + if (send_inline) + lnk->wr_tx_ibs[i].send_flags |= IB_SEND_INLINE; lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE; lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE; lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list = diff --git a/net/socket.c b/net/socket.c index 6887840682bb..6ee634c01eca 100644 --- a/net/socket.c +++ b/net/socket.c @@ -683,9 +683,18 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) { u8 flags = *tx_flags; - if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) + if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) { flags |= SKBTX_HW_TSTAMP; + /* PTP hardware clocks can provide a free running cycle counter + * as a time base for virtual clocks. Tell driver to use the + * free running cycle counter for timestamp if socket is bound + * to virtual clock. + */ + if (tsflags & SOF_TIMESTAMPING_BIND_PHC) + flags |= SKBTX_HW_TSTAMP_USE_CYCLES; + } + if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) flags |= SKBTX_SW_TSTAMP; @@ -796,7 +805,28 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) return skb->tstamp && !false_tstamp && skb_is_err_queue(skb); } -static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb) +static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index) +{ + bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC; + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + struct net_device *orig_dev; + ktime_t hwtstamp; + + rcu_read_lock(); + orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); + if (orig_dev) { + *if_index = orig_dev->ifindex; + hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles); + } else { + hwtstamp = shhwtstamps->hwtstamp; + } + rcu_read_unlock(); + + return hwtstamp; +} + +static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb, + int if_index) { struct scm_ts_pktinfo ts_pktinfo; struct net_device *orig_dev; @@ -806,11 +836,14 @@ static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb) memset(&ts_pktinfo, 0, sizeof(ts_pktinfo)); - rcu_read_lock(); - orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); - if (orig_dev) - ts_pktinfo.if_index = orig_dev->ifindex; - rcu_read_unlock(); + if (!if_index) { + rcu_read_lock(); + orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); + if (orig_dev) + if_index = orig_dev->ifindex; + rcu_read_unlock(); + } + ts_pktinfo.if_index = if_index; ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb); put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO, @@ -830,6 +863,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + int if_index; ktime_t hwtstamp; /* Race occurred between timestamp enabling and packet @@ -878,18 +912,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && !skb_is_swtx_tstamp(skb, false_tstamp)) { - if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC) - hwtstamp = ptp_convert_timestamp(shhwtstamps, - sk->sk_bind_phc); + if_index = 0; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV) + hwtstamp = get_timestamp(sk, skb, &if_index); else hwtstamp = shhwtstamps->hwtstamp; + if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC) + hwtstamp = ptp_convert_timestamp(&hwtstamp, + sk->sk_bind_phc); + if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) { empty = 0; if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && !skb_is_err_queue(skb)) - put_ts_pktinfo(msg, skb); + put_ts_pktinfo(msg, skb, if_index); } } if (!empty) { @@ -930,13 +968,22 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); } -void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb) +static void sock_recv_mark(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RCVMARK) && skb) + put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), + &skb->mark); +} + +void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); + sock_recv_mark(msg, sk, skb); } -EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +EXPORT_SYMBOL_GPL(__sock_recv_cmsgs); INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, size_t, int)); diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 61c276bddaf2..f549e4c05def 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) * done without the correct namespace: */ .flags = RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_CONNECTED | RPC_CLNT_CREATE_NO_IDLE_TIMEOUT }; struct rpc_clnt *clnt; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af0174d7ce5a..e2c6eca0271b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task, static int rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr); static int rpc_ping(struct rpc_clnt *clnt); +static int rpc_ping_noreply(struct rpc_clnt *clnt); static void rpc_check_timeout(struct rpc_task *task); static void rpc_register_client(struct rpc_clnt *clnt) @@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, rpc_shutdown_client(clnt); return ERR_PTR(err); } + } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) { + int err = rpc_ping_noreply(clnt); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } } clnt->cl_softrtry = 1; @@ -1065,10 +1072,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt) static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { - if (task->tk_xprt && - !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && - (task->tk_flags & RPC_TASK_MOVEABLE))) - return; + if (task->tk_xprt) { + if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && + (task->tk_flags & RPC_TASK_MOVEABLE))) + return; + xprt_release(task); + xprt_put(task->tk_xprt); + } if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); else @@ -2706,6 +2716,10 @@ static const struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; +static const struct rpc_procinfo rpcproc_null_noreply = { + .p_encode = rpcproc_encode_null, +}; + static void rpc_null_call_prepare(struct rpc_task *task, void *data) { @@ -2759,6 +2773,28 @@ static int rpc_ping(struct rpc_clnt *clnt) return status; } +static int rpc_ping_noreply(struct rpc_clnt *clnt) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null_noreply, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = &msg, + .callback_ops = &rpc_null_ops, + .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS, + }; + struct rpc_task *task; + int status; + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = task->tk_status; + rpc_put_task(task); + return status; +} + struct rpc_cb_add_xprt_calldata { struct rpc_xprt_switch *xps; struct rpc_xprt *xprt; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5c91c5457197..fcdd0fca408e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1419,6 +1419,26 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) #endif /* CONFIG_SUNRPC_BACKCHANNEL */ /** + * xs_local_state_change - callback to handle AF_LOCAL socket state changes + * @sk: socket whose state has changed + * + */ +static void xs_local_state_change(struct sock *sk) +{ + struct rpc_xprt *xprt; + struct sock_xprt *transport; + + if (!(xprt = xprt_from_sock(sk))) + return; + transport = container_of(xprt, struct sock_xprt, xprt); + if (sk->sk_shutdown & SHUTDOWN_MASK) { + clear_bit(XPRT_CONNECTED, &xprt->state); + /* Trigger the socket release */ + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); + } +} + +/** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed * @@ -1866,6 +1886,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; + sk->sk_state_change = xs_local_state_change; sk->sk_error_report = xs_error_report; xprt_clear_connected(xprt); @@ -1950,6 +1971,9 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); int ret; + if (transport->file) + goto force_disconnect; + if (RPC_IS_ASYNC(task)) { /* * We want the AF_LOCAL connect to be resolved in the @@ -1962,11 +1986,17 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) */ task->tk_rpc_status = -ENOTCONN; rpc_exit(task, -ENOTCONN); - return; + goto out_wake; } ret = xs_local_setup_socket(transport); if (ret && !RPC_IS_SOFTCONN(task)) msleep_interruptible(15000); + return; +force_disconnect: + xprt_force_disconnect(xprt); +out_wake: + xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, -ENOTCONN); } #if IS_ENABLED(CONFIG_SUNRPC_SWAP) @@ -2845,9 +2875,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) } xprt_set_bound(xprt); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); - ret = ERR_PTR(xs_local_setup_socket(transport)); - if (ret) - goto out_err; break; default: ret = ERR_PTR(-EAFNOSUPPORT); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 5e4bd8ba48d3..ec6f4b699a2b 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -411,10 +411,16 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i) return 0; } +union tls_iter_offset { + struct iov_iter *msg_iter; + int offset; +}; + static int tls_push_data(struct sock *sk, - struct iov_iter *msg_iter, + union tls_iter_offset iter_offset, size_t size, int flags, - unsigned char record_type) + unsigned char record_type, + struct page *zc_page) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -480,14 +486,25 @@ handle_error: } record = ctx->open_record; - copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); - copy = min_t(size_t, copy, (max_open_record_len - record->len)); - rc = tls_device_copy_data(page_address(pfrag->page) + - pfrag->offset, copy, msg_iter); - if (rc) - goto handle_error; - tls_append_frag(record, pfrag, copy); + copy = min_t(size_t, size, max_open_record_len - record->len); + if (copy && zc_page) { + struct page_frag zc_pfrag; + + zc_pfrag.page = zc_page; + zc_pfrag.offset = iter_offset.offset; + zc_pfrag.size = copy; + tls_append_frag(record, &zc_pfrag, copy); + } else if (copy) { + copy = min_t(size_t, copy, pfrag->size - pfrag->offset); + + rc = tls_device_copy_data(page_address(pfrag->page) + + pfrag->offset, copy, + iter_offset.msg_iter); + if (rc) + goto handle_error; + tls_append_frag(record, pfrag, copy); + } size -= copy; if (!size) { @@ -538,6 +555,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { unsigned char record_type = TLS_RECORD_TYPE_DATA; struct tls_context *tls_ctx = tls_get_ctx(sk); + union tls_iter_offset iter; int rc; mutex_lock(&tls_ctx->tx_lock); @@ -549,8 +567,8 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto out; } - rc = tls_push_data(sk, &msg->msg_iter, size, - msg->msg_flags, record_type); + iter.msg_iter = &msg->msg_iter; + rc = tls_push_data(sk, iter, size, msg->msg_flags, record_type, NULL); out: release_sock(sk); @@ -562,7 +580,8 @@ int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct iov_iter msg_iter; + union tls_iter_offset iter_offset; + struct iov_iter msg_iter; char *kaddr; struct kvec iov; int rc; @@ -578,12 +597,20 @@ int tls_device_sendpage(struct sock *sk, struct page *page, goto out; } + if (tls_ctx->zerocopy_sendfile) { + iter_offset.offset = offset; + rc = tls_push_data(sk, iter_offset, size, + flags, TLS_RECORD_TYPE_DATA, page); + goto out; + } + kaddr = kmap(page); iov.iov_base = kaddr + offset; iov.iov_len = size; iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size); - rc = tls_push_data(sk, &msg_iter, size, - flags, TLS_RECORD_TYPE_DATA); + iter_offset.msg_iter = &msg_iter; + rc = tls_push_data(sk, iter_offset, size, flags, TLS_RECORD_TYPE_DATA, + NULL); kunmap(page); out: @@ -654,10 +681,12 @@ EXPORT_SYMBOL(tls_get_record); static int tls_device_push_pending_record(struct sock *sk, int flags) { - struct iov_iter msg_iter; + union tls_iter_offset iter; + struct iov_iter msg_iter; iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0); - return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA); + iter.msg_iter = &msg_iter; + return tls_push_data(sk, iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL); } void tls_device_write_space(struct sock *sk, struct tls_context *ctx) @@ -1343,7 +1372,10 @@ static int tls_device_down(struct net_device *netdev) /* Device contexts for RX and TX will be freed in on sk_destruct * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. + * Now release the ref taken above. */ + if (refcount_dec_and_test(&ctx->refcount)) + tls_device_free_ctx(ctx); } up_write(&device_offload_lock); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 7b2b0e7ffee4..b91ddc110786 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -513,6 +513,26 @@ out: return rc; } +static int do_tls_getsockopt_tx_zc(struct sock *sk, char __user *optval, + int __user *optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + unsigned int value; + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len != sizeof(value)) + return -EINVAL; + + value = ctx->zerocopy_sendfile; + if (copy_to_user(optval, &value, sizeof(value))) + return -EFAULT; + + return 0; +} + static int do_tls_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { @@ -524,6 +544,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname, rc = do_tls_getsockopt_conf(sk, optval, optlen, optname == TLS_TX); break; + case TLS_TX_ZEROCOPY_SENDFILE: + rc = do_tls_getsockopt_tx_zc(sk, optval, optlen); + break; default: rc = -ENOPROTOOPT; break; @@ -675,6 +698,26 @@ err_crypto_info: return rc; } +static int do_tls_setsockopt_tx_zc(struct sock *sk, sockptr_t optval, + unsigned int optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + unsigned int value; + + if (sockptr_is_null(optval) || optlen != sizeof(value)) + return -EINVAL; + + if (copy_from_sockptr(&value, optval, sizeof(value))) + return -EFAULT; + + if (value > 1) + return -EINVAL; + + ctx->zerocopy_sendfile = value; + + return 0; +} + static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { @@ -688,6 +731,11 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, optname == TLS_TX); release_sock(sk); break; + case TLS_TX_ZEROCOPY_SENDFILE: + lock_sock(sk); + rc = do_tls_setsockopt_tx_zc(sk, optval, optlen); + release_sock(sk); + break; default: rc = -ENOPROTOOPT; break; @@ -921,6 +969,12 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb) if (err) goto nla_failure; + if (ctx->tx_conf == TLS_HW && ctx->zerocopy_sendfile) { + err = nla_put_flag(skb, TLS_INFO_ZC_SENDFILE); + if (err) + goto nla_failure; + } + rcu_read_unlock(); nla_nest_end(skb, start); return 0; @@ -940,6 +994,7 @@ static size_t tls_get_info_size(const struct sock *sk) nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */ nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */ nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */ + nla_total_size(0) + /* TLS_INFO_ZC_SENDFILE */ 0; return size; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 939d1673f508..0513f82b8537 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1837,15 +1837,17 @@ leave_on_list: bool partially_consumed = chunk > len; if (bpf_strp_enabled) { + /* BPF may try to queue the skb */ + __skb_unlink(skb, &ctx->rx_list); err = sk_psock_tls_strp_read(psock, skb); if (err != __SK_PASS) { rxm->offset = rxm->offset + rxm->full_len; rxm->full_len = 0; - __skb_unlink(skb, &ctx->rx_list); if (err == __SK_DROP) consume_skb(skb); continue; } + __skb_queue_tail(&ctx->rx_list, skb); } if (partially_consumed) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index ba1c8cc0c467..ad64f403536a 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -566,67 +566,28 @@ out: mutex_unlock(&vsock->rx_lock); } -static int virtio_vsock_probe(struct virtio_device *vdev) +static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) { - vq_callback_t *callbacks[] = { - virtio_vsock_rx_done, - virtio_vsock_tx_done, - virtio_vsock_event_done, - }; + struct virtio_device *vdev = vsock->vdev; static const char * const names[] = { "rx", "tx", "event", }; - struct virtio_vsock *vsock = NULL; + vq_callback_t *callbacks[] = { + virtio_vsock_rx_done, + virtio_vsock_tx_done, + virtio_vsock_event_done, + }; int ret; - ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); - if (ret) - return ret; - - /* Only one virtio-vsock device per guest is supported */ - if (rcu_dereference_protected(the_virtio_vsock, - lockdep_is_held(&the_virtio_vsock_mutex))) { - ret = -EBUSY; - goto out; - } - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) { - ret = -ENOMEM; - goto out; - } - - vsock->vdev = vdev; - - ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names, + ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, callbacks, names, NULL); if (ret < 0) - goto out; + return ret; virtio_vsock_update_guest_cid(vsock); - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - atomic_set(&vsock->queued_replies, 0); - - mutex_init(&vsock->tx_lock); - mutex_init(&vsock->rx_lock); - mutex_init(&vsock->event_lock); - spin_lock_init(&vsock->send_pkt_list_lock); - INIT_LIST_HEAD(&vsock->send_pkt_list); - INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); - INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); - INIT_WORK(&vsock->event_work, virtio_transport_event_work); - INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); - - if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) - vsock->seqpacket_allow = true; - - vdev->priv = vsock; - virtio_device_ready(vdev); mutex_lock(&vsock->tx_lock); @@ -643,30 +604,15 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->event_run = true; mutex_unlock(&vsock->event_lock); - rcu_assign_pointer(the_virtio_vsock, vsock); - - mutex_unlock(&the_virtio_vsock_mutex); - return 0; - -out: - kfree(vsock); - mutex_unlock(&the_virtio_vsock_mutex); - return ret; } -static void virtio_vsock_remove(struct virtio_device *vdev) +static void virtio_vsock_vqs_del(struct virtio_vsock *vsock) { - struct virtio_vsock *vsock = vdev->priv; + struct virtio_device *vdev = vsock->vdev; struct virtio_vsock_pkt *pkt; - mutex_lock(&the_virtio_vsock_mutex); - - vdev->priv = NULL; - rcu_assign_pointer(the_virtio_vsock, NULL); - synchronize_rcu(); - - /* Reset all connected sockets when the device disappear */ + /* Reset all connected sockets when the VQs disappear */ vsock_for_each_connected_socket(&virtio_transport.transport, virtio_vsock_reset_sock); @@ -711,6 +657,78 @@ static void virtio_vsock_remove(struct virtio_device *vdev) /* Delete virtqueues and flush outstanding callbacks if any */ vdev->config->del_vqs(vdev); +} + +static int virtio_vsock_probe(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = NULL; + int ret; + + ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); + if (ret) + return ret; + + /* Only one virtio-vsock device per guest is supported */ + if (rcu_dereference_protected(the_virtio_vsock, + lockdep_is_held(&the_virtio_vsock_mutex))) { + ret = -EBUSY; + goto out; + } + + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); + if (!vsock) { + ret = -ENOMEM; + goto out; + } + + vsock->vdev = vdev; + + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + atomic_set(&vsock->queued_replies, 0); + + mutex_init(&vsock->tx_lock); + mutex_init(&vsock->rx_lock); + mutex_init(&vsock->event_lock); + spin_lock_init(&vsock->send_pkt_list_lock); + INIT_LIST_HEAD(&vsock->send_pkt_list); + INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); + INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); + INIT_WORK(&vsock->event_work, virtio_transport_event_work); + INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); + + if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) + vsock->seqpacket_allow = true; + + vdev->priv = vsock; + + ret = virtio_vsock_vqs_init(vsock); + if (ret < 0) + goto out; + + rcu_assign_pointer(the_virtio_vsock, vsock); + + mutex_unlock(&the_virtio_vsock_mutex); + + return 0; + +out: + kfree(vsock); + mutex_unlock(&the_virtio_vsock_mutex); + return ret; +} + +static void virtio_vsock_remove(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + + mutex_lock(&the_virtio_vsock_mutex); + + vdev->priv = NULL; + rcu_assign_pointer(the_virtio_vsock, NULL); + synchronize_rcu(); + + virtio_vsock_vqs_del(vsock); /* Other works can be queued before 'config->del_vqs()', so we flush * all works before to free the vsock object to avoid use after free. @@ -725,6 +743,49 @@ static void virtio_vsock_remove(struct virtio_device *vdev) kfree(vsock); } +#ifdef CONFIG_PM_SLEEP +static int virtio_vsock_freeze(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + + mutex_lock(&the_virtio_vsock_mutex); + + rcu_assign_pointer(the_virtio_vsock, NULL); + synchronize_rcu(); + + virtio_vsock_vqs_del(vsock); + + mutex_unlock(&the_virtio_vsock_mutex); + + return 0; +} + +static int virtio_vsock_restore(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + int ret; + + mutex_lock(&the_virtio_vsock_mutex); + + /* Only one virtio-vsock device per guest is supported */ + if (rcu_dereference_protected(the_virtio_vsock, + lockdep_is_held(&the_virtio_vsock_mutex))) { + ret = -EBUSY; + goto out; + } + + ret = virtio_vsock_vqs_init(vsock); + if (ret < 0) + goto out; + + rcu_assign_pointer(the_virtio_vsock, vsock); + +out: + mutex_unlock(&the_virtio_vsock_mutex); + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + static struct virtio_device_id id_table[] = { { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -742,6 +803,10 @@ static struct virtio_driver virtio_vsock_driver = { .id_table = id_table, .probe = virtio_vsock_probe, .remove = virtio_vsock_remove, +#ifdef CONFIG_PM_SLEEP + .freeze = virtio_vsock_freeze, + .restore = virtio_vsock_restore, +#endif }; static int __init virtio_vsock_init(void) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 8b7fb4a9e07b..f74f176e0d9d 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,7 +6,7 @@ * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2018-2021 Intel Corporation + * Copyright 2018-2022 Intel Corporation */ #include <linux/export.h> @@ -1344,97 +1344,6 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, return rdev_set_monitor_channel(rdev, chandef); } -void -cfg80211_get_chan_state(struct wireless_dev *wdev, - struct ieee80211_channel **chan, - enum cfg80211_chan_mode *chanmode, - u8 *radar_detect) -{ - int ret; - - *chan = NULL; - *chanmode = CHAN_MODE_UNDEFINED; - - ASSERT_WDEV_LOCK(wdev); - - if (wdev->netdev && !netif_running(wdev->netdev)) - return; - - switch (wdev->iftype) { - case NL80211_IFTYPE_ADHOC: - if (wdev->current_bss) { - *chan = wdev->current_bss->pub.channel; - *chanmode = (wdev->ibss_fixed && - !wdev->ibss_dfs_possible) - ? CHAN_MODE_SHARED - : CHAN_MODE_EXCLUSIVE; - - /* consider worst-case - IBSS can try to return to the - * original user-specified channel as creator */ - if (wdev->ibss_dfs_possible) - *radar_detect |= BIT(wdev->chandef.width); - return; - } - break; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - if (wdev->current_bss) { - *chan = wdev->current_bss->pub.channel; - *chanmode = CHAN_MODE_SHARED; - return; - } - break; - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_P2P_GO: - if (wdev->cac_started) { - *chan = wdev->chandef.chan; - *chanmode = CHAN_MODE_SHARED; - *radar_detect |= BIT(wdev->chandef.width); - } else if (wdev->beacon_interval) { - *chan = wdev->chandef.chan; - *chanmode = CHAN_MODE_SHARED; - - ret = cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef, - wdev->iftype); - WARN_ON(ret < 0); - if (ret > 0) - *radar_detect |= BIT(wdev->chandef.width); - } - return; - case NL80211_IFTYPE_MESH_POINT: - if (wdev->mesh_id_len) { - *chan = wdev->chandef.chan; - *chanmode = CHAN_MODE_SHARED; - - ret = cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef, - wdev->iftype); - WARN_ON(ret < 0); - if (ret > 0) - *radar_detect |= BIT(wdev->chandef.width); - } - return; - case NL80211_IFTYPE_OCB: - if (wdev->chandef.chan) { - *chan = wdev->chandef.chan; - *chanmode = CHAN_MODE_SHARED; - return; - } - break; - case NL80211_IFTYPE_MONITOR: - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: - /* these interface types don't really have a channel */ - return; - case NL80211_IFTYPE_UNSPECIFIED: - case NL80211_IFTYPE_WDS: - case NUM_NL80211_IFTYPES: - WARN_ON(1); - } -} - bool cfg80211_any_usable_channels(struct wiphy *wiphy, unsigned long sband_mask, u32 prohibited_flags) diff --git a/net/wireless/core.h b/net/wireless/core.h index 3a7dbd63d8c6..5436ada91b1a 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -3,7 +3,7 @@ * Wireless configuration interface internals. * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H @@ -281,12 +281,6 @@ struct cfg80211_cached_keys { int def; }; -enum cfg80211_chan_mode { - CHAN_MODE_UNDEFINED, - CHAN_MODE_SHARED, - CHAN_MODE_EXCLUSIVE, -}; - struct cfg80211_beacon_registration { struct list_head list; u32 nlportid; @@ -525,12 +519,6 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start) return jiffies_to_msecs(end + (ULONG_MAX - start) + 1); } -void -cfg80211_get_chan_state(struct wireless_dev *wdev, - struct ieee80211_channel **chan, - enum cfg80211_chan_mode *chanmode, - u8 *radar_detect); - int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 8f98e546becf..5d89eec2869a 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -3,7 +3,7 @@ * Some IBSS support code for cfg80211. * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation */ #include <linux/etherdevice.h> @@ -131,8 +131,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, kfree_sensitive(wdev->connect_keys); wdev->connect_keys = connkeys; - wdev->ibss_fixed = params->channel_fixed; - wdev->ibss_dfs_possible = params->userspace_handles_dfs; wdev->chandef = params->chandef; if (connkeys) { params->wep_keys = connkeys->params; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 21e808fcb676..740b29481bc6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -791,6 +791,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NLA_POLICY_RANGE(NLA_BINARY, NL80211_EHT_MIN_CAPABILITY_LEN, NL80211_EHT_MAX_CAPABILITY_LEN), + [NL80211_ATTR_DISABLE_EHT] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -3173,6 +3174,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) { chandef->width = nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]); + if (chandef->chan->band == NL80211_BAND_S1GHZ) { + /* User input error for channel width doesn't match channel */ + if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) { + NL_SET_ERR_MSG_ATTR(extack, + attrs[NL80211_ATTR_CHANNEL_WIDTH], + "bad channel width"); + return -EINVAL; + } + } if (attrs[NL80211_ATTR_CENTER_FREQ1]) { chandef->center_freq1 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]); @@ -3710,6 +3720,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: if (wdev->ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) goto nla_put_failure_locked; @@ -5171,6 +5182,30 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs) return elems; } +static int nl80211_parse_he_bss_color(struct nlattr *attrs, + struct cfg80211_he_bss_color *he_bss_color) +{ + struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs, + he_bss_color_policy, NULL); + if (err) + return err; + + if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]) + return -EINVAL; + + he_bss_color->color = + nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]); + he_bss_color->enabled = + !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]); + he_bss_color->partial = + nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]); + + return 0; +} + static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) @@ -5251,6 +5286,14 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, bcn->ftm_responder = -1; } + if (attrs[NL80211_ATTR_HE_BSS_COLOR]) { + err = nl80211_parse_he_bss_color(attrs[NL80211_ATTR_HE_BSS_COLOR], + &bcn->he_bss_color); + if (err) + return err; + bcn->he_bss_color_valid = true; + } + if (attrs[NL80211_ATTR_MBSSID_ELEMS]) { struct cfg80211_mbssid_elems *mbssid = nl80211_parse_mbssid_elems(&rdev->wiphy, @@ -5309,30 +5352,6 @@ static int nl80211_parse_he_obss_pd(struct nlattr *attrs, return 0; } -static int nl80211_parse_he_bss_color(struct nlattr *attrs, - struct cfg80211_he_bss_color *he_bss_color) -{ - struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1]; - int err; - - err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs, - he_bss_color_policy, NULL); - if (err) - return err; - - if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]) - return -EINVAL; - - he_bss_color->color = - nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]); - he_bss_color->enabled = - !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]); - he_bss_color->partial = - nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]); - - return 0; -} - static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, struct nlattr *attrs, struct cfg80211_ap_settings *params) @@ -5724,14 +5743,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } - if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) { - err = nl80211_parse_he_bss_color( - info->attrs[NL80211_ATTR_HE_BSS_COLOR], - ¶ms->he_bss_color); - if (err) - goto out; - } - if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) { err = nl80211_parse_fils_discovery(rdev, info->attrs[NL80211_ATTR_FILS_DISCOVERY], @@ -10377,6 +10388,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE])) req.flags |= ASSOC_REQ_DISABLE_HE; + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT])) + req.flags |= ASSOC_REQ_DISABLE_EHT; + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) memcpy(&req.vht_capa_mask, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), @@ -11165,6 +11179,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE])) connect.flags |= ASSOC_REQ_DISABLE_HE; + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT])) + connect.flags |= ASSOC_REQ_DISABLE_EHT; + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) memcpy(&connect.vht_capa_mask, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), @@ -11657,18 +11674,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct cfg80211_bitrate_mask mask; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; + wdev_lock(wdev); err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, dev, true); if (err) - return err; + goto out; - return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); +out: + wdev_unlock(wdev); + return err; } static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) @@ -15286,23 +15308,79 @@ static int nl80211_set_fils_aad(struct sk_buff *skb, #define NL80211_FLAG_CLEAR_SKB 0x20 #define NL80211_FLAG_NO_WIPHY_MTX 0x40 +#define INTERNAL_FLAG_SELECTORS(__sel) \ + SELECTOR(__sel, NONE, 0) /* must be first */ \ + SELECTOR(__sel, WIPHY, \ + NL80211_FLAG_NEED_WIPHY) \ + SELECTOR(__sel, WDEV, \ + NL80211_FLAG_NEED_WDEV) \ + SELECTOR(__sel, NETDEV, \ + NL80211_FLAG_NEED_NETDEV) \ + SELECTOR(__sel, WIPHY_RTNL, \ + NL80211_FLAG_NEED_WIPHY | \ + NL80211_FLAG_NEED_RTNL) \ + SELECTOR(__sel, WIPHY_RTNL_NOMTX, \ + NL80211_FLAG_NEED_WIPHY | \ + NL80211_FLAG_NEED_RTNL | \ + NL80211_FLAG_NO_WIPHY_MTX) \ + SELECTOR(__sel, WDEV_RTNL, \ + NL80211_FLAG_NEED_WDEV | \ + NL80211_FLAG_NEED_RTNL) \ + SELECTOR(__sel, NETDEV_RTNL, \ + NL80211_FLAG_NEED_NETDEV | \ + NL80211_FLAG_NEED_RTNL) \ + SELECTOR(__sel, NETDEV_UP, \ + NL80211_FLAG_NEED_NETDEV_UP) \ + SELECTOR(__sel, NETDEV_UP_NOTMX, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_NO_WIPHY_MTX) \ + SELECTOR(__sel, NETDEV_UP_CLEAR, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_CLEAR_SKB) \ + SELECTOR(__sel, WDEV_UP, \ + NL80211_FLAG_NEED_WDEV_UP) \ + SELECTOR(__sel, WDEV_UP_RTNL, \ + NL80211_FLAG_NEED_WDEV_UP | \ + NL80211_FLAG_NEED_RTNL) \ + SELECTOR(__sel, WIPHY_CLEAR, \ + NL80211_FLAG_NEED_WIPHY | \ + NL80211_FLAG_CLEAR_SKB) + +enum nl80211_internal_flags_selector { +#define SELECTOR(_, name, value) NL80211_IFL_SEL_##name, + INTERNAL_FLAG_SELECTORS(_) +#undef SELECTOR +}; + +static u32 nl80211_internal_flags[] = { +#define SELECTOR(_, name, value) [NL80211_IFL_SEL_##name] = value, + INTERNAL_FLAG_SELECTORS(_) +#undef SELECTOR +}; + static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = NULL; struct wireless_dev *wdev; struct net_device *dev; + u32 internal_flags; + + if (WARN_ON(ops->internal_flags >= ARRAY_SIZE(nl80211_internal_flags))) + return -EINVAL; + + internal_flags = nl80211_internal_flags[ops->internal_flags]; rtnl_lock(); - if (ops->internal_flags & NL80211_FLAG_NEED_WIPHY) { + if (internal_flags & NL80211_FLAG_NEED_WIPHY) { rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { rtnl_unlock(); return PTR_ERR(rdev); } info->user_ptr[0] = rdev; - } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV || - ops->internal_flags & NL80211_FLAG_NEED_WDEV) { + } else if (internal_flags & NL80211_FLAG_NEED_NETDEV || + internal_flags & NL80211_FLAG_NEED_WDEV) { wdev = __cfg80211_wdev_from_attrs(NULL, genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { @@ -15313,7 +15391,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, dev = wdev->netdev; rdev = wiphy_to_rdev(wdev->wiphy); - if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { + if (internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { rtnl_unlock(); return -EINVAL; @@ -15324,7 +15402,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[1] = wdev; } - if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && + if (internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !wdev_running(wdev)) { rtnl_unlock(); return -ENETDOWN; @@ -15334,12 +15412,12 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[0] = rdev; } - if (rdev && !(ops->internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { + if (rdev && !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { wiphy_lock(&rdev->wiphy); /* we keep the mutex locked until post_doit */ __release(&rdev->wiphy.mtx); } - if (!(ops->internal_flags & NL80211_FLAG_NEED_RTNL)) + if (!(internal_flags & NL80211_FLAG_NEED_RTNL)) rtnl_unlock(); return 0; @@ -15348,8 +15426,10 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { + u32 internal_flags = nl80211_internal_flags[ops->internal_flags]; + if (info->user_ptr[1]) { - if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) { + if (internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; dev_put(wdev->netdev); @@ -15359,7 +15439,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, } if (info->user_ptr[0] && - !(ops->internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { + !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; /* we kept the mutex locked since pre_doit */ @@ -15367,7 +15447,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, wiphy_unlock(&rdev->wiphy); } - if (ops->internal_flags & NL80211_FLAG_NEED_RTNL) + if (internal_flags & NL80211_FLAG_NEED_RTNL) rtnl_unlock(); /* If needed, clear the netlink message payload from the SKB @@ -15375,7 +15455,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, * the heap after the SKB is freed. The netlink message header * is still needed for further processing, so leave it intact. */ - if (ops->internal_flags & NL80211_FLAG_CLEAR_SKB) { + if (internal_flags & NL80211_FLAG_CLEAR_SKB) { struct nlmsghdr *nlh = nlmsg_hdr(skb); memset(nlmsg_data(nlh), 0, nlmsg_len(nlh)); @@ -15485,6 +15565,11 @@ error: return err; } +#define SELECTOR(__sel, name, value) \ + ((__sel) == (value)) ? NL80211_IFL_SEL_##name : +int __missing_selector(void); +#define IFLAGS(__val) INTERNAL_FLAG_SELECTORS(__val) __missing_selector() + static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -15493,7 +15578,7 @@ static const struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_wiphy, .done = nl80211_dump_wiphy_done, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, }; @@ -15510,112 +15595,113 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_get_interface, .dumpit = nl80211_dump_interface, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV), }, { .cmd = NL80211_CMD_SET_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_interface, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_NEW_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_interface, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL | - /* we take the wiphy mutex later ourselves */ - NL80211_FLAG_NO_WIPHY_MTX, + .internal_flags = + IFLAGS(NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL | + /* we take the wiphy mutex later ourselves */ + NL80211_FLAG_NO_WIPHY_MTX), }, { .cmd = NL80211_CMD_DEL_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_interface, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_GET_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_NEW_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEL_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_BEACON, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_START_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_STOP_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_NEW_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DEL_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_MPATH, @@ -15623,7 +15709,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_get_mpath, .dumpit = nl80211_dump_mpath, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_MPP, @@ -15631,42 +15717,41 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_get_mpp, .dumpit = nl80211_dump_mpp, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mpath, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_NEW_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_mpath, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DEL_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_mpath, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_BSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_bss, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_REG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_reg_do, .dumpit = nl80211_get_reg_dump, - .internal_flags = 0, /* can be retrieved by unprivileged users */ }, #ifdef CONFIG_CFG80211_CRDA_SUPPORT @@ -15675,7 +15760,6 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_reg, .flags = GENL_ADMIN_PERM, - .internal_flags = 0, }, #endif { @@ -15695,28 +15779,28 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mesh_config, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_MESH_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_mesh_config, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TRIGGER_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_trigger_scan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_ABORT_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_abort_scan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_GET_SCAN, @@ -15728,60 +15812,58 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_sched_scan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_STOP_SCHED_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_sched_scan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_AUTHENTICATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_authenticate, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - 0 | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_ASSOCIATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_associate, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - 0 | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEAUTHENTICATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_deauthenticate, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DISASSOCIATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disassociate, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_JOIN_IBSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ibss, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_LEAVE_IBSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ibss, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, #ifdef CONFIG_NL80211_TESTMODE { @@ -15790,7 +15872,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_testmode_do, .dumpit = nl80211_testmode_dump, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, #endif { @@ -15798,34 +15880,32 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_connect, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - 0 | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_connect_params, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - 0 | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DISCONNECT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disconnect, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_WIPHY_NETNS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_wiphy_netns, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL | - NL80211_FLAG_NO_WIPHY_MTX, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_NO_WIPHY_MTX), }, { .cmd = NL80211_CMD_GET_SURVEY, @@ -15837,121 +15917,120 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_setdel_pmksa, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - 0 | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEL_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_setdel_pmksa, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_FLUSH_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_flush_pmksa, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_cancel_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_tx_bitrate_mask, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_REGISTER_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_mgmt, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV), }, { .cmd = NL80211_CMD_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt_cancel_wait, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_POWER_SAVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_power_save, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_GET_POWER_SAVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_power_save, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_CQM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_cqm, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_JOIN_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_mesh, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_LEAVE_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_mesh, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_JOIN_OCB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ocb, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_LEAVE_OCB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ocb, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, #ifdef CONFIG_PM { @@ -15959,14 +16038,14 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wowlan, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_SET_WOWLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wowlan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, #endif { @@ -15974,126 +16053,125 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_rekey_data, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - 0 | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_TDLS_MGMT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_mgmt, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TDLS_OPER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_oper, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_UNEXPECTED_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_unexpected_frame, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_PROBE_CLIENT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_probe_client, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_REGISTER_BEACONS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_beacons, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_SET_NOACK_MAP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_noack_map, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_START_P2P_DEVICE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_p2p_device, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_STOP_P2P_DEVICE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_p2p_device, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_START_NAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_nan, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_STOP_NAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_nan, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_ADD_NAN_FUNCTION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_add_func, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_DEL_NAN_FUNCTION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_del_func, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_change_config, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mcast_rate, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_MAC_ACL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mac_acl, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_RADAR_DETECT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_radar_detection, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NO_WIPHY_MTX, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NO_WIPHY_MTX), }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, @@ -16105,41 +16183,41 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_ft_ies, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_start, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_stop, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_GET_COALESCE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_coalesce, - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_SET_COALESCE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_coalesce, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_channel_switch, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_VENDOR, @@ -16147,140 +16225,137 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_vendor_cmd, .dumpit = nl80211_vendor_cmd_dump, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - 0 | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_SET_QOS_MAP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_qos_map, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_ADD_TX_TS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_add_tx_ts, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DEL_TX_TS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_tx_ts, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_channel_switch, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_cancel_channel_switch, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_multicast_to_unicast, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - 0 | - NL80211_FLAG_CLEAR_SKB, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEL_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_EXTERNAL_AUTH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_external_auth, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_CONTROL_PORT_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_control_port, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_ftm_responder_stats, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_pmsr_start, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_NOTIFY_RADAR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_notify_radar_detection, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_UPDATE_OWE_INFO, .doit = nl80211_update_owe_info, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_PROBE_MESH_LINK, .doit = nl80211_probe_mesh_link, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_TID_CONFIG, .doit = nl80211_set_tid_config, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_SAR_SPECS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_sar_specs, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_COLOR_CHANGE_REQUEST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_color_change, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_FILS_AAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_fils_aad, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c76cd973f06e..58e83ce642ad 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -807,6 +807,8 @@ static int __init load_builtin_regdb_keys(void) return 0; } +MODULE_FIRMWARE("regulatory.db.p7s"); + static bool regdb_has_valid_signature(const u8 *data, unsigned int size) { const struct firmware *sig; @@ -1078,6 +1080,8 @@ static void regdb_fw_cb(const struct firmware *fw, void *context) release_firmware(fw); } +MODULE_FIRMWARE("regulatory.db"); + static int query_regdb_file(const char *alpha2) { ASSERT_RTNL(); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 4a6d86432910..6d82bd9eaf8c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1829,7 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) { struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data; - return s1gop->primary_ch; + return s1gop->oper_ch; } } else { tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen); diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 3bddcbdf2e40..0412814a2295 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -79,7 +79,6 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v) { struct sock *s; struct x25_sock *x25; - struct net_device *dev; const char *devname; if (v == SEQ_START_TOKEN) { @@ -91,7 +90,7 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v) s = sk_entry(v); x25 = x25_sk(s); - if (!x25->neighbour || (dev = x25->neighbour->dev) == NULL) + if (!x25->neighbour || !x25->neighbour->dev) devname = "???"; else devname = x25->neighbour->dev->name; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 040c73345b7c..e0a4526ab66b 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -639,7 +639,7 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len if (sk_can_busy_loop(sk)) sk_busy_loop(sk, 1); /* only support non-blocking sockets */ - if (xsk_no_wakeup(sk)) + if (xs->zc && xsk_no_wakeup(sk)) return 0; pool = xs->pool; @@ -967,6 +967,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xp_get_pool(umem_xs->pool); xs->pool = umem_xs->pool; + + /* If underlying shared umem was created without Tx + * ring, allocate Tx descs array that Tx batching API + * utilizes + */ + if (xs->tx && !xs->pool->tx_descs) { + err = xp_alloc_tx_descs(xs->pool, xs); + if (err) { + xp_put_pool(xs->pool); + sockfd_put(sock); + goto out_unlock; + } + } } xdp_get_umem(umem_xs->umem); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index af040ffa14ff..87bdd71c7bb6 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -42,6 +42,16 @@ void xp_destroy(struct xsk_buff_pool *pool) kvfree(pool); } +int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs) +{ + pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), + GFP_KERNEL); + if (!pool->tx_descs) + return -ENOMEM; + + return 0; +} + struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem) { @@ -59,11 +69,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, if (!pool->heads) goto out; - if (xs->tx) { - pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL); - if (!pool->tx_descs) + if (xs->tx) + if (xp_alloc_tx_descs(pool, xs)) goto out; - } pool->chunk_mask = ~((u64)umem->chunk_size - 1); pool->addrs_cnt = umem->size; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 36aa01d92b65..35c7e89b2e7d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -117,7 +117,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur sp = skb_sec_path(skb); x = sp->xvec[sp->len - 1]; - if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND) + if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN) return skb; /* This skb was already validated on the upper/virtual dev */ @@ -212,7 +212,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, int err; struct dst_entry *dst; struct net_device *dev; - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; xfrm_address_t *saddr; xfrm_address_t *daddr; @@ -264,15 +264,16 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = dev; netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); xso->real_dev = dev; - xso->num_exthdrs = 1; - /* Don't forward bit that is not implemented */ - xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6; + + if (xuo->flags & XFRM_OFFLOAD_INBOUND) + xso->dir = XFRM_DEV_OFFLOAD_IN; + else + xso->dir = XFRM_DEV_OFFLOAD_OUT; err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { - xso->num_exthdrs = 0; - xso->flags = 0; xso->dev = NULL; + xso->dir = 0; xso->real_dev = NULL; dev_put_track(dev, &xso->dev_tracker); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 00bd0ecff5a1..f1876ea61fdc 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3744,7 +3744,7 @@ static int stale_bundle(struct dst_entry *dst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) { - dst->dev = dev_net(dev)->loopback_dev; + dst->dev = blackhole_netdev; dev_hold(dst->dev); dev_put(dev); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index b749935152ba..08564e0eef20 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -751,7 +751,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; - struct xfrm_state_offload *xso; + struct xfrm_dev_offload *xso; hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { xso = &x->xso; @@ -835,7 +835,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; - struct xfrm_state_offload *xso; + struct xfrm_dev_offload *xso; restart: hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { xso = &x->xso; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 64fa8fdd6bbd..6a58fec6a1fb 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -840,7 +840,7 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) return 0; } -static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb) +static int copy_user_offload(struct xfrm_dev_offload *xso, struct sk_buff *skb) { struct xfrm_user_offload *xuo; struct nlattr *attr; @@ -852,7 +852,8 @@ static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb xuo = nla_data(attr); memset(xuo, 0, sizeof(*xuo)); xuo->ifindex = xso->dev->ifindex; - xuo->flags = xso->flags; + if (xso->dir == XFRM_DEV_OFFLOAD_IN) + xuo->flags = XFRM_OFFLOAD_INBOUND; return 0; } |