diff options
Diffstat (limited to 'net')
216 files changed, 5708 insertions, 3196 deletions
diff --git a/net/Kconfig b/net/Kconfig index 9c456acc379e..8d955195c069 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -429,6 +429,10 @@ config GRO_CELLS config SOCK_VALIDATE_XMIT bool +config NET_SELFTESTS + def_tristate PHYLIB + depends on PHYLIB + config NET_SOCK_MSG bool default n diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a5e313cd6f44..789f257be24f 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -456,7 +456,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, * if: * * - the send time is within our MAX_AGGREGATION_MS time - * - the resulting packet wont be bigger than + * - the resulting packet won't be bigger than * MAX_AGGREGATION_BYTES * otherwise aggregation is not possible */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index bcd543ce835b..7dc133cfc363 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -25,7 +25,6 @@ #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> -#include <linux/preempt.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index f8761281aab0..434b4f042909 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -890,6 +890,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); + tt_vlan->reserved = 0; tt_vlan++; } @@ -973,6 +974,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); + tt_vlan->reserved = 0; tt_vlan++; } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 7c0b475cc22a..2be5d4a712c5 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1659,19 +1659,19 @@ struct batadv_priv { /** @tp_list: list of tp sessions */ struct hlist_head tp_list; - /** @tp_num: number of currently active tp sessions */ + /** @orig_hash: hash table containing mesh participants (orig nodes) */ struct batadv_hashtable *orig_hash; - /** @orig_hash: hash table containing mesh participants (orig nodes) */ + /** @forw_bat_list_lock: lock protecting forw_bat_list */ spinlock_t forw_bat_list_lock; - /** @forw_bat_list_lock: lock protecting forw_bat_list */ + /** @forw_bcast_list_lock: lock protecting forw_bcast_list */ spinlock_t forw_bcast_list_lock; - /** @forw_bcast_list_lock: lock protecting forw_bcast_list */ + /** @tp_list_lock: spinlock protecting @tp_list */ spinlock_t tp_list_lock; - /** @tp_list_lock: spinlock protecting @tp_list */ + /** @tp_num: number of currently active tp sessions */ atomic_t tp_num; /** @orig_work: work queue callback item for orig node purging */ diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index cff4944d5b66..97617d02c8f9 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -205,8 +205,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, } } - /* use the neighbour cache for matching addresses assigned by SLAAC - */ + /* use the neighbour cache for matching addresses assigned by SLAAC */ neigh = __ipv6_neigh_lookup(dev->netdev, nexthop); if (neigh) { list_for_each_entry_rcu(peer, &dev->peers, list) { @@ -841,8 +840,6 @@ static void chan_close_cb(struct l2cap_chan *chan) } else { spin_unlock(&devices_lock); } - - return; } static void chan_state_change_cb(struct l2cap_chan *chan, int state, int err) diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 400c5130dc0a..e0ab4cd7afc3 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -99,6 +99,13 @@ config BT_MSFTEXT This options enables support for the Microsoft defined HCI vendor extensions. +config BT_AOSPEXT + bool "Enable Android Open Source Project extensions" + depends on BT + help + This options enables support for the Android Open Source + Project defined HCI vendor extensions. + config BT_DEBUGFS bool "Export Bluetooth internals in debugfs" depends on BT && DEBUG_FS diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 1c645fba8c49..cc0995301f93 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -20,5 +20,6 @@ bluetooth-$(CONFIG_BT_BREDR) += sco.o bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o bluetooth-$(CONFIG_BT_LEDS) += leds.o bluetooth-$(CONFIG_BT_MSFTEXT) += msft.o +bluetooth-$(CONFIG_BT_AOSPEXT) += aosp.o bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o diff --git a/net/bluetooth/aosp.c b/net/bluetooth/aosp.c new file mode 100644 index 000000000000..a1b7762335a5 --- /dev/null +++ b/net/bluetooth/aosp.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Intel Corporation + */ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "aosp.h" + +void aosp_do_open(struct hci_dev *hdev) +{ + struct sk_buff *skb; + + if (!hdev->aosp_capable) + return; + + bt_dev_dbg(hdev, "Initialize AOSP extension"); + + /* LE Get Vendor Capabilities Command */ + skb = __hci_cmd_sync(hdev, hci_opcode_pack(0x3f, 0x153), 0, NULL, + HCI_CMD_TIMEOUT); + if (IS_ERR(skb)) + return; + + kfree_skb(skb); +} + +void aosp_do_close(struct hci_dev *hdev) +{ + if (!hdev->aosp_capable) + return; + + bt_dev_dbg(hdev, "Cleanup of AOSP extension"); +} diff --git a/net/bluetooth/aosp.h b/net/bluetooth/aosp.h new file mode 100644 index 000000000000..328fc6d39f70 --- /dev/null +++ b/net/bluetooth/aosp.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Intel Corporation + */ + +#if IS_ENABLED(CONFIG_BT_AOSPEXT) + +void aosp_do_open(struct hci_dev *hdev); +void aosp_do_close(struct hci_dev *hdev); + +#else + +static inline void aosp_do_open(struct hci_dev *hdev) {} +static inline void aosp_do_close(struct hci_dev *hdev) {} + +#endif diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h index a6f8d03d4aaf..830723971cf8 100644 --- a/net/bluetooth/ecdh_helper.h +++ b/net/bluetooth/ecdh_helper.h @@ -25,6 +25,6 @@ int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64], u8 secret[32]); -int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key); +int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32]); int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]); int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6ffa89e3ba0a..88ec08978ff4 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -772,6 +772,16 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) hci_conn_del(conn); + /* The suspend notifier is waiting for all devices to disconnect and an + * LE connect cancel will result in an hci_le_conn_failed. Once the last + * connection is deleted, we should also wake the suspend queue to + * complete suspend operations. + */ + if (list_empty(&hdev->conn_hash.list) && + test_and_clear_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks)) { + wake_up(&hdev->suspend_wait_q); + } + /* Since we may have temporarily stopped the background scanning in * favor of connection establishment, we should restart it. */ @@ -1830,8 +1840,6 @@ u32 hci_conn_get_phy(struct hci_conn *conn) { u32 phys = 0; - hci_dev_lock(conn->hdev); - /* BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 2, Part B page 471: * Table 6.2: Packets defined for synchronous, asynchronous, and * CSB logical transport types. @@ -1928,7 +1936,5 @@ u32 hci_conn_get_phy(struct hci_conn *conn) break; } - hci_dev_unlock(conn->hdev); - return phys; } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b0d9c36acc03..fd12f1652bdf 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -44,6 +44,7 @@ #include "smp.h" #include "leds.h" #include "msft.h" +#include "aosp.h" static void hci_rx_work(struct work_struct *work); static void hci_cmd_work(struct work_struct *work); @@ -1586,6 +1587,7 @@ setup_failed: ret = hdev->set_diag(hdev, true); msft_do_open(hdev); + aosp_do_open(hdev); clear_bit(HCI_INIT, &hdev->flags); @@ -1782,6 +1784,7 @@ int hci_dev_do_close(struct hci_dev *hdev) hci_sock_dev_event(hdev, HCI_DEV_DOWN); + aosp_do_close(hdev); msft_do_close(hdev); if (hdev->flush) @@ -3760,6 +3763,8 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_scan_window_suspend = 0x0012; hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT; hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN; + hdev->le_scan_int_adv_monitor = 0x0060; + hdev->le_scan_window_adv_monitor = 0x0030; hdev->le_scan_int_connect = 0x0060; hdev->le_scan_window_connect = 0x0060; hdev->le_conn_min_interval = 0x0018; diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 1a0ab58bfad0..47f4f21fbc1a 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -274,7 +274,7 @@ static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS) ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -292,7 +292,7 @@ static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = hci_dev_test_flag(hdev, HCI_SC_ONLY) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_SC_ONLY) ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -428,7 +428,7 @@ static ssize_t ssp_debug_mode_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = hdev->ssp_debug_mode ? 'Y': 'N'; + buf[0] = hdev->ssp_debug_mode ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -742,7 +742,7 @@ static ssize_t force_static_address_read(struct file *file, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 67668be3461e..016b2999f219 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -395,6 +395,29 @@ done: hci_dev_unlock(hdev); } +static void hci_cc_set_event_filter(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *)skb->data); + struct hci_cp_set_event_filter *cp; + void *sent; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_SET_EVENT_FLT); + if (!sent) + return; + + cp = (struct hci_cp_set_event_filter *)sent; + + if (cp->flt_type == HCI_FLT_CLEAR_ALL) + hci_dev_clear_flag(hdev, HCI_EVENT_FILTER_CONFIGURED); + else + hci_dev_set_flag(hdev, HCI_EVENT_FILTER_CONFIGURED); +} + static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_read_class_of_dev *rp = (void *) skb->data; @@ -1189,12 +1212,11 @@ static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev, hci_dev_lock(hdev); - if (!hdev->cur_adv_instance) { + if (!cp->handle) { /* Store in hdev for instance 0 (Set adv and Directed advs) */ bacpy(&hdev->random_addr, &cp->bdaddr); } else { - adv_instance = hci_find_adv_instance(hdev, - hdev->cur_adv_instance); + adv_instance = hci_find_adv_instance(hdev, cp->handle); if (adv_instance) bacpy(&adv_instance->random_addr, &cp->bdaddr); } @@ -1755,17 +1777,16 @@ static void hci_cc_set_ext_adv_param(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); hdev->adv_addr_type = cp->own_addr_type; - if (!hdev->cur_adv_instance) { + if (!cp->handle) { /* Store in hdev for instance 0 */ hdev->adv_tx_power = rp->tx_power; } else { - adv_instance = hci_find_adv_instance(hdev, - hdev->cur_adv_instance); + adv_instance = hci_find_adv_instance(hdev, cp->handle); if (adv_instance) adv_instance->tx_power = rp->tx_power; } /* Update adv data as tx power is known now */ - hci_req_update_adv_data(hdev, hdev->cur_adv_instance); + hci_req_update_adv_data(hdev, cp->handle); hci_dev_unlock(hdev); } @@ -3328,6 +3349,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_cc_write_scan_enable(hdev, skb); break; + case HCI_OP_SET_EVENT_FLT: + hci_cc_set_event_filter(hdev, skb); + break; + case HCI_OP_READ_CLASS_OF_DEV: hci_cc_read_class_of_dev(hdev, skb); break; @@ -5005,6 +5030,7 @@ static void hci_loglink_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) return; hchan->handle = le16_to_cpu(ev->handle); + hchan->amp = true; BT_DBG("hcon %p mgr %p hchan %p", hcon, hcon->amp_mgr, hchan); @@ -5037,7 +5063,7 @@ static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev, hci_dev_lock(hdev); hchan = hci_chan_lookup_handle(hdev, le16_to_cpu(ev->handle)); - if (!hchan) + if (!hchan || !hchan->amp) goto unlock; amp_destroy_logical_link(hchan, ev->reason); @@ -5280,12 +5306,12 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM) return; - if (!hdev->cur_adv_instance) { + if (!ev->handle) { bacpy(&conn->resp_addr, &hdev->random_addr); return; } - adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); + adv_instance = hci_find_adv_instance(hdev, ev->handle); if (adv_instance) bacpy(&conn->resp_addr, &adv_instance->random_addr); } @@ -5863,7 +5889,7 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, params->conn_latency = latency; params->supervision_timeout = timeout; store_hint = 0x01; - } else{ + } else { store_hint = 0x00; } @@ -5911,7 +5937,7 @@ static void hci_le_phy_update_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); - if (!ev->status) + if (ev->status) return; hci_dev_lock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e55976db4403..560b74d421a8 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -847,6 +847,10 @@ static u8 update_white_list(struct hci_request *req) */ bool allow_rpa = hdev->suspended; + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + allow_rpa = true; + /* Go through the current white list programmed into the * controller one by one and check if that address is still * in the list of pending connections or list of devices to @@ -1131,14 +1135,14 @@ static void hci_req_clear_event_filter(struct hci_request *req) { struct hci_cp_set_event_filter f; - memset(&f, 0, sizeof(f)); - f.flt_type = HCI_FLT_CLEAR_ALL; - hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &f); + if (!hci_dev_test_flag(req->hdev, HCI_BREDR_ENABLED)) + return; - /* Update page scan state (since we may have modified it when setting - * the event filter). - */ - __hci_req_update_scan(req); + if (hci_dev_test_flag(req->hdev, HCI_EVENT_FILTER_CONFIGURED)) { + memset(&f, 0, sizeof(f)); + f.flt_type = HCI_FLT_CLEAR_ALL; + hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &f); + } } static void hci_req_set_event_filter(struct hci_request *req) @@ -1147,6 +1151,10 @@ static void hci_req_set_event_filter(struct hci_request *req) struct hci_cp_set_event_filter f; struct hci_dev *hdev = req->hdev; u8 scan = SCAN_DISABLED; + bool scanning = test_bit(HCI_PSCAN, &hdev->flags); + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return; /* Always clear event filter when starting */ hci_req_clear_event_filter(req); @@ -1167,12 +1175,13 @@ static void hci_req_set_event_filter(struct hci_request *req) scan = SCAN_PAGE; } - if (scan) + if (scan && !scanning) { set_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks); - else + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + } else if (!scan && scanning) { set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); - - hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + } } static void cancel_adv_timeout(struct hci_dev *hdev) @@ -1315,9 +1324,14 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) hdev->advertising_paused = true; hdev->advertising_old_state = old_state; - /* Disable page scan */ - page_scan = SCAN_DISABLED; - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan); + + /* Disable page scan if enabled */ + if (test_bit(HCI_PSCAN, &hdev->flags)) { + page_scan = SCAN_DISABLED; + hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, + &page_scan); + set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + } /* Disable LE passive scan if enabled */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { @@ -1328,9 +1342,6 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) /* Disable advertisement filters */ hci_req_add_set_adv_filter_enable(&req, false); - /* Mark task needing completion */ - set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); - /* Prevent disconnects from causing scanning to be re-enabled */ hdev->scanning_paused = true; @@ -1364,7 +1375,10 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) hdev->suspended = false; hdev->scanning_paused = false; + /* Clear any event filters and restore scan state */ hci_req_clear_event_filter(&req); + __hci_req_update_scan(&req); + /* Reset passive/background scanning to normal */ __hci_update_background_scan(&req); /* Enable all of the advertisement filters */ @@ -1637,9 +1651,8 @@ static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) { u8 scan_rsp_len = 0; - if (hdev->appearance) { + if (hdev->appearance) scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); - } return append_local_name(hdev, ptr, scan_rsp_len); } @@ -1657,9 +1670,8 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, instance_flags = adv_instance->flags; - if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) { + if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); - } memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); @@ -2035,7 +2047,8 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, /* If Controller supports LL Privacy use own address type is * 0x03 */ - if (use_ll_privacy(hdev)) + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; else *own_addr_type = ADDR_LE_DEV_RANDOM; @@ -2170,7 +2183,8 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND); else cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND); - } else if (adv_instance_is_scannable(hdev, instance)) { + } else if (adv_instance_is_scannable(hdev, instance) || + (flags & MGMT_ADV_PARAM_SCAN_RSP)) { if (secondary_adv) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND); else @@ -2508,7 +2522,8 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, /* If Controller supports LL Privacy use own address type is * 0x03 */ - if (use_ll_privacy(hdev)) + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; else *own_addr_type = ADDR_LE_DEV_RANDOM; @@ -2941,6 +2956,9 @@ static int bredr_inquiry(struct hci_request *req, unsigned long opt) const u8 liac[3] = { 0x00, 0x8b, 0x9e }; struct hci_cp_inquiry cp; + if (test_bit(HCI_INQUIRY, &req->hdev->flags)) + return 0; + bt_dev_dbg(req->hdev, ""); hci_dev_lock(req->hdev); @@ -3241,6 +3259,7 @@ bool hci_req_stop_discovery(struct hci_request *req) if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { cancel_delayed_work(&hdev->le_scan_disable); + cancel_delayed_work(&hdev->le_scan_restart); hci_req_add_le_scan_disable(req, false); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 72c2f5226d67..b6a88b8256c7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -451,6 +451,8 @@ struct l2cap_chan *l2cap_chan_create(void) if (!chan) return NULL; + skb_queue_head_init(&chan->tx_q); + skb_queue_head_init(&chan->srej_q); mutex_init(&chan->lock); /* Set default lock nesting level */ @@ -490,14 +492,14 @@ static void l2cap_chan_destroy(struct kref *kref) void l2cap_chan_hold(struct l2cap_chan *c) { - BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref)); + BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); kref_get(&c->kref); } void l2cap_chan_put(struct l2cap_chan *c) { - BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref)); + BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); kref_put(&c->kref, l2cap_chan_destroy); } @@ -516,7 +518,9 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO; chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; + chan->conf_state = 0; + set_bit(CONF_NOT_COMPLETE, &chan->conf_state); set_bit(FLAG_FORCE_ACTIVE, &chan->flags); } @@ -648,7 +652,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state)) return; - switch(chan->mode) { + switch (chan->mode) { case L2CAP_MODE_BASIC: break; @@ -672,8 +676,6 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) skb_queue_purge(&chan->tx_q); break; } - - return; } EXPORT_SYMBOL_GPL(l2cap_chan_del); @@ -1690,7 +1692,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) smp_conn_security(hcon, hcon->pending_sec_level); /* For LE slave connections, make sure the connection interval - * is in the range of the minium and maximum interval that has + * is in the range of the minimum and maximum interval that has * been configured for this connection. If not, then trigger * the connection update procedure. */ @@ -5921,7 +5923,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, struct l2cap_ecred_conn_req *req = (void *) data; struct { struct l2cap_ecred_conn_rsp rsp; - __le16 dcid[5]; + __le16 dcid[L2CAP_ECRED_MAX_CID]; } __packed pdu; struct l2cap_chan *chan, *pchan; u16 mtu, mps; @@ -5938,6 +5940,14 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, goto response; } + cmd_len -= sizeof(*req); + num_scid = cmd_len / sizeof(u16); + + if (num_scid > ARRAY_SIZE(pdu.dcid)) { + result = L2CAP_CR_LE_INVALID_PARAMS; + goto response; + } + mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); @@ -5970,8 +5980,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, } result = L2CAP_CR_LE_SUCCESS; - cmd_len -= sizeof(*req); - num_scid = cmd_len / sizeof(u16); for (i = 0; i < num_scid; i++) { u16 scid = __le16_to_cpu(req->scid[i]); @@ -7253,7 +7261,7 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, L2CAP_TXSEQ_EXPECTED) { l2cap_pass_to_tx(chan, control); - BT_DBG("buffer_seq %d->%d", chan->buffer_seq, + BT_DBG("buffer_seq %u->%u", chan->buffer_seq, __next_seq(chan, chan->buffer_seq)); chan->buffer_seq = __next_seq(chan, chan->buffer_seq); @@ -7542,7 +7550,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, BT_DBG("chan %p, len %d", chan, skb->len); /* If we receive data on a fixed channel before the info req/rsp - * procdure is done simply assume that the channel is supported + * procedure is done simply assume that the channel is supported * and mark it as ready. */ if (chan->chan_type == L2CAP_CHAN_FIXED) @@ -7762,7 +7770,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) return conn; } -static bool is_valid_psm(u16 psm, u8 dst_type) { +static bool is_valid_psm(u16 psm, u8 dst_type) +{ if (!psm) return false; @@ -8356,7 +8365,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) if (!conn) goto drop; - BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags); + BT_DBG("conn %p len %u flags 0x%x", conn, skb->len, flags); switch (flags) { case ACL_START: @@ -8386,10 +8395,10 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) return; } - BT_DBG("Start: total len %d, frag len %d", len, skb->len); + BT_DBG("Start: total len %d, frag len %u", len, skb->len); if (skb->len > len) { - BT_ERR("Frame is too long (len %d, expected len %d)", + BT_ERR("Frame is too long (len %u, expected len %d)", skb->len, len); l2cap_conn_unreliable(conn, ECOMM); goto drop; @@ -8402,7 +8411,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) break; case ACL_CONT: - BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); + BT_DBG("Cont: frag len %u (expecting %u)", skb->len, conn->rx_len); if (!conn->rx_skb) { BT_ERR("Unexpected continuation frame (len %d)", skb->len); @@ -8423,7 +8432,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) } if (skb->len > conn->rx_len) { - BT_ERR("Fragment is too long (len %d, expected %d)", + BT_ERR("Fragment is too long (len %u, expected %u)", skb->len, conn->rx_len); l2cap_recv_reset(conn); l2cap_conn_unreliable(conn, ECOMM); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f1b1edd0b697..c99d65ef13b1 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -179,9 +179,17 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct sockaddr_l2 la; int len, err = 0; + bool zapped; BT_DBG("sk %p", sk); + lock_sock(sk); + zapped = sock_flag(sk, SOCK_ZAPPED); + release_sock(sk); + + if (zapped) + return -EINVAL; + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 74971b4bd457..f9be7f9084d6 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -40,7 +40,7 @@ #include "msft.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 19 +#define MGMT_REVISION 20 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -108,6 +108,8 @@ static const u16 mgmt_commands[] = { MGMT_OP_START_LIMITED_DISCOVERY, MGMT_OP_READ_EXT_INFO, MGMT_OP_SET_APPEARANCE, + MGMT_OP_GET_PHY_CONFIGURATION, + MGMT_OP_SET_PHY_CONFIGURATION, MGMT_OP_SET_BLOCKED_KEYS, MGMT_OP_SET_WIDEBAND_SPEECH, MGMT_OP_READ_CONTROLLER_CAP, @@ -166,6 +168,8 @@ static const u16 mgmt_events[] = { MGMT_EV_PHY_CONFIGURATION_CHANGED, MGMT_EV_EXP_FEATURE_CHANGED, MGMT_EV_DEVICE_FLAGS_CHANGED, + MGMT_EV_ADV_MONITOR_ADDED, + MGMT_EV_ADV_MONITOR_REMOVED, MGMT_EV_CONTROLLER_SUSPEND, MGMT_EV_CONTROLLER_RESUME, }; @@ -196,8 +200,6 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_EXT_INDEX_REMOVED, MGMT_EV_EXT_INFO_CHANGED, MGMT_EV_EXP_FEATURE_CHANGED, - MGMT_EV_ADV_MONITOR_ADDED, - MGMT_EV_ADV_MONITOR_REMOVED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -3728,8 +3730,11 @@ static int read_controller_cap(struct sock *sk, struct hci_dev *hdev, /* When the Read Simple Pairing Options command is supported, then * the remote public key validation is supported. + * + * Alternatively, when Microsoft extensions are available, they can + * indicate support for public key validation as well. */ - if (hdev->commands[41] & 0x08) + if ((hdev->commands[41] & 0x08) || msft_curve_validity(hdev)) flags |= 0x01; /* Remote public key validation (BR/EDR) */ flags |= 0x02; /* Remote public key validation (LE) */ @@ -3982,7 +3987,7 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, if (hdev_is_powered(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_NOT_POWERED); + MGMT_STATUS_REJECTED); /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) @@ -7432,6 +7437,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_PARAM_TIMEOUT; flags |= MGMT_ADV_PARAM_INTERVALS; flags |= MGMT_ADV_PARAM_TX_POWER; + flags |= MGMT_ADV_PARAM_SCAN_RSP; /* In extended adv TX_POWER returned from Set Adv Param * will be always valid. @@ -7475,7 +7481,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, * advertising. */ if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); @@ -7976,7 +7982,6 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, goto unlock; } - hdev->cur_adv_instance = cp->instance; /* Submit request for advertising params if ext adv available */ if (ext_adv_capable(hdev)) { hci_req_init(&req, hdev); diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 47b104f318e9..e28f15439ce4 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -142,6 +142,9 @@ static bool read_supported_features(struct hci_dev *hdev, msft->evt_prefix_len = rp->evt_prefix_len; msft->features = __le64_to_cpu(rp->features); + if (msft->features & MSFT_FEATURE_MASK_CURVE_VALIDITY) + hdev->msft_curve_validity = true; + kfree_skb(skb); return true; @@ -605,3 +608,8 @@ int msft_set_filter_enable(struct hci_dev *hdev, bool enable) return err; } + +bool msft_curve_validity(struct hci_dev *hdev) +{ + return hdev->msft_curve_validity; +} diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h index 88ed613dfa08..6e56d94b88d8 100644 --- a/net/bluetooth/msft.h +++ b/net/bluetooth/msft.h @@ -22,6 +22,7 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, u16 handle); void msft_req_add_set_filter_enable(struct hci_request *req, bool enable); int msft_set_filter_enable(struct hci_dev *hdev, bool enable); +bool msft_curve_validity(struct hci_dev *hdev); #else @@ -54,4 +55,9 @@ static inline int msft_set_filter_enable(struct hci_dev *hdev, bool enable) return -EOPNOTSUPP; } +static inline bool msft_curve_validity(struct hci_dev *hdev) +{ + return false; +} + #endif diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 22a110f37abc..3bd41563f118 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -51,8 +51,8 @@ struct sco_conn { unsigned int mtu; }; -#define sco_conn_lock(c) spin_lock(&c->lock); -#define sco_conn_unlock(c) spin_unlock(&c->lock); +#define sco_conn_lock(c) spin_lock(&c->lock) +#define sco_conn_unlock(c) spin_unlock(&c->lock) static void sco_sock_close(struct sock *sk); static void sco_sock_kill(struct sock *sk); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index b0c1ee110eff..5c17acfb1645 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -54,7 +54,7 @@ #define SMP_ALLOW_CMD(smp, code) set_bit(code, &smp->allow_cmd) /* Keys which are not distributed with Secure Connections */ -#define SMP_SC_NO_DIST (SMP_DIST_ENC_KEY | SMP_DIST_LINK_KEY); +#define SMP_SC_NO_DIST (SMP_DIST_ENC_KEY | SMP_DIST_LINK_KEY) #define SMP_TIMEOUT msecs_to_jiffies(30000) @@ -398,7 +398,7 @@ static int smp_e(const u8 *k, u8 *r) SMP_DBG("r %16phN", r); - memzero_explicit(&ctx, sizeof (ctx)); + memzero_explicit(&ctx, sizeof(ctx)); return err; } @@ -595,7 +595,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data) if (!chan) return; - BT_DBG("code 0x%2.2x", code); + bt_dev_dbg(conn->hcon->hdev, "code 0x%2.2x", code); iv[0].iov_base = &code; iv[0].iov_len = 1; @@ -859,7 +859,8 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, memset(smp->tk, 0, sizeof(smp->tk)); clear_bit(SMP_FLAG_TK_VALID, &smp->flags); - BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io); + bt_dev_dbg(hcon->hdev, "auth:%d lcl:%d rem:%d", auth, local_io, + remote_io); /* If neither side wants MITM, either "just" confirm an incoming * request or use just-works for outgoing ones. The JUST_CFM @@ -924,7 +925,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, get_random_bytes(&passkey, sizeof(passkey)); passkey %= 1000000; put_unaligned_le32(passkey, smp->tk); - BT_DBG("PassKey: %d", passkey); + bt_dev_dbg(hcon->hdev, "PassKey: %d", passkey); set_bit(SMP_FLAG_TK_VALID, &smp->flags); } @@ -949,7 +950,7 @@ static u8 smp_confirm(struct smp_chan *smp) struct smp_cmd_pairing_confirm cp; int ret; - BT_DBG("conn %p", conn); + bt_dev_dbg(conn->hcon->hdev, "conn %p", conn); ret = smp_c1(smp->tk, smp->prnd, smp->preq, smp->prsp, conn->hcon->init_addr_type, &conn->hcon->init_addr, @@ -977,7 +978,8 @@ static u8 smp_random(struct smp_chan *smp) u8 confirm[16]; int ret; - BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave"); + bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn, + conn->hcon->out ? "master" : "slave"); ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp, hcon->init_addr_type, &hcon->init_addr, @@ -1236,7 +1238,7 @@ static void smp_distribute_keys(struct smp_chan *smp) struct hci_dev *hdev = hcon->hdev; __u8 *keydist; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); rsp = (void *) &smp->prsp[1]; @@ -1266,7 +1268,7 @@ static void smp_distribute_keys(struct smp_chan *smp) *keydist &= ~SMP_SC_NO_DIST; } - BT_DBG("keydist 0x%x", *keydist); + bt_dev_dbg(hdev, "keydist 0x%x", *keydist); if (*keydist & SMP_DIST_ENC_KEY) { struct smp_cmd_encrypt_info enc; @@ -1366,13 +1368,14 @@ static void smp_timeout(struct work_struct *work) security_timer.work); struct l2cap_conn *conn = smp->conn; - BT_DBG("conn %p", conn); + bt_dev_dbg(conn->hcon->hdev, "conn %p", conn); hci_disconnect(conn->hcon, HCI_ERROR_REMOTE_USER_TERM); } static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) { + struct hci_conn *hcon = conn->hcon; struct l2cap_chan *chan = conn->smp; struct smp_chan *smp; @@ -1382,13 +1385,13 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0); if (IS_ERR(smp->tfm_cmac)) { - BT_ERR("Unable to create CMAC crypto context"); + bt_dev_err(hcon->hdev, "Unable to create CMAC crypto context"); goto zfree_smp; } smp->tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0); if (IS_ERR(smp->tfm_ecdh)) { - BT_ERR("Unable to create ECDH crypto context"); + bt_dev_err(hcon->hdev, "Unable to create ECDH crypto context"); goto free_shash; } @@ -1399,7 +1402,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) INIT_DELAYED_WORK(&smp->security_timer, smp_timeout); - hci_conn_hold(conn->hcon); + hci_conn_hold(hcon); return smp; @@ -1564,8 +1567,8 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) if (!hcon->out) return 0; - BT_DBG("%s Starting passkey round %u", hdev->name, - smp->passkey_round + 1); + bt_dev_dbg(hdev, "Starting passkey round %u", + smp->passkey_round + 1); SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); @@ -1625,11 +1628,11 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) u32 value; int err; - BT_DBG(""); - if (!conn) return -ENOTCONN; + bt_dev_dbg(conn->hcon->hdev, ""); + chan = conn->smp; if (!chan) return -ENOTCONN; @@ -1651,7 +1654,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) case MGMT_OP_USER_PASSKEY_REPLY: value = le32_to_cpu(passkey); memset(smp->tk, 0, sizeof(smp->tk)); - BT_DBG("PassKey: %d", value); + bt_dev_dbg(conn->hcon->hdev, "PassKey: %d", value); put_unaligned_le32(value, smp->tk); fallthrough; case MGMT_OP_USER_CONFIRM_REPLY: @@ -1733,7 +1736,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) u8 key_size, auth, sec_level; int ret; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); if (skb->len < sizeof(*req)) return SMP_INVALID_PARAMS; @@ -1887,7 +1890,7 @@ static u8 sc_send_public_key(struct smp_chan *smp) } if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { - BT_DBG("Using debug keys"); + bt_dev_dbg(hdev, "Using debug keys"); if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk)) return SMP_UNSPECIFIED; memcpy(smp->local_pk, debug_pk, 64); @@ -1924,7 +1927,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) u8 key_size, auth; int ret; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); if (skb->len < sizeof(*rsp)) return SMP_INVALID_PARAMS; @@ -2019,7 +2022,7 @@ static u8 sc_check_confirm(struct smp_chan *smp) { struct l2cap_conn *conn = smp->conn; - BT_DBG(""); + bt_dev_dbg(conn->hcon->hdev, ""); if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM); @@ -2078,8 +2081,10 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) { struct l2cap_chan *chan = conn->smp; struct smp_chan *smp = chan->data; + struct hci_conn *hcon = conn->hcon; + struct hci_dev *hdev = hcon->hdev; - BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave"); + bt_dev_dbg(hdev, "conn %p %s", conn, hcon->out ? "master" : "slave"); if (skb->len < sizeof(smp->pcnf)) return SMP_INVALID_PARAMS; @@ -2094,7 +2099,7 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) if (test_bit(SMP_FLAG_REMOTE_PK, &smp->flags)) return sc_check_confirm(smp); - BT_ERR("Unexpected SMP Pairing Confirm"); + bt_dev_err(hdev, "Unexpected SMP Pairing Confirm"); ret = fixup_sc_false_positive(smp); if (ret) @@ -2125,7 +2130,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) u32 passkey; int err; - BT_DBG("conn %p", conn); + bt_dev_dbg(hcon->hdev, "conn %p", conn); if (skb->len < sizeof(smp->rrnd)) return SMP_INVALID_PARAMS; @@ -2284,7 +2289,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_chan *smp; u8 sec_level, auth; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); if (skb->len < sizeof(*rp)) return SMP_INVALID_PARAMS; @@ -2347,7 +2352,8 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) __u8 authreq; int ret; - BT_DBG("conn %p hcon %p level 0x%2.2x", conn, hcon, sec_level); + bt_dev_dbg(hcon->hdev, "conn %p hcon %p level 0x%2.2x", conn, hcon, + sec_level); /* This may be NULL if there's an unexpected disconnection */ if (!conn) @@ -2483,7 +2489,7 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) struct l2cap_chan *chan = conn->smp; struct smp_chan *smp = chan->data; - BT_DBG("conn %p", conn); + bt_dev_dbg(conn->hcon->hdev, "conn %p", conn); if (skb->len < sizeof(*rp)) return SMP_INVALID_PARAMS; @@ -2516,7 +2522,7 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_ltk *ltk; u8 authenticated; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); if (skb->len < sizeof(*rp)) return SMP_INVALID_PARAMS; @@ -2548,7 +2554,7 @@ static int smp_cmd_ident_info(struct l2cap_conn *conn, struct sk_buff *skb) struct l2cap_chan *chan = conn->smp; struct smp_chan *smp = chan->data; - BT_DBG(""); + bt_dev_dbg(conn->hcon->hdev, ""); if (skb->len < sizeof(*info)) return SMP_INVALID_PARAMS; @@ -2580,7 +2586,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, struct hci_conn *hcon = conn->hcon; bdaddr_t rpa; - BT_DBG(""); + bt_dev_dbg(hcon->hdev, ""); if (skb->len < sizeof(*info)) return SMP_INVALID_PARAMS; @@ -2647,7 +2653,7 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_chan *smp = chan->data; struct smp_csrk *csrk; - BT_DBG("conn %p", conn); + bt_dev_dbg(conn->hcon->hdev, "conn %p", conn); if (skb->len < sizeof(*rp)) return SMP_INVALID_PARAMS; @@ -2727,11 +2733,20 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_cmd_pairing_confirm cfm; int err; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); if (skb->len < sizeof(*key)) return SMP_INVALID_PARAMS; + /* Check if remote and local public keys are the same and debug key is + * not in use. + */ + if (!test_bit(SMP_FLAG_DEBUG_KEY, &smp->flags) && + !crypto_memneq(key, smp->local_pk, 64)) { + bt_dev_err(hdev, "Remote and local public keys are identical"); + return SMP_UNSPECIFIED; + } + memcpy(smp->remote_pk, key, 64); if (test_bit(SMP_FLAG_REMOTE_OOB, &smp->flags)) { @@ -2782,7 +2797,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) smp->method = sc_select_method(smp); - BT_DBG("%s selected method 0x%02x", hdev->name, smp->method); + bt_dev_dbg(hdev, "selected method 0x%02x", smp->method); /* JUST_WORKS and JUST_CFM result in an unauthenticated key */ if (smp->method == JUST_WORKS || smp->method == JUST_CFM) @@ -2857,7 +2872,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) u8 io_cap[3], r[16], e[16]; int err; - BT_DBG("conn %p", conn); + bt_dev_dbg(hcon->hdev, "conn %p", conn); if (skb->len < sizeof(*check)) return SMP_INVALID_PARAMS; @@ -2917,7 +2932,7 @@ static int smp_cmd_keypress_notify(struct l2cap_conn *conn, { struct smp_cmd_keypress_notify *kp = (void *) skb->data; - BT_DBG("value 0x%02x", kp->value); + bt_dev_dbg(conn->hcon->hdev, "value 0x%02x", kp->value); return 0; } @@ -3014,7 +3029,7 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb) break; default: - BT_DBG("Unknown command code 0x%2.2x", code); + bt_dev_dbg(hcon->hdev, "Unknown command code 0x%2.2x", code); reason = SMP_CMD_NOTSUPP; goto done; } @@ -3039,7 +3054,7 @@ static void smp_teardown_cb(struct l2cap_chan *chan, int err) { struct l2cap_conn *conn = chan->conn; - BT_DBG("chan %p", chan); + bt_dev_dbg(conn->hcon->hdev, "chan %p", chan); if (chan->data) smp_chan_destroy(conn); @@ -3056,7 +3071,7 @@ static void bredr_pairing(struct l2cap_chan *chan) struct smp_cmd_pairing req; struct smp_chan *smp; - BT_DBG("chan %p", chan); + bt_dev_dbg(hdev, "chan %p", chan); /* Only new pairings are interesting */ if (!test_bit(HCI_CONN_NEW_LINK_KEY, &hcon->flags)) @@ -3103,7 +3118,7 @@ static void bredr_pairing(struct l2cap_chan *chan) set_bit(SMP_FLAG_SC, &smp->flags); - BT_DBG("%s starting SMP over BR/EDR", hdev->name); + bt_dev_dbg(hdev, "starting SMP over BR/EDR"); /* Prepare and send the BR/EDR SMP Pairing Request */ build_bredr_pairing_cmd(smp, &req, NULL); @@ -3121,7 +3136,7 @@ static void smp_resume_cb(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; struct hci_conn *hcon = conn->hcon; - BT_DBG("chan %p", chan); + bt_dev_dbg(hcon->hdev, "chan %p", chan); if (hcon->type == ACL_LINK) { bredr_pairing(chan); @@ -3144,7 +3159,7 @@ static void smp_ready_cb(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; struct hci_conn *hcon = conn->hcon; - BT_DBG("chan %p", chan); + bt_dev_dbg(hcon->hdev, "chan %p", chan); /* No need to call l2cap_chan_hold() here since we already own * the reference taken in smp_new_conn_cb(). This is just the @@ -3162,7 +3177,7 @@ static int smp_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { int err; - BT_DBG("chan %p", chan); + bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan); err = smp_sig_channel(chan, skb); if (err) { @@ -3214,7 +3229,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan) { struct l2cap_chan *chan; - BT_DBG("pchan %p", pchan); + bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan); chan = l2cap_chan_create(); if (!chan) @@ -3235,7 +3250,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan) */ atomic_set(&chan->nesting, L2CAP_NESTING_SMP); - BT_DBG("created chan %p", chan); + bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan); return chan; } @@ -3276,14 +3291,14 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0); if (IS_ERR(tfm_cmac)) { - BT_ERR("Unable to create CMAC crypto context"); + bt_dev_err(hdev, "Unable to create CMAC crypto context"); kfree_sensitive(smp); return ERR_CAST(tfm_cmac); } tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0); if (IS_ERR(tfm_ecdh)) { - BT_ERR("Unable to create ECDH crypto context"); + bt_dev_err(hdev, "Unable to create ECDH crypto context"); crypto_free_shash(tfm_cmac); kfree_sensitive(smp); return ERR_CAST(tfm_ecdh); @@ -3339,7 +3354,7 @@ static void smp_del_chan(struct l2cap_chan *chan) { struct smp_dev *smp; - BT_DBG("chan %p", chan); + bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan); smp = chan->data; if (smp) { @@ -3382,7 +3397,7 @@ int smp_register(struct hci_dev *hdev) { struct l2cap_chan *chan; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, ""); /* If the controller does not support Low Energy operation, then * there is also no need to register any SMP channel. diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9d265447d654..2883601d5c8b 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1593,7 +1593,8 @@ out: spin_unlock(&br->multicast_lock); } -static void br_mc_disabled_update(struct net_device *dev, bool value) +static int br_mc_disabled_update(struct net_device *dev, bool value, + struct netlink_ext_ack *extack) { struct switchdev_attr attr = { .orig_dev = dev, @@ -1602,11 +1603,13 @@ static void br_mc_disabled_update(struct net_device *dev, bool value) .u.mc_disabled = !value, }; - switchdev_port_attr_set(dev, &attr, NULL); + return switchdev_port_attr_set(dev, &attr, extack); } int br_multicast_add_port(struct net_bridge_port *port) { + int err; + port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT; @@ -1618,8 +1621,12 @@ int br_multicast_add_port(struct net_bridge_port *port) timer_setup(&port->ip6_own_query.timer, br_ip6_multicast_port_query_expired, 0); #endif - br_mc_disabled_update(port->dev, - br_opt_get(port->br, BROPT_MULTICAST_ENABLED)); + err = br_mc_disabled_update(port->dev, + br_opt_get(port->br, + BROPT_MULTICAST_ENABLED), + NULL); + if (err && err != -EOPNOTSUPP) + return err; port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); if (!port->mcast_stats) @@ -3560,16 +3567,23 @@ static void br_multicast_start_querier(struct net_bridge *br, rcu_read_unlock(); } -int br_multicast_toggle(struct net_bridge *br, unsigned long val) +int br_multicast_toggle(struct net_bridge *br, unsigned long val, + struct netlink_ext_ack *extack) { struct net_bridge_port *port; bool change_snoopers = false; + int err = 0; spin_lock_bh(&br->multicast_lock); if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val) goto unlock; - br_mc_disabled_update(br->dev, val); + err = br_mc_disabled_update(br->dev, val, extack); + if (err == -EOPNOTSUPP) + err = 0; + if (err) + goto unlock; + br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { change_snoopers = true; @@ -3607,7 +3621,7 @@ unlock: br_multicast_leave_snoopers(br); } - return 0; + return err; } bool br_multicast_enabled(const struct net_device *dev) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f2b1343f8332..0456593aceec 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1293,7 +1293,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_SNOOPING]) { u8 mcast_snooping = nla_get_u8(data[IFLA_BR_MCAST_SNOOPING]); - br_multicast_toggle(br, mcast_snooping); + err = br_multicast_toggle(br, mcast_snooping, extack); + if (err) + return err; } if (data[IFLA_BR_MCAST_QUERY_USE_IFADDR]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 50747990188e..7ce8a77cc6b6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -810,7 +810,8 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, bool local_rcv, bool local_orig); int br_multicast_set_router(struct net_bridge *br, unsigned long val); int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val); -int br_multicast_toggle(struct net_bridge *br, unsigned long val); +int br_multicast_toggle(struct net_bridge *br, unsigned long val, + struct netlink_ext_ack *extack); int br_multicast_set_querier(struct net_bridge *br, unsigned long val); int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 1e24d9a2c9a7..a5e601e41cb9 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -107,48 +107,28 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, return 0; } -static void -br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, - u16 vid, struct net_device *dev, - bool added_by_user, bool offloaded) -{ - struct switchdev_notifier_fdb_info info; - unsigned long notifier_type; - - info.addr = mac; - info.vid = vid; - info.added_by_user = added_by_user; - info.offloaded = offloaded; - notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE; - call_switchdev_notifiers(notifier_type, dev, &info.info, NULL); -} - void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) { + struct switchdev_notifier_fdb_info info = { + .addr = fdb->key.addr.addr, + .vid = fdb->key.vlan_id, + .added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags), + .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags), + .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags), + }; + if (!fdb->dst) return; - if (test_bit(BR_FDB_LOCAL, &fdb->flags)) - return; switch (type) { case RTM_DELNEIGH: - br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr, - fdb->key.vlan_id, - fdb->dst->dev, - test_bit(BR_FDB_ADDED_BY_USER, - &fdb->flags), - test_bit(BR_FDB_OFFLOADED, - &fdb->flags)); + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE, + fdb->dst->dev, &info.info, NULL); break; case RTM_NEWNEIGH: - br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr, - fdb->key.vlan_id, - fdb->dst->dev, - test_bit(BR_FDB_ADDED_BY_USER, - &fdb->flags), - test_bit(BR_FDB_OFFLOADED, - &fdb->flags)); + call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE, + fdb->dst->dev, &info.info, NULL); break; } } diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 072e29840082..381467b691d5 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -409,17 +409,11 @@ static ssize_t multicast_snooping_show(struct device *d, return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_ENABLED)); } -static int toggle_multicast(struct net_bridge *br, unsigned long val, - struct netlink_ext_ack *extack) -{ - return br_multicast_toggle(br, val); -} - static ssize_t multicast_snooping_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - return store_bridge_parm(d, buf, len, toggle_multicast); + return store_bridge_parm(d, buf, len, br_multicast_toggle); } static DEVICE_ATTR_RW(multicast_snooping); diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index ac5372121e60..7f304a19ac1b 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -23,10 +23,6 @@ config NFT_BRIDGE_REJECT help Add support to reject packets. -config NF_LOG_BRIDGE - tristate "Bridge packet logging" - select NF_LOG_COMMON - endif # NF_TABLES_BRIDGE config NF_CONNTRACK_BRIDGE diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 8e2c5759d964..1c9ce49ab651 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -9,9 +9,6 @@ obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o # connection tracking obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o -# packet logging -obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o - obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o # tables diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 66e7af165494..32bc2821027f 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -105,14 +105,20 @@ static int __net_init broute_net_init(struct net *net) &net->xt.broute_table); } +static void __net_exit broute_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "broute", &ebt_ops_broute); +} + static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.broute_table, &ebt_ops_broute); + ebt_unregister_table(net, net->xt.broute_table); } static struct pernet_operations broute_net_ops = { .init = broute_net_init, .exit = broute_net_exit, + .pre_exit = broute_net_pre_exit, }; static int __init ebtable_broute_init(void) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 78cb9b21022d..bcf982e12f16 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -99,14 +99,20 @@ static int __net_init frame_filter_net_init(struct net *net) &net->xt.frame_filter); } +static void __net_exit frame_filter_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "filter", ebt_ops_filter); +} + static void __net_exit frame_filter_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_filter, ebt_ops_filter); + ebt_unregister_table(net, net->xt.frame_filter); } static struct pernet_operations frame_filter_net_ops = { .init = frame_filter_net_init, .exit = frame_filter_net_exit, + .pre_exit = frame_filter_net_pre_exit, }; static int __init ebtable_filter_init(void) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 0888936ef853..0d092773f816 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -99,14 +99,20 @@ static int __net_init frame_nat_net_init(struct net *net) &net->xt.frame_nat); } +static void __net_exit frame_nat_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "nat", ebt_ops_nat); +} + static void __net_exit frame_nat_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_nat, ebt_ops_nat); + ebt_unregister_table(net, net->xt.frame_nat); } static struct pernet_operations frame_nat_net_ops = { .init = frame_nat_net_init, .exit = frame_nat_net_exit, + .pre_exit = frame_nat_net_pre_exit, }; static int __init ebtable_nat_init(void) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ebe33b60efd6..96d789c8d1c7 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -24,6 +24,7 @@ #include <linux/cpumask.h> #include <linux/audit.h> #include <net/sock.h> +#include <net/netns/generic.h> /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" @@ -39,8 +40,11 @@ #define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \ COUNTER_OFFSET(n) * cpu)) +struct ebt_pernet { + struct list_head tables; +}; - +static unsigned int ebt_pernet_id __read_mostly; static DEFINE_MUTEX(ebt_mutex); #ifdef CONFIG_COMPAT @@ -336,7 +340,9 @@ static inline struct ebt_table * find_table_lock(struct net *net, const char *name, int *error, struct mutex *mutex) { - return find_inlist_lock(&net->xt.tables[NFPROTO_BRIDGE], name, + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + + return find_inlist_lock(&ebt_net->tables, name, "ebtable_", error, mutex); } @@ -1136,6 +1142,7 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table) int ebt_register_table(struct net *net, const struct ebt_table *input_table, const struct nf_hook_ops *ops, struct ebt_table **res) { + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); struct ebt_table_info *newinfo; struct ebt_table *t, *table; struct ebt_replace_kernel *repl; @@ -1194,7 +1201,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, table->private = newinfo; rwlock_init(&table->lock); mutex_lock(&ebt_mutex); - list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { + list_for_each_entry(t, &ebt_net->tables, list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; goto free_unlock; @@ -1206,7 +1213,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, ret = -ENOENT; goto free_unlock; } - list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); + list_add(&table->list, &ebt_net->tables); mutex_unlock(&ebt_mutex); WRITE_ONCE(*res, table); @@ -1232,10 +1239,35 @@ out: return ret; } -void ebt_unregister_table(struct net *net, struct ebt_table *table, - const struct nf_hook_ops *ops) +static struct ebt_table *__ebt_find_table(struct net *net, const char *name) +{ + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + struct ebt_table *t; + + mutex_lock(&ebt_mutex); + + list_for_each_entry(t, &ebt_net->tables, list) { + if (strcmp(t->name, name) == 0) { + mutex_unlock(&ebt_mutex); + return t; + } + } + + mutex_unlock(&ebt_mutex); + return NULL; +} + +void ebt_unregister_table_pre_exit(struct net *net, const char *name, const struct nf_hook_ops *ops) +{ + struct ebt_table *table = __ebt_find_table(net, name); + + if (table) + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} +EXPORT_SYMBOL(ebt_unregister_table_pre_exit); + +void ebt_unregister_table(struct net *net, struct ebt_table *table) { - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); __ebt_unregister_table(net, table); } @@ -2412,6 +2444,20 @@ static struct nf_sockopt_ops ebt_sockopts = { .owner = THIS_MODULE, }; +static int __net_init ebt_pernet_init(struct net *net) +{ + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + + INIT_LIST_HEAD(&ebt_net->tables); + return 0; +} + +static struct pernet_operations ebt_net_ops = { + .init = ebt_pernet_init, + .id = &ebt_pernet_id, + .size = sizeof(struct ebt_pernet), +}; + static int __init ebtables_init(void) { int ret; @@ -2425,13 +2471,21 @@ static int __init ebtables_init(void) return ret; } + ret = register_pernet_subsys(&ebt_net_ops); + if (ret < 0) { + nf_unregister_sockopt(&ebt_sockopts); + xt_unregister_target(&ebt_standard_target); + return ret; + } + return 0; } -static void __exit ebtables_fini(void) +static void ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); xt_unregister_target(&ebt_standard_target); + unregister_pernet_subsys(&ebt_net_ops); } EXPORT_SYMBOL(ebt_register_table); diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c deleted file mode 100644 index 1ad61d1017b6..000000000000 --- a/net/bridge/netfilter/nf_log_bridge.c +++ /dev/null @@ -1,79 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org> - */ - -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/if_bridge.h> -#include <linux/ip.h> -#include <net/route.h> - -#include <linux/netfilter.h> -#include <net/netfilter/nf_log.h> - -static void nf_log_bridge_packet(struct net *net, u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - nf_log_l2packet(net, pf, eth_hdr(skb)->h_proto, hooknum, skb, - in, out, loginfo, prefix); -} - -static struct nf_logger nf_bridge_logger __read_mostly = { - .name = "nf_log_bridge", - .type = NF_LOG_TYPE_LOG, - .logfn = nf_log_bridge_packet, - .me = THIS_MODULE, -}; - -static int __net_init nf_log_bridge_net_init(struct net *net) -{ - return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); -} - -static void __net_exit nf_log_bridge_net_exit(struct net *net) -{ - nf_log_unset(net, &nf_bridge_logger); -} - -static struct pernet_operations nf_log_bridge_net_ops = { - .init = nf_log_bridge_net_init, - .exit = nf_log_bridge_net_exit, -}; - -static int __init nf_log_bridge_init(void) -{ - int ret; - - /* Request to load the real packet loggers. */ - nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG); - nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG); - nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG); - - ret = register_pernet_subsys(&nf_log_bridge_net_ops); - if (ret < 0) - return ret; - - nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger); - return 0; -} - -static void __exit nf_log_bridge_exit(void) -{ - unregister_pernet_subsys(&nf_log_bridge_net_ops); - nf_log_unregister(&nf_bridge_logger); -} - -module_init(nf_log_bridge_init); -module_exit(nf_log_bridge_exit); - -MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); -MODULE_DESCRIPTION("Netfilter bridge packet logging"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0); diff --git a/net/can/bcm.c b/net/can/bcm.c index 0e5c37be4a2b..909b9e684e04 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -86,6 +86,8 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); MODULE_ALIAS("can-proto-2"); +#define BCM_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex) + /* * easy access to the first 64 bit of can(fd)_frame payload. cp->data is * 64 bit aligned so the offset has to be multiples of 8 which is ensured @@ -1292,7 +1294,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) /* no bound device as default => check msg_name */ DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); - if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex)) + if (msg->msg_namelen < BCM_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) @@ -1534,7 +1536,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct net *net = sock_net(sk); int ret = 0; - if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex)) + if (len < BCM_MIN_NAMELEN) return -EINVAL; lock_sock(sk); @@ -1616,8 +1618,8 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { - __sockaddr_check_size(sizeof(struct sockaddr_can)); - msg->msg_namelen = sizeof(struct sockaddr_can); + __sockaddr_check_size(BCM_MIN_NAMELEN); + msg->msg_namelen = BCM_MIN_NAMELEN; memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } diff --git a/net/can/isotp.c b/net/can/isotp.c index 15ea1234d457..9f94ad3caee9 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -77,6 +77,8 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <socketcan@hartkopp.net>"); MODULE_ALIAS("can-proto-6"); +#define ISOTP_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.tp) + #define SINGLE_MASK(id) (((id) & CAN_EFF_FLAG) ? \ (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) @@ -986,7 +988,8 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, sock_recv_timestamp(msg, sk, skb); if (msg->msg_name) { - msg->msg_namelen = sizeof(struct sockaddr_can); + __sockaddr_check_size(ISOTP_MIN_NAMELEN); + msg->msg_namelen = ISOTP_MIN_NAMELEN; memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } @@ -1056,7 +1059,7 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) int notify_enetdown = 0; int do_rx_reg = 1; - if (len < CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.tp)) + if (len < ISOTP_MIN_NAMELEN) return -EINVAL; /* do not register frame reception for functional addressing */ @@ -1152,13 +1155,13 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) if (peer) return -EOPNOTSUPP; - memset(addr, 0, sizeof(*addr)); + memset(addr, 0, ISOTP_MIN_NAMELEN); addr->can_family = AF_CAN; addr->can_ifindex = so->ifindex; addr->can_addr.tp.rx_id = so->rxid; addr->can_addr.tp.tx_id = so->txid; - return sizeof(*addr); + return ISOTP_MIN_NAMELEN; } static int isotp_setsockopt(struct socket *sock, int level, int optname, diff --git a/net/can/raw.c b/net/can/raw.c index 37b47a39a3ed..139d9471ddcf 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -60,6 +60,8 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>"); MODULE_ALIAS("can-proto-1"); +#define RAW_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex) + #define MASK_ALL 0 /* A raw socket has a list of can_filters attached to it, each receiving @@ -394,7 +396,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) int err = 0; int notify_enetdown = 0; - if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex)) + if (len < RAW_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; @@ -475,11 +477,11 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr, if (peer) return -EOPNOTSUPP; - memset(addr, 0, sizeof(*addr)); + memset(addr, 0, RAW_MIN_NAMELEN); addr->can_family = AF_CAN; addr->can_ifindex = ro->ifindex; - return sizeof(*addr); + return RAW_MIN_NAMELEN; } static int raw_setsockopt(struct socket *sock, int level, int optname, @@ -739,7 +741,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); - if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex)) + if (msg->msg_namelen < RAW_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) @@ -832,8 +834,8 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { - __sockaddr_check_size(sizeof(struct sockaddr_can)); - msg->msg_namelen = sizeof(struct sockaddr_can); + __sockaddr_check_size(RAW_MIN_NAMELEN); + msg->msg_namelen = RAW_MIN_NAMELEN; memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } diff --git a/net/core/Makefile b/net/core/Makefile index 0c2233c826fd..1a6168d8f23b 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -33,6 +33,7 @@ obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o obj-$(CONFIG_FAILOVER) += failover.o ifeq ($(CONFIG_INET),y) +obj-$(CONFIG_NET_SELFTESTS) += selftests.o obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o endif diff --git a/net/core/dev.c b/net/core/dev.c index 6a1ef7a15bed..eed028aec6a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5976,7 +5976,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0_len = 0; if (!skb_headlen(skb) && pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0))) { + !PageHighMem(skb_frag_page(frag0)) && + (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, skb_frag_size(frag0), @@ -7045,7 +7046,7 @@ static int napi_thread_wait(struct napi_struct *napi) set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop() && !napi_disable_pending(napi)) { + while (!kthread_should_stop()) { /* Testing SCHED_THREADED bit here to make sure the current * kthread owns this napi and could poll on this napi. * Testing SCHED bit is not enough because SCHED bit might be @@ -7063,6 +7064,7 @@ static int napi_thread_wait(struct napi_struct *napi) set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); + return -1; } @@ -11066,11 +11068,13 @@ void unregister_netdev(struct net_device *dev) EXPORT_SYMBOL(unregister_netdev); /** - * dev_change_net_namespace - move device to different nethost namespace + * __dev_change_net_namespace - move device to different nethost namespace * @dev: device * @net: network namespace * @pat: If not NULL name pattern to try if the current device name * is already taken in the destination network namespace. + * @new_ifindex: If not zero, specifies device index in the target + * namespace. * * This function shuts down a device interface and moves it * to a new network namespace. On success 0 is returned, on @@ -11079,10 +11083,11 @@ EXPORT_SYMBOL(unregister_netdev); * Callers must hold the rtnl semaphore. */ -int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) +int __dev_change_net_namespace(struct net_device *dev, struct net *net, + const char *pat, int new_ifindex) { struct net *net_old = dev_net(dev); - int err, new_nsid, new_ifindex; + int err, new_nsid; ASSERT_RTNL(); @@ -11113,6 +11118,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char goto out; } + /* Check that new_ifindex isn't used yet. */ + err = -EBUSY; + if (new_ifindex && __dev_get_by_index(net, new_ifindex)) + goto out; + /* * And now a mini version of register_netdevice unregister_netdevice. */ @@ -11140,10 +11150,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL); /* If there is an ifindex conflict assign a new one */ - if (__dev_get_by_index(net, dev->ifindex)) - new_ifindex = dev_new_index(net); - else - new_ifindex = dev->ifindex; + if (!new_ifindex) { + if (__dev_get_by_index(net, dev->ifindex)) + new_ifindex = dev_new_index(net); + else + new_ifindex = dev->ifindex; + } rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid, new_ifindex); @@ -11196,7 +11208,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char out: return err; } -EXPORT_SYMBOL_GPL(dev_change_net_namespace); +EXPORT_SYMBOL_GPL(__dev_change_net_namespace); static int dev_cpu_dead(unsigned int oldcpu) { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5985029e43d4..3ed7c98a98e1 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -832,8 +832,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IPV6_ADDRS, target_container); - memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src, - sizeof(key_addrs->v6addrs)); + memcpy(&key_addrs->v6addrs.src, &flow_keys->ipv6_src, + sizeof(key_addrs->v6addrs.src)); + memcpy(&key_addrs->v6addrs.dst, &flow_keys->ipv6_dst, + sizeof(key_addrs->v6addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e2982b3970b8..8379719d1dce 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1379,7 +1379,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, * we can reinject the packet there. */ n2 = NULL; - if (dst) { + if (dst && dst->obsolete != DST_OBSOLETE_DEAD) { n2 = dst_neigh_lookup_skb(dst, skb); if (n2) n1 = n2; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1bdcb33fb561..714d5fa38546 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1877,6 +1877,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { .len = ALTIFNAMSIZ - 1 }, [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, + [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2603,14 +2604,22 @@ static int do_setlink(const struct sk_buff *skb, return err; if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) { - struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev), - tb, CAP_NET_ADMIN); + struct net *net; + int new_ifindex; + + net = rtnl_link_get_net_capable(skb, dev_net(dev), + tb, CAP_NET_ADMIN); if (IS_ERR(net)) { err = PTR_ERR(net); goto errout; } - err = dev_change_net_namespace(dev, net, ifname); + if (tb[IFLA_NEW_IFINDEX]) + new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]); + else + new_ifindex = 0; + + err = __dev_change_net_namespace(dev, net, ifname, new_ifindex); put_net(net); if (err) goto errout; @@ -2863,7 +2872,7 @@ static int do_setlink(const struct sk_buff *skb, BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af)))); - err = af_ops->set_link_af(dev, af); + err = af_ops->set_link_af(dev, af, extack); if (err < 0) { rcu_read_unlock(); goto errout; diff --git a/net/core/scm.c b/net/core/scm.c index 8156d4fb8a39..ae3085d9aae8 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -228,14 +228,16 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) if (msg->msg_control_is_user) { struct cmsghdr __user *cm = msg->msg_control_user; - struct cmsghdr cmhdr; - - cmhdr.cmsg_level = level; - cmhdr.cmsg_type = type; - cmhdr.cmsg_len = cmlen; - if (copy_to_user(cm, &cmhdr, sizeof cmhdr) || - copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm))) - return -EFAULT; + + if (!user_write_access_begin(cm, cmlen)) + goto efault; + + unsafe_put_user(cmlen, &cm->cmsg_len, efault_end); + unsafe_put_user(level, &cm->cmsg_level, efault_end); + unsafe_put_user(type, &cm->cmsg_type, efault_end); + unsafe_copy_to_user(CMSG_USER_DATA(cm), data, + cmlen - sizeof(*cm), efault_end); + user_write_access_end(); } else { struct cmsghdr *cm = msg->msg_control; @@ -249,6 +251,11 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) msg->msg_control += cmlen; msg->msg_controllen -= cmlen; return 0; + +efault_end: + user_write_access_end(); +efault: + return -EFAULT; } EXPORT_SYMBOL(put_cmsg); diff --git a/net/core/selftests.c b/net/core/selftests.c new file mode 100644 index 000000000000..ba7b0171974c --- /dev/null +++ b/net/core/selftests.c @@ -0,0 +1,400 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Synopsys, Inc. and/or its affiliates. + * stmmac Selftests Support + * + * Author: Jose Abreu <joabreu@synopsys.com> + * + * Ported from stmmac by: + * Copyright (C) 2021 Oleksij Rempel <o.rempel@pengutronix.de> + */ + +#include <linux/phy.h> +#include <net/selftests.h> +#include <net/tcp.h> +#include <net/udp.h> + +struct net_packet_attrs { + unsigned char *src; + unsigned char *dst; + u32 ip_src; + u32 ip_dst; + bool tcp; + u16 sport; + u16 dport; + int timeout; + int size; + int max_size; + u8 id; + u16 queue_mapping; +}; + +struct net_test_priv { + struct net_packet_attrs *packet; + struct packet_type pt; + struct completion comp; + int double_vlan; + int vlan_id; + int ok; +}; + +struct netsfhdr { + __be32 version; + __be64 magic; + u8 id; +} __packed; + +static u8 net_test_next_id; + +#define NET_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ + sizeof(struct netsfhdr)) +#define NET_TEST_PKT_MAGIC 0xdeadcafecafedeadULL +#define NET_LB_TIMEOUT msecs_to_jiffies(200) + +static struct sk_buff *net_test_get_skb(struct net_device *ndev, + struct net_packet_attrs *attr) +{ + struct sk_buff *skb = NULL; + struct udphdr *uhdr = NULL; + struct tcphdr *thdr = NULL; + struct netsfhdr *shdr; + struct ethhdr *ehdr; + struct iphdr *ihdr; + int iplen, size; + + size = attr->size + NET_TEST_PKT_SIZE; + + if (attr->tcp) + size += sizeof(struct tcphdr); + else + size += sizeof(struct udphdr); + + if (attr->max_size && attr->max_size > size) + size = attr->max_size; + + skb = netdev_alloc_skb(ndev, size); + if (!skb) + return NULL; + + prefetchw(skb->data); + + ehdr = skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + + skb_set_network_header(skb, skb->len); + ihdr = skb_put(skb, sizeof(*ihdr)); + + skb_set_transport_header(skb, skb->len); + if (attr->tcp) + thdr = skb_put(skb, sizeof(*thdr)); + else + uhdr = skb_put(skb, sizeof(*uhdr)); + + eth_zero_addr(ehdr->h_dest); + + if (attr->src) + ether_addr_copy(ehdr->h_source, attr->src); + if (attr->dst) + ether_addr_copy(ehdr->h_dest, attr->dst); + + ehdr->h_proto = htons(ETH_P_IP); + + if (attr->tcp) { + thdr->source = htons(attr->sport); + thdr->dest = htons(attr->dport); + thdr->doff = sizeof(struct tcphdr) / 4; + thdr->check = 0; + } else { + uhdr->source = htons(attr->sport); + uhdr->dest = htons(attr->dport); + uhdr->len = htons(sizeof(*shdr) + sizeof(*uhdr) + attr->size); + if (attr->max_size) + uhdr->len = htons(attr->max_size - + (sizeof(*ihdr) + sizeof(*ehdr))); + uhdr->check = 0; + } + + ihdr->ihl = 5; + ihdr->ttl = 32; + ihdr->version = 4; + if (attr->tcp) + ihdr->protocol = IPPROTO_TCP; + else + ihdr->protocol = IPPROTO_UDP; + iplen = sizeof(*ihdr) + sizeof(*shdr) + attr->size; + if (attr->tcp) + iplen += sizeof(*thdr); + else + iplen += sizeof(*uhdr); + + if (attr->max_size) + iplen = attr->max_size - sizeof(*ehdr); + + ihdr->tot_len = htons(iplen); + ihdr->frag_off = 0; + ihdr->saddr = htonl(attr->ip_src); + ihdr->daddr = htonl(attr->ip_dst); + ihdr->tos = 0; + ihdr->id = 0; + ip_send_check(ihdr); + + shdr = skb_put(skb, sizeof(*shdr)); + shdr->version = 0; + shdr->magic = cpu_to_be64(NET_TEST_PKT_MAGIC); + attr->id = net_test_next_id; + shdr->id = net_test_next_id++; + + if (attr->size) + skb_put(skb, attr->size); + if (attr->max_size && attr->max_size > skb->len) + skb_put(skb, attr->max_size - skb->len); + + skb->csum = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + if (attr->tcp) { + thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, + ihdr->daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + } else { + udp4_hwcsum(skb, ihdr->saddr, ihdr->daddr); + } + + skb->protocol = htons(ETH_P_IP); + skb->pkt_type = PACKET_HOST; + skb->dev = ndev; + + return skb; +} + +static int net_test_loopback_validate(struct sk_buff *skb, + struct net_device *ndev, + struct packet_type *pt, + struct net_device *orig_ndev) +{ + struct net_test_priv *tpriv = pt->af_packet_priv; + unsigned char *src = tpriv->packet->src; + unsigned char *dst = tpriv->packet->dst; + struct netsfhdr *shdr; + struct ethhdr *ehdr; + struct udphdr *uhdr; + struct tcphdr *thdr; + struct iphdr *ihdr; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (skb_linearize(skb)) + goto out; + if (skb_headlen(skb) < (NET_TEST_PKT_SIZE - ETH_HLEN)) + goto out; + + ehdr = (struct ethhdr *)skb_mac_header(skb); + if (dst) { + if (!ether_addr_equal_unaligned(ehdr->h_dest, dst)) + goto out; + } + + if (src) { + if (!ether_addr_equal_unaligned(ehdr->h_source, src)) + goto out; + } + + ihdr = ip_hdr(skb); + if (tpriv->double_vlan) + ihdr = (struct iphdr *)(skb_network_header(skb) + 4); + + if (tpriv->packet->tcp) { + if (ihdr->protocol != IPPROTO_TCP) + goto out; + + thdr = (struct tcphdr *)((u8 *)ihdr + 4 * ihdr->ihl); + if (thdr->dest != htons(tpriv->packet->dport)) + goto out; + + shdr = (struct netsfhdr *)((u8 *)thdr + sizeof(*thdr)); + } else { + if (ihdr->protocol != IPPROTO_UDP) + goto out; + + uhdr = (struct udphdr *)((u8 *)ihdr + 4 * ihdr->ihl); + if (uhdr->dest != htons(tpriv->packet->dport)) + goto out; + + shdr = (struct netsfhdr *)((u8 *)uhdr + sizeof(*uhdr)); + } + + if (shdr->magic != cpu_to_be64(NET_TEST_PKT_MAGIC)) + goto out; + if (tpriv->packet->id != shdr->id) + goto out; + + tpriv->ok = true; + complete(&tpriv->comp); +out: + kfree_skb(skb); + return 0; +} + +static int __net_test_loopback(struct net_device *ndev, + struct net_packet_attrs *attr) +{ + struct net_test_priv *tpriv; + struct sk_buff *skb = NULL; + int ret = 0; + + tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL); + if (!tpriv) + return -ENOMEM; + + tpriv->ok = false; + init_completion(&tpriv->comp); + + tpriv->pt.type = htons(ETH_P_IP); + tpriv->pt.func = net_test_loopback_validate; + tpriv->pt.dev = ndev; + tpriv->pt.af_packet_priv = tpriv; + tpriv->packet = attr; + dev_add_pack(&tpriv->pt); + + skb = net_test_get_skb(ndev, attr); + if (!skb) { + ret = -ENOMEM; + goto cleanup; + } + + ret = dev_direct_xmit(skb, attr->queue_mapping); + if (ret < 0) { + goto cleanup; + } else if (ret > 0) { + ret = -ENETUNREACH; + goto cleanup; + } + + if (!attr->timeout) + attr->timeout = NET_LB_TIMEOUT; + + wait_for_completion_timeout(&tpriv->comp, attr->timeout); + ret = tpriv->ok ? 0 : -ETIMEDOUT; + +cleanup: + dev_remove_pack(&tpriv->pt); + kfree(tpriv); + return ret; +} + +static int net_test_netif_carrier(struct net_device *ndev) +{ + return netif_carrier_ok(ndev) ? 0 : -ENOLINK; +} + +static int net_test_phy_phydev(struct net_device *ndev) +{ + return ndev->phydev ? 0 : -EOPNOTSUPP; +} + +static int net_test_phy_loopback_enable(struct net_device *ndev) +{ + if (!ndev->phydev) + return -EOPNOTSUPP; + + return phy_loopback(ndev->phydev, true); +} + +static int net_test_phy_loopback_disable(struct net_device *ndev) +{ + if (!ndev->phydev) + return -EOPNOTSUPP; + + return phy_loopback(ndev->phydev, false); +} + +static int net_test_phy_loopback_udp(struct net_device *ndev) +{ + struct net_packet_attrs attr = { }; + + attr.dst = ndev->dev_addr; + return __net_test_loopback(ndev, &attr); +} + +static int net_test_phy_loopback_tcp(struct net_device *ndev) +{ + struct net_packet_attrs attr = { }; + + attr.dst = ndev->dev_addr; + attr.tcp = true; + return __net_test_loopback(ndev, &attr); +} + +static const struct net_test { + char name[ETH_GSTRING_LEN]; + int (*fn)(struct net_device *ndev); +} net_selftests[] = { + { + .name = "Carrier ", + .fn = net_test_netif_carrier, + }, { + .name = "PHY dev is present ", + .fn = net_test_phy_phydev, + }, { + /* This test should be done before all PHY loopback test */ + .name = "PHY internal loopback, enable ", + .fn = net_test_phy_loopback_enable, + }, { + .name = "PHY internal loopback, UDP ", + .fn = net_test_phy_loopback_udp, + }, { + .name = "PHY internal loopback, TCP ", + .fn = net_test_phy_loopback_tcp, + }, { + /* This test should be done after all PHY loopback test */ + .name = "PHY internal loopback, disable", + .fn = net_test_phy_loopback_disable, + }, +}; + +void net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf) +{ + int count = net_selftest_get_count(); + int i; + + memset(buf, 0, sizeof(*buf) * count); + net_test_next_id = 0; + + if (etest->flags != ETH_TEST_FL_OFFLINE) { + netdev_err(ndev, "Only offline tests are supported\n"); + etest->flags |= ETH_TEST_FL_FAILED; + return; + } + + + for (i = 0; i < count; i++) { + buf[i] = net_selftests[i].fn(ndev); + if (buf[i] && (buf[i] != -EOPNOTSUPP)) + etest->flags |= ETH_TEST_FL_FAILED; + } +} +EXPORT_SYMBOL_GPL(net_selftest); + +int net_selftest_get_count(void) +{ + return ARRAY_SIZE(net_selftests); +} +EXPORT_SYMBOL_GPL(net_selftest_get_count); + +void net_selftest_get_strings(u8 *data) +{ + u8 *p = data; + int i; + + for (i = 0; i < net_selftest_get_count(); i++) { + snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1, + net_selftests[i].name); + p += ETH_GSTRING_LEN; + } +} +EXPORT_SYMBOL_GPL(net_selftest_get_strings); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Oleksij Rempel <o.rempel@pengutronix.de>"); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3ad9e8425ab2..14010c0eec48 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3773,13 +3773,13 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int delta_truesize = 0; unsigned int delta_len = 0; + struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; int err; skb_push(skb, -skb_network_offset(skb) + offset); skb_shinfo(skb)->frag_list = NULL; - skb->next = list_skb; do { nskb = list_skb; @@ -3797,8 +3797,17 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, } } - if (unlikely(err)) + if (!tail) + skb->next = nskb; + else + tail->next = nskb; + + if (unlikely(err)) { + nskb->next = list_skb; goto err_linearize; + } + + tail = nskb; delta_len += nskb->len; delta_truesize += nskb->truesize; @@ -3825,7 +3834,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_gso_reset(skb); - skb->prev = nskb; + skb->prev = tail; if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 92a83c02562a..43ce17a6a585 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -586,6 +586,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb if (unlikely(!msg)) return -EAGAIN; sk_msg_init(msg); + skb_set_owner_r(skb, sk); return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); } @@ -884,7 +885,6 @@ static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int { switch (verdict) { case __SK_REDIRECT: - skb_set_owner_r(skb, sk); sk_psock_skb_redirect(skb); break; case __SK_PASS: @@ -902,10 +902,6 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) rcu_read_lock(); prog = READ_ONCE(psock->progs.stream_verdict); if (likely(prog)) { - /* We skip full set_owner_r here because if we do a SK_PASS - * or SK_DROP we can skip skb memory accounting and use the - * TLS context. - */ skb->sk = psock->sk; skb_dst_drop(skb); skb_bpf_redirect_clear(skb); @@ -995,13 +991,14 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) kfree_skb(skb); goto out; } - skb_set_owner_r(skb, sk); prog = READ_ONCE(psock->progs.stream_verdict); if (likely(prog)) { + skb->sk = sk; skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); + skb->sk = NULL; } sk_psock_verdict_apply(psock, skb, ret); out: @@ -1115,15 +1112,16 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, kfree_skb(skb); goto out; } - skb_set_owner_r(skb, sk); prog = READ_ONCE(psock->progs.stream_verdict); if (!prog) prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { + skb->sk = sk; skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); + skb->sk = NULL; } sk_psock_verdict_apply(psock, skb, ret); out: diff --git a/net/core/sock.c b/net/core/sock.c index cc31b601ae10..c761c4a0b66b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2132,16 +2132,10 @@ void skb_orphan_partial(struct sk_buff *skb) if (skb_is_tcp_pure_ack(skb)) return; - if (can_skb_orphan_partial(skb)) { - struct sock *sk = skb->sk; - - if (refcount_inc_not_zero(&sk->sk_refcnt)) { - WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); - skb->destructor = sock_efree; - } - } else { + if (can_skb_orphan_partial(skb)) + skb_set_owner_sk_safe(skb, skb->sk); + else skb_orphan(skb); - } } EXPORT_SYMBOL(skb_orphan_partial); @@ -3537,7 +3531,7 @@ int proto_register(struct proto *prot, int alloc_slab) return ret; out_free_timewait_sock_slab: - if (alloc_slab && prot->twsk_prot) + if (alloc_slab) tw_prot_cleanup(prot->twsk_prot); out_free_request_sock_slab: if (alloc_slab) { diff --git a/net/core/xdp.c b/net/core/xdp.c index 05354976c1fc..858276e72c68 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -350,7 +350,8 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); page = virt_to_head_page(data); - napi_direct &= !xdp_return_frame_no_direct(); + if (napi_direct && xdp_return_frame_no_direct()) + napi_direct = false; page_pool_put_full_page(xa->page_pool, page, napi_direct); rcu_read_unlock(); break; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2455b0c0e486..ffc601a3b329 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -23,14 +23,21 @@ #include <net/tcp_states.h> #include <net/xfrm.h> #include <net/secure_seq.h> +#include <net/netns/generic.h> #include "ackvec.h" #include "ccid.h" #include "dccp.h" #include "feat.h" +struct dccp_v4_pernet { + struct sock *v4_ctl_sk; +}; + +static unsigned int dccp_v4_pernet_id __read_mostly; + /* - * The per-net dccp.v4_ctl_sk socket is used for responding to + * The per-net v4_ctl_sk socket is used for responding to * the Out-of-the-blue (OOTB) packets. A control sock will be created * for this socket at the initialization time. */ @@ -513,7 +520,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) struct sk_buff *skb; struct dst_entry *dst; struct net *net = dev_net(skb_dst(rxskb)->dev); - struct sock *ctl_sk = net->dccp.v4_ctl_sk; + struct dccp_v4_pernet *pn; + struct sock *ctl_sk; /* Never send a reset in response to a reset. */ if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) @@ -522,6 +530,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) if (skb_rtable(rxskb)->rt_type != RTN_LOCAL) return; + pn = net_generic(net, dccp_v4_pernet_id); + ctl_sk = pn->v4_ctl_sk; dst = dccp_v4_route_skb(net, ctl_sk, rxskb); if (dst == NULL) return; @@ -1005,16 +1015,20 @@ static struct inet_protosw dccp_v4_protosw = { static int __net_init dccp_v4_init_net(struct net *net) { + struct dccp_v4_pernet *pn = net_generic(net, dccp_v4_pernet_id); + if (dccp_hashinfo.bhash == NULL) return -ESOCKTNOSUPPORT; - return inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET, + return inet_ctl_sock_create(&pn->v4_ctl_sk, PF_INET, SOCK_DCCP, IPPROTO_DCCP, net); } static void __net_exit dccp_v4_exit_net(struct net *net) { - inet_ctl_sock_destroy(net->dccp.v4_ctl_sk); + struct dccp_v4_pernet *pn = net_generic(net, dccp_v4_pernet_id); + + inet_ctl_sock_destroy(pn->v4_ctl_sk); } static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list) @@ -1026,6 +1040,8 @@ static struct pernet_operations dccp_v4_ops = { .init = dccp_v4_init_net, .exit = dccp_v4_exit_net, .exit_batch = dccp_v4_exit_batch, + .id = &dccp_v4_pernet_id, + .size = sizeof(struct dccp_v4_pernet), }; static int __init dccp_v4_init(void) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2be5c69824f9..6f5304db5a67 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -27,13 +27,20 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/secure_seq.h> +#include <net/netns/generic.h> #include <net/sock.h> #include "dccp.h" #include "ipv6.h" #include "feat.h" -/* The per-net dccp.v6_ctl_sk is used for sending RSTs and ACKs */ +struct dccp_v6_pernet { + struct sock *v6_ctl_sk; +}; + +static unsigned int dccp_v6_pernet_id __read_mostly; + +/* The per-net v6_ctl_sk is used for sending RSTs and ACKs */ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped; static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops; @@ -254,7 +261,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) struct sk_buff *skb; struct flowi6 fl6; struct net *net = dev_net(skb_dst(rxskb)->dev); - struct sock *ctl_sk = net->dccp.v6_ctl_sk; + struct dccp_v6_pernet *pn; + struct sock *ctl_sk; struct dst_entry *dst; if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) @@ -263,6 +271,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) if (!ipv6_unicast_destination(rxskb)) return; + pn = net_generic(net, dccp_v6_pernet_id); + ctl_sk = pn->v6_ctl_sk; skb = dccp_ctl_make_reset(ctl_sk, rxskb); if (skb == NULL) return; @@ -1089,16 +1099,20 @@ static struct inet_protosw dccp_v6_protosw = { static int __net_init dccp_v6_init_net(struct net *net) { + struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id); + if (dccp_hashinfo.bhash == NULL) return -ESOCKTNOSUPPORT; - return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6, + return inet_ctl_sock_create(&pn->v6_ctl_sk, PF_INET6, SOCK_DCCP, IPPROTO_DCCP, net); } static void __net_exit dccp_v6_exit_net(struct net *net) { - inet_ctl_sock_destroy(net->dccp.v6_ctl_sk); + struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id); + + inet_ctl_sock_destroy(pn->v6_ctl_sk); } static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list) @@ -1110,6 +1124,8 @@ static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, .exit_batch = dccp_v6_exit_batch, + .id = &dccp_v6_pernet_id, + .size = sizeof(struct dccp_v6_pernet), }; static int __init dccp_v6_init(void) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 8746b07668ae..cbc2bd643ab2 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -9,6 +9,7 @@ menuconfig NET_DSA select NET_SWITCHDEV select PHYLINK select NET_DEVLINK + select NET_SELFTESTS help Say Y if you want to enable support for the hardware switches supported by the Distributed Switch Architecture. diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index d142eb2b288b..b71e87909f0e 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -392,7 +392,7 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_USER: - dp->mac = of_get_mac_address(dp->dn); + of_get_mac_address(dp->dn, dp->mac); err = dsa_slave_create(dp); if (err) break; @@ -668,6 +668,30 @@ static const struct devlink_ops dsa_devlink_ops = { .sb_occ_tc_port_bind_get = dsa_devlink_sb_occ_tc_port_bind_get, }; +static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) +{ + const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; + struct dsa_switch_tree *dst = ds->dst; + int port, err; + + if (tag_ops->proto == dst->default_proto) + return 0; + + for (port = 0; port < ds->num_ports; port++) { + if (!dsa_is_cpu_port(ds, port)) + continue; + + err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto); + if (err) { + dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", + tag_ops->name, ERR_PTR(err)); + return err; + } + } + + return 0; +} + static int dsa_switch_setup(struct dsa_switch *ds) { struct dsa_devlink_priv *dl_priv; @@ -718,6 +742,10 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (err < 0) goto unregister_notifier; + err = dsa_switch_setup_tag_protocol(ds); + if (err) + goto teardown; + devlink_params_publish(ds->devlink); if (!ds->slave_mii_bus && ds->ops->phy_read) { @@ -795,8 +823,14 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) list_for_each_entry(dp, &dst->ports, list) { err = dsa_port_setup(dp); - if (err) + if (err) { + dsa_port_devlink_teardown(dp); + dp->type = DSA_PORT_TYPE_UNUSED; + err = dsa_port_devlink_setup(dp); + if (err) + goto teardown; continue; + } } return 0; @@ -1062,34 +1096,60 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol); } -static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) +static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, + const char *user_protocol) { struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; const struct dsa_device_ops *tag_ops; - enum dsa_tag_protocol tag_protocol; + enum dsa_tag_protocol default_proto; + + /* Find out which protocol the switch would prefer. */ + default_proto = dsa_get_tag_protocol(dp, master); + if (dst->default_proto) { + if (dst->default_proto != default_proto) { + dev_err(ds->dev, + "A DSA switch tree can have only one tagging protocol\n"); + return -EINVAL; + } + } else { + dst->default_proto = default_proto; + } + + /* See if the user wants to override that preference. */ + if (user_protocol) { + if (!ds->ops->change_tag_protocol) { + dev_err(ds->dev, "Tag protocol cannot be modified\n"); + return -EINVAL; + } + + tag_ops = dsa_find_tagger_by_name(user_protocol); + } else { + tag_ops = dsa_tag_driver_get(default_proto); + } + + if (IS_ERR(tag_ops)) { + if (PTR_ERR(tag_ops) == -ENOPROTOOPT) + return -EPROBE_DEFER; + + dev_warn(ds->dev, "No tagger for this switch\n"); + return PTR_ERR(tag_ops); + } - tag_protocol = dsa_get_tag_protocol(dp, master); if (dst->tag_ops) { - if (dst->tag_ops->proto != tag_protocol) { + if (dst->tag_ops != tag_ops) { dev_err(ds->dev, "A DSA switch tree can have only one tagging protocol\n"); + + dsa_tag_driver_put(tag_ops); return -EINVAL; } + /* In the case of multiple CPU ports per switch, the tagging - * protocol is still reference-counted only per switch tree, so - * nothing to do here. + * protocol is still reference-counted only per switch tree. */ + dsa_tag_driver_put(tag_ops); } else { - tag_ops = dsa_tag_driver_get(tag_protocol); - if (IS_ERR(tag_ops)) { - if (PTR_ERR(tag_ops) == -ENOPROTOOPT) - return -EPROBE_DEFER; - dev_warn(ds->dev, "No tagger for this switch\n"); - dp->master = NULL; - return PTR_ERR(tag_ops); - } - dst->tag_ops = tag_ops; } @@ -1098,6 +1158,19 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) dsa_port_set_tag_protocol(dp, dst->tag_ops); dp->dst = dst; + /* At this point, the tree may be configured to use a different + * tagger than the one chosen by the switch driver during + * .setup, in the case when a user selects a custom protocol + * through the DT. + * + * This is resolved by syncing the driver with the tree in + * dsa_switch_setup_tag_protocol once .setup has run and the + * driver is ready to accept calls to .change_tag_protocol. If + * the driver does not support the custom protocol at that + * point, the tree is wholly rejected, thereby ensuring that the + * tree and driver are always in agreement on the protocol to + * use. + */ return 0; } @@ -1111,12 +1184,14 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) if (ethernet) { struct net_device *master; + const char *user_protocol; master = of_find_net_device_by_node(ethernet); if (!master) return -EPROBE_DEFER; - return dsa_port_parse_cpu(dp, master); + user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL); + return dsa_port_parse_cpu(dp, master, user_protocol); } if (link) @@ -1228,7 +1303,7 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name, dev_put(master); - return dsa_port_parse_cpu(dp, master); + return dsa_port_parse_cpu(dp, master, NULL); } if (!strcmp(name, "dsa")) diff --git a/net/dsa/port.c b/net/dsa/port.c index 01e30264b25b..6379d66a6bb3 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -550,7 +550,7 @@ int dsa_port_bridge_flags(const struct dsa_port *dp, struct dsa_switch *ds = dp->ds; if (!ds->ops->port_bridge_flags) - return -EINVAL; + return -EOPNOTSUPP; return ds->ops->port_bridge_flags(ds, dp->index, flags, extack); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 995e0e16f295..77b33bd161b8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -15,6 +15,7 @@ #include <linux/mdio.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> +#include <net/selftests.h> #include <net/tc_act/tc_mirred.h> #include <linux/if_bridge.h> #include <linux/if_hsr.h> @@ -748,7 +749,10 @@ static void dsa_slave_get_strings(struct net_device *dev, if (ds->ops->get_strings) ds->ops->get_strings(ds, dp->index, stringset, data + 4 * len); + } else if (stringset == ETH_SS_TEST) { + net_selftest_get_strings(data); } + } static void dsa_slave_get_ethtool_stats(struct net_device *dev, @@ -794,11 +798,27 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) count += ds->ops->get_sset_count(ds, dp->index, sset); return count; + } else if (sset == ETH_SS_TEST) { + return net_selftest_get_count(); } return -EOPNOTSUPP; } +static void dsa_slave_net_selftest(struct net_device *ndev, + struct ethtool_test *etest, u64 *buf) +{ + struct dsa_port *dp = dsa_slave_to_port(ndev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->self_test) { + ds->ops->self_test(ds, dp->index, etest, buf); + return; + } + + net_selftest(ndev, etest, buf); +} + static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -1630,6 +1650,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_rxnfc = dsa_slave_get_rxnfc, .set_rxnfc = dsa_slave_set_rxnfc, .get_ts_info = dsa_slave_get_ts_info, + .self_test = dsa_slave_net_selftest, }; /* legacy way, bypassing the bridge *****************************************/ @@ -1896,7 +1917,7 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->hw_features |= NETIF_F_HW_TC; slave_dev->features |= NETIF_F_LLTX; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; - if (!IS_ERR_OR_NULL(port->mac)) + if (!is_zero_ether_addr(port->mac)) ether_addr_copy(slave_dev->dev_addr, port->mac); else eth_hw_addr_inherit(slave_dev, master); @@ -2329,7 +2350,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, fdb_info = ptr; if (dsa_slave_dev_check(dev)) { - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) return NOTIFY_OK; dp = dsa_slave_to_port(dev); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 4b5da89dc27a..9bf8e20ecdf3 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -107,7 +107,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, bool unset_vlan_filtering = br_vlan_enabled(info->br); struct dsa_switch_tree *dst = ds->dst; struct netlink_ext_ack extack = {0}; - int err, i; + int err, port; if (dst->index == info->tree_index && ds->index == info->sw_index && ds->ops->port_bridge_join) @@ -124,13 +124,16 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * it. That is a good thing, because that lets us handle it and also * handle the case where the switch's vlan_filtering setting is global * (not per port). When that happens, the correct moment to trigger the - * vlan_filtering callback is only when the last port left this bridge. + * vlan_filtering callback is only when the last port leaves the last + * VLAN-aware bridge. */ if (unset_vlan_filtering && ds->vlan_filtering_is_global) { - for (i = 0; i < ds->num_ports; i++) { - if (i == info->port) - continue; - if (dsa_to_port(ds, i)->bridge_dev == info->br) { + for (port = 0; port < ds->num_ports; port++) { + struct net_device *bridge_dev; + + bridge_dev = dsa_to_port(ds, port)->bridge_dev; + + if (bridge_dev && br_vlan_enabled(bridge_dev)) { unset_vlan_filtering = false; break; } @@ -320,15 +323,6 @@ static int dsa_switch_vlan_del(struct dsa_switch *ds, return 0; } -static bool dsa_switch_tag_proto_match(struct dsa_switch *ds, int port, - struct dsa_notifier_tag_proto_info *info) -{ - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) - return true; - - return false; -} - static int dsa_switch_change_tag_proto(struct dsa_switch *ds, struct dsa_notifier_tag_proto_info *info) { @@ -341,16 +335,14 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds, ASSERT_RTNL(); for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_tag_proto_match(ds, port, info)) { - err = ds->ops->change_tag_protocol(ds, port, - tag_ops->proto); - if (err) - return err; + if (!dsa_is_cpu_port(ds, port)) + continue; - if (dsa_is_cpu_port(ds, port)) - dsa_port_set_tag_protocol(dsa_to_port(ds, port), - tag_ops); - } + err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto); + if (err) + return err; + + dsa_port_set_tag_protocol(dsa_to_port(ds, port), tag_ops); } /* Now that changing the tag protocol can no longer fail, let's update diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 933b427122be..9cce612e8976 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -511,13 +511,14 @@ unsigned char * __weak arch_get_platform_mac_address(void) int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) { - const unsigned char *addr = NULL; + unsigned char *addr; + int ret; - if (dev->of_node) - addr = of_get_mac_address(dev->of_node); - if (IS_ERR_OR_NULL(addr)) - addr = arch_get_platform_mac_address(); + ret = of_get_mac_address(dev->of_node, mac_addr); + if (!ret) + return 0; + addr = arch_get_platform_mac_address(); if (!addr) return -ENODEV; diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index c2dc9033a8f7..723c9a8a8cdf 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ - tunnels.o fec.o + tunnels.o fec.o eeprom.o stats.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index c6a383dfd6c2..f9dcbad84788 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -273,6 +273,7 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10000, KR, Full), [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = { .speed = SPEED_10000, + .lanes = 1, .duplex = DUPLEX_FULL, }, __DEFINE_LINK_MODE_PARAMS(20000, MLD2, Full), @@ -562,3 +563,19 @@ void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops) rtnl_unlock(); } EXPORT_SYMBOL_GPL(ethtool_set_ethtool_phy_ops); + +void +ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, + enum ethtool_link_mode_bit_indices link_mode) +{ + const struct link_mode_info *link_info; + + if (WARN_ON_ONCE(link_mode >= __ETHTOOL_LINK_MODE_MASK_NBITS)) + return; + + link_info = &link_mode_params[link_mode]; + link_ksettings->base.speed = link_info->speed; + link_ksettings->lanes = link_info->lanes; + link_ksettings->base.duplex = link_info->duplex; +} +EXPORT_SYMBOL_GPL(ethtool_params_from_link_mode); diff --git a/net/ethtool/common.h b/net/ethtool/common.h index a9d071248698..2dc2b80aea5f 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -47,4 +47,9 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info); extern const struct ethtool_phy_ops *ethtool_phy_ops; +int ethtool_get_module_info_call(struct net_device *dev, + struct ethtool_modinfo *modinfo); +int ethtool_get_module_eeprom_call(struct net_device *dev, + struct ethtool_eeprom *ee, u8 *data); + #endif /* _ETHTOOL_COMMON_H */ diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c index 901b7de941ab..e10bfcc07853 100644 --- a/net/ethtool/eee.c +++ b/net/ethtool/eee.c @@ -169,8 +169,8 @@ int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info) ethnl_update_bool32(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod); ethnl_update_bool32(&eee.tx_lpi_enabled, tb[ETHTOOL_A_EEE_TX_LPI_ENABLED], &mod); - ethnl_update_bool32(&eee.tx_lpi_timer, tb[ETHTOOL_A_EEE_TX_LPI_TIMER], - &mod); + ethnl_update_u32(&eee.tx_lpi_timer, tb[ETHTOOL_A_EEE_TX_LPI_TIMER], + &mod); ret = 0; if (!mod) goto out_ops; diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c new file mode 100644 index 000000000000..2a6733a6449a --- /dev/null +++ b/net/ethtool/eeprom.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/ethtool.h> +#include <linux/sfp.h> +#include "netlink.h" +#include "common.h" + +struct eeprom_req_info { + struct ethnl_req_info base; + u32 offset; + u32 length; + u8 page; + u8 bank; + u8 i2c_address; +}; + +struct eeprom_reply_data { + struct ethnl_reply_data base; + u32 length; + u8 *data; +}; + +#define MODULE_EEPROM_REQINFO(__req_base) \ + container_of(__req_base, struct eeprom_req_info, base) + +#define MODULE_EEPROM_REPDATA(__reply_base) \ + container_of(__reply_base, struct eeprom_reply_data, base) + +static int fallback_set_params(struct eeprom_req_info *request, + struct ethtool_modinfo *modinfo, + struct ethtool_eeprom *eeprom) +{ + u32 offset = request->offset; + u32 length = request->length; + + if (request->page) + offset = request->page * ETH_MODULE_EEPROM_PAGE_LEN + offset; + + if (modinfo->type == ETH_MODULE_SFF_8079 && + request->i2c_address == 0x51) + offset += ETH_MODULE_EEPROM_PAGE_LEN * 2; + + if (offset >= modinfo->eeprom_len) + return -EINVAL; + + eeprom->cmd = ETHTOOL_GMODULEEEPROM; + eeprom->len = length; + eeprom->offset = offset; + + return 0; +} + +static int eeprom_fallback(struct eeprom_req_info *request, + struct eeprom_reply_data *reply, + struct genl_info *info) +{ + struct net_device *dev = reply->base.dev; + struct ethtool_modinfo modinfo = {0}; + struct ethtool_eeprom eeprom = {0}; + u8 *data; + int err; + + modinfo.cmd = ETHTOOL_GMODULEINFO; + err = ethtool_get_module_info_call(dev, &modinfo); + if (err < 0) + return err; + + err = fallback_set_params(request, &modinfo, &eeprom); + if (err < 0) + return err; + + data = kmalloc(eeprom.len, GFP_KERNEL); + if (!data) + return -ENOMEM; + err = ethtool_get_module_eeprom_call(dev, &eeprom, data); + if (err < 0) + goto err_out; + + reply->data = data; + reply->length = eeprom.len; + + return 0; + +err_out: + kfree(data); + return err; +} + +static int get_module_eeprom_by_page(struct net_device *dev, + struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (dev->sfp_bus) + return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack); + + if (ops->get_module_info) + return ops->get_module_eeprom_by_page(dev, page_data, extack); + + return -EOPNOTSUPP; +} + +static int eeprom_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base); + struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base); + struct ethtool_module_eeprom page_data = {0}; + struct net_device *dev = reply_base->dev; + int ret; + + page_data.offset = request->offset; + page_data.length = request->length; + page_data.i2c_address = request->i2c_address; + page_data.page = request->page; + page_data.bank = request->bank; + page_data.data = kmalloc(page_data.length, GFP_KERNEL); + if (!page_data.data) + return -ENOMEM; + + ret = ethnl_ops_begin(dev); + if (ret) + goto err_free; + + ret = get_module_eeprom_by_page(dev, &page_data, info->extack); + if (ret < 0) + goto err_ops; + + reply->length = ret; + reply->data = page_data.data; + + ethnl_ops_complete(dev); + return 0; + +err_ops: + ethnl_ops_complete(dev); +err_free: + kfree(page_data.data); + + if (ret == -EOPNOTSUPP) + return eeprom_fallback(request, reply, info); + return ret; +} + +static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_info); + + if (!tb[ETHTOOL_A_MODULE_EEPROM_OFFSET] || + !tb[ETHTOOL_A_MODULE_EEPROM_LENGTH] || + !tb[ETHTOOL_A_MODULE_EEPROM_PAGE] || + !tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]) + return -EINVAL; + + request->i2c_address = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]); + request->offset = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_OFFSET]); + request->length = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_LENGTH]); + + if (!request->length) + return -EINVAL; + + /* The following set of conditions limit the API to only dump 1/2 + * EEPROM page without crossing low page boundary located at offset 128. + * This means user may only request dumps of length limited to 128 from + * either low 128 bytes or high 128 bytes. + * For pages higher than 0 only high 128 bytes are accessible. + */ + request->page = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_PAGE]); + if (request->page && request->offset < ETH_MODULE_EEPROM_PAGE_LEN) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_PAGE], + "reading from lower half page is allowed for page 0 only"); + return -EINVAL; + } + + if (request->offset < ETH_MODULE_EEPROM_PAGE_LEN && + request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH], + "reading cross half page boundary is illegal"); + return -EINVAL; + } else if (request->offset >= ETH_MODULE_EEPROM_PAGE_LEN * 2) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_OFFSET], + "offset is out of bounds"); + return -EINVAL; + } else if (request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN * 2) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH], + "reading cross page boundary is illegal"); + return -EINVAL; + } + + if (tb[ETHTOOL_A_MODULE_EEPROM_BANK]) + request->bank = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_BANK]); + + return 0; +} + +static int eeprom_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base); + + return nla_total_size(sizeof(u8) * request->length); /* _EEPROM_DATA */ +} + +static int eeprom_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base); + + return nla_put(skb, ETHTOOL_A_MODULE_EEPROM_DATA, reply->length, reply->data); +} + +static void eeprom_cleanup_data(struct ethnl_reply_data *reply_base) +{ + struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base); + + kfree(reply->data); +} + +const struct ethnl_request_ops ethnl_module_eeprom_request_ops = { + .request_cmd = ETHTOOL_MSG_MODULE_EEPROM_GET, + .reply_cmd = ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, + .hdr_attr = ETHTOOL_A_MODULE_EEPROM_HEADER, + .req_info_size = sizeof(struct eeprom_req_info), + .reply_data_size = sizeof(struct eeprom_reply_data), + + .parse_request = eeprom_parse_request, + .prepare_data = eeprom_prepare_data, + .reply_size = eeprom_reply_size, + .fill_reply = eeprom_fill_reply, + .cleanup_data = eeprom_cleanup_data, +}; + +const struct nla_policy ethnl_module_eeprom_get_policy[] = { + [ETHTOOL_A_MODULE_EEPROM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_MODULE_EEPROM_OFFSET] = { .type = NLA_U32 }, + [ETHTOOL_A_MODULE_EEPROM_LENGTH] = { .type = NLA_U32 }, + [ETHTOOL_A_MODULE_EEPROM_PAGE] = { .type = NLA_U8 }, + [ETHTOOL_A_MODULE_EEPROM_BANK] = { .type = NLA_U8 }, + [ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS] = + NLA_POLICY_RANGE(NLA_U8, 0, ETH_MODULE_MAX_I2C_ADDRESS), +}; + diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c index 31454b9188bd..8738dafd5417 100644 --- a/net/ethtool/fec.c +++ b/net/ethtool/fec.c @@ -13,6 +13,10 @@ struct fec_reply_data { __ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes); u32 active_fec; u8 fec_auto; + struct fec_stat_grp { + u64 stats[1 + ETHTOOL_MAX_LANES]; + u8 cnt; + } corr, uncorr, corr_bits; }; #define FEC_REPDATA(__reply_base) \ @@ -21,7 +25,7 @@ struct fec_reply_data { #define ETHTOOL_FEC_MASK ((ETHTOOL_FEC_LLRS << 1) - 1) const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1] = { - [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), }; static void @@ -64,6 +68,28 @@ ethtool_link_modes_to_fecparam(struct ethtool_fecparam *fec, return 0; } +static void +fec_stats_recalc(struct fec_stat_grp *grp, struct ethtool_fec_stat *stats) +{ + int i; + + if (stats->lanes[0] == ETHTOOL_STAT_NOT_SET) { + grp->stats[0] = stats->total; + grp->cnt = stats->total != ETHTOOL_STAT_NOT_SET; + return; + } + + grp->cnt = 1; + grp->stats[0] = 0; + for (i = 0; i < ETHTOOL_MAX_LANES; i++) { + if (stats->lanes[i] == ETHTOOL_STAT_NOT_SET) + break; + + grp->stats[0] += stats->lanes[i]; + grp->stats[grp->cnt++] = stats->lanes[i]; + } +} + static int fec_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, struct genl_info *info) @@ -80,9 +106,19 @@ static int fec_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; ret = dev->ethtool_ops->get_fecparam(dev, &fec); - ethnl_ops_complete(dev); if (ret) - return ret; + goto out_complete; + if (req_base->flags & ETHTOOL_FLAG_STATS && + dev->ethtool_ops->get_fec_stats) { + struct ethtool_fec_stats stats; + + ethtool_stats_init((u64 *)&stats, sizeof(stats) / 8); + dev->ethtool_ops->get_fec_stats(dev, &stats); + + fec_stats_recalc(&data->corr, &stats.corrected_blocks); + fec_stats_recalc(&data->uncorr, &stats.uncorrectable_blocks); + fec_stats_recalc(&data->corr_bits, &stats.corrected_bits); + } WARN_ON_ONCE(fec.reserved); @@ -98,7 +134,9 @@ static int fec_prepare_data(const struct ethnl_req_info *req_base, if (data->active_fec == __ETHTOOL_LINK_MODE_MASK_NBITS) data->active_fec = 0; - return 0; +out_complete: + ethnl_ops_complete(dev); + return ret; } static int fec_reply_size(const struct ethnl_req_info *req_base, @@ -119,9 +157,40 @@ static int fec_reply_size(const struct ethnl_req_info *req_base, len += nla_total_size(sizeof(u8)) + /* _FEC_AUTO */ nla_total_size(sizeof(u32)); /* _FEC_ACTIVE */ + if (req_base->flags & ETHTOOL_FLAG_STATS) + len += 3 * nla_total_size_64bit(sizeof(u64) * + (1 + ETHTOOL_MAX_LANES)); + return len; } +static int fec_put_stats(struct sk_buff *skb, const struct fec_reply_data *data) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, ETHTOOL_A_FEC_STATS); + if (!nest) + return -EMSGSIZE; + + if (nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_CORRECTED, + sizeof(u64) * data->corr.cnt, + data->corr.stats, ETHTOOL_A_FEC_STAT_PAD) || + nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_UNCORR, + sizeof(u64) * data->uncorr.cnt, + data->uncorr.stats, ETHTOOL_A_FEC_STAT_PAD) || + nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_CORR_BITS, + sizeof(u64) * data->corr_bits.cnt, + data->corr_bits.stats, ETHTOOL_A_FEC_STAT_PAD)) + goto err_cancel; + + nla_nest_end(skb, nest); + return 0; + +err_cancel: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static int fec_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) @@ -142,6 +211,9 @@ static int fec_fill_reply(struct sk_buff *skb, nla_put_u32(skb, ETHTOOL_A_FEC_ACTIVE, data->active_fec))) return -EMSGSIZE; + if (req_base->flags & ETHTOOL_FLAG_STATS && fec_put_stats(skb, data)) + return -EMSGSIZE; + return 0; } diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 26b3e7086075..3fa7a394eabf 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -426,29 +426,13 @@ struct ethtool_link_usettings { int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings) { - const struct link_mode_info *link_info; - int err; - ASSERT_RTNL(); if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP; memset(link_ksettings, 0, sizeof(*link_ksettings)); - - link_ksettings->link_mode = -1; - err = dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); - if (err) - return err; - - if (link_ksettings->link_mode != -1) { - link_info = &link_mode_params[link_ksettings->link_mode]; - link_ksettings->base.speed = link_info->speed; - link_ksettings->lanes = link_info->lanes; - link_ksettings->base.duplex = link_info->duplex; - } - - return 0; + return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); } EXPORT_SYMBOL(__ethtool_get_link_ksettings); @@ -505,7 +489,7 @@ store_link_ksettings_for_user(void __user *to, { struct ethtool_link_usettings link_usettings; - memcpy(&link_usettings.base, &from->base, sizeof(link_usettings)); + memcpy(&link_usettings, from, sizeof(link_usettings)); bitmap_to_arr32(link_usettings.link_modes.supported, from->link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -2204,8 +2188,8 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) return 0; } -static int __ethtool_get_module_info(struct net_device *dev, - struct ethtool_modinfo *modinfo) +int ethtool_get_module_info_call(struct net_device *dev, + struct ethtool_modinfo *modinfo) { const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; @@ -2231,7 +2215,7 @@ static int ethtool_get_module_info(struct net_device *dev, if (copy_from_user(&modinfo, useraddr, sizeof(modinfo))) return -EFAULT; - ret = __ethtool_get_module_info(dev, &modinfo); + ret = ethtool_get_module_info_call(dev, &modinfo); if (ret) return ret; @@ -2241,8 +2225,8 @@ static int ethtool_get_module_info(struct net_device *dev, return 0; } -static int __ethtool_get_module_eeprom(struct net_device *dev, - struct ethtool_eeprom *ee, u8 *data) +int ethtool_get_module_eeprom_call(struct net_device *dev, + struct ethtool_eeprom *ee, u8 *data) { const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; @@ -2265,12 +2249,12 @@ static int ethtool_get_module_eeprom(struct net_device *dev, int ret; struct ethtool_modinfo modinfo; - ret = __ethtool_get_module_info(dev, &modinfo); + ret = ethtool_get_module_info_call(dev, &modinfo); if (ret) return ret; return ethtool_get_any_eeprom(dev, useraddr, - __ethtool_get_module_eeprom, + ethtool_get_module_eeprom_call, modinfo.eeprom_len); } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 705a4b201564..290012d0d11d 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -246,6 +246,8 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_EEE_GET] = ðnl_eee_request_ops, [ETHTOOL_MSG_FEC_GET] = ðnl_fec_request_ops, [ETHTOOL_MSG_TSINFO_GET] = ðnl_tsinfo_request_ops, + [ETHTOOL_MSG_MODULE_EEPROM_GET] = ðnl_module_eeprom_request_ops, + [ETHTOOL_MSG_STATS_GET] = ðnl_stats_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -931,6 +933,25 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_fec_set_policy, .maxattr = ARRAY_SIZE(ethnl_fec_set_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_MODULE_EEPROM_GET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_module_eeprom_get_policy, + .maxattr = ARRAY_SIZE(ethnl_module_eeprom_get_policy) - 1, + }, + { + .cmd = ETHTOOL_MSG_STATS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_stats_get_policy, + .maxattr = ARRAY_SIZE(ethnl_stats_get_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 785f7ee45930..8abcbc10796c 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -36,9 +36,9 @@ static inline int ethnl_strz_size(const char *s) /** * ethnl_put_strz() - put string attribute with fixed size string - * @skb: skb with the message - * @attrype: attribute type - * @s: ETH_GSTRING_LEN sized string (may not be null terminated) + * @skb: skb with the message + * @attrtype: attribute type + * @s: ETH_GSTRING_LEN sized string (may not be null terminated) * * Puts an attribute with null terminated string from @s into the message. * @@ -345,6 +345,8 @@ extern const struct ethnl_request_ops ethnl_pause_request_ops; extern const struct ethnl_request_ops ethnl_eee_request_ops; extern const struct ethnl_request_ops ethnl_tsinfo_request_ops; extern const struct ethnl_request_ops ethnl_fec_request_ops; +extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops; +extern const struct ethnl_request_ops ethnl_stats_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -378,6 +380,8 @@ extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_T extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1]; extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1]; extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1]; +extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA + 1]; +extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); @@ -397,4 +401,10 @@ int ethnl_tunnel_info_start(struct netlink_callback *cb); int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info); +extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; +extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; +extern const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN]; +extern const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN]; +extern const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN]; + #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index 09998dc5c185..9009f412151e 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -21,12 +21,6 @@ const struct nla_policy ethnl_pause_get_policy[] = { NLA_POLICY_NESTED(ethnl_header_policy_stats), }; -static void ethtool_stats_init(u64 *stats, unsigned int n) -{ - while (n--) - stats[n] = ETHTOOL_STAT_NOT_SET; -} - static int pause_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, struct genl_info *info) @@ -38,16 +32,16 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base, if (!dev->ethtool_ops->get_pauseparam) return -EOPNOTSUPP; + ethtool_stats_init((u64 *)&data->pausestat, + sizeof(data->pausestat) / 8); + ret = ethnl_ops_begin(dev); if (ret < 0) return ret; dev->ethtool_ops->get_pauseparam(dev, &data->pauseparam); if (req_base->flags & ETHTOOL_FLAG_STATS && - dev->ethtool_ops->get_pause_stats) { - ethtool_stats_init((u64 *)&data->pausestat, - sizeof(data->pausestat) / 8); + dev->ethtool_ops->get_pause_stats) dev->ethtool_ops->get_pause_stats(dev, &data->pausestat); - } ethnl_ops_complete(dev); return 0; diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c new file mode 100644 index 000000000000..b7642dc96d50 --- /dev/null +++ b/net/ethtool/stats.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct stats_req_info { + struct ethnl_req_info base; + DECLARE_BITMAP(stat_mask, __ETHTOOL_STATS_CNT); +}; + +#define STATS_REQINFO(__req_base) \ + container_of(__req_base, struct stats_req_info, base) + +struct stats_reply_data { + struct ethnl_reply_data base; + struct ethtool_eth_phy_stats phy_stats; + struct ethtool_eth_mac_stats mac_stats; + struct ethtool_eth_ctrl_stats ctrl_stats; + struct ethtool_rmon_stats rmon_stats; + const struct ethtool_rmon_hist_range *rmon_ranges; +}; + +#define STATS_REPDATA(__reply_base) \ + container_of(__reply_base, struct stats_reply_data, base) + +const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN] = { + [ETHTOOL_STATS_ETH_PHY] = "eth-phy", + [ETHTOOL_STATS_ETH_MAC] = "eth-mac", + [ETHTOOL_STATS_ETH_CTRL] = "eth-ctrl", + [ETHTOOL_STATS_RMON] = "rmon", +}; + +const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN] = { + [ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR] = "SymbolErrorDuringCarrier", +}; + +const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN] = { + [ETHTOOL_A_STATS_ETH_MAC_2_TX_PKT] = "FramesTransmittedOK", + [ETHTOOL_A_STATS_ETH_MAC_3_SINGLE_COL] = "SingleCollisionFrames", + [ETHTOOL_A_STATS_ETH_MAC_4_MULTI_COL] = "MultipleCollisionFrames", + [ETHTOOL_A_STATS_ETH_MAC_5_RX_PKT] = "FramesReceivedOK", + [ETHTOOL_A_STATS_ETH_MAC_6_FCS_ERR] = "FrameCheckSequenceErrors", + [ETHTOOL_A_STATS_ETH_MAC_7_ALIGN_ERR] = "AlignmentErrors", + [ETHTOOL_A_STATS_ETH_MAC_8_TX_BYTES] = "OctetsTransmittedOK", + [ETHTOOL_A_STATS_ETH_MAC_9_TX_DEFER] = "FramesWithDeferredXmissions", + [ETHTOOL_A_STATS_ETH_MAC_10_LATE_COL] = "LateCollisions", + [ETHTOOL_A_STATS_ETH_MAC_11_XS_COL] = "FramesAbortedDueToXSColls", + [ETHTOOL_A_STATS_ETH_MAC_12_TX_INT_ERR] = "FramesLostDueToIntMACXmitError", + [ETHTOOL_A_STATS_ETH_MAC_13_CS_ERR] = "CarrierSenseErrors", + [ETHTOOL_A_STATS_ETH_MAC_14_RX_BYTES] = "OctetsReceivedOK", + [ETHTOOL_A_STATS_ETH_MAC_15_RX_INT_ERR] = "FramesLostDueToIntMACRcvError", + [ETHTOOL_A_STATS_ETH_MAC_18_TX_MCAST] = "MulticastFramesXmittedOK", + [ETHTOOL_A_STATS_ETH_MAC_19_TX_BCAST] = "BroadcastFramesXmittedOK", + [ETHTOOL_A_STATS_ETH_MAC_20_XS_DEFER] = "FramesWithExcessiveDeferral", + [ETHTOOL_A_STATS_ETH_MAC_21_RX_MCAST] = "MulticastFramesReceivedOK", + [ETHTOOL_A_STATS_ETH_MAC_22_RX_BCAST] = "BroadcastFramesReceivedOK", + [ETHTOOL_A_STATS_ETH_MAC_23_IR_LEN_ERR] = "InRangeLengthErrors", + [ETHTOOL_A_STATS_ETH_MAC_24_OOR_LEN] = "OutOfRangeLengthField", + [ETHTOOL_A_STATS_ETH_MAC_25_TOO_LONG_ERR] = "FrameTooLongErrors", +}; + +const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN] = { + [ETHTOOL_A_STATS_ETH_CTRL_3_TX] = "MACControlFramesTransmitted", + [ETHTOOL_A_STATS_ETH_CTRL_4_RX] = "MACControlFramesReceived", + [ETHTOOL_A_STATS_ETH_CTRL_5_RX_UNSUP] = "UnsupportedOpcodesReceived", +}; + +const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN] = { + [ETHTOOL_A_STATS_RMON_UNDERSIZE] = "etherStatsUndersizePkts", + [ETHTOOL_A_STATS_RMON_OVERSIZE] = "etherStatsOversizePkts", + [ETHTOOL_A_STATS_RMON_FRAG] = "etherStatsFragments", + [ETHTOOL_A_STATS_RMON_JABBER] = "etherStatsJabbers", +}; + +const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1] = { + [ETHTOOL_A_STATS_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_STATS_GROUPS] = { .type = NLA_NESTED }, +}; + +static int stats_parse_request(struct ethnl_req_info *req_base, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct stats_req_info *req_info = STATS_REQINFO(req_base); + bool mod = false; + int err; + + err = ethnl_update_bitset(req_info->stat_mask, __ETHTOOL_STATS_CNT, + tb[ETHTOOL_A_STATS_GROUPS], stats_std_names, + extack, &mod); + if (err) + return err; + + if (!mod) { + NL_SET_ERR_MSG(extack, "no stats requested"); + return -EINVAL; + } + + return 0; +} + +static int stats_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + const struct stats_req_info *req_info = STATS_REQINFO(req_base); + struct stats_reply_data *data = STATS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + /* Mark all stats as unset (see ETHTOOL_STAT_NOT_SET) to prevent them + * from being reported to user space in case driver did not set them. + */ + memset(&data->phy_stats, 0xff, sizeof(data->phy_stats)); + memset(&data->mac_stats, 0xff, sizeof(data->mac_stats)); + memset(&data->ctrl_stats, 0xff, sizeof(data->mac_stats)); + memset(&data->rmon_stats, 0xff, sizeof(data->rmon_stats)); + + if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) && + dev->ethtool_ops->get_eth_phy_stats) + dev->ethtool_ops->get_eth_phy_stats(dev, &data->phy_stats); + if (test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask) && + dev->ethtool_ops->get_eth_mac_stats) + dev->ethtool_ops->get_eth_mac_stats(dev, &data->mac_stats); + if (test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask) && + dev->ethtool_ops->get_eth_ctrl_stats) + dev->ethtool_ops->get_eth_ctrl_stats(dev, &data->ctrl_stats); + if (test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask) && + dev->ethtool_ops->get_rmon_stats) + dev->ethtool_ops->get_rmon_stats(dev, &data->rmon_stats, + &data->rmon_ranges); + + ethnl_ops_complete(dev); + return 0; +} + +static int stats_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct stats_req_info *req_info = STATS_REQINFO(req_base); + unsigned int n_grps = 0, n_stats = 0; + int len = 0; + + if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) { + n_stats += sizeof(struct ethtool_eth_phy_stats) / sizeof(u64); + n_grps++; + } + if (test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask)) { + n_stats += sizeof(struct ethtool_eth_mac_stats) / sizeof(u64); + n_grps++; + } + if (test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask)) { + n_stats += sizeof(struct ethtool_eth_ctrl_stats) / sizeof(u64); + n_grps++; + } + if (test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask)) { + n_stats += sizeof(struct ethtool_rmon_stats) / sizeof(u64); + n_grps++; + /* Above includes the space for _A_STATS_GRP_HIST_VALs */ + + len += (nla_total_size(0) + /* _A_STATS_GRP_HIST */ + nla_total_size(4) + /* _A_STATS_GRP_HIST_BKT_LOW */ + nla_total_size(4)) * /* _A_STATS_GRP_HIST_BKT_HI */ + ETHTOOL_RMON_HIST_MAX * 2; + } + + len += n_grps * (nla_total_size(0) + /* _A_STATS_GRP */ + nla_total_size(4) + /* _A_STATS_GRP_ID */ + nla_total_size(4)); /* _A_STATS_GRP_SS_ID */ + len += n_stats * (nla_total_size(0) + /* _A_STATS_GRP_STAT */ + nla_total_size_64bit(sizeof(u64))); + + return len; +} + +static int stat_put(struct sk_buff *skb, u16 attrtype, u64 val) +{ + struct nlattr *nest; + int ret; + + if (val == ETHTOOL_STAT_NOT_SET) + return 0; + + /* We want to start stats attr types from 0, so we don't have a type + * for pad inside ETHTOOL_A_STATS_GRP_STAT. Pad things on the outside + * of ETHTOOL_A_STATS_GRP_STAT. Since we're one nest away from the + * actual attr we're 4B off - nla_need_padding_for_64bit() & co. + * can't be used. + */ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (!IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8)) + if (!nla_reserve(skb, ETHTOOL_A_STATS_GRP_PAD, 0)) + return -EMSGSIZE; +#endif + + nest = nla_nest_start(skb, ETHTOOL_A_STATS_GRP_STAT); + if (!nest) + return -EMSGSIZE; + + ret = nla_put_u64_64bit(skb, attrtype, val, -1 /* not used */); + if (ret) { + nla_nest_cancel(skb, nest); + return ret; + } + + nla_nest_end(skb, nest); + return 0; +} + +static int stats_put_phy_stats(struct sk_buff *skb, + const struct stats_reply_data *data) +{ + if (stat_put(skb, ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR, + data->phy_stats.SymbolErrorDuringCarrier)) + return -EMSGSIZE; + return 0; +} + +static int stats_put_mac_stats(struct sk_buff *skb, + const struct stats_reply_data *data) +{ + if (stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_2_TX_PKT, + data->mac_stats.FramesTransmittedOK) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_3_SINGLE_COL, + data->mac_stats.SingleCollisionFrames) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_4_MULTI_COL, + data->mac_stats.MultipleCollisionFrames) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_5_RX_PKT, + data->mac_stats.FramesReceivedOK) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_6_FCS_ERR, + data->mac_stats.FrameCheckSequenceErrors) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_7_ALIGN_ERR, + data->mac_stats.AlignmentErrors) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_8_TX_BYTES, + data->mac_stats.OctetsTransmittedOK) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_9_TX_DEFER, + data->mac_stats.FramesWithDeferredXmissions) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_10_LATE_COL, + data->mac_stats.LateCollisions) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_11_XS_COL, + data->mac_stats.FramesAbortedDueToXSColls) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_12_TX_INT_ERR, + data->mac_stats.FramesLostDueToIntMACXmitError) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_13_CS_ERR, + data->mac_stats.CarrierSenseErrors) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_14_RX_BYTES, + data->mac_stats.OctetsReceivedOK) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_15_RX_INT_ERR, + data->mac_stats.FramesLostDueToIntMACRcvError) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_18_TX_MCAST, + data->mac_stats.MulticastFramesXmittedOK) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_19_TX_BCAST, + data->mac_stats.BroadcastFramesXmittedOK) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_20_XS_DEFER, + data->mac_stats.FramesWithExcessiveDeferral) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_21_RX_MCAST, + data->mac_stats.MulticastFramesReceivedOK) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_22_RX_BCAST, + data->mac_stats.BroadcastFramesReceivedOK) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_23_IR_LEN_ERR, + data->mac_stats.InRangeLengthErrors) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_24_OOR_LEN, + data->mac_stats.OutOfRangeLengthField) || + stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_25_TOO_LONG_ERR, + data->mac_stats.FrameTooLongErrors)) + return -EMSGSIZE; + return 0; +} + +static int stats_put_ctrl_stats(struct sk_buff *skb, + const struct stats_reply_data *data) +{ + if (stat_put(skb, ETHTOOL_A_STATS_ETH_CTRL_3_TX, + data->ctrl_stats.MACControlFramesTransmitted) || + stat_put(skb, ETHTOOL_A_STATS_ETH_CTRL_4_RX, + data->ctrl_stats.MACControlFramesReceived) || + stat_put(skb, ETHTOOL_A_STATS_ETH_CTRL_5_RX_UNSUP, + data->ctrl_stats.UnsupportedOpcodesReceived)) + return -EMSGSIZE; + return 0; +} + +static int stats_put_rmon_hist(struct sk_buff *skb, u32 attr, const u64 *hist, + const struct ethtool_rmon_hist_range *ranges) +{ + struct nlattr *nest; + int i; + + if (!ranges) + return 0; + + for (i = 0; i < ETHTOOL_RMON_HIST_MAX; i++) { + if (!ranges[i].low && !ranges[i].high) + break; + if (hist[i] == ETHTOOL_STAT_NOT_SET) + continue; + + nest = nla_nest_start(skb, attr); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, + ranges[i].low) || + nla_put_u32(skb, ETHTOOL_A_STATS_GRP_HIST_BKT_HI, + ranges[i].high) || + nla_put_u64_64bit(skb, ETHTOOL_A_STATS_GRP_HIST_VAL, + hist[i], ETHTOOL_A_STATS_GRP_PAD)) + goto err_cancel_hist; + + nla_nest_end(skb, nest); + } + + return 0; + +err_cancel_hist: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int stats_put_rmon_stats(struct sk_buff *skb, + const struct stats_reply_data *data) +{ + if (stats_put_rmon_hist(skb, ETHTOOL_A_STATS_GRP_HIST_RX, + data->rmon_stats.hist, data->rmon_ranges) || + stats_put_rmon_hist(skb, ETHTOOL_A_STATS_GRP_HIST_TX, + data->rmon_stats.hist_tx, data->rmon_ranges)) + return -EMSGSIZE; + + if (stat_put(skb, ETHTOOL_A_STATS_RMON_UNDERSIZE, + data->rmon_stats.undersize_pkts) || + stat_put(skb, ETHTOOL_A_STATS_RMON_OVERSIZE, + data->rmon_stats.oversize_pkts) || + stat_put(skb, ETHTOOL_A_STATS_RMON_FRAG, + data->rmon_stats.fragments) || + stat_put(skb, ETHTOOL_A_STATS_RMON_JABBER, + data->rmon_stats.jabbers)) + return -EMSGSIZE; + + return 0; +} + +static int stats_put_stats(struct sk_buff *skb, + const struct stats_reply_data *data, + u32 id, u32 ss_id, + int (*cb)(struct sk_buff *skb, + const struct stats_reply_data *data)) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, ETHTOOL_A_STATS_GRP); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_STATS_GRP_ID, id) || + nla_put_u32(skb, ETHTOOL_A_STATS_GRP_SS_ID, ss_id)) + goto err_cancel; + + if (cb(skb, data)) + goto err_cancel; + + nla_nest_end(skb, nest); + return 0; + +err_cancel: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int stats_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct stats_req_info *req_info = STATS_REQINFO(req_base); + const struct stats_reply_data *data = STATS_REPDATA(reply_base); + int ret = 0; + + if (!ret && test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) + ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_PHY, + ETH_SS_STATS_ETH_PHY, + stats_put_phy_stats); + if (!ret && test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask)) + ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_MAC, + ETH_SS_STATS_ETH_MAC, + stats_put_mac_stats); + if (!ret && test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask)) + ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_CTRL, + ETH_SS_STATS_ETH_CTRL, + stats_put_ctrl_stats); + if (!ret && test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask)) + ret = stats_put_stats(skb, data, ETHTOOL_STATS_RMON, + ETH_SS_STATS_RMON, stats_put_rmon_stats); + + return ret; +} + +const struct ethnl_request_ops ethnl_stats_request_ops = { + .request_cmd = ETHTOOL_MSG_STATS_GET, + .reply_cmd = ETHTOOL_MSG_STATS_GET_REPLY, + .hdr_attr = ETHTOOL_A_STATS_HEADER, + .req_info_size = sizeof(struct stats_req_info), + .reply_data_size = sizeof(struct stats_reply_data), + + .parse_request = stats_parse_request, + .prepare_data = stats_prepare_data, + .reply_size = stats_reply_size, + .fill_reply = stats_fill_reply, +}; diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index c3a5489964cd..b3029fff715d 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -80,6 +80,31 @@ static const struct strset_info info_template[] = { .count = __ETHTOOL_UDP_TUNNEL_TYPE_CNT, .strings = udp_tunnel_type_names, }, + [ETH_SS_STATS_STD] = { + .per_dev = false, + .count = __ETHTOOL_STATS_CNT, + .strings = stats_std_names, + }, + [ETH_SS_STATS_ETH_PHY] = { + .per_dev = false, + .count = __ETHTOOL_A_STATS_ETH_PHY_CNT, + .strings = stats_eth_phy_names, + }, + [ETH_SS_STATS_ETH_MAC] = { + .per_dev = false, + .count = __ETHTOOL_A_STATS_ETH_MAC_CNT, + .strings = stats_eth_mac_names, + }, + [ETH_SS_STATS_ETH_CTRL] = { + .per_dev = false, + .count = __ETHTOOL_A_STATS_ETH_CTRL_CNT, + .strings = stats_eth_ctrl_names, + }, + [ETH_SS_STATS_RMON] = { + .per_dev = false, + .count = __ETHTOOL_A_STATS_RMON_CNT, + .strings = stats_rmon_names, + }, }; struct strset_req_info { diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 7444ec6e298e..bfcdc75fc01e 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -217,6 +217,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (master) { skb->dev = master->dev; + skb_reset_mac_header(skb); hsr_forward_skb(skb, master); } else { atomic_long_inc(&dev->tx_dropped); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index ed82a470b6e1..b218e4594009 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -555,12 +555,6 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) { struct hsr_frame_info frame; - if (skb_mac_header(skb) != skb->data) { - WARN_ONCE(1, "%s:%d: Malformed frame (port_src %s)\n", - __FILE__, __LINE__, port->dev->name); - goto out_drop; - } - if (fill_frame_info(&frame, skb, port) < 0) goto out_drop; diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 9c640d670ffe..0c1b0770c59e 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -551,9 +551,7 @@ ieee802154_llsec_parse_key_id(struct genl_info *info, desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]); if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { - if (!info->attrs[IEEE802154_ATTR_PAN_ID] && - !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] || - info->attrs[IEEE802154_ATTR_HW_ADDR])) + if (!info->attrs[IEEE802154_ATTR_PAN_ID]) return -EINVAL; desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); @@ -562,6 +560,9 @@ ieee802154_llsec_parse_key_id(struct genl_info *info, desc->device_addr.mode = IEEE802154_ADDR_SHORT; desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); } else { + if (!info->attrs[IEEE802154_ATTR_HW_ADDR]) + return -EINVAL; + desc->device_addr.mode = IEEE802154_ADDR_LONG; desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); } diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 7c5a1aa5adb4..05f6bd89a7dd 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -820,8 +820,13 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, goto nla_put_failure; #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + goto out; + if (nl802154_get_llsec_params(msg, rdev, wpan_dev) < 0) goto nla_put_failure; + +out: #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ genlmsg_end(msg, hdr); @@ -1384,6 +1389,9 @@ static int nl802154_set_llsec_params(struct sk_buff *skb, u32 changed = 0; int ret; + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + return -EOPNOTSUPP; + if (info->attrs[NL802154_ATTR_SEC_ENABLED]) { u8 enabled; @@ -1490,6 +1498,11 @@ nl802154_dump_llsec_key(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) { + err = skb->len; + goto out_err; + } + if (!wpan_dev->netdev) { err = -EINVAL; goto out_err; @@ -1544,7 +1557,11 @@ static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info) struct ieee802154_llsec_key_id id = { }; u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { }; - if (nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + return -EOPNOTSUPP; + + if (!info->attrs[NL802154_ATTR_SEC_KEY] || + nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] || @@ -1592,7 +1609,11 @@ static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info) struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1]; struct ieee802154_llsec_key_id id; - if (nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + return -EOPNOTSUPP; + + if (!info->attrs[NL802154_ATTR_SEC_KEY] || + nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) return -EINVAL; if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0) @@ -1656,6 +1677,11 @@ nl802154_dump_llsec_dev(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) { + err = skb->len; + goto out_err; + } + if (!wpan_dev->netdev) { err = -EINVAL; goto out_err; @@ -1742,6 +1768,9 @@ static int nl802154_add_llsec_dev(struct sk_buff *skb, struct genl_info *info) struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct ieee802154_llsec_device dev_desc; + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + return -EOPNOTSUPP; + if (ieee802154_llsec_parse_device(info->attrs[NL802154_ATTR_SEC_DEVICE], &dev_desc) < 0) return -EINVAL; @@ -1757,7 +1786,11 @@ static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info) struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1]; __le64 extended_addr; - if (nla_parse_nested_deprecated(attrs, NL802154_DEV_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVICE], nl802154_dev_policy, info->extack)) + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + return -EOPNOTSUPP; + + if (!info->attrs[NL802154_ATTR_SEC_DEVICE] || + nla_parse_nested_deprecated(attrs, NL802154_DEV_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVICE], nl802154_dev_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]) @@ -1825,6 +1858,11 @@ nl802154_dump_llsec_devkey(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) { + err = skb->len; + goto out_err; + } + if (!wpan_dev->netdev) { err = -EINVAL; goto out_err; @@ -1882,6 +1920,9 @@ static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info struct ieee802154_llsec_device_key key; __le64 extended_addr; + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + return -EOPNOTSUPP; + if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] || nla_parse_nested_deprecated(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack) < 0) return -EINVAL; @@ -1913,7 +1954,11 @@ static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info struct ieee802154_llsec_device_key key; __le64 extended_addr; - if (nla_parse_nested_deprecated(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack)) + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + return -EOPNOTSUPP; + + if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] || + nla_parse_nested_deprecated(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]) @@ -1986,6 +2031,11 @@ nl802154_dump_llsec_seclevel(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) { + err = skb->len; + goto out_err; + } + if (!wpan_dev->netdev) { err = -EINVAL; goto out_err; @@ -2070,6 +2120,9 @@ static int nl802154_add_llsec_seclevel(struct sk_buff *skb, struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct ieee802154_llsec_seclevel sl; + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + return -EOPNOTSUPP; + if (llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL], &sl) < 0) return -EINVAL; @@ -2085,6 +2138,9 @@ static int nl802154_del_llsec_seclevel(struct sk_buff *skb, struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct ieee802154_llsec_seclevel sl; + if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) + return -EOPNOTSUPP; + if (!info->attrs[NL802154_ATTR_SEC_LEVEL] || llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL], &sl) < 0) @@ -2098,11 +2154,7 @@ static int nl802154_del_llsec_seclevel(struct sk_buff *skb, #define NL802154_FLAG_NEED_NETDEV 0x02 #define NL802154_FLAG_NEED_RTNL 0x04 #define NL802154_FLAG_CHECK_NETDEV_UP 0x08 -#define NL802154_FLAG_NEED_NETDEV_UP (NL802154_FLAG_NEED_NETDEV |\ - NL802154_FLAG_CHECK_NETDEV_UP) #define NL802154_FLAG_NEED_WPAN_DEV 0x10 -#define NL802154_FLAG_NEED_WPAN_DEV_UP (NL802154_FLAG_NEED_WPAN_DEV |\ - NL802154_FLAG_CHECK_NETDEV_UP) static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index d99e1be94019..36ed85bf2ad5 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -141,7 +141,7 @@ static void ah_output_done(struct crypto_async_request *base, int err) } kfree(AH_SKB_CB(skb)->tmp); - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } static int ah_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 75f67994fc85..2e35f68da40a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1978,7 +1978,8 @@ static int inet_validate_link_af(const struct net_device *dev, return 0; } -static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) +static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct in_device *in_dev = __in_dev_get_rcu(dev); struct nlattr *a, *tb[IFLA_INET_MAX+1]; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1ae920b93f39..35803ab7ac80 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -279,7 +279,7 @@ static void esp_output_done(struct crypto_async_request *base, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } } @@ -754,7 +754,7 @@ int esp_input_done2(struct sk_buff *skb, int err) int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); int ihl; - if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + if (!xo || !(xo->flags & CRYPTO_DONE)) kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 601f5fbfc63f..33687cf58286 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -217,10 +217,12 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) && !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev) - esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK | + NETIF_F_SCTP_CRC); else if (!(features & NETIF_F_HW_ESP_TX_CSUM) && !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM)) - esp_features = features & ~NETIF_F_CSUM_MASK; + esp_features = features & ~(NETIF_F_CSUM_MASK | + NETIF_F_SCTP_CRC); xo->flags |= XFRM_GSO_SEGMENT; @@ -312,8 +314,17 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ ip_hdr(skb)->tot_len = htons(skb->len); ip_send_check(ip_hdr(skb)); - if (hw_offload) + if (hw_offload) { + if (!skb_ext_add(skb, SKB_EXT_SEC_PATH)) + return -ENOMEM; + + xo = xfrm_offload(skb); + if (!xo) + return -EINVAL; + + xo->flags |= XFRM_XMIT; return 0; + } err = esp_output_tail(x, skb, &esp); if (err) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 76990e13a2f9..8bd988fbcb31 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1196,6 +1196,11 @@ int icmp_rcv(struct sk_buff *skb) goto success_check; } + if (icmph->type == ICMP_EXT_ECHOREPLY) { + success = ping_rcv(skb); + goto success_check; + } + /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index eb207089ece0..31c6c6d99d5e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -218,7 +218,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, } if (dst->flags & DST_XFRM_QUEUE) - goto queued; + goto xmit; if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) { dev->stats.tx_carrier_errors++; @@ -238,6 +238,8 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { + if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) + goto xmit; icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); } else { @@ -251,7 +253,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } -queued: +xmit: skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 7c841037c533..aff707988e23 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -25,6 +25,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un __be32 saddr = iph->saddr; __u8 flags; struct net_device *dev = skb_dst(skb)->dev; + struct flow_keys flkeys; unsigned int hh_len; sk = sk_to_full_sk(sk); @@ -48,6 +49,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un fl4.flowi4_oif = l3mdev_master_ifindex(dev); fl4.flowi4_mark = skb->mark; fl4.flowi4_flags = flags; + fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index a2f4f894be2b..63cb953bd019 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -76,12 +76,18 @@ config NF_DUP_IPV4 config NF_LOG_ARP tristate "ARP packet logging" default m if NETFILTER_ADVANCED=n - select NF_LOG_COMMON + select NF_LOG_SYSLOG + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects CONFIG_NF_LOG_SYSLOG. config NF_LOG_IPV4 tristate "IPv4 packet logging" default m if NETFILTER_ADVANCED=n - select NF_LOG_COMMON + select NF_LOG_SYSLOG + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects CONFIG_NF_LOG_SYSLOG. config NF_REJECT_IPV4 tristate "IPv4 packet rejection" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 7c497c78105f..f38fb1368ddb 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -9,10 +9,6 @@ obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o obj-$(CONFIG_NF_TPROXY_IPV4) += nf_tproxy_ipv4.o -# logging -obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o -obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o - # reject obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1e04d2b5170..d6d45d820d79 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1193,6 +1193,8 @@ static int translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; + memset(newinfo->entries, 0, size); + newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; @@ -1539,10 +1541,15 @@ out_free: return ret; } -void arpt_unregister_table(struct net *net, struct xt_table *table, - const struct nf_hook_ops *ops) +void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) { nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} +EXPORT_SYMBOL(arpt_unregister_table_pre_exit); + +void arpt_unregister_table(struct net *net, struct xt_table *table) +{ __arpt_unregister_table(net, table); } diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index c216b9ad3bb2..6c300ba5634e 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -56,16 +56,24 @@ static int __net_init arptable_filter_table_init(struct net *net) return err; } +static void __net_exit arptable_filter_net_pre_exit(struct net *net) +{ + if (net->ipv4.arptable_filter) + arpt_unregister_table_pre_exit(net, net->ipv4.arptable_filter, + arpfilter_ops); +} + static void __net_exit arptable_filter_net_exit(struct net *net) { if (!net->ipv4.arptable_filter) return; - arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops); + arpt_unregister_table(net, net->ipv4.arptable_filter); net->ipv4.arptable_filter = NULL; } static struct pernet_operations arptable_filter_net_ops = { .exit = arptable_filter_net_exit, + .pre_exit = arptable_filter_net_pre_exit, }; static int __init arptable_filter_init(void) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f15bc21d7301..f77ea0dbe656 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1428,6 +1428,8 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; + memset(newinfo->entries, 0, size); + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 8115611aa47d..ffdcc2b9360f 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -20,8 +20,13 @@ #endif #include <net/netfilter/nf_conntrack_zones.h> +static unsigned int defrag4_pernet_id __read_mostly; static DEFINE_MUTEX(defrag4_mutex); +struct defrag4_pernet { + unsigned int users; +}; + static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, u_int32_t user) { @@ -106,15 +111,19 @@ static const struct nf_hook_ops ipv4_defrag_ops[] = { static void __net_exit defrag4_net_exit(struct net *net) { - if (net->nf.defrag_ipv4) { + struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id); + + if (nf_defrag->users) { nf_unregister_net_hooks(net, ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); - net->nf.defrag_ipv4 = false; + nf_defrag->users = 0; } } static struct pernet_operations defrag4_net_ops = { .exit = defrag4_net_exit, + .id = &defrag4_pernet_id, + .size = sizeof(struct defrag4_pernet), }; static int __init nf_defrag_init(void) @@ -129,21 +138,22 @@ static void __exit nf_defrag_fini(void) int nf_defrag_ipv4_enable(struct net *net) { + struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id); int err = 0; might_sleep(); - if (net->nf.defrag_ipv4) + if (nf_defrag->users) return 0; mutex_lock(&defrag4_mutex); - if (net->nf.defrag_ipv4) + if (nf_defrag->users) goto out_unlock; err = nf_register_net_hooks(net, ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); if (err == 0) - net->nf.defrag_ipv4 = true; + nf_defrag->users = 1; out_unlock: mutex_unlock(&defrag4_mutex); diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c deleted file mode 100644 index 136030ad2e54..000000000000 --- a/net/ipv4/netfilter/nf_log_arp.c +++ /dev/null @@ -1,172 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org> - * - * Based on code from ebt_log from: - * - * Bart De Schuymer <bdschuym@pandora.be> - * Harald Welte <laforge@netfilter.org> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/if_arp.h> -#include <linux/ip.h> -#include <net/route.h> - -#include <linux/netfilter.h> -#include <linux/netfilter/xt_LOG.h> -#include <net/netfilter/nf_log.h> - -static const struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_DEFAULT_MASK, - }, - }, -}; - -struct arppayload { - unsigned char mac_src[ETH_ALEN]; - unsigned char ip_src[4]; - unsigned char mac_dst[ETH_ALEN]; - unsigned char ip_dst[4]; -}; - -static void dump_arp_packet(struct nf_log_buf *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, unsigned int nhoff) -{ - const struct arppayload *ap; - struct arppayload _arpp; - const struct arphdr *ah; - unsigned int logflags; - struct arphdr _arph; - - ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); - if (ah == NULL) { - nf_log_buf_add(m, "TRUNCATED"); - return; - } - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_DEFAULT_MASK; - - if (logflags & NF_LOG_MACDECODE) { - nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); - nf_log_dump_vlan(m, skb); - nf_log_buf_add(m, "MACPROTO=%04x ", - ntohs(eth_hdr(skb)->h_proto)); - } - - nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d", - ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op)); - - /* If it's for Ethernet and the lengths are OK, then log the ARP - * payload. - */ - if (ah->ar_hrd != htons(ARPHRD_ETHER) || - ah->ar_hln != ETH_ALEN || - ah->ar_pln != sizeof(__be32)) - return; - - ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); - if (ap == NULL) { - nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", - skb->len - sizeof(_arph)); - return; - } - nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4", - ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); -} - -static void nf_log_arp_packet(struct net *net, u_int8_t pf, - unsigned int hooknum, const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct nf_log_buf *m; - - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) - return; - - m = nf_log_buf_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, - prefix); - dump_arp_packet(m, loginfo, skb, 0); - - nf_log_buf_close(m); -} - -static struct nf_logger nf_arp_logger __read_mostly = { - .name = "nf_log_arp", - .type = NF_LOG_TYPE_LOG, - .logfn = nf_log_arp_packet, - .me = THIS_MODULE, -}; - -static int __net_init nf_log_arp_net_init(struct net *net) -{ - return nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); -} - -static void __net_exit nf_log_arp_net_exit(struct net *net) -{ - nf_log_unset(net, &nf_arp_logger); -} - -static struct pernet_operations nf_log_arp_net_ops = { - .init = nf_log_arp_net_init, - .exit = nf_log_arp_net_exit, -}; - -static int __init nf_log_arp_init(void) -{ - int ret; - - ret = register_pernet_subsys(&nf_log_arp_net_ops); - if (ret < 0) - return ret; - - ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger); - if (ret < 0) { - pr_err("failed to register logger\n"); - goto err1; - } - - return 0; - -err1: - unregister_pernet_subsys(&nf_log_arp_net_ops); - return ret; -} - -static void __exit nf_log_arp_exit(void) -{ - unregister_pernet_subsys(&nf_log_arp_net_ops); - nf_log_unregister(&nf_arp_logger); -} - -module_init(nf_log_arp_init); -module_exit(nf_log_arp_exit); - -MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); -MODULE_DESCRIPTION("Netfilter ARP packet logging"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NF_LOGGER(3, 0); diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c deleted file mode 100644 index d07583fac8f8..000000000000 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ /dev/null @@ -1,395 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/if_arp.h> -#include <linux/ip.h> -#include <net/ipv6.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/tcp.h> -#include <net/route.h> - -#include <linux/netfilter.h> -#include <linux/netfilter/xt_LOG.h> -#include <net/netfilter/nf_log.h> - -static const struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_DEFAULT_MASK, - }, - }, -}; - -/* One level of recursion won't kill us */ -static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, unsigned int iphoff) -{ - struct iphdr _iph; - const struct iphdr *ih; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_DEFAULT_MASK; - - ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); - if (ih == NULL) { - nf_log_buf_add(m, "TRUNCATED"); - return; - } - - /* Important fields: - * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ - /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ - nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr); - - /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ - nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", - ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, - ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); - - /* Max length: 6 "CE DF MF " */ - if (ntohs(ih->frag_off) & IP_CE) - nf_log_buf_add(m, "CE "); - if (ntohs(ih->frag_off) & IP_DF) - nf_log_buf_add(m, "DF "); - if (ntohs(ih->frag_off) & IP_MF) - nf_log_buf_add(m, "MF "); - - /* Max length: 11 "FRAG:65535 " */ - if (ntohs(ih->frag_off) & IP_OFFSET) - nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - - if ((logflags & NF_LOG_IPOPT) && - ih->ihl * 4 > sizeof(struct iphdr)) { - const unsigned char *op; - unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; - unsigned int i, optsize; - - optsize = ih->ihl * 4 - sizeof(struct iphdr); - op = skb_header_pointer(skb, iphoff+sizeof(_iph), - optsize, _opt); - if (op == NULL) { - nf_log_buf_add(m, "TRUNCATED"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - nf_log_buf_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - nf_log_buf_add(m, "%02X", op[i]); - nf_log_buf_add(m, ") "); - } - - switch (ih->protocol) { - case IPPROTO_TCP: - if (nf_log_dump_tcp_header(m, skb, ih->protocol, - ntohs(ih->frag_off) & IP_OFFSET, - iphoff+ih->ihl*4, logflags)) - return; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - if (nf_log_dump_udp_header(m, skb, ih->protocol, - ntohs(ih->frag_off) & IP_OFFSET, - iphoff+ih->ihl*4)) - return; - break; - case IPPROTO_ICMP: { - struct icmphdr _icmph; - const struct icmphdr *ich; - static const size_t required_len[NR_ICMP_TYPES+1] - = { [ICMP_ECHOREPLY] = 4, - [ICMP_DEST_UNREACH] - = 8 + sizeof(struct iphdr), - [ICMP_SOURCE_QUENCH] - = 8 + sizeof(struct iphdr), - [ICMP_REDIRECT] - = 8 + sizeof(struct iphdr), - [ICMP_ECHO] = 4, - [ICMP_TIME_EXCEEDED] - = 8 + sizeof(struct iphdr), - [ICMP_PARAMETERPROB] - = 8 + sizeof(struct iphdr), - [ICMP_TIMESTAMP] = 20, - [ICMP_TIMESTAMPREPLY] = 20, - [ICMP_ADDRESS] = 12, - [ICMP_ADDRESSREPLY] = 12 }; - - /* Max length: 11 "PROTO=ICMP " */ - nf_log_buf_add(m, "PROTO=ICMP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, - sizeof(_icmph), &_icmph); - if (ich == NULL) { - nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (ich->type <= NR_ICMP_TYPES && - required_len[ich->type] && - skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { - nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - switch (ich->type) { - case ICMP_ECHOREPLY: - case ICMP_ECHO: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - nf_log_buf_add(m, "ID=%u SEQ=%u ", - ntohs(ich->un.echo.id), - ntohs(ich->un.echo.sequence)); - break; - - case ICMP_PARAMETERPROB: - /* Max length: 14 "PARAMETER=255 " */ - nf_log_buf_add(m, "PARAMETER=%u ", - ntohl(ich->un.gateway) >> 24); - break; - case ICMP_REDIRECT: - /* Max length: 24 "GATEWAY=255.255.255.255 " */ - nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); - fallthrough; - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_TIME_EXCEEDED: - /* Max length: 3+maxlen */ - if (!iphoff) { /* Only recurse once. */ - nf_log_buf_add(m, "["); - dump_ipv4_packet(net, m, info, skb, - iphoff + ih->ihl*4+sizeof(_icmph)); - nf_log_buf_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ich->type == ICMP_DEST_UNREACH && - ich->code == ICMP_FRAG_NEEDED) { - nf_log_buf_add(m, "MTU=%u ", - ntohs(ich->un.frag.mtu)); - } - } - break; - } - /* Max Length */ - case IPPROTO_AH: { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 9 "PROTO=AH " */ - nf_log_buf_add(m, "PROTO=AH "); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ah = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_ahdr), &_ahdr); - if (ah == NULL) { - nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); - break; - } - case IPPROTO_ESP: { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 10 "PROTO=ESP " */ - nf_log_buf_add(m, "PROTO=ESP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - eh = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_esph), &_esph); - if (eh == NULL) { - nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi)); - break; - } - /* Max length: 10 "PROTO 255 " */ - default: - nf_log_buf_add(m, "PROTO=%u ", ih->protocol); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & NF_LOG_UID) && !iphoff) - nf_log_dump_sk_uid_gid(net, m, skb->sk); - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (!iphoff && skb->mark) - nf_log_buf_add(m, "MARK=0x%x ", skb->mark); - - /* Proto Max log string length */ - /* IP: 40+46+6+11+127 = 230 */ - /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ - /* UDP: 10+max(25,20) = 35 */ - /* UDPLITE: 14+max(25,20) = 39 */ - /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ - /* ESP: 10+max(25)+15 = 50 */ - /* AH: 9+max(25)+15 = 49 */ - /* unknown: 10 */ - - /* (ICMP allows recursion one level deep) */ - /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ - /* maxlen = 230+ 91 + 230 + 252 = 803 */ -} - -static void dump_ipv4_mac_header(struct nf_log_buf *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & NF_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); - nf_log_dump_vlan(m, skb); - nf_log_buf_add(m, "MACPROTO=%04x ", - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - nf_log_buf_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int i; - - nf_log_buf_add(m, "%02x", *p++); - for (i = 1; i < dev->hard_header_len; i++, p++) - nf_log_buf_add(m, ":%02x", *p); - } - nf_log_buf_add(m, " "); -} - -static void nf_log_ip_packet(struct net *net, u_int8_t pf, - unsigned int hooknum, const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct nf_log_buf *m; - - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) - return; - - m = nf_log_buf_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - nf_log_dump_packet_common(m, pf, hooknum, skb, in, - out, loginfo, prefix); - - if (in != NULL) - dump_ipv4_mac_header(m, loginfo, skb); - - dump_ipv4_packet(net, m, loginfo, skb, 0); - - nf_log_buf_close(m); -} - -static struct nf_logger nf_ip_logger __read_mostly = { - .name = "nf_log_ipv4", - .type = NF_LOG_TYPE_LOG, - .logfn = nf_log_ip_packet, - .me = THIS_MODULE, -}; - -static int __net_init nf_log_ipv4_net_init(struct net *net) -{ - return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); -} - -static void __net_exit nf_log_ipv4_net_exit(struct net *net) -{ - nf_log_unset(net, &nf_ip_logger); -} - -static struct pernet_operations nf_log_ipv4_net_ops = { - .init = nf_log_ipv4_net_init, - .exit = nf_log_ipv4_net_exit, -}; - -static int __init nf_log_ipv4_init(void) -{ - int ret; - - ret = register_pernet_subsys(&nf_log_ipv4_net_ops); - if (ret < 0) - return ret; - - ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger); - if (ret < 0) { - pr_err("failed to register logger\n"); - goto err1; - } - - return 0; - -err1: - unregister_pernet_subsys(&nf_log_ipv4_net_ops); - return ret; -} - -static void __exit nf_log_ipv4_exit(void) -{ - unregister_pernet_subsys(&nf_log_ipv4_net_ops); - nf_log_unregister(&nf_ip_logger); -} - -module_init(nf_log_ipv4_init); -module_exit(nf_log_ipv4_exit); - -MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("Netfilter IPv4 packet logging"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NF_LOGGER(AF_INET, 0); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5a2fc8798d20..4075230b14c6 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3140,26 +3140,24 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb, void *data) { struct rb_node *node; - int idx = 0, s_idx; + int s_idx; int err; s_idx = ctx->idx; for (node = rb_first(root); node; node = rb_next(node)) { struct nexthop *nh; - if (idx < s_idx) - goto cont; - nh = rb_entry(node, struct nexthop, rb_node); - ctx->idx = idx; + if (nh->id < s_idx) + continue; + + ctx->idx = nh->id; err = nh_cb(skb, cb, nh, data); if (err) return err; -cont: - idx++; } - ctx->idx = idx; + ctx->idx++; return 0; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a09e466ce11d..a62934b9f15a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1383,9 +1383,19 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!table) goto err_alloc; - /* Update the variables to point into the current struct net */ - for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) - table[i].data += (void *)net - (void *)&init_net; + for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { + if (table[i].data) { + /* Update the variables to point into + * the current struct net + */ + table[i].data += (void *)net - (void *)&init_net; + } else { + /* Entries without data pointer are global; + * Make them read-only in non-init_net ns + */ + table[i].mode &= ~0222; + } + } } net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 4930bc8ab47e..ad9d17923fc5 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -506,6 +506,12 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) if (restore) { if (inet_csk_has_ulp(sk)) { + /* TLS does not have an unhash proto in SW cases, + * but we need to ensure we stop using the sock_map + * unhash routine because the associated psock is being + * removed. So use the original unhash handler. + */ + WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash); tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); } else { sk->sk_write_space = psock->saved_write_space; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bfcc7f1a8a7f..15f5504adf5b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2788,6 +2788,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = up->gso_size; break; + case UDP_GRO: + val = up->gro_enabled; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 120073ffb666..b0ef65eb9bd2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4485,7 +4485,9 @@ restart: age >= ifp->valid_lft) { spin_unlock(&ifp->lock); in6_ifa_hold(ifp); + rcu_read_unlock_bh(); ipv6_del_addr(ifp); + rcu_read_lock_bh(); goto restart; } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { spin_unlock(&ifp->lock); @@ -5672,7 +5674,8 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev, return 0; } -static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) +static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token, + struct netlink_ext_ack *extack) { struct inet6_ifaddr *ifp; struct net_device *dev = idev->dev; @@ -5683,12 +5686,29 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) if (!token) return -EINVAL; - if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) + + if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG_MOD(extack, "Device is loopback"); return -EINVAL; - if (!ipv6_accept_ra(idev)) + } + + if (dev->flags & IFF_NOARP) { + NL_SET_ERR_MSG_MOD(extack, + "Device does not do neighbour discovery"); + return -EINVAL; + } + + if (!ipv6_accept_ra(idev)) { + NL_SET_ERR_MSG_MOD(extack, + "Router advertisement is disabled on device"); return -EINVAL; - if (idev->cnf.rtr_solicits == 0) + } + + if (idev->cnf.rtr_solicits == 0) { + NL_SET_ERR_MSG(extack, + "Router solicitation is disabled on device"); return -EINVAL; + } write_lock_bh(&idev->lock); @@ -5796,7 +5816,8 @@ static int inet6_validate_link_af(const struct net_device *dev, return 0; } -static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) +static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct inet6_dev *idev = __in6_dev_get(dev); struct nlattr *tb[IFLA_INET6_MAX + 1]; @@ -5809,7 +5830,8 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) BUG(); if (tb[IFLA_INET6_TOKEN]) { - err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]), + extack); if (err) return err; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 440080da805b..20d492da725a 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -316,7 +316,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err) } kfree(AH_SKB_CB(skb)->tmp); - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) @@ -705,7 +705,7 @@ static int ah6_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { - pr_info("AH: %s digestsize %u != %hu\n", + pr_info("AH: %s digestsize %u != %u\n", x->aalg->alg_name, crypto_ahash_digestsize(ahash), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 153ad103ba74..393ae2b78e7d 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -314,7 +314,7 @@ static void esp_output_done(struct crypto_async_request *base, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } } @@ -1147,7 +1147,7 @@ static int esp_init_authenc(struct xfrm_state *x) err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { - pr_info("ESP: %s digestsize %u != %hu\n", + pr_info("ESP: %s digestsize %u != %u\n", x->aalg->alg_name, crypto_aead_authsize(aead), aalg_desc->uinfo.auth.icv_fullbits / 8); diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 1ca516fb30e1..40ed4fcf1cf4 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -254,9 +254,11 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, skb->encap_hdr_csum = 1; if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) - esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK | + NETIF_F_SCTP_CRC); else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) - esp_features = features & ~NETIF_F_CSUM_MASK; + esp_features = features & ~(NETIF_F_CSUM_MASK | + NETIF_F_SCTP_CRC); xo->flags |= XFRM_GSO_SEGMENT; @@ -316,7 +318,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features esp.plen = esp.clen - skb->len - esp.tfclen; esp.tailen = esp.tfclen + esp.plen + alen; - if (!hw_offload || (hw_offload && !skb_is_gso(skb))) { + if (!hw_offload || !skb_is_gso(skb)) { esp.nfrags = esp6_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; @@ -346,8 +348,17 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features ipv6_hdr(skb)->payload_len = htons(len); - if (hw_offload) + if (hw_offload) { + if (!skb_ext_add(skb, SKB_EXT_SEC_PATH)) + return -ENOMEM; + + xo = xfrm_offload(skb); + if (!xo) + return -EINVAL; + + xo->flags |= XFRM_XMIT; return 0; + } err = esp6_output_tail(x, skb, &esp); if (err) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1bca2b09d77e..e8398ffb5e35 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -916,6 +916,10 @@ static int icmpv6_rcv(struct sk_buff *skb) success = ping_rcv(skb); break; + case ICMPV6_EXT_ECHO_REPLY: + success = ping_rcv(skb); + break; + case ICMPV6_PKT_TOOBIG: /* BUGGG_FUTURE: if packet contains rthdr, we cannot update standard destination cache. Seems, only "advanced" diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 07a0a06a9b52..288bafded998 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2243,6 +2243,16 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head t = rtnl_dereference(t->next); } } + + t = rtnl_dereference(ip6n->tnls_wc[0]); + while (t) { + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, list); + t = rtnl_dereference(t->next); + } } static int __net_init ip6_tnl_init_net(struct net *net) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 856e46ad0895..2d048e21abbb 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -493,7 +493,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } if (dst->flags & DST_XFRM_QUEUE) - goto queued; + goto xmit; x = dst->xfrm; if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr)) @@ -522,6 +522,8 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } else { + if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) + goto xmit; icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); } @@ -530,7 +532,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_dst_release; } -queued: +xmit: skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 49b0cebfdcdc..0d59efb6b49e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -75,9 +75,6 @@ static void igmp6_leave_group(struct ifmcaddr6 *ma); static void mld_mca_work(struct work_struct *work); static void mld_ifc_event(struct inet6_dev *idev); -static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); -static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); -static void mld_clear_delrec(struct inet6_dev *idev); static bool mld_in_v1_mode(const struct inet6_dev *idev); static int sf_setstate(struct ifmcaddr6 *pmc); static void sf_markstate(struct ifmcaddr6 *pmc); @@ -114,10 +111,13 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT; #define mc_dereference(e, idev) \ rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock)) -#define for_each_pmc_rtnl(np, pmc) \ - for (pmc = rtnl_dereference((np)->ipv6_mc_list); \ +#define sock_dereference(e, sk) \ + rcu_dereference_protected(e, lockdep_sock_is_held(sk)) + +#define for_each_pmc_socklock(np, sk, pmc) \ + for (pmc = sock_dereference((np)->ipv6_mc_list, sk); \ pmc; \ - pmc = rtnl_dereference(pmc->next)) + pmc = sock_dereference(pmc->next, sk)) #define for_each_pmc_rcu(np, pmc) \ for (pmc = rcu_dereference((np)->ipv6_mc_list); \ @@ -180,7 +180,7 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - for_each_pmc_rtnl(np, mc_lst) { + for_each_pmc_socklock(np, sk, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) return -EADDRINUSE; @@ -258,7 +258,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; for (lnk = &np->ipv6_mc_list; - (mc_lst = rtnl_dereference(*lnk)) != NULL; + (mc_lst = sock_dereference(*lnk, sk)) != NULL; lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { @@ -323,7 +323,7 @@ void __ipv6_sock_mc_close(struct sock *sk) ASSERT_RTNL(); - while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) { + while ((mc_lst = sock_dereference(np->ipv6_mc_list, sk)) != NULL) { struct net_device *dev; np->ipv6_mc_list = mc_lst->next; @@ -350,8 +350,11 @@ void ipv6_sock_mc_close(struct sock *sk) if (!rcu_access_pointer(np->ipv6_mc_list)) return; + rtnl_lock(); + lock_sock(sk); __ipv6_sock_mc_close(sk); + release_sock(sk); rtnl_unlock(); } @@ -381,7 +384,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, err = -EADDRNOTAVAIL; mutex_lock(&idev->mc_lock); - for_each_pmc_rtnl(inet6, pmc) { + for_each_pmc_socklock(inet6, sk, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -404,7 +407,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, pmc->sfmode = omode; } - psl = rtnl_dereference(pmc->sflist); + psl = sock_dereference(pmc->sflist, sk); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ @@ -511,7 +514,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, goto done; } - for_each_pmc_rtnl(inet6, pmc) { + for_each_pmc_socklock(inet6, sk, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -552,7 +555,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, } mutex_lock(&idev->mc_lock); - psl = rtnl_dereference(pmc->sflist); + psl = sock_dereference(pmc->sflist, sk); if (psl) { ip6_mc_del_src(idev, group, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); @@ -574,40 +577,32 @@ done: int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, struct sockaddr_storage __user *p) { - int err, i, count, copycount; + struct ipv6_pinfo *inet6 = inet6_sk(sk); const struct in6_addr *group; struct ipv6_mc_socklist *pmc; - struct inet6_dev *idev; - struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; - struct net *net = sock_net(sk); + int i, count, copycount; group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr; if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface); - if (!idev) - return -ENODEV; - - err = -EADDRNOTAVAIL; /* changes to the ipv6_mc_list require the socket lock and - * rtnl lock. We have the socket lock and rcu read lock, - * so reading the list is safe. + * rtnl lock. We have the socket lock, so reading the list is safe. */ - for_each_pmc_rtnl(inet6, pmc) { + for_each_pmc_socklock(inet6, sk, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) break; } if (!pmc) /* must have a prior join */ - return err; + return -EADDRNOTAVAIL; gsf->gf_fmode = pmc->sfmode; - psl = rtnl_dereference(pmc->sflist); + psl = sock_dereference(pmc->sflist, sk); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; @@ -2600,7 +2595,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, struct ip6_sf_socklist *psl; int err; - psl = rtnl_dereference(iml->sflist); + psl = sock_dereference(iml->sflist, sk); if (idev) mutex_lock(&idev->mc_lock); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index ab9a279dd6d4..6ab710b5a1a8 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -24,6 +24,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff { const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *sk = sk_to_full_sk(sk_partial); + struct flow_keys flkeys; unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & @@ -38,6 +39,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff }; int err; + fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys); dst = ip6_route_output(net, sk, &fl6); err = dst->error; if (err) { diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 262bb51a2d99..f22233e44ee9 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -69,7 +69,10 @@ config NF_REJECT_IPV6 config NF_LOG_IPV6 tristate "IPv6 packet logging" default m if NETFILTER_ADVANCED=n - select NF_LOG_COMMON + select NF_LOG_SYSLOG + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects CONFIG_NF_LOG_SYSLOG. config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 731a74c60dca..b85383606df7 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -18,9 +18,6 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o obj-$(CONFIG_NF_TPROXY_IPV6) += nf_tproxy_ipv6.o -# logging -obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o - # reject obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e2119bfcf13..eb2b5404806c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1443,6 +1443,8 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; + memset(newinfo->entries, 0, size); + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c129ad334eb3..a0108415275f 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -15,28 +15,13 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/jiffies.h> #include <linux/net.h> -#include <linux/list.h> #include <linux/netdevice.h> -#include <linux/in6.h> #include <linux/ipv6.h> -#include <linux/icmpv6.h> -#include <linux/random.h> #include <linux/slab.h> -#include <net/sock.h> -#include <net/snmp.h> #include <net/ipv6_frag.h> -#include <net/protocol.h> -#include <net/transp_v6.h> -#include <net/rawv6.h> -#include <net/ndisc.h> -#include <net/addrconf.h> -#include <net/inet_ecn.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> #include <linux/sysctl.h> #include <linux/netfilter.h> @@ -44,11 +29,18 @@ #include <linux/kernel.h> #include <linux/module.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <net/netns/generic.h> static const char nf_frags_cache_name[] = "nf-frags"; +unsigned int nf_frag_pernet_id __read_mostly; static struct inet_frags nf_frags; +static struct nft_ct_frag6_pernet *nf_frag_pernet(struct net *net) +{ + return net_generic(net, nf_frag_pernet_id); +} + #ifdef CONFIG_SYSCTL static struct ctl_table nf_ct_frag6_sysctl_table[] = { @@ -75,6 +67,7 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { static int nf_ct_frag6_sysctl_register(struct net *net) { + struct nft_ct_frag6_pernet *nf_frag; struct ctl_table *table; struct ctl_table_header *hdr; @@ -86,18 +79,20 @@ static int nf_ct_frag6_sysctl_register(struct net *net) goto err_alloc; } - table[0].data = &net->nf_frag.fqdir->timeout; - table[1].data = &net->nf_frag.fqdir->low_thresh; - table[1].extra2 = &net->nf_frag.fqdir->high_thresh; - table[2].data = &net->nf_frag.fqdir->high_thresh; - table[2].extra1 = &net->nf_frag.fqdir->low_thresh; - table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh; + nf_frag = nf_frag_pernet(net); + + table[0].data = &nf_frag->fqdir->timeout; + table[1].data = &nf_frag->fqdir->low_thresh; + table[1].extra2 = &nf_frag->fqdir->high_thresh; + table[2].data = &nf_frag->fqdir->high_thresh; + table[2].extra1 = &nf_frag->fqdir->low_thresh; + table[2].extra2 = &nf_frag->fqdir->high_thresh; hdr = register_net_sysctl(net, "net/netfilter", table); if (hdr == NULL) goto err_reg; - net->nf_frag_frags_hdr = hdr; + nf_frag->nf_frag_frags_hdr = hdr; return 0; err_reg: @@ -109,10 +104,11 @@ err_alloc: static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) { + struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net); struct ctl_table *table; - table = net->nf_frag_frags_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->nf_frag_frags_hdr); + table = nf_frag->nf_frag_frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(nf_frag->nf_frag_frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } @@ -149,6 +145,7 @@ static void nf_ct_frag6_expire(struct timer_list *t) static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, const struct ipv6hdr *hdr, int iif) { + struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net); struct frag_v6_compare_key key = { .id = id, .saddr = hdr->saddr, @@ -158,7 +155,7 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, }; struct inet_frag_queue *q; - q = inet_frag_find(net->nf_frag.fqdir, &key); + q = inet_frag_find(nf_frag->fqdir, &key); if (!q) return NULL; @@ -495,37 +492,44 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); static int nf_ct_net_init(struct net *net) { + struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net); int res; - res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net); + res = fqdir_init(&nf_frag->fqdir, &nf_frags, net); if (res < 0) return res; - net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; - net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; - net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT; + nf_frag->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; + nf_frag->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; + nf_frag->fqdir->timeout = IPV6_FRAG_TIMEOUT; res = nf_ct_frag6_sysctl_register(net); if (res < 0) - fqdir_exit(net->nf_frag.fqdir); + fqdir_exit(nf_frag->fqdir); return res; } static void nf_ct_net_pre_exit(struct net *net) { - fqdir_pre_exit(net->nf_frag.fqdir); + struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net); + + fqdir_pre_exit(nf_frag->fqdir); } static void nf_ct_net_exit(struct net *net) { + struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net); + nf_ct_frags6_sysctl_unregister(net); - fqdir_exit(net->nf_frag.fqdir); + fqdir_exit(nf_frag->fqdir); } static struct pernet_operations nf_ct_net_ops = { .init = nf_ct_net_init, .pre_exit = nf_ct_net_pre_exit, .exit = nf_ct_net_exit, + .id = &nf_frag_pernet_id, + .size = sizeof(struct nft_ct_frag6_pernet), }; static const struct rhashtable_params nfct_rhash_params = { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 6646a87fb5dc..402dc4ca9504 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -25,6 +25,8 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +extern unsigned int nf_frag_pernet_id; + static DEFINE_MUTEX(defrag6_mutex); static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, @@ -89,10 +91,12 @@ static const struct nf_hook_ops ipv6_defrag_ops[] = { static void __net_exit defrag6_net_exit(struct net *net) { - if (net->nf.defrag_ipv6) { + struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); + + if (nf_frag->users) { nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); - net->nf.defrag_ipv6 = false; + nf_frag->users = 0; } } @@ -130,21 +134,22 @@ static void __exit nf_defrag_fini(void) int nf_defrag_ipv6_enable(struct net *net) { + struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); int err = 0; might_sleep(); - if (net->nf.defrag_ipv6) + if (nf_frag->users) return 0; mutex_lock(&defrag6_mutex); - if (net->nf.defrag_ipv6) + if (nf_frag->users) goto out_unlock; err = nf_register_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); if (err == 0) - net->nf.defrag_ipv6 = true; + nf_frag->users = 1; out_unlock: mutex_unlock(&defrag6_mutex); diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c deleted file mode 100644 index 8210ff34ed9b..000000000000 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ /dev/null @@ -1,427 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/if_arp.h> -#include <linux/ip.h> -#include <net/ipv6.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/tcp.h> -#include <net/route.h> - -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <linux/netfilter/xt_LOG.h> -#include <net/netfilter/nf_log.h> - -static const struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_DEFAULT_MASK, - }, - }, -}; - -/* One level of recursion won't kill us */ -static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, unsigned int ip6hoff, - int recurse) -{ - u_int8_t currenthdr; - int fragment; - struct ipv6hdr _ip6h; - const struct ipv6hdr *ih; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_DEFAULT_MASK; - - ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); - if (ih == NULL) { - nf_log_buf_add(m, "TRUNCATED"); - return; - } - - /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); - - /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ - nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", - ntohs(ih->payload_len) + sizeof(struct ipv6hdr), - (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, - ih->hop_limit, - (ntohl(*(__be32 *)ih) & 0x000fffff)); - - fragment = 0; - ptr = ip6hoff + sizeof(struct ipv6hdr); - currenthdr = ih->nexthdr; - while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) { - struct ipv6_opt_hdr _hdr; - const struct ipv6_opt_hdr *hp; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - if (hp == NULL) { - nf_log_buf_add(m, "TRUNCATED"); - return; - } - - /* Max length: 48 "OPT (...) " */ - if (logflags & NF_LOG_IPOPT) - nf_log_buf_add(m, "OPT ( "); - - switch (currenthdr) { - case IPPROTO_FRAGMENT: { - struct frag_hdr _fhdr; - const struct frag_hdr *fh; - - nf_log_buf_add(m, "FRAG:"); - fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), - &_fhdr); - if (fh == NULL) { - nf_log_buf_add(m, "TRUNCATED "); - return; - } - - /* Max length: 6 "65535 " */ - nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); - - /* Max length: 11 "INCOMPLETE " */ - if (fh->frag_off & htons(0x0001)) - nf_log_buf_add(m, "INCOMPLETE "); - - nf_log_buf_add(m, "ID:%08x ", - ntohl(fh->identification)); - - if (ntohs(fh->frag_off) & 0xFFF8) - fragment = 1; - - hdrlen = 8; - - break; - } - case IPPROTO_DSTOPTS: - case IPPROTO_ROUTING: - case IPPROTO_HOPOPTS: - if (fragment) { - if (logflags & NF_LOG_IPOPT) - nf_log_buf_add(m, ")"); - return; - } - hdrlen = ipv6_optlen(hp); - break; - /* Max Length */ - case IPPROTO_AH: - if (logflags & NF_LOG_IPOPT) { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - /* Max length: 3 "AH " */ - nf_log_buf_add(m, "AH "); - - if (fragment) { - nf_log_buf_add(m, ")"); - return; - } - - ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), - &_ahdr); - if (ah == NULL) { - /* - * Max length: 26 "INCOMPLETE [65535 - * bytes] )" - */ - nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 15 "SPI=0xF1234567 */ - nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); - - } - - hdrlen = ipv6_authlen(hp); - break; - case IPPROTO_ESP: - if (logflags & NF_LOG_IPOPT) { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 4 "ESP " */ - nf_log_buf_add(m, "ESP "); - - if (fragment) { - nf_log_buf_add(m, ")"); - return; - } - - /* - * Max length: 26 "INCOMPLETE [65535 bytes] )" - */ - eh = skb_header_pointer(skb, ptr, sizeof(_esph), - &_esph); - if (eh == NULL) { - nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 16 "SPI=0xF1234567 )" */ - nf_log_buf_add(m, "SPI=0x%x )", - ntohl(eh->spi)); - } - return; - default: - /* Max length: 20 "Unknown Ext Hdr 255" */ - nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr); - return; - } - if (logflags & NF_LOG_IPOPT) - nf_log_buf_add(m, ") "); - - currenthdr = hp->nexthdr; - ptr += hdrlen; - } - - switch (currenthdr) { - case IPPROTO_TCP: - if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment, - ptr, logflags)) - return; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr)) - return; - break; - case IPPROTO_ICMPV6: { - struct icmp6hdr _icmp6h; - const struct icmp6hdr *ic; - - /* Max length: 13 "PROTO=ICMPv6 " */ - nf_log_buf_add(m, "PROTO=ICMPv6 "); - - if (fragment) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); - if (ic == NULL) { - nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", - skb->len - ptr); - return; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - nf_log_buf_add(m, "TYPE=%u CODE=%u ", - ic->icmp6_type, ic->icmp6_code); - - switch (ic->icmp6_type) { - case ICMPV6_ECHO_REQUEST: - case ICMPV6_ECHO_REPLY: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - nf_log_buf_add(m, "ID=%u SEQ=%u ", - ntohs(ic->icmp6_identifier), - ntohs(ic->icmp6_sequence)); - break; - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - break; - - case ICMPV6_PARAMPROB: - /* Max length: 17 "POINTER=ffffffff " */ - nf_log_buf_add(m, "POINTER=%08x ", - ntohl(ic->icmp6_pointer)); - fallthrough; - case ICMPV6_DEST_UNREACH: - case ICMPV6_PKT_TOOBIG: - case ICMPV6_TIME_EXCEED: - /* Max length: 3+maxlen */ - if (recurse) { - nf_log_buf_add(m, "["); - dump_ipv6_packet(net, m, info, skb, - ptr + sizeof(_icmp6h), 0); - nf_log_buf_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) { - nf_log_buf_add(m, "MTU=%u ", - ntohl(ic->icmp6_mtu)); - } - } - break; - } - /* Max length: 10 "PROTO=255 " */ - default: - nf_log_buf_add(m, "PROTO=%u ", currenthdr); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & NF_LOG_UID) && recurse) - nf_log_dump_sk_uid_gid(net, m, skb->sk); - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (recurse && skb->mark) - nf_log_buf_add(m, "MARK=0x%x ", skb->mark); -} - -static void dump_ipv6_mac_header(struct nf_log_buf *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & NF_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); - nf_log_dump_vlan(m, skb); - nf_log_buf_add(m, "MACPROTO=%04x ", - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - nf_log_buf_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int len = dev->hard_header_len; - unsigned int i; - - if (dev->type == ARPHRD_SIT) { - p -= ETH_HLEN; - - if (p < skb->head) - p = NULL; - } - - if (p != NULL) { - nf_log_buf_add(m, "%02x", *p++); - for (i = 1; i < len; i++) - nf_log_buf_add(m, ":%02x", *p++); - } - nf_log_buf_add(m, " "); - - if (dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, - &iph->daddr); - } - } else { - nf_log_buf_add(m, " "); - } -} - -static void nf_log_ip6_packet(struct net *net, u_int8_t pf, - unsigned int hooknum, const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct nf_log_buf *m; - - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) - return; - - m = nf_log_buf_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, - loginfo, prefix); - - if (in != NULL) - dump_ipv6_mac_header(m, loginfo, skb); - - dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1); - - nf_log_buf_close(m); -} - -static struct nf_logger nf_ip6_logger __read_mostly = { - .name = "nf_log_ipv6", - .type = NF_LOG_TYPE_LOG, - .logfn = nf_log_ip6_packet, - .me = THIS_MODULE, -}; - -static int __net_init nf_log_ipv6_net_init(struct net *net) -{ - return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); -} - -static void __net_exit nf_log_ipv6_net_exit(struct net *net) -{ - nf_log_unset(net, &nf_ip6_logger); -} - -static struct pernet_operations nf_log_ipv6_net_ops = { - .init = nf_log_ipv6_net_init, - .exit = nf_log_ipv6_net_exit, -}; - -static int __init nf_log_ipv6_init(void) -{ - int ret; - - ret = register_pernet_subsys(&nf_log_ipv6_net_ops); - if (ret < 0) - return ret; - - ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); - if (ret < 0) { - pr_err("failed to register logger\n"); - goto err1; - } - - return 0; - -err1: - unregister_pernet_subsys(&nf_log_ipv6_net_ops); - return ret; -} - -static void __exit nf_log_ipv6_exit(void) -{ - unregister_pernet_subsys(&nf_log_ipv6_net_ops); - nf_log_unregister(&nf_ip6_logger); -} - -module_init(nf_log_ipv6_init); -module_exit(nf_log_ipv6_exit); - -MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("Netfilter IPv6 packet logging"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NF_LOGGER(AF_INET6, 0); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1f56d9aae589..bf3646b57c68 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -298,7 +298,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST) && - !sock_net(sk)->ipv6.sysctl.ip_nonlocal_bind) { + !ipv6_can_nonlocal_bind(sock_net(sk), inet)) { err = -EADDRNOTAVAIL; if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 28801ae80548..a22822bdbf39 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5206,9 +5206,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, * nexthops have been replaced by first new, the rest should * be added to it. */ - cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | - NLM_F_REPLACE); - cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; + if (cfg->fc_nlinfo.nlh) { + cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | + NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; + } nhn++; } diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 8936f48570fc..bd7140885e60 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -1475,7 +1475,7 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be * disjoined, this allow us to release acquired resources by optional * attributes and by required attributes independently from each other - * without any interfarence. + * without any interference. * In other terms, we are sure that we do not release some the acquired * resources twice. * diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ff2ca2e7c7f5..aa98294a3ad3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1864,9 +1864,9 @@ static void __net_exit sit_destroy_tunnels(struct net *net, if (dev->rtnl_link_ops == &sit_link_ops) unregister_netdevice_queue(dev, head); - for (prio = 1; prio < 4; prio++) { + for (prio = 0; prio < 4; prio++) { int h; - for (h = 0; h < IP6_SIT_HASH_SIZE; h++) { + for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) { struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index b31f1021ad9c..48c04f89de20 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -2,6 +2,7 @@ /* * AES-128-CMAC with TLen 16 for IEEE 802.11w BIP * Copyright 2008, Jouni Malinen <j@w1.fi> + * Copyright (C) 2020 Intel Corporation */ #include <linux/kernel.h> @@ -73,8 +74,14 @@ struct crypto_shash *ieee80211_aes_cmac_key_setup(const u8 key[], struct crypto_shash *tfm; tfm = crypto_alloc_shash("cmac(aes)", 0, 0); - if (!IS_ERR(tfm)) - crypto_shash_setkey(tfm, key, key_len); + if (!IS_ERR(tfm)) { + int err = crypto_shash_setkey(tfm, key, key_len); + + if (err) { + crypto_free_shash(tfm); + return ERR_PTR(err); + } + } return tfm; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a0a11624a5be..7a99892e5aba 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1788,8 +1788,10 @@ static int ieee80211_change_station(struct wiphy *wiphy, } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) + sta->sdata->u.vlan.sta) { + ieee80211_clear_fast_rx(sta); RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); + } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 5296898875ff..9245c0421bda 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -456,6 +456,7 @@ static const char *hw_flag_names[] = { FLAG(AMPDU_KEYBORDER_SUPPORT), FLAG(SUPPORTS_TX_ENCAP_OFFLOAD), FLAG(SUPPORTS_RX_DECAP_OFFLOAD), + FLAG(SUPPORTS_CONC_MON_RX_DECAP), #undef FLAG }; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 5a27c61a7b38..936c9dfa86c8 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -711,17 +711,17 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf, PFLAG(MAC, 3, OFDMA_RA, "OFDMA-RA"); switch (cap[3] & IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK) { - case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT: - PRINT("MAX-AMPDU-LEN-EXP-USE-VHT"); + case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_0: + PRINT("MAX-AMPDU-LEN-EXP-USE-EXT-0"); break; - case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1: - PRINT("MAX-AMPDU-LEN-EXP-VHT-1"); + case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_1: + PRINT("MAX-AMPDU-LEN-EXP-VHT-EXT-1"); break; - case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2: - PRINT("MAX-AMPDU-LEN-EXP-VHT-2"); + case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2: + PRINT("MAX-AMPDU-LEN-EXP-VHT-EXT-2"); break; - case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED: - PRINT("MAX-AMPDU-LEN-EXP-RESERVED"); + case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3: + PRINT("MAX-AMPDU-LEN-EXP-VHT-EXT-3"); break; } @@ -732,15 +732,15 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf, PFLAG(MAC, 4, BSRP_BQRP_A_MPDU_AGG, "BSRP-BQRP-A-MPDU-AGG"); PFLAG(MAC, 4, QTP, "QTP"); PFLAG(MAC, 4, BQR, "BQR"); - PFLAG(MAC, 4, SRP_RESP, "SRP-RESP"); + PFLAG(MAC, 4, PSR_RESP, "PSR-RESP"); PFLAG(MAC, 4, NDP_FB_REP, "NDP-FB-REP"); PFLAG(MAC, 4, OPS, "OPS"); - PFLAG(MAC, 4, AMDSU_IN_AMPDU, "AMSDU-IN-AMPDU"); + PFLAG(MAC, 4, AMSDU_IN_AMPDU, "AMSDU-IN-AMPDU"); PRINT("MULTI-TID-AGG-TX-QOS-%d", ((cap[5] << 1) | (cap[4] >> 7)) & 0x7); - PFLAG(MAC, 5, SUBCHAN_SELECVITE_TRANSMISSION, - "SUBCHAN-SELECVITE-TRANSMISSION"); + PFLAG(MAC, 5, SUBCHAN_SELECTIVE_TRANSMISSION, + "SUBCHAN-SELECTIVE-TRANSMISSION"); PFLAG(MAC, 5, UL_2x996_TONE_RU, "UL-2x996-TONE-RU"); PFLAG(MAC, 5, OM_CTRL_UL_MU_DATA_DIS_RX, "OM-CTRL-UL-MU-DATA-DIS-RX"); PFLAG(MAC, 5, HE_DYNAMIC_SM_PS, "HE-DYNAMIC-SM-PS"); @@ -832,8 +832,8 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf, PFLAG(PHY, 3, DCM_MAX_RX_NSS_1, "DCM-MAX-RX-NSS-1"); PFLAG(PHY, 3, DCM_MAX_RX_NSS_2, "DCM-MAX-RX-NSS-2"); - PFLAG(PHY, 3, RX_HE_MU_PPDU_FROM_NON_AP_STA, - "RX-HE-MU-PPDU-FROM-NON-AP-STA"); + PFLAG(PHY, 3, RX_PARTIAL_BW_SU_IN_20MHZ_MU, + "RX-PARTIAL-BW-SU-IN-20MHZ-MU"); PFLAG(PHY, 3, SU_BEAMFORMER, "SU-BEAMFORMER"); PFLAG(PHY, 4, SU_BEAMFORMEE, "SU-BEAMFORMEE"); @@ -853,16 +853,17 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf, PFLAG(PHY, 6, CODEBOOK_SIZE_42_SU, "CODEBOOK-SIZE-42-SU"); PFLAG(PHY, 6, CODEBOOK_SIZE_75_MU, "CODEBOOK-SIZE-75-MU"); - PFLAG(PHY, 6, TRIG_SU_BEAMFORMER_FB, "TRIG-SU-BEAMFORMER-FB"); - PFLAG(PHY, 6, TRIG_MU_BEAMFORMER_FB, "TRIG-MU-BEAMFORMER-FB"); + PFLAG(PHY, 6, TRIG_SU_BEAMFORMING_FB, "TRIG-SU-BEAMFORMING-FB"); + PFLAG(PHY, 6, TRIG_MU_BEAMFORMING_PARTIAL_BW_FB, + "MU-BEAMFORMING-PARTIAL-BW-FB"); PFLAG(PHY, 6, TRIG_CQI_FB, "TRIG-CQI-FB"); PFLAG(PHY, 6, PARTIAL_BW_EXT_RANGE, "PARTIAL-BW-EXT-RANGE"); PFLAG(PHY, 6, PARTIAL_BANDWIDTH_DL_MUMIMO, "PARTIAL-BANDWIDTH-DL-MUMIMO"); PFLAG(PHY, 6, PPE_THRESHOLD_PRESENT, "PPE-THRESHOLD-PRESENT"); - PFLAG(PHY, 7, SRP_BASED_SR, "SRP-BASED-SR"); - PFLAG(PHY, 7, POWER_BOOST_FACTOR_AR, "POWER-BOOST-FACTOR-AR"); + PFLAG(PHY, 7, PSR_BASED_SR, "PSR-BASED-SR"); + PFLAG(PHY, 7, POWER_BOOST_FACTOR_SUPP, "POWER-BOOST-FACTOR-SUPP"); PFLAG(PHY, 7, HE_SU_MU_PPDU_4XLTF_AND_08_US_GI, "HE-SU-MU-PPDU-4XLTF-AND-08-US-GI"); PFLAG_RANGE(PHY, 7, MAX_NC, 0, 1, 1, "MAX-NC-%d"); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ecda126a7026..8fcbaa1eedf3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1660,6 +1660,8 @@ void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata, void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata); +void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, + u8 *bssid, u8 reason, bool tx); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b80c9b016b2b..b1c170939e44 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -807,7 +807,8 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) { flags |= IEEE80211_OFFLOAD_DECAP_ENABLED; - if (local->monitors) + if (local->monitors && + !ieee80211_hw_check(&local->hw, SUPPORTS_CONC_MON_RX_DECAP)) flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED; } else { flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1b9c82616606..62145e5f9628 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include <net/mac80211.h> @@ -282,6 +282,13 @@ static void ieee80211_restart_work(struct work_struct *work) * Then we can have a race... */ cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work); + if (sdata->vif.csa_active) { + sdata_lock(sdata); + ieee80211_sta_connection_lost(sdata, + sdata->u.mgd.associated->bssid, + WLAN_REASON_UNSPECIFIED, false); + sdata_unlock(sdata); + } } flush_delayed_work(&sdata->dec_tailroom_needed_wk); } @@ -1141,8 +1148,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->hw.wiphy->max_scan_ie_len) local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; - WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes, - local->hw.n_cipher_schemes)); + if (WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes, + local->hw.n_cipher_schemes))) { + result = -EINVAL; + goto fail_workqueue; + } result = ieee80211_init_cipher_suites(local); if (result < 0) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ce4e3855fec1..2480bd0577bb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1295,6 +1295,11 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) sdata->vif.csa_active = false; ifmgd->csa_waiting_bcn = false; + /* + * If the CSA IE is still present on the beacon after the switch, + * we need to consider it as a new CSA (possibly to self). + */ + ifmgd->beacon_crc_valid = false; ret = drv_post_channel_switch(sdata); if (ret) { @@ -1400,11 +1405,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, ch_switch.delay = csa_ie.max_switch_time; } - if (res < 0) { - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - return; - } + if (res < 0) + goto lock_and_drop_connection; if (beacon && sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) { if (res) @@ -4382,8 +4384,8 @@ static void ieee80211_sta_timer(struct timer_list *t) ieee80211_queue_work(&sdata->local->hw, &sdata->work); } -static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, - u8 *bssid, u8 reason, bool tx) +void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, + u8 *bssid, u8 reason, bool tx) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -4707,7 +4709,10 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t) timeout = sta->rx_stats.last_rx; timeout += IEEE80211_CONNECTION_IDLE_TIME; - if (time_is_before_jiffies(timeout)) { + /* If timeout is after now, then update timer to fire at + * the later date, but do not actually probe at this time. + */ + if (time_is_after_jiffies(timeout)) { mod_timer(&ifmgd->conn_mon_timer, round_jiffies_up(timeout)); return; } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index ecad9b10984f..6487b05da6fa 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -370,7 +370,7 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, /* short preamble */ if ((mi->supported[group] & BIT(idx + 4)) && (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)) - idx += 4; + idx += 4; goto out; } @@ -868,7 +868,6 @@ static u16 minstrel_ht_next_jump_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur, u32 slow_rate_dur, int *slow_rate_ofs) { - struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; u32 max_duration = slow_rate_dur; int i, index, offset; @@ -886,7 +885,6 @@ minstrel_ht_next_jump_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur, u8 type; group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups); - mg = &mi->groups[group]; supported = mi->supported[group]; if (!supported) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5d06de61047a..0b719f3d2dec 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * Transmit and frame generation functions. */ @@ -1388,8 +1388,20 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local, ieee80211_set_skb_enqueue_time(skb); spin_lock_bh(&fq->lock); - fq_tin_enqueue(fq, tin, flow_idx, skb, - fq_skb_free_func); + /* + * For management frames, don't really apply codel etc., + * we don't want to apply any shaping or anything we just + * want to simplify the driver API by having them on the + * txqi. + */ + if (unlikely(txqi->txq.tid == IEEE80211_NUM_TIDS)) { + IEEE80211_SKB_CB(skb)->control.flags |= + IEEE80211_TX_INTCFL_NEED_TXPROCESSING; + __skb_queue_tail(&txqi->frags, skb); + } else { + fq_tin_enqueue(fq, tin, flow_idx, skb, + fq_skb_free_func); + } spin_unlock_bh(&fq->lock); } @@ -1684,7 +1696,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; struct ieee80211_vif *vif; struct sk_buff *skb; - bool result = true; + bool result; __le16 fc; if (WARN_ON(skb_queue_empty(skbs))) @@ -2267,17 +2279,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, payload[7]); } - /* Initialize skb->priority for QoS frames. If the DONT_REORDER flag - * is set, stick to the default value for skb->priority to assure - * frames injected with this flag are not reordered relative to each - * other. - */ - if (ieee80211_is_data_qos(hdr->frame_control) && - !(info->control.flags & IEEE80211_TX_CTRL_DONT_REORDER)) { - u8 *p = ieee80211_get_qos_ctl(hdr); - skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK; - } - rcu_read_lock(); /* @@ -2341,6 +2342,15 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, info->band = chandef->chan->band; + /* Initialize skb->priority according to frame type and TID class, + * with respect to the sub interface that the frame will actually + * be transmitted on. If the DONT_REORDER flag is set, the original + * skb-priority is preserved to assure frames injected with this + * flag are not reordered relative to each other. + */ + ieee80211_select_queue_80211(sdata, skb, hdr); + skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority)); + /* remove the injection radiotap header */ skb_pull(skb, len_rthdr); @@ -3573,17 +3583,23 @@ begin: test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags)) goto out; - if (vif->txqs_stopped[ieee80211_ac_from_tid(txq->tid)]) { + if (vif->txqs_stopped[txq->ac]) { set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags); goto out; } /* Make sure fragments stay together. */ skb = __skb_dequeue(&txqi->frags); - if (skb) - goto out; + if (unlikely(skb)) { + if (!(IEEE80211_SKB_CB(skb)->control.flags & + IEEE80211_TX_INTCFL_NEED_TXPROCESSING)) + goto out; + IEEE80211_SKB_CB(skb)->control.flags &= + ~IEEE80211_TX_INTCFL_NEED_TXPROCESSING; + } else { + skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); + } - skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); if (!skb) goto out; @@ -3835,6 +3851,9 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, if (!txq->sta) return true; + if (unlikely(txq->tid == IEEE80211_NUM_TIDS)) + return true; + sta = container_of(txq->sta, struct sta_info, sta); if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) < sta->airtime[txq->ac].aql_limit_low) @@ -4150,6 +4169,9 @@ static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata, unsigned long flags; int q = info->hw_queue; + if (sta) + sk_pacing_shift_update(skb->sk, local->hw.tx_sk_pacing_shift); + if (ieee80211_queue_skb(local, sdata, sta, skb)) return true; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c0fa526a45b4..0a0481f5af48 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -888,18 +888,10 @@ EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif); struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif) { - struct ieee80211_sub_if_data *sdata; - if (!vif) return NULL; - sdata = vif_to_sdata(vif); - - if (!ieee80211_sdata_running(sdata) || - !(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) - return NULL; - - return &sdata->wdev; + return &vif_to_sdata(vif)->wdev; } EXPORT_SYMBOL_GPL(ieee80211_vif_to_wdev); diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 585d33144c33..55550ead2ced 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -152,7 +152,7 @@ err_tfm0: crypto_free_sync_skcipher(key->tfm0); err_tfm: for (i = 0; i < ARRAY_SIZE(key->tfm); i++) - if (key->tfm[i]) + if (!IS_ERR_OR_NULL(key->tfm[i])) crypto_free_aead(key->tfm[i]); kfree_sensitive(key); diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index a014149aa323..20328920f6ed 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -22,7 +22,7 @@ config MPTCP_IPV6 depends on IPV6=y default y -config MPTCP_KUNIT_TESTS +config MPTCP_KUNIT_TEST tristate "This builds the MPTCP KUnit tests" if !KUNIT_ALL_TESTS depends on KUNIT default KUNIT_ALL_TESTS diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index a611968be4d7..e54daceac58b 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -2,11 +2,11 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o + mib.o pm_netlink.o sockopt.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o mptcp_crypto_test-objs := crypto_test.o mptcp_token_test-objs := token_test.o -obj-$(CONFIG_MPTCP_KUNIT_TESTS) += mptcp_crypto_test.o mptcp_token_test.o +obj-$(CONFIG_MPTCP_KUNIT_TEST) += mptcp_crypto_test.o mptcp_token_test.o diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c index b472dc149856..a8931349933c 100644 --- a/net/mptcp/crypto.c +++ b/net/mptcp/crypto.c @@ -78,6 +78,6 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac); } -#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS) +#if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST) EXPORT_SYMBOL_GPL(mptcp_crypto_hmac_sha); #endif diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 4b7119eb2c31..99fc21406168 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -13,6 +13,8 @@ #include "protocol.h" #include "mib.h" +#include <trace/events/mptcp.h> + static bool mptcp_cap_flag_sha256(u8 flags) { return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256; @@ -220,45 +222,45 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (!mp_opt->echo) { if (opsize == TCPOLEN_MPTCP_ADD_ADDR || opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT) - mp_opt->family = MPTCP_ADDR_IPVERSION_4; + mp_opt->addr.family = AF_INET; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 || opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT) - mp_opt->family = MPTCP_ADDR_IPVERSION_6; + mp_opt->addr.family = AF_INET6; #endif else break; } else { if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE || opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) - mp_opt->family = MPTCP_ADDR_IPVERSION_4; + mp_opt->addr.family = AF_INET; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE || opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) - mp_opt->family = MPTCP_ADDR_IPVERSION_6; + mp_opt->addr.family = AF_INET6; #endif else break; } mp_opt->add_addr = 1; - mp_opt->addr_id = *ptr++; - if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { - memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4); + mp_opt->addr.id = *ptr++; + if (mp_opt->addr.family == AF_INET) { + memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4); ptr += 4; if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT || opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) { - mp_opt->port = get_unaligned_be16(ptr); + mp_opt->addr.port = htons(get_unaligned_be16(ptr)); ptr += 2; } } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else { - memcpy(mp_opt->addr6.s6_addr, (u8 *)ptr, 16); + memcpy(mp_opt->addr.addr6.s6_addr, (u8 *)ptr, 16); ptr += 16; if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT || opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) { - mp_opt->port = get_unaligned_be16(ptr); + mp_opt->addr.port = htons(get_unaligned_be16(ptr)); ptr += 2; } } @@ -268,8 +270,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 8; } pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d", - (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "", - mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port); + (mp_opt->addr.family == AF_INET6) ? "6" : "", + mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port)); break; case MPTCPOPT_RM_ADDR: @@ -335,7 +337,7 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->add_addr = 0; mp_opt->ahmac = 0; mp_opt->fastclose = 0; - mp_opt->port = 0; + mp_opt->addr.port = 0; mp_opt->rm_addr = 0; mp_opt->dss = 0; mp_opt->mp_prio = 0; @@ -583,39 +585,32 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, return true; } -static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in_addr *addr, u16 port) -{ - u8 hmac[SHA256_DIGEST_SIZE]; - u8 msg[7]; - - msg[0] = addr_id; - memcpy(&msg[1], &addr->s_addr, 4); - msg[5] = port >> 8; - msg[6] = port & 0xFF; - - mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac); - - return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); -} - -#if IS_ENABLED(CONFIG_MPTCP_IPV6) -static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in6_addr *addr, u16 port) +static u64 add_addr_generate_hmac(u64 key1, u64 key2, + struct mptcp_addr_info *addr) { + u16 port = ntohs(addr->port); u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; + int i = 0; - msg[0] = addr_id; - memcpy(&msg[1], &addr->s6_addr, 16); - msg[17] = port >> 8; - msg[18] = port & 0xFF; + msg[i++] = addr->id; + if (addr->family == AF_INET) { + memcpy(&msg[i], &addr->addr.s_addr, 4); + i += 4; + } +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (addr->family == AF_INET6) { + memcpy(&msg[i], &addr->addr6.s6_addr, 16); + i += 16; + } +#endif + msg[i++] = port >> 8; + msg[i++] = port & 0xFF; - mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac); + mptcp_crypto_hmac_sha(key1, key2, msg, i, hmac); return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } -#endif static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb, unsigned int *size, @@ -626,13 +621,13 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * struct mptcp_sock *msk = mptcp_sk(subflow->conn); bool drop_other_suboptions = false; unsigned int opt_size = *size; - struct mptcp_addr_info saddr; bool echo; bool port; int len; if ((mptcp_pm_should_add_signal_ipv6(msk) || - mptcp_pm_should_add_signal_port(msk)) && + mptcp_pm_should_add_signal_port(msk) || + mptcp_pm_should_add_signal_echo(msk)) && skb && skb_is_tcp_pure_ack(skb)) { pr_debug("drop other suboptions"); opts->suboptions = 0; @@ -643,45 +638,24 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * } if (!mptcp_pm_should_add_signal(msk) || - !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port))) + !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port))) return false; - len = mptcp_add_addr_len(saddr.family, echo, port); + len = mptcp_add_addr_len(opts->addr.family, echo, port); if (remaining < len) return false; *size = len; if (drop_other_suboptions) *size -= opt_size; - opts->addr_id = saddr.id; - if (port) - opts->port = ntohs(saddr.port); - if (saddr.family == AF_INET) { - opts->suboptions |= OPTION_MPTCP_ADD_ADDR; - opts->addr = saddr.addr; - if (!echo) { - opts->ahmac = add_addr_generate_hmac(msk->local_key, - msk->remote_key, - opts->addr_id, - &opts->addr, - opts->port); - } + opts->suboptions |= OPTION_MPTCP_ADD_ADDR; + if (!echo) { + opts->ahmac = add_addr_generate_hmac(msk->local_key, + msk->remote_key, + &opts->addr); } -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (saddr.family == AF_INET6) { - opts->suboptions |= OPTION_MPTCP_ADD_ADDR6; - opts->addr6 = saddr.addr6; - if (!echo) { - opts->ahmac = add_addr6_generate_hmac(msk->local_key, - msk->remote_key, - opts->addr_id, - &opts->addr6, - opts->port); - } - } -#endif pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d", - opts->addr_id, opts->ahmac, echo, opts->port); + opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); return true; } @@ -971,6 +945,10 @@ static void ack_update_msk(struct mptcp_sock *msk, __mptcp_data_acked(sk); } mptcp_data_unlock(sk); + + trace_ack_update_msk(mp_opt->data_ack, + old_snd_una, new_snd_una, + new_wnd_end, msk->wnd_end); } bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit) @@ -998,18 +976,9 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, if (mp_opt->echo) return true; - if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) - hmac = add_addr_generate_hmac(msk->remote_key, - msk->local_key, - mp_opt->addr_id, &mp_opt->addr, - mp_opt->port); -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else - hmac = add_addr6_generate_hmac(msk->remote_key, - msk->local_key, - mp_opt->addr_id, &mp_opt->addr6, - mp_opt->port); -#endif + hmac = add_addr_generate_hmac(msk->remote_key, + msk->local_key, + &mp_opt->addr); pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n", msk, (unsigned long long)hmac, @@ -1050,30 +1019,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) { - struct mptcp_addr_info addr; - - addr.port = htons(mp_opt.port); - addr.id = mp_opt.addr_id; - if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) { - addr.family = AF_INET; - addr.addr = mp_opt.addr; - } -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) { - addr.family = AF_INET6; - addr.addr6 = mp_opt.addr6; - } -#endif if (!mp_opt.echo) { - mptcp_pm_add_addr_received(msk, &addr); + mptcp_pm_add_addr_received(msk, &mp_opt.addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { - mptcp_pm_add_addr_echoed(msk, &addr); - mptcp_pm_del_add_timer(msk, &addr); + mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); + mptcp_pm_del_add_timer(msk, &mp_opt.addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); } - if (mp_opt.port) + if (mp_opt.addr.port) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); mp_opt.add_addr = 0; @@ -1204,20 +1159,16 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, } mp_capable_done: - if ((OPTION_MPTCP_ADD_ADDR -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - | OPTION_MPTCP_ADD_ADDR6 -#endif - ) & opts->suboptions) { + if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; u8 echo = MPTCP_ADDR_ECHO; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) + if (opts->addr.family == AF_INET6) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; #endif - if (opts->port) + if (opts->addr.port) len += TCPOLEN_MPTCP_PORT_LEN; if (opts->ahmac) { @@ -1226,28 +1177,30 @@ mp_capable_done: } *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, - len, echo, opts->addr_id); - if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { - memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4); + len, echo, opts->addr.id); + if (opts->addr.family == AF_INET) { + memcpy((u8 *)ptr, (u8 *)&opts->addr.addr.s_addr, 4); ptr += 1; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) { - memcpy((u8 *)ptr, opts->addr6.s6_addr, 16); + else if (opts->addr.family == AF_INET6) { + memcpy((u8 *)ptr, opts->addr.addr6.s6_addr, 16); ptr += 4; } #endif - if (!opts->port) { + if (!opts->addr.port) { if (opts->ahmac) { put_unaligned_be64(opts->ahmac, ptr); ptr += 2; } } else { + u16 port = ntohs(opts->addr.port); + if (opts->ahmac) { u8 *bptr = (u8 *)ptr; - put_unaligned_be16(opts->port, bptr); + put_unaligned_be16(port, bptr); bptr += 2; put_unaligned_be64(opts->ahmac, bptr); bptr += 8; @@ -1256,7 +1209,7 @@ mp_capable_done: ptr += 3; } else { - put_unaligned_be32(opts->port << 16 | + put_unaligned_be32(port << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); ptr += 1; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 51be6c34b339..6ba040897738 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -25,6 +25,8 @@ static int pm_nl_pernet_id; struct mptcp_pm_addr_entry { struct list_head list; struct mptcp_addr_info addr; + u8 flags; + int ifindex; struct rcu_head rcu; struct socket *lsk; }; @@ -168,7 +170,7 @@ select_local_address(const struct pm_nl_pernet *pernet, rcu_read_lock(); __mptcp_flush_join_list(msk); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) continue; if (entry->addr.family != sk->sk_family) { @@ -206,7 +208,7 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos) * can lead to additional addresses not being announced. */ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; if (i++ == pos) { ret = entry; @@ -459,7 +461,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) check_work_pending(msk); remote_address((struct sock_common *)sk, &remote); spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local->addr, &remote); + __mptcp_subflow_connect(sk, &local->addr, &remote, + local->flags, local->ifindex); spin_lock_bh(&msk->pm.lock); return; } @@ -514,7 +517,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) local.family = remote.family; spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local, &remote); + __mptcp_subflow_connect(sk, &local, &remote, 0, 0); spin_lock_bh(&msk->pm.lock); add_addr_echo: @@ -683,7 +686,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) static bool address_use_port(struct mptcp_pm_addr_entry *entry) { - return (entry->addr.flags & + return (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == MPTCP_PM_ADDR_FLAG_SIGNAL; } @@ -735,11 +738,11 @@ find_next: if (entry->addr.id > pernet->next_id) pernet->next_id = entry->addr.id; - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { addr_max = pernet->add_addr_signal_max; WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); } - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { addr_max = pernet->local_addr_max; WRITE_ONCE(pernet->local_addr_max, addr_max + 1); } @@ -841,10 +844,10 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return -ENOMEM; entry->addr = skc_local; - entry->addr.ifindex = 0; - entry->addr.flags = 0; entry->addr.id = 0; entry->addr.port = 0; + entry->ifindex = 0; + entry->flags = 0; entry->lsk = NULL; ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); if (ret < 0) @@ -959,14 +962,14 @@ skip_family: if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) { u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]); - entry->addr.ifindex = val; + entry->ifindex = val; } if (tb[MPTCP_PM_ADDR_ATTR_ID]) entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) - entry->addr.flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); + entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); if (tb[MPTCP_PM_ADDR_ATTR_PORT]) entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); @@ -1218,11 +1221,11 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); return -EINVAL; } - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { addr_max = pernet->add_addr_signal_max; WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); } - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { addr_max = pernet->local_addr_max; WRITE_ONCE(pernet->local_addr_max, addr_max - 1); } @@ -1338,10 +1341,10 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb, goto nla_put_failure; if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id)) goto nla_put_failure; - if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->addr.flags)) + if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags)) goto nla_put_failure; - if (entry->addr.ifindex && - nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->addr.ifindex)) + if (entry->ifindex && + nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex)) goto nla_put_failure; if (addr->family == AF_INET && @@ -1569,7 +1572,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; - if (addr.addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; list_for_each_entry(entry, &pernet->local_addr_list, list) { @@ -1579,9 +1582,9 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return ret; if (bkup) - entry->addr.flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else - entry->addr.flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e894345d10c1..8bf21996734d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -11,7 +11,6 @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> -#include <linux/igmp.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -20,13 +19,15 @@ #include <net/tcp_states.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/transp_v6.h> -#include <net/addrconf.h> #endif #include <net/mptcp.h> #include <net/xfrm.h> #include "protocol.h" #include "mib.h" +#define CREATE_TRACE_POINTS +#include <trace/events/mptcp.h> + #if IS_ENABLED(CONFIG_MPTCP_IPV6) struct mptcp6_sock { struct mptcp_sock msk; @@ -92,16 +93,6 @@ static bool mptcp_is_tcpsk(struct sock *sk) return false; } -static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) -{ - sock_owned_by_me((const struct sock *)msk); - - if (likely(!__mptcp_check_fallback(msk))) - return NULL; - - return msk->first; -} - static int __mptcp_socket_create(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -411,18 +402,6 @@ static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; } -static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) -{ - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ - if (subflow->request_join && !subflow->fully_established) - return false; - - /* only send if our side has not closed yet */ - return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); -} - static bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & @@ -742,18 +721,47 @@ wake: sk->sk_data_ready(sk); } -void __mptcp_flush_join_list(struct mptcp_sock *msk) +static bool mptcp_do_flush_join_list(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; + bool ret = false; if (likely(list_empty(&msk->join_list))) - return; + return false; spin_lock_bh(&msk->join_list_lock); - list_for_each_entry(subflow, &msk->join_list, node) + list_for_each_entry(subflow, &msk->join_list, node) { + u32 sseq = READ_ONCE(subflow->setsockopt_seq); + mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow)); + if (READ_ONCE(msk->setsockopt_seq) != sseq) + ret = true; + } list_splice_tail_init(&msk->join_list, &msk->conn_list); spin_unlock_bh(&msk->join_list_lock); + + return ret; +} + +void __mptcp_flush_join_list(struct mptcp_sock *msk) +{ + if (likely(!mptcp_do_flush_join_list(msk))) + return; + + if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags)) + mptcp_schedule_work((struct sock *)msk); +} + +static void mptcp_flush_join_list(struct mptcp_sock *msk) +{ + bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags); + + might_sleep(); + + if (!mptcp_do_flush_join_list(msk) && !sync_needed) + return; + + mptcp_sockopt_sync_all(msk); } static bool mptcp_timer_pending(struct sock *sk) @@ -1277,7 +1285,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, int avail_size; size_t ret = 0; - pr_debug("msk=%p ssk=%p sending dfrag at seq=%lld len=%d already sent=%d", + pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u", msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent); /* compute send limit */ @@ -1405,6 +1413,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) send_info[i].ratio = -1; } mptcp_for_each_subflow(msk, subflow) { + trace_mptcp_subflow_get_send(subflow); ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; @@ -1425,10 +1434,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } } - pr_debug("msk=%p nr_active=%d ssk=%p:%lld backup=%p:%lld", - msk, nr_active, send_info[0].ssk, send_info[0].ratio, - send_info[1].ssk, send_info[1].ratio); - /* pick the best backup if no other subflow is active */ if (!nr_active) send_info[0].ssk = send_info[1].ssk; @@ -1469,7 +1474,7 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags) int ret = 0; prev_ssk = ssk; - __mptcp_flush_join_list(msk); + mptcp_flush_join_list(msk); ssk = mptcp_subflow_get_send(msk); /* try to keep the subflow socket lock across @@ -1609,9 +1614,13 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int ret = 0; long timeo; - if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + /* we don't support FASTOPEN yet */ + if (msg->msg_flags & MSG_FASTOPEN) return -EOPNOTSUPP; + /* silently ignore everything else */ + msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; + mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len))); timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -1695,7 +1704,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!msk->first_pending) WRITE_ONCE(msk->first_pending, dfrag); } - pr_debug("msk=%p dfrag at seq=%lld len=%d sent=%d new=%d", msk, + pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d", msk, dfrag->data_seq, dfrag->data_len, dfrag->already_sent, !dfrag_collapsed); @@ -1734,36 +1743,41 @@ static void mptcp_wait_data(struct sock *sk, long *timeo) static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, struct msghdr *msg, - size_t len) + size_t len, int flags) { - struct sk_buff *skb; + struct sk_buff *skb, *tmp; int copied = 0; - while ((skb = skb_peek(&msk->receive_queue)) != NULL) { + skb_queue_walk_safe(&msk->receive_queue, skb, tmp) { u32 offset = MPTCP_SKB_CB(skb)->offset; u32 data_len = skb->len - offset; u32 count = min_t(size_t, len - copied, data_len); int err; - err = skb_copy_datagram_msg(skb, offset, msg, count); - if (unlikely(err < 0)) { - if (!copied) - return err; - break; + if (!(flags & MSG_TRUNC)) { + err = skb_copy_datagram_msg(skb, offset, msg, count); + if (unlikely(err < 0)) { + if (!copied) + return err; + break; + } } copied += count; if (count < data_len) { - MPTCP_SKB_CB(skb)->offset += count; + if (!(flags & MSG_PEEK)) + MPTCP_SKB_CB(skb)->offset += count; break; } - /* we will bulk release the skb memory later */ - skb->destructor = NULL; - msk->rmem_released += skb->truesize; - __skb_unlink(skb, &msk->receive_queue); - __kfree_skb(skb); + if (!(flags & MSG_PEEK)) { + /* we will bulk release the skb memory later */ + skb->destructor = NULL; + msk->rmem_released += skb->truesize; + __skb_unlink(skb, &msk->receive_queue); + __kfree_skb(skb); + } if (copied >= len) break; @@ -1895,7 +1909,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) unsigned int moved = 0; bool ret, done; - __mptcp_flush_join_list(msk); + mptcp_flush_join_list(msk); do { struct sock *ssk = mptcp_subflow_recv_lookup(msk); bool slowpath; @@ -1940,8 +1954,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int target; long timeo; - if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT)) - return -EOPNOTSUPP; + /* MSG_ERRQUEUE is really a no-op till we support IP_RECVERR */ + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); mptcp_lock_sock(sk, __mptcp_splice_receive_queue(sk)); if (unlikely(sk->sk_state == TCP_LISTEN)) { @@ -1957,7 +1972,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, while (copied < len) { int bytes_read; - bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied); + bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@ -2041,7 +2056,8 @@ out_err: pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d", msk, test_bit(MPTCP_DATA_READY, &msk->flags), skb_queue_empty_lockless(&sk->sk_receive_queue), copied); - mptcp_rcv_space_adjust(msk, copied); + if (!(flags & MSG_PEEK)) + mptcp_rcv_space_adjust(msk, copied); release_sock(sk); return copied; @@ -2319,7 +2335,7 @@ static void mptcp_worker(struct work_struct *work) goto unlock; mptcp_check_data_fin_ack(sk); - __mptcp_flush_join_list(msk); + mptcp_flush_join_list(msk); mptcp_check_fastclose(msk); @@ -2382,6 +2398,9 @@ static int __mptcp_init_sock(struct sock *sk) /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); + + tcp_assign_congestion_control(sk); + return 0; } @@ -2519,7 +2538,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk) } } - __mptcp_flush_join_list(msk); + mptcp_flush_join_list(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); @@ -2575,6 +2594,8 @@ static void __mptcp_destroy_sock(struct sock *sk) WARN_ON_ONCE(msk->rmem_released); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); + + tcp_cleanup_congestion_control(sk); sk_refcnt_debug_release(sk); mptcp_dispose_initial_subflow(msk); sock_put(sk); @@ -2601,7 +2622,7 @@ static void mptcp_close(struct sock *sk, long timeout) cleanup: /* orphan all the subflows */ inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; - list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) { + mptcp_for_each_subflow(mptcp_sk(sk), subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); @@ -2656,7 +2677,8 @@ static int mptcp_disconnect(struct sock *sk, int flags) struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - __mptcp_flush_join_list(msk); + mptcp_do_flush_join_list(msk); + mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -2705,6 +2727,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->snd_nxt = msk->write_seq; msk->snd_una = msk->write_seq; msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; + msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; if (mp_opt->mp_capable) { msk->can_ack = true; @@ -2813,116 +2836,6 @@ static void mptcp_destroy(struct sock *sk) sk_sockets_allocated_dec(sk); } -static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct sock *sk = (struct sock *)msk; - struct socket *ssock; - int ret; - - switch (optname) { - case SO_REUSEPORT: - case SO_REUSEADDR: - lock_sock(sk); - ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - release_sock(sk); - return -EINVAL; - } - - ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); - if (ret == 0) { - if (optname == SO_REUSEPORT) - sk->sk_reuseport = ssock->sk->sk_reuseport; - else if (optname == SO_REUSEADDR) - sk->sk_reuse = ssock->sk->sk_reuse; - } - release_sock(sk); - return ret; - } - - return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); -} - -static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct sock *sk = (struct sock *)msk; - int ret = -EOPNOTSUPP; - struct socket *ssock; - - switch (optname) { - case IPV6_V6ONLY: - lock_sock(sk); - ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - release_sock(sk); - return -EINVAL; - } - - ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); - if (ret == 0) - sk->sk_ipv6only = ssock->sk->sk_ipv6only; - - release_sock(sk); - break; - } - - return ret; -} - -static int mptcp_setsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - struct sock *ssk; - - pr_debug("msk=%p", msk); - - if (level == SOL_SOCKET) - return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); - - /* @@ the meaning of setsockopt() when the socket is connected and - * there are multiple subflows is not yet defined. It is up to the - * MPTCP-level socket to configure the subflows until the subflow - * is in TCP fallback, when TCP socket options are passed through - * to the one remaining subflow. - */ - lock_sock(sk); - ssk = __mptcp_tcp_fallback(msk); - release_sock(sk); - if (ssk) - return tcp_setsockopt(ssk, level, optname, optval, optlen); - - if (level == SOL_IPV6) - return mptcp_setsockopt_v6(msk, optname, optval, optlen); - - return -EOPNOTSUPP; -} - -static int mptcp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *option) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - struct sock *ssk; - - pr_debug("msk=%p", msk); - - /* @@ the meaning of setsockopt() when the socket is connected and - * there are multiple subflows is not yet defined. It is up to the - * MPTCP-level socket to configure the subflows until the subflow - * is in TCP fallback, when socket options are passed through - * to the one remaining subflow. - */ - lock_sock(sk); - ssk = __mptcp_tcp_fallback(msk); - release_sock(sk); - if (ssk) - return tcp_getsockopt(ssk, level, optname, optval, option); - - return -EOPNOTSUPP; -} - void __mptcp_data_acked(struct sock *sk) { if (!sock_owned_by_user(sk)) @@ -3332,7 +3245,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ - __mptcp_flush_join_list(msk); + mptcp_flush_join_list(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -3409,34 +3322,10 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, return mask; } -static int mptcp_release(struct socket *sock) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = sock->sk; - struct mptcp_sock *msk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - } - - release_sock(sk); - - return inet_release(sock); -} - static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, - .release = mptcp_release, + .release = inet_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, @@ -3528,35 +3417,10 @@ void __init mptcp_proto_init(void) } #if IS_ENABLED(CONFIG_MPTCP_IPV6) -static int mptcp6_release(struct socket *sock) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk; - struct sock *sk = sock->sk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - ipv6_sock_mc_close(ssk); - ipv6_sock_ac_close(ssk); - } - - release_sock(sk); - return inet6_release(sock); -} - static const struct proto_ops mptcp_v6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, - .release = mptcp6_release, + .release = inet6_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 40e9b05856cd..edc0128730df 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -22,11 +22,10 @@ #define OPTION_MPTCP_MPJ_SYNACK BIT(4) #define OPTION_MPTCP_MPJ_ACK BIT(5) #define OPTION_MPTCP_ADD_ADDR BIT(6) -#define OPTION_MPTCP_ADD_ADDR6 BIT(7) -#define OPTION_MPTCP_RM_ADDR BIT(8) -#define OPTION_MPTCP_FASTCLOSE BIT(9) -#define OPTION_MPTCP_PRIO BIT(10) -#define OPTION_MPTCP_RST BIT(11) +#define OPTION_MPTCP_RM_ADDR BIT(7) +#define OPTION_MPTCP_FASTCLOSE BIT(8) +#define OPTION_MPTCP_PRIO BIT(9) +#define OPTION_MPTCP_RST BIT(10) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 @@ -91,8 +90,6 @@ /* MPTCP ADD_ADDR flags */ #define MPTCP_ADDR_ECHO BIT(0) -#define MPTCP_ADDR_IPVERSION_4 4 -#define MPTCP_ADDR_IPVERSION_6 6 /* MPTCP MP_PRIO flags */ #define MPTCP_PRIO_BKUP BIT(0) @@ -111,6 +108,7 @@ #define MPTCP_CLEAN_UNA 7 #define MPTCP_ERROR_REPORT 8 #define MPTCP_RETRANSMIT 9 +#define MPTCP_WORK_SYNC_SETSOCKOPT 10 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -134,7 +132,6 @@ struct mptcp_options_received { add_addr : 1, rm_addr : 1, mp_prio : 1, - family : 4, echo : 1, backup : 1; u32 token; @@ -149,16 +146,9 @@ struct mptcp_options_received { ack64:1, mpc_map:1, __unused:2; - u8 addr_id; + struct mptcp_addr_info addr; struct mptcp_rm_list rm_list; - union { - struct in_addr addr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - struct in6_addr addr6; -#endif - }; u64 ahmac; - u16 port; u8 reset_reason:4; u8 reset_transient:1; }; @@ -169,20 +159,6 @@ static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) ((nib & 0xF) << 8) | field); } -struct mptcp_addr_info { - sa_family_t family; - __be16 port; - u8 id; - u8 flags; - int ifindex; - union { - struct in_addr addr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - struct in6_addr addr6; -#endif - }; -}; - enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_RECEIVED, MPTCP_PM_ADD_ADDR_SEND_ACK, @@ -280,6 +256,8 @@ struct mptcp_sock { u64 time; /* start time of measurement window */ u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; + + u32 setsockopt_seq; }; #define mptcp_lock_sock(___sk, cb) do { \ @@ -438,6 +416,8 @@ struct mptcp_subflow_context { long delegated_status; struct list_head delegated_node; /* link into delegated_action, protected by local BH */ + u32 setsockopt_seq; + struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ const struct inet_connection_sock_af_ops *icsk_af_ops; @@ -557,12 +537,25 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote); + const struct mptcp_addr_info *remote, + u8 flags, int ifindex); int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); +static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) +{ + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ + if (subflow->request_join && !subflow->fully_established) + return false; + + /* only send if our side has not closed yet */ + return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); +} + static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { @@ -595,6 +588,11 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); +int mptcp_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen); +int mptcp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *option); + void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); @@ -754,6 +752,12 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk); +int mptcp_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen); + +void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk); +void mptcp_sockopt_sync_all(struct mptcp_sock *msk); + static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) { return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c new file mode 100644 index 000000000000..00d941b66c1e --- /dev/null +++ b/net/mptcp/sockopt.c @@ -0,0 +1,756 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2021, Red Hat. + */ + +#define pr_fmt(fmt) "MPTCP: " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <net/sock.h> +#include <net/protocol.h> +#include <net/tcp.h> +#include <net/mptcp.h> +#include "protocol.h" + +static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) +{ + sock_owned_by_me((const struct sock *)msk); + + if (likely(!__mptcp_check_fallback(msk))) + return NULL; + + return msk->first; +} + +static u32 sockopt_seq_reset(const struct sock *sk) +{ + sock_owned_by_me(sk); + + /* Highbits contain state. Allows to distinguish sockopt_seq + * of listener and established: + * s0 = new_listener() + * sockopt(s0) - seq is 1 + * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) + * sockopt(s0) - seq increments to 2 on s0 + * sockopt(s1) // seq increments to 2 on s1 (different option) + * new ssk completes join, inherits options from s0 // seq 2 + * Needs sync from mptcp join logic, but ssk->seq == msk->seq + * + * Set High order bits to sk_state so ssk->seq == msk->seq test + * will fail. + */ + + return (u32)sk->sk_state << 24u; +} + +static void sockopt_seq_inc(struct mptcp_sock *msk) +{ + u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; + + msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; +} + +static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen, int *val) +{ + if (optlen < sizeof(int)) + return -EINVAL; + + if (copy_from_sockptr(val, optval, sizeof(*val))) + return -EFAULT; + + return 0; +} + +static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + + lock_sock(sk); + sockopt_seq_inc(msk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow = lock_sock_fast(ssk); + + switch (optname) { + case SO_DEBUG: + sock_valbool_flag(ssk, SOCK_DBG, !!val); + break; + case SO_KEEPALIVE: + if (ssk->sk_prot->keepalive) + ssk->sk_prot->keepalive(ssk, !!val); + sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); + break; + case SO_PRIORITY: + ssk->sk_priority = val; + break; + case SO_SNDBUF: + case SO_SNDBUFFORCE: + ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; + WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); + break; + case SO_RCVBUF: + case SO_RCVBUFFORCE: + ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; + WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); + break; + case SO_MARK: + if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { + ssk->sk_mark = sk->sk_mark; + sk_dst_reset(ssk); + } + break; + case SO_INCOMING_CPU: + WRITE_ONCE(ssk->sk_incoming_cpu, val); + break; + } + + subflow->setsockopt_seq = msk->setsockopt_seq; + unlock_sock_fast(ssk, slow); + } + + release_sock(sk); +} + +static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) +{ + sockptr_t optval = KERNEL_SOCKPTR(&val); + struct sock *sk = (struct sock *)msk; + int ret; + + ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, + optval, sizeof(val)); + if (ret) + return ret; + + mptcp_sol_socket_sync_intval(msk, optname, val); + return 0; +} + +static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) +{ + struct sock *sk = (struct sock *)msk; + + WRITE_ONCE(sk->sk_incoming_cpu, val); + + mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); +} + +static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + int val, ret; + + ret = mptcp_get_int_option(msk, optval, optlen, &val); + if (ret) + return ret; + + switch (optname) { + case SO_KEEPALIVE: + mptcp_sol_socket_sync_intval(msk, optname, val); + return 0; + case SO_DEBUG: + case SO_MARK: + case SO_PRIORITY: + case SO_SNDBUF: + case SO_SNDBUFFORCE: + case SO_RCVBUF: + case SO_RCVBUFFORCE: + return mptcp_sol_socket_intval(msk, optname, val); + case SO_INCOMING_CPU: + mptcp_so_incoming_cpu(msk, val); + return 0; + } + + return -ENOPROTOOPT; +} + +static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + struct linger ling; + sockptr_t kopt; + int ret; + + if (optlen < sizeof(ling)) + return -EINVAL; + + if (copy_from_sockptr(&ling, optval, sizeof(ling))) + return -EFAULT; + + kopt = KERNEL_SOCKPTR(&ling); + ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); + if (ret) + return ret; + + lock_sock(sk); + sockopt_seq_inc(msk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow = lock_sock_fast(ssk); + + if (!ling.l_onoff) { + sock_reset_flag(ssk, SOCK_LINGER); + } else { + ssk->sk_lingertime = sk->sk_lingertime; + sock_set_flag(ssk, SOCK_LINGER); + } + + subflow->setsockopt_seq = msk->setsockopt_seq; + unlock_sock_fast(ssk, slow); + } + + release_sock(sk); + return 0; +} + +static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct sock *sk = (struct sock *)msk; + struct socket *ssock; + int ret; + + switch (optname) { + case SO_REUSEPORT: + case SO_REUSEADDR: + case SO_BINDTODEVICE: + case SO_BINDTOIFINDEX: + lock_sock(sk); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + release_sock(sk); + return -EINVAL; + } + + ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); + if (ret == 0) { + if (optname == SO_REUSEPORT) + sk->sk_reuseport = ssock->sk->sk_reuseport; + else if (optname == SO_REUSEADDR) + sk->sk_reuse = ssock->sk->sk_reuse; + else if (optname == SO_BINDTODEVICE) + sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; + else if (optname == SO_BINDTOIFINDEX) + sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; + } + release_sock(sk); + return ret; + case SO_KEEPALIVE: + case SO_PRIORITY: + case SO_SNDBUF: + case SO_SNDBUFFORCE: + case SO_RCVBUF: + case SO_RCVBUFFORCE: + case SO_MARK: + case SO_INCOMING_CPU: + case SO_DEBUG: + return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen); + case SO_LINGER: + return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); + case SO_NO_CHECK: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_BSDCOMPAT: + case SO_PASSCRED: + case SO_PASSSEC: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_NOFCS: + case SO_SELECT_ERR_QUEUE: + return 0; + } + + return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); +} + +static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct sock *sk = (struct sock *)msk; + int ret = -EOPNOTSUPP; + struct socket *ssock; + + switch (optname) { + case IPV6_V6ONLY: + lock_sock(sk); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + release_sock(sk); + return -EINVAL; + } + + ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); + if (ret == 0) + sk->sk_ipv6only = ssock->sk->sk_ipv6only; + + release_sock(sk); + break; + } + + return ret; +} + +static bool mptcp_supported_sockopt(int level, int optname) +{ + if (level == SOL_SOCKET) { + switch (optname) { + case SO_DEBUG: + case SO_REUSEPORT: + case SO_REUSEADDR: + + /* the following ones need a better implementation, + * but are quite common we want to preserve them + */ + case SO_BINDTODEVICE: + case SO_SNDBUF: + case SO_SNDBUFFORCE: + case SO_RCVBUF: + case SO_RCVBUFFORCE: + case SO_KEEPALIVE: + case SO_PRIORITY: + case SO_LINGER: + case SO_TIMESTAMP_OLD: + case SO_TIMESTAMP_NEW: + case SO_TIMESTAMPNS_OLD: + case SO_TIMESTAMPNS_NEW: + case SO_TIMESTAMPING_OLD: + case SO_TIMESTAMPING_NEW: + case SO_RCVLOWAT: + case SO_RCVTIMEO_OLD: + case SO_RCVTIMEO_NEW: + case SO_SNDTIMEO_OLD: + case SO_SNDTIMEO_NEW: + case SO_MARK: + case SO_INCOMING_CPU: + case SO_BINDTOIFINDEX: + case SO_BUSY_POLL: + case SO_PREFER_BUSY_POLL: + case SO_BUSY_POLL_BUDGET: + + /* next ones are no-op for plain TCP */ + case SO_NO_CHECK: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_BSDCOMPAT: + case SO_PASSCRED: + case SO_PASSSEC: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_NOFCS: + case SO_SELECT_ERR_QUEUE: + return true; + } + + /* SO_OOBINLINE is not supported, let's avoid the related mess */ + /* SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, + * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, + * we must be careful with subflows + */ + /* SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks + * explicitly the sk_protocol field + */ + /* SO_PEEK_OFF is unsupported, as it is for plain TCP */ + /* SO_MAX_PACING_RATE is unsupported, we must be careful with subflows */ + /* SO_CNX_ADVICE is currently unsupported, could possibly be relevant, + * but likely needs careful design + */ + /* SO_ZEROCOPY is currently unsupported, TODO in sndmsg */ + /* SO_TXTIME is currently unsupported */ + return false; + } + if (level == SOL_IP) { + switch (optname) { + /* should work fine */ + case IP_FREEBIND: + case IP_TRANSPARENT: + + /* the following are control cmsg related */ + case IP_PKTINFO: + case IP_RECVTTL: + case IP_RECVTOS: + case IP_RECVOPTS: + case IP_RETOPTS: + case IP_PASSSEC: + case IP_RECVORIGDSTADDR: + case IP_CHECKSUM: + case IP_RECVFRAGSIZE: + + /* common stuff that need some love */ + case IP_TOS: + case IP_TTL: + case IP_BIND_ADDRESS_NO_PORT: + case IP_MTU_DISCOVER: + case IP_RECVERR: + + /* possibly less common may deserve some love */ + case IP_MINTTL: + + /* the following is apparently a no-op for plain TCP */ + case IP_RECVERR_RFC4884: + return true; + } + + /* IP_OPTIONS is not supported, needs subflow care */ + /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ + /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, + * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, + * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, + * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, + * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, + * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal + * with mcast stuff + */ + /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ + return false; + } + if (level == SOL_IPV6) { + switch (optname) { + case IPV6_V6ONLY: + + /* the following are control cmsg related */ + case IPV6_RECVPKTINFO: + case IPV6_2292PKTINFO: + case IPV6_RECVHOPLIMIT: + case IPV6_2292HOPLIMIT: + case IPV6_RECVRTHDR: + case IPV6_2292RTHDR: + case IPV6_RECVHOPOPTS: + case IPV6_2292HOPOPTS: + case IPV6_RECVDSTOPTS: + case IPV6_2292DSTOPTS: + case IPV6_RECVTCLASS: + case IPV6_FLOWINFO: + case IPV6_RECVPATHMTU: + case IPV6_RECVORIGDSTADDR: + case IPV6_RECVFRAGSIZE: + + /* the following ones need some love but are quite common */ + case IPV6_TCLASS: + case IPV6_TRANSPARENT: + case IPV6_FREEBIND: + case IPV6_PKTINFO: + case IPV6_2292PKTOPTIONS: + case IPV6_UNICAST_HOPS: + case IPV6_MTU_DISCOVER: + case IPV6_MTU: + case IPV6_RECVERR: + case IPV6_FLOWINFO_SEND: + case IPV6_FLOWLABEL_MGR: + case IPV6_MINHOPCOUNT: + case IPV6_DONTFRAG: + case IPV6_AUTOFLOWLABEL: + + /* the following one is a no-op for plain TCP */ + case IPV6_RECVERR_RFC4884: + return true; + } + + /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are + * not supported + */ + /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, + * IPV6_MULTICAST_IF, IPV6_ADDRFORM, + * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, + * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, + * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, + * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER + * are not supported better not deal with mcast + */ + /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ + + /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ + /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ + return false; + } + if (level == SOL_TCP) { + switch (optname) { + /* the following are no-op or should work just fine */ + case TCP_THIN_DUPACK: + case TCP_DEFER_ACCEPT: + + /* the following need some love */ + case TCP_MAXSEG: + case TCP_NODELAY: + case TCP_THIN_LINEAR_TIMEOUTS: + case TCP_CONGESTION: + case TCP_ULP: + case TCP_CORK: + case TCP_KEEPIDLE: + case TCP_KEEPINTVL: + case TCP_KEEPCNT: + case TCP_SYNCNT: + case TCP_SAVE_SYN: + case TCP_LINGER2: + case TCP_WINDOW_CLAMP: + case TCP_QUICKACK: + case TCP_USER_TIMEOUT: + case TCP_TIMESTAMP: + case TCP_NOTSENT_LOWAT: + case TCP_TX_DELAY: + return true; + } + + /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ + + /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, + * TCP_REPAIR_WINDOW are not supported, better avoid this mess + */ + /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE, + * are not supported fastopen is currently unsupported + */ + /* TCP_INQ is currently unsupported, needs some recvmsg work */ + } + return false; +} + +static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + char name[TCP_CA_NAME_MAX]; + bool cap_net_admin; + int ret; + + if (optlen < 1) + return -EINVAL; + + ret = strncpy_from_sockptr(name, optval, + min_t(long, TCP_CA_NAME_MAX - 1, optlen)); + if (ret < 0) + return -EFAULT; + + name[ret] = 0; + + cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); + + ret = 0; + lock_sock(sk); + sockopt_seq_inc(msk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err; + + lock_sock(ssk); + err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); + if (err < 0 && ret == 0) + ret = err; + subflow->setsockopt_seq = msk->setsockopt_seq; + release_sock(ssk); + } + + if (ret == 0) + tcp_set_congestion_control(sk, name, false, cap_net_admin); + + release_sock(sk); + return ret; +} + +static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + switch (optname) { + case TCP_ULP: + return -EOPNOTSUPP; + case TCP_CONGESTION: + return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); + } + + return -EOPNOTSUPP; +} + +int mptcp_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk; + + pr_debug("msk=%p", msk); + + if (!mptcp_supported_sockopt(level, optname)) + return -ENOPROTOOPT; + + if (level == SOL_SOCKET) + return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); + + /* @@ the meaning of setsockopt() when the socket is connected and + * there are multiple subflows is not yet defined. It is up to the + * MPTCP-level socket to configure the subflows until the subflow + * is in TCP fallback, when TCP socket options are passed through + * to the one remaining subflow. + */ + lock_sock(sk); + ssk = __mptcp_tcp_fallback(msk); + release_sock(sk); + if (ssk) + return tcp_setsockopt(ssk, level, optname, optval, optlen); + + if (level == SOL_IPV6) + return mptcp_setsockopt_v6(msk, optname, optval, optlen); + + if (level == SOL_TCP) + return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); + + return -EOPNOTSUPP; +} + +static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = (struct sock *)msk; + struct socket *ssock; + int ret = -EINVAL; + struct sock *ssk; + + lock_sock(sk); + ssk = msk->first; + if (ssk) { + ret = tcp_getsockopt(ssk, level, optname, optval, optlen); + goto out; + } + + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) + goto out; + + ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); + +out: + release_sock(sk); + return ret; +} + +static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, + char __user *optval, int __user *optlen) +{ + switch (optname) { + case TCP_ULP: + case TCP_CONGESTION: + case TCP_INFO: + case TCP_CC_INFO: + return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, + optval, optlen); + } + return -EOPNOTSUPP; +} + +int mptcp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *option) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk; + + pr_debug("msk=%p", msk); + + /* @@ the meaning of setsockopt() when the socket is connected and + * there are multiple subflows is not yet defined. It is up to the + * MPTCP-level socket to configure the subflows until the subflow + * is in TCP fallback, when socket options are passed through + * to the one remaining subflow. + */ + lock_sock(sk); + ssk = __mptcp_tcp_fallback(msk); + release_sock(sk); + if (ssk) + return tcp_getsockopt(ssk, level, optname, optval, option); + + if (level == SOL_TCP) + return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); + return -EOPNOTSUPP; +} + +static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) +{ + static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; + struct sock *sk = (struct sock *)msk; + + if (ssk->sk_prot->keepalive) { + if (sock_flag(sk, SOCK_KEEPOPEN)) + ssk->sk_prot->keepalive(ssk, 1); + else + ssk->sk_prot->keepalive(ssk, 0); + } + + ssk->sk_priority = sk->sk_priority; + ssk->sk_bound_dev_if = sk->sk_bound_dev_if; + ssk->sk_incoming_cpu = sk->sk_incoming_cpu; + + if (sk->sk_userlocks & tx_rx_locks) { + ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) + WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) + WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); + } + + if (sock_flag(sk, SOCK_LINGER)) { + ssk->sk_lingertime = sk->sk_lingertime; + sock_set_flag(ssk, SOCK_LINGER); + } else { + sock_reset_flag(ssk, SOCK_LINGER); + } + + if (sk->sk_mark != ssk->sk_mark) { + ssk->sk_mark = sk->sk_mark; + sk_dst_reset(ssk); + } + + sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); + + if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) + tcp_set_congestion_control(ssk, inet_csk(sk)->icsk_ca_ops->name, false, true); +} + +static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) +{ + bool slow = lock_sock_fast(ssk); + + sync_socket_options(msk, ssk); + + unlock_sock_fast(ssk, slow); +} + +void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + msk_owned_by_me(msk); + + if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { + __mptcp_sockopt_sync(msk, ssk); + + subflow->setsockopt_seq = msk->setsockopt_seq; + } +} + +void mptcp_sockopt_sync_all(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + u32 seq; + + seq = sockopt_seq_reset(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u32 sseq = READ_ONCE(subflow->setsockopt_seq); + + if (sseq != msk->setsockopt_seq) { + __mptcp_sockopt_sync(msk, ssk); + WRITE_ONCE(subflow->setsockopt_seq, seq); + } else if (sseq != seq) { + WRITE_ONCE(subflow->setsockopt_seq, seq); + } + + cond_resched(); + } + + msk->setsockopt_seq = seq; +} diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 223d6be5fc3b..82e91b00ad39 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -25,6 +25,8 @@ #include "protocol.h" #include "mib.h" +#include <trace/events/mptcp.h> + static void mptcp_subflow_ops_undo_override(struct sock *ssk); static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, @@ -679,6 +681,9 @@ create_child: goto out; } + /* ssk inherits options of listener sk */ + ctx->setsockopt_seq = listener->setsockopt_seq; + if (ctx->mp_capable) { /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space @@ -694,6 +699,7 @@ create_child: * created mptcp socket */ new_msk->sk_destruct = mptcp_sock_destruct; + mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); ctx->conn = new_msk; @@ -858,9 +864,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, goto validate_seq; } - pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d", - mpext->data_seq, mpext->dsn64, mpext->subflow_seq, - mpext->data_len, mpext->data_fin); + trace_get_mapping_status(mpext); data_len = mpext->data_len; if (data_len == 0) { @@ -998,8 +1002,6 @@ static bool subflow_check_data_avail(struct sock *ssk) struct mptcp_sock *msk; struct sk_buff *skb; - pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk, - subflow->data_avail, skb_peek(&ssk->sk_receive_queue)); if (!skb_peek(&ssk->sk_receive_queue)) subflow->data_avail = 0; if (subflow->data_avail) @@ -1011,7 +1013,7 @@ static bool subflow_check_data_avail(struct sock *ssk) u64 old_ack; status = get_mapping_status(ssk, msk); - pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status); + trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); if (status == MAPPING_INVALID) { ssk->sk_err = EBADMSG; goto fatal; @@ -1256,7 +1258,8 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, } int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote) + const struct mptcp_addr_info *remote, + u8 flags, int ifindex) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; @@ -1300,7 +1303,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - ssk->sk_bound_dev_if = loc->ifindex; + ssk->sk_bound_dev_if = ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) goto failed; @@ -1312,10 +1315,11 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->local_id = local_id; subflow->remote_id = remote_id; subflow->request_join = 1; - subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr, ssk->sk_family); mptcp_add_pending_subflow(msk, subflow); + mptcp_sockopt_sync(msk, ssk); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); if (err && err != -EINPROGRESS) goto failed_unlink; diff --git a/net/mptcp/token.c b/net/mptcp/token.c index feb4b9ffd462..8f0270a780ce 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -402,7 +402,7 @@ void __init mptcp_token_init(void) } } -#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS) +#if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST) EXPORT_SYMBOL_GPL(mptcp_token_new_request); EXPORT_SYMBOL_GPL(mptcp_token_new_connect); EXPORT_SYMBOL_GPL(mptcp_token_accept); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index a9cb355324d1..ffff8da707b8 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -105,13 +105,20 @@ static void ncsi_channel_monitor(struct timer_list *t) monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); - if (!enabled || chained) { - ncsi_stop_channel_monitor(nc); - return; - } + if (!enabled) + return; /* expected race disabling timer */ + if (WARN_ON_ONCE(chained)) + goto bad_state; + if (state != NCSI_CHANNEL_INACTIVE && state != NCSI_CHANNEL_ACTIVE) { - ncsi_stop_channel_monitor(nc); +bad_state: + netdev_warn(ndp->ndev.dev, + "Bad NCSI monitor state channel %d 0x%x %s queue\n", + nc->id, state, chained ? "on" : "off"); + spin_lock_irqsave(&nc->lock, flags); + nc->monitor.enabled = false; + spin_unlock_irqrestore(&nc->lock, flags); return; } @@ -136,10 +143,9 @@ static void ncsi_channel_monitor(struct timer_list *t) ncsi_report_link(ndp, true); ndp->flags |= NCSI_DEV_RESHUFFLE; - ncsi_stop_channel_monitor(nc); - ncm = &nc->modes[NCSI_MODE_LINK]; spin_lock_irqsave(&nc->lock, flags); + nc->monitor.enabled = false; nc->state = NCSI_CHANNEL_INVISIBLE; ncm->data[2] &= ~0x1; spin_unlock_irqrestore(&nc->lock, flags); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 1a92063c73a4..fcd8682704c4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -71,12 +71,17 @@ config NF_CONNTRACK To compile it as a module, choose M here. If unsure, say N. -config NF_LOG_COMMON - tristate - -config NF_LOG_NETDEV - tristate "Netdev packet logging" - select NF_LOG_COMMON +config NF_LOG_SYSLOG + tristate "Syslog packet logging" + default m if NETFILTER_ADVANCED=n + help + This option enable support for packet logging via syslog. + It supports IPv4, IPV6, ARP and common transport protocols such + as TCP and UDP. + This is a simpler but less flexible logging method compared to + CONFIG_NETFILTER_NETLINK_LOG. + If both are enabled the backend to use can be configured at run-time + by means of per-address-family sysctl tunables. if NF_CONNTRACK config NETFILTER_CONNCOUNT @@ -922,8 +927,7 @@ config NETFILTER_XT_TARGET_LED config NETFILTER_XT_TARGET_LOG tristate "LOG target support" - select NF_LOG_COMMON - select NF_LOG_IPV4 + select NF_LOG_SYSLOG select NF_LOG_IPV6 if IP6_NF_IPTABLES default m if NETFILTER_ADVANCED=n help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 33da7bf1b68e..e80e010354b1 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -48,11 +48,7 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o nf_nat-y := nf_nat_core.o nf_nat_proto.o nf_nat_helper.o -# generic transport layer logging -obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o - -# packet logging for netdev family -obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o +obj-$(CONFIG_NF_LOG_SYSLOG) += nf_log_syslog.o obj-$(CONFIG_NF_NAT) += nf_nat.o nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 89009c82a6b2..359ff8ec236a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -963,20 +963,9 @@ static struct nlmsghdr * start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - - nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), - sizeof(*nfmsg), flags); - if (!nlh) - return NULL; - - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = NFPROTO_IPV4; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - - return nlh; + return nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags, + NFPROTO_IPV4, NFNETLINK_V0, 0); } /* Create a set */ diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index cf925906f59b..ef1f45e43b63 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -591,8 +591,6 @@ static int __net_init __ip_vs_ftp_init(struct net *net) ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]); if (ret) goto err_unreg; - pr_info("%s: loaded support on port[%d] = %u\n", - app->name, i, ports[i]); } return 0; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ff0168736f6e..e0befcf8113a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -55,6 +55,8 @@ #include "nf_internals.h" +extern unsigned int nf_conntrack_net_id; + __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; EXPORT_SYMBOL_GPL(nf_conntrack_locks); @@ -85,6 +87,8 @@ static __read_mostly bool nf_conntrack_locks_all; static struct conntrack_gc_work conntrack_gc_work; +extern unsigned int nf_conntrack_net_id; + void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) { /* 1) Acquire the lock */ @@ -656,6 +660,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; + struct net *net; if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) return false; @@ -670,11 +675,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) * be done by event cache worker on redelivery. */ nf_ct_delete_from_lists(ct); - nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); + nf_conntrack_ecache_work(nf_ct_net(ct), NFCT_ECACHE_DESTROY_FAIL); return false; } - nf_conntrack_ecache_work(nf_ct_net(ct)); + net = nf_ct_net(ct); + if (nf_conntrack_ecache_dwork_pending(net)) + nf_conntrack_ecache_work(net, NFCT_ECACHE_DESTROY_SENT); nf_ct_delete_from_lists(ct); nf_ct_put(ct); return true; @@ -1376,6 +1383,7 @@ static void gc_worker(struct work_struct *work) i = 0; hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { + struct nf_conntrack_net *cnet; struct net *net; tmp = nf_ct_tuplehash_to_ctrack(h); @@ -1396,7 +1404,8 @@ static void gc_worker(struct work_struct *work) continue; net = nf_ct_net(tmp); - if (atomic_read(&net->ct.count) < nf_conntrack_max95) + cnet = net_generic(net, nf_conntrack_net_id); + if (atomic_read(&cnet->count) < nf_conntrack_max95) continue; /* need to take reference to avoid possible races */ @@ -1475,17 +1484,18 @@ __nf_conntrack_alloc(struct net *net, const struct nf_conntrack_tuple *repl, gfp_t gfp, u32 hash) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + unsigned int ct_count; struct nf_conn *ct; /* We don't want any race condition at early drop stage */ - atomic_inc(&net->ct.count); + ct_count = atomic_inc_return(&cnet->count); - if (nf_conntrack_max && - unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { + if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) { if (!early_drop(net, hash)) { if (!conntrack_gc_work.early_drop) conntrack_gc_work.early_drop = true; - atomic_dec(&net->ct.count); + atomic_dec(&cnet->count); net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); return ERR_PTR(-ENOMEM); } @@ -1520,7 +1530,7 @@ __nf_conntrack_alloc(struct net *net, atomic_set(&ct->ct_general.use, 0); return ct; out: - atomic_dec(&net->ct.count); + atomic_dec(&cnet->count); return ERR_PTR(-ENOMEM); } @@ -1537,6 +1547,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc); void nf_conntrack_free(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_net *cnet; /* A freed object has refcnt == 0, that's * the golden rule for SLAB_TYPESAFE_BY_RCU @@ -1545,8 +1556,10 @@ void nf_conntrack_free(struct nf_conn *ct) nf_ct_ext_destroy(ct); kmem_cache_free(nf_conntrack_cachep, ct); + cnet = net_generic(net, nf_conntrack_net_id); + smp_mb__before_atomic(); - atomic_dec(&net->ct.count); + atomic_dec(&cnet->count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -1567,6 +1580,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_zone *zone; struct nf_conn_timeout *timeout_ext; struct nf_conntrack_zone tmp; + struct nf_conntrack_net *cnet; if (!nf_ct_invert_tuple(&repl_tuple, tuple)) { pr_debug("Can't invert tuple.\n"); @@ -1600,7 +1614,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, GFP_ATOMIC); local_bh_disable(); - if (net->ct.expect_count) { + cnet = net_generic(net, nf_conntrack_net_id); + if (cnet->expect_count) { spin_lock(&nf_conntrack_expect_lock); exp = nf_ct_find_expectation(net, zone, tuple); if (exp) { @@ -2302,9 +2317,11 @@ __nf_ct_unconfirmed_destroy(struct net *net) void nf_ct_unconfirmed_destroy(struct net *net) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + might_sleep(); - if (atomic_read(&net->ct.count) > 0) { + if (atomic_read(&cnet->count) > 0) { __nf_ct_unconfirmed_destroy(net); nf_queue_nf_hook_drop(net); synchronize_net(); @@ -2316,11 +2333,12 @@ void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); struct iter_data d; might_sleep(); - if (atomic_read(&net->ct.count) == 0) + if (atomic_read(&cnet->count) == 0) return; d.iter = iter; @@ -2349,7 +2367,9 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) down_read(&net_rwsem); for_each_net(net) { - if (atomic_read(&net->ct.count) == 0) + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + + if (atomic_read(&cnet->count) == 0) continue; __nf_ct_unconfirmed_destroy(net); nf_queue_nf_hook_drop(net); @@ -2429,8 +2449,10 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + nf_ct_iterate_cleanup(kill_all, net, 0, 0); - if (atomic_read(&net->ct.count) != 0) + if (atomic_read(&cnet->count) != 0) busy = 1; } if (busy) { @@ -2711,12 +2733,13 @@ void nf_conntrack_init_end(void) int nf_conntrack_init_net(struct net *net) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); int ret = -ENOMEM; int cpu; BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS); - atomic_set(&net->ct.count, 0); + atomic_set(&cnet->count, 0); net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); if (!net->ct.pcpu_lists) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 7956c9f19899..759d87aef95f 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -27,6 +27,8 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> +extern unsigned int nf_conntrack_net_id; + static DEFINE_MUTEX(nf_ct_ecache_mutex); #define ECACHE_RETRY_WAIT (HZ/10) @@ -96,8 +98,8 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) static void ecache_work(struct work_struct *work) { - struct netns_ct *ctnet = - container_of(work, struct netns_ct, ecache_dwork.work); + struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work); + struct netns_ct *ctnet = cnet->ct_net; int cpu, delay = -1; struct ct_pcpu *pcpu; @@ -127,7 +129,7 @@ static void ecache_work(struct work_struct *work) ctnet->ecache_dwork_pending = delay > 0; if (delay >= 0) - schedule_delayed_work(&ctnet->ecache_dwork, delay); + schedule_delayed_work(&cnet->ecache_dwork, delay); } int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, @@ -344,6 +346,20 @@ void nf_ct_expect_unregister_notifier(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); +void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) +{ + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + + if (state == NFCT_ECACHE_DESTROY_FAIL && + !delayed_work_pending(&cnet->ecache_dwork)) { + schedule_delayed_work(&cnet->ecache_dwork, HZ); + net->ct.ecache_dwork_pending = true; + } else if (state == NFCT_ECACHE_DESTROY_SENT) { + net->ct.ecache_dwork_pending = false; + mod_delayed_work(system_wq, &cnet->ecache_dwork, 0); + } +} + #define NF_CT_EVENTS_DEFAULT 1 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; @@ -355,13 +371,18 @@ static const struct nf_ct_ext_type event_extend = { void nf_conntrack_ecache_pernet_init(struct net *net) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + net->ct.sysctl_events = nf_ct_events; - INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); + cnet->ct_net = &net->ct; + INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work); } void nf_conntrack_ecache_pernet_fini(struct net *net) { - cancel_delayed_work_sync(&net->ct.ecache_dwork); + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + + cancel_delayed_work_sync(&cnet->ecache_dwork); } int nf_conntrack_ecache_init(void) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 42557d2b6a90..efdd391b3f72 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -43,18 +43,23 @@ unsigned int nf_ct_expect_max __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; static unsigned int nf_ct_expect_hashrnd __read_mostly; +extern unsigned int nf_conntrack_net_id; + /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, u32 portid, int report) { struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); + struct nf_conntrack_net *cnet; WARN_ON(!master_help); WARN_ON(timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); - net->ct.expect_count--; + + cnet = net_generic(net, nf_conntrack_net_id); + cnet->expect_count--; hlist_del_rcu(&exp->lnode); master_help->expecting[exp->class]--; @@ -118,10 +123,11 @@ __nf_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); struct nf_conntrack_expect *i; unsigned int h; - if (!net->ct.expect_count) + if (!cnet->expect_count) return NULL; h = nf_ct_expect_dst_hash(net, tuple); @@ -158,10 +164,11 @@ nf_ct_find_expectation(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); struct nf_conntrack_expect *i, *exp = NULL; unsigned int h; - if (!net->ct.expect_count) + if (!cnet->expect_count) return NULL; h = nf_ct_expect_dst_hash(net, tuple); @@ -368,6 +375,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put); static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { + struct nf_conntrack_net *cnet; struct nf_conn_help *master_help = nfct_help(exp->master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(exp); @@ -389,7 +397,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) master_help->expecting[exp->class]++; hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); - net->ct.expect_count++; + cnet = net_generic(net, nf_conntrack_net_id); + cnet->expect_count++; NF_CT_STAT_INC(net, expect_create); } @@ -415,6 +424,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect, { const struct nf_conntrack_expect_policy *p; struct nf_conntrack_expect *i; + struct nf_conntrack_net *cnet; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_helper *helper; @@ -458,7 +468,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect, } } - if (net->ct.expect_count >= nf_ct_expect_max) { + cnet = net_generic(net, nf_conntrack_net_id); + if (cnet->expect_count >= nf_ct_expect_max) { net_warn_ratelimited("nf_conntrack: expectation table full\n"); ret = -EMFILE; } @@ -686,7 +697,6 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); int nf_conntrack_expect_pernet_init(struct net *net) { - net->ct.expect_count = 0; return exp_proc_init(net); } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index b055187235f8..ac396cc8bfae 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -43,6 +43,8 @@ MODULE_PARM_DESC(nf_conntrack_helper, static DEFINE_MUTEX(nf_ct_nat_helpers_mutex); static struct list_head nf_ct_nat_helpers __read_mostly; +extern unsigned int nf_conntrack_net_id; + /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) @@ -212,8 +214,10 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); static struct nf_conntrack_helper * nf_ct_lookup_helper(struct nf_conn *ct, struct net *net) { - if (!net->ct.sysctl_auto_assign_helper) { - if (net->ct.auto_assign_helper_warned) + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + + if (!cnet->sysctl_auto_assign_helper) { + if (cnet->auto_assign_helper_warned) return NULL; if (!__nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)) return NULL; @@ -221,7 +225,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net) "has been turned off for security reasons and CT-based " "firewall rule not found. Use the iptables CT target " "to attach helpers instead.\n"); - net->ct.auto_assign_helper_warned = 1; + cnet->auto_assign_helper_warned = true; return NULL; } @@ -556,8 +560,9 @@ static const struct nf_ct_ext_type helper_extend = { void nf_conntrack_helper_pernet_init(struct net *net) { - net->ct.auto_assign_helper_warned = false; - net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper; + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + + cnet->sysctl_auto_assign_helper = nf_ct_auto_assign_helper; } int nf_conntrack_helper_init(void) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1d519b0e51a5..44e3cb80e2e0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -555,22 +555,17 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, { const struct nf_conntrack_zone *zone; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct nlattr *nest_parms; unsigned int event; if (portid) flags |= NLM_F_MULTI; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, nf_ct_l3num(ct), + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = nf_ct_l3num(ct); - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); @@ -713,7 +708,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) const struct nf_conntrack_zone *zone; struct net *net; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct nlattr *nest_parms; struct nf_conn *ct = item->ct; struct sk_buff *skb; @@ -743,15 +737,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type); - nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, nf_ct_l3num(ct), + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = nf_ct_l3num(ct); - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); @@ -2490,20 +2480,15 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS_CPU); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, htons(cpu)); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || @@ -2574,21 +2559,17 @@ static int ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; - unsigned int nr_conntracks = atomic_read(&net->ct.count); + unsigned int nr_conntracks; + struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - + nr_conntracks = nf_conntrack_count(net); if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks))) goto nla_put_failure; @@ -3085,19 +3066,14 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, + exp->tuple.src.l3num, NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = exp->tuple.src.l3num; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; @@ -3117,7 +3093,6 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) struct nf_conntrack_expect *exp = item->exp; struct net *net = nf_ct_exp_net(exp); struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct sk_buff *skb; unsigned int type, group; int flags = 0; @@ -3140,15 +3115,11 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type); - nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, + exp->tuple.src.l3num, NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = exp->tuple.src.l3num; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; @@ -3716,20 +3687,15 @@ ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_EXP_GET_STATS_CPU); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, htons(cpu)); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) || nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) || nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete))) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ec23330687a5..318b8f723349 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -31,20 +31,6 @@ #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> -/* "Be conservative in what you do, - be liberal in what you accept from others." - If it's non-zero, we mark only out of window RST segments as INVALID. */ -static int nf_ct_tcp_be_liberal __read_mostly = 0; - -/* If it is set to zero, we disable picking up already established - connections. */ -static int nf_ct_tcp_loose __read_mostly = 1; - -/* Max number of the retransmitted packets without receiving an (acceptable) - ACK from the destination. If this number is reached, a shorter timer - will be started. */ -static int nf_ct_tcp_max_retrans __read_mostly = 3; - /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ @@ -1436,9 +1422,23 @@ void nf_conntrack_tcp_init_net(struct net *net) * ->timeouts[0] contains 'new' timeout, like udp or icmp. */ tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT]; - tn->tcp_loose = nf_ct_tcp_loose; - tn->tcp_be_liberal = nf_ct_tcp_be_liberal; - tn->tcp_max_retrans = nf_ct_tcp_max_retrans; + + /* If it is set to zero, we disable picking up already established + * connections. + */ + tn->tcp_loose = 1; + + /* "Be conservative in what you do, + * be liberal in what you accept from others." + * If it's non-zero, we mark only out of window RST segments as INVALID. + */ + tn->tcp_be_liberal = 0; + + /* Max number of the retransmitted packets without receiving an (acceptable) + * ACK from the destination. If this number is reached, a shorter timer + * will be started. + */ + tn->tcp_max_retrans = 3; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp = diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0ee702d374b0..aaa55246d0ca 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -266,6 +266,7 @@ static const char* l4proto_name(u16 proto) case IPPROTO_GRE: return "gre"; case IPPROTO_SCTP: return "sctp"; case IPPROTO_UDPLITE: return "udplite"; + case IPPROTO_ICMPV6: return "icmpv6"; } return "unknown"; @@ -424,14 +425,16 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { struct net *net = seq_file_net(seq); - unsigned int nr_conntracks = atomic_read(&net->ct.count); const struct ip_conntrack_stat *st = v; + unsigned int nr_conntracks; if (v == SEQ_START_TOKEN) { seq_puts(seq, "entries clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); return 0; } + nr_conntracks = nf_conntrack_count(net); + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", nr_conntracks, @@ -507,13 +510,19 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) } #endif /* CONFIG_NF_CONNTRACK_PROCFS */ +u32 nf_conntrack_count(const struct net *net) +{ + const struct nf_conntrack_net *cnet; + + cnet = net_generic(net, nf_conntrack_net_id); + + return atomic_read(&cnet->count); +} +EXPORT_SYMBOL_GPL(nf_conntrack_count); + /* Sysctl support */ #ifdef CONFIG_SYSCTL -/* Log invalid packets of a given protocol */ -static int log_invalid_proto_min __read_mostly; -static int log_invalid_proto_max __read_mostly = 255; - /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; @@ -614,7 +623,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, [NF_SYSCTL_CT_COUNT] = { .procname = "nf_conntrack_count", - .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, @@ -629,20 +637,18 @@ static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_CHECKSUM] = { .procname = "nf_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_LOG_INVALID] = { .procname = "nf_conntrack_log_invalid", .data = &init_net.ct.sysctl_log_invalid, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &log_invalid_proto_min, - .extra2 = &log_invalid_proto_max, + .proc_handler = proc_dou8vec_minmax, }, [NF_SYSCTL_CT_EXPECT_MAX] = { .procname = "nf_conntrack_expect_max", @@ -654,18 +660,17 @@ static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_ACCT] = { .procname = "nf_conntrack_acct", .data = &init_net.ct.sysctl_acct, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_HELPER] = { .procname = "nf_conntrack_helper", - .data = &init_net.ct.sysctl_auto_assign_helper, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -673,9 +678,9 @@ static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_EVENTS] = { .procname = "nf_conntrack_events", .data = &init_net.ct.sysctl_events, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -684,9 +689,9 @@ static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_TIMESTAMP] = { .procname = "nf_conntrack_timestamp", .data = &init_net.ct.sysctl_tstamp, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -759,25 +764,25 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { .procname = "nf_conntrack_tcp_loose", - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = { .procname = "nf_conntrack_tcp_be_liberal", - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = { .procname = "nf_conntrack_udp_timeout", @@ -904,9 +909,9 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, [NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = { .procname = "nf_conntrack_dccp_loose", - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -1027,6 +1032,7 @@ static void nf_conntrack_standalone_init_gre_sysctl(struct net *net, static int nf_conntrack_standalone_init_sysctl(struct net *net) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); struct nf_udp_net *un = nf_udp_pernet(net); struct ctl_table *table; @@ -1037,11 +1043,11 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) if (!table) return -ENOMEM; - table[NF_SYSCTL_CT_COUNT].data = &net->ct.count; + table[NF_SYSCTL_CT_COUNT].data = &cnet->count; table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum; table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid; table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct; - table[NF_SYSCTL_CT_HELPER].data = &net->ct.sysctl_auto_assign_helper; + table[NF_SYSCTL_CT_HELPER].data = &cnet->sysctl_auto_assign_helper; #ifdef CONFIG_NF_CONNTRACK_EVENTS table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; #endif @@ -1059,21 +1065,15 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) nf_conntrack_standalone_init_dccp_sysctl(net, table); nf_conntrack_standalone_init_gre_sysctl(net, table); - /* Don't allow unprivileged users to alter certain sysctls */ - if (net->user_ns != &init_user_ns) { + /* Don't allow non-init_net ns to alter global sysctls */ + if (!net_eq(&init_net, net)) { table[NF_SYSCTL_CT_MAX].mode = 0444; table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444; - table[NF_SYSCTL_CT_HELPER].mode = 0444; -#ifdef CONFIG_NF_CONNTRACK_EVENTS - table[NF_SYSCTL_CT_EVENTS].mode = 0444; -#endif - table[NF_SYSCTL_CT_BUCKETS].mode = 0444; - } else if (!net_eq(&init_net, net)) { table[NF_SYSCTL_CT_BUCKETS].mode = 0444; } - net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); - if (!net->ct.sysctl_header) + cnet->sysctl_header = register_net_sysctl(net, "net/netfilter", table); + if (!cnet->sysctl_header) goto out_unregister_netfilter; return 0; @@ -1085,10 +1085,11 @@ out_unregister_netfilter: static void nf_conntrack_standalone_fini_sysctl(struct net *net) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); struct ctl_table *table; - table = net->ct.sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.sysctl_header); + table = cnet->sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(cnet->sysctl_header); kfree(table); } #else diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 1bce1d2805c4..39c02d1aeedf 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -74,6 +74,18 @@ err_ct_refcnt: } EXPORT_SYMBOL_GPL(flow_offload_alloc); +static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) +{ + const struct rt6_info *rt; + + if (flow_tuple->l3proto == NFPROTO_IPV6) { + rt = (const struct rt6_info *)flow_tuple->dst_cache; + return rt6_get_cookie(rt); + } + + return 0; +} + static int flow_offload_fill_route(struct flow_offload *flow, const struct nf_flow_route *route, enum flow_offload_tuple_dir dir) @@ -116,6 +128,10 @@ static int flow_offload_fill_route(struct flow_offload *flow, return -1; flow_tuple->dst_cache = dst; + flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); + break; + default: + WARN_ON_ONCE(1); break; } flow_tuple->xmit_type = route->tuple[dir].xmit_type; @@ -390,11 +406,33 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } +static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple) +{ + struct dst_entry *dst; + + if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || + tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) { + dst = tuple->dst_cache; + if (!dst_check(dst, tuple->dst_cookie)) + return true; + } + + return false; +} + +static bool nf_flow_has_stale_dst(struct flow_offload *flow) +{ + return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) || + flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple); +} + static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { struct nf_flowtable *flow_table = data; - if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct)) + if (nf_flow_has_expired(flow) || + nf_ct_is_dying(flow->ct) || + nf_flow_has_stale_dst(flow)) set_bit(NF_FLOW_TEARDOWN, &flow->flags); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 12cb0cc6958c..889cf88d3dba 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -364,15 +364,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) return NF_ACCEPT; - if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || - tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) { - rt = (struct rtable *)tuplehash->tuple.dst_cache; - if (!dst_check(&rt->dst, 0)) { - flow_offload_teardown(flow); - return NF_ACCEPT; - } - } - if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; @@ -391,6 +382,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { + rt = (struct rtable *)tuplehash->tuple.dst_cache; memset(skb->cb, 0, sizeof(struct inet_skb_parm)); IPCB(skb)->iif = skb->dev->ifindex; IPCB(skb)->flags = IPSKB_FORWARDED; @@ -399,6 +391,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: + rt = (struct rtable *)tuplehash->tuple.dst_cache; outdev = rt->dst.dev; skb->dev = outdev; nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); @@ -607,15 +600,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) return NF_ACCEPT; - if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || - tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) { - rt = (struct rt6_info *)tuplehash->tuple.dst_cache; - if (!dst_check(&rt->dst, 0)) { - flow_offload_teardown(flow); - return NF_ACCEPT; - } - } - if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; @@ -633,6 +617,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { + rt = (struct rt6_info *)tuplehash->tuple.dst_cache; memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->iif = skb->dev->ifindex; IP6CB(skb)->flags = IP6SKB_FORWARDED; @@ -641,6 +626,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: + rt = (struct rt6_info *)tuplehash->tuple.dst_cache; outdev = rt->dst.dev; skb->dev = outdev; nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 7d0d128407be..2af7bdb38407 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -78,6 +78,16 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match, match->dissector.used_keys |= enc_keys; } +static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key, + struct flow_dissector_key_vlan *mask, + u16 vlan_id, __be16 proto) +{ + key->vlan_id = vlan_id; + mask->vlan_id = VLAN_VID_MASK; + key->vlan_tpid = proto; + mask->vlan_tpid = 0xffff; +} + static int nf_flow_rule_match(struct nf_flow_match *match, const struct flow_offload_tuple *tuple, struct dst_entry *other_dst) @@ -85,6 +95,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, struct nf_flow_key *mask = &match->mask; struct nf_flow_key *key = &match->key; struct ip_tunnel_info *tun_info; + bool vlan_encap = false; NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); @@ -102,6 +113,32 @@ static int nf_flow_rule_match(struct nf_flow_match *match, key->meta.ingress_ifindex = tuple->iifidx; mask->meta.ingress_ifindex = 0xffffffff; + if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) && + tuple->encap[0].proto == htons(ETH_P_8021Q)) { + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan); + nf_flow_rule_vlan_match(&key->vlan, &mask->vlan, + tuple->encap[0].id, + tuple->encap[0].proto); + vlan_encap = true; + } + + if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) && + tuple->encap[1].proto == htons(ETH_P_8021Q)) { + if (vlan_encap) { + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN, + cvlan); + nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan, + tuple->encap[1].id, + tuple->encap[1].proto); + } else { + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, + vlan); + nf_flow_rule_vlan_match(&key->vlan, &mask->vlan, + tuple->encap[1].id, + tuple->encap[1].proto); + } + } + switch (tuple->l3proto) { case AF_INET: key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; @@ -336,12 +373,12 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, const __be32 *addr, const __be32 *mask) { struct flow_action_entry *entry; - int i; + int i, j; - for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) { + for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) { entry = flow_action_entry_next(flow_rule); flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, - offset + i, &addr[i], mask); + offset + i, &addr[j], mask); } } @@ -582,6 +619,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *other_tuple; + const struct flow_offload_tuple *tuple; int i; flow_offload_decap_tunnel(flow, dir, flow_rule); @@ -591,6 +629,20 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1; + tuple = &flow->tuplehash[dir].tuple; + + for (i = 0; i < tuple->encap_num; i++) { + struct flow_action_entry *entry; + + if (tuple->in_vlan_ingress & BIT(i)) + continue; + + if (tuple->encap[i].proto == htons(ETH_P_8021Q)) { + entry = flow_action_entry_next(flow_rule); + entry->id = FLOW_ACTION_VLAN_POP; + } + } + other_tuple = &flow->tuplehash[!dir].tuple; for (i = 0; i < other_tuple->encap_num; i++) { diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 6cb9f9474b05..edee7fa944c1 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -151,13 +151,6 @@ void nf_log_unbind_pf(struct net *net, u_int8_t pf) } EXPORT_SYMBOL(nf_log_unbind_pf); -void nf_logger_request_module(int pf, enum nf_log_type type) -{ - if (loggers[pf][type] == NULL) - request_module("nf-logger-%u-%u", pf, type); -} -EXPORT_SYMBOL_GPL(nf_logger_request_module); - int nf_logger_find_get(int pf, enum nf_log_type type) { struct nf_logger *logger; @@ -177,9 +170,6 @@ int nf_logger_find_get(int pf, enum nf_log_type type) return 0; } - if (rcu_access_pointer(loggers[pf][type]) == NULL) - request_module("nf-logger-%u-%u", pf, type); - rcu_read_lock(); logger = rcu_dereference(loggers[pf][type]); if (logger == NULL) diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c deleted file mode 100644 index fd7c5f0f5c25..000000000000 --- a/net/netfilter/nf_log_common.c +++ /dev/null @@ -1,224 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - */ - -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/if_arp.h> -#include <linux/ip.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/tcp.h> -#include <net/route.h> - -#include <linux/netfilter.h> -#include <linux/netfilter_bridge.h> -#include <linux/netfilter/xt_LOG.h> -#include <net/netfilter/nf_log.h> - -int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb, - u8 proto, int fragment, unsigned int offset) -{ - struct udphdr _udph; - const struct udphdr *uh; - - if (proto == IPPROTO_UDP) - /* Max length: 10 "PROTO=UDP " */ - nf_log_buf_add(m, "PROTO=UDP "); - else /* Max length: 14 "PROTO=UDPLITE " */ - nf_log_buf_add(m, "PROTO=UDPLITE "); - - if (fragment) - goto out; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); - if (uh == NULL) { - nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); - - return 1; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ", - ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len)); - -out: - return 0; -} -EXPORT_SYMBOL_GPL(nf_log_dump_udp_header); - -int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, - u8 proto, int fragment, unsigned int offset, - unsigned int logflags) -{ - struct tcphdr _tcph; - const struct tcphdr *th; - - /* Max length: 10 "PROTO=TCP " */ - nf_log_buf_add(m, "PROTO=TCP "); - - if (fragment) - return 0; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); - if (th == NULL) { - nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); - return 1; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - nf_log_buf_add(m, "SPT=%u DPT=%u ", - ntohs(th->source), ntohs(th->dest)); - /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & NF_LOG_TCPSEQ) { - nf_log_buf_add(m, "SEQ=%u ACK=%u ", - ntohl(th->seq), ntohl(th->ack_seq)); - } - - /* Max length: 13 "WINDOW=65535 " */ - nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window)); - /* Max length: 9 "RES=0x3C " */ - nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & - TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ - if (th->cwr) - nf_log_buf_add(m, "CWR "); - if (th->ece) - nf_log_buf_add(m, "ECE "); - if (th->urg) - nf_log_buf_add(m, "URG "); - if (th->ack) - nf_log_buf_add(m, "ACK "); - if (th->psh) - nf_log_buf_add(m, "PSH "); - if (th->rst) - nf_log_buf_add(m, "RST "); - if (th->syn) - nf_log_buf_add(m, "SYN "); - if (th->fin) - nf_log_buf_add(m, "FIN "); - /* Max length: 11 "URGP=65535 " */ - nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - - if ((logflags & NF_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { - u_int8_t _opt[60 - sizeof(struct tcphdr)]; - const u_int8_t *op; - unsigned int i; - unsigned int optsize = th->doff*4 - sizeof(struct tcphdr); - - op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), - optsize, _opt); - if (op == NULL) { - nf_log_buf_add(m, "OPT (TRUNCATED)"); - return 1; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - nf_log_buf_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - nf_log_buf_add(m, "%02X", op[i]); - - nf_log_buf_add(m, ") "); - } - - return 0; -} -EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header); - -void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m, - struct sock *sk) -{ - if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk))) - return; - - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - const struct cred *cred = sk->sk_socket->file->f_cred; - nf_log_buf_add(m, "UID=%u GID=%u ", - from_kuid_munged(&init_user_ns, cred->fsuid), - from_kgid_munged(&init_user_ns, cred->fsgid)); - } - read_unlock_bh(&sk->sk_callback_lock); -} -EXPORT_SYMBOL_GPL(nf_log_dump_sk_uid_gid); - -void -nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, - unsigned int hooknum, const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, const char *prefix) -{ - const struct net_device *physoutdev __maybe_unused; - const struct net_device *physindev __maybe_unused; - - nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", - '0' + loginfo->u.log.level, prefix, - in ? in->name : "", - out ? out->name : ""); -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - physindev = nf_bridge_get_physindev(skb); - if (physindev && in != physindev) - nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); - physoutdev = nf_bridge_get_physoutdev(skb); - if (physoutdev && out != physoutdev) - nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); -#endif -} -EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); - -void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb) -{ - u16 vid; - - if (!skb_vlan_tag_present(skb)) - return; - - vid = skb_vlan_tag_get(skb); - nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid); -} -EXPORT_SYMBOL_GPL(nf_log_dump_vlan); - -/* bridge and netdev logging families share this code. */ -void nf_log_l2packet(struct net *net, u_int8_t pf, - __be16 protocol, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - switch (protocol) { - case htons(ETH_P_IP): - nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_IPV6): - nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_ARP): - case htons(ETH_P_RARP): - nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - } -} -EXPORT_SYMBOL_GPL(nf_log_l2packet); - -static int __init nf_log_common_init(void) -{ - return 0; -} - -static void __exit nf_log_common_exit(void) {} - -module_init(nf_log_common_init); -module_exit(nf_log_common_exit); - -MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c deleted file mode 100644 index 968dafa684c9..000000000000 --- a/net/netfilter/nf_log_netdev.c +++ /dev/null @@ -1,78 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org> - */ - -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/route.h> - -#include <linux/netfilter.h> -#include <net/netfilter/nf_log.h> - -static void nf_log_netdev_packet(struct net *net, u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - nf_log_l2packet(net, pf, skb->protocol, hooknum, skb, in, out, - loginfo, prefix); -} - -static struct nf_logger nf_netdev_logger __read_mostly = { - .name = "nf_log_netdev", - .type = NF_LOG_TYPE_LOG, - .logfn = nf_log_netdev_packet, - .me = THIS_MODULE, -}; - -static int __net_init nf_log_netdev_net_init(struct net *net) -{ - return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger); -} - -static void __net_exit nf_log_netdev_net_exit(struct net *net) -{ - nf_log_unset(net, &nf_netdev_logger); -} - -static struct pernet_operations nf_log_netdev_net_ops = { - .init = nf_log_netdev_net_init, - .exit = nf_log_netdev_net_exit, -}; - -static int __init nf_log_netdev_init(void) -{ - int ret; - - /* Request to load the real packet loggers. */ - nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG); - nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG); - nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG); - - ret = register_pernet_subsys(&nf_log_netdev_net_ops); - if (ret < 0) - return ret; - - nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger); - return 0; -} - -static void __exit nf_log_netdev_exit(void) -{ - unregister_pernet_subsys(&nf_log_netdev_net_ops); - nf_log_unregister(&nf_netdev_logger); -} - -module_init(nf_log_netdev_init); -module_exit(nf_log_netdev_exit); - -MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); -MODULE_DESCRIPTION("Netfilter netdev packet logging"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */ diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c new file mode 100644 index 000000000000..2518818ed479 --- /dev/null +++ b/net/netfilter/nf_log_syslog.c @@ -0,0 +1,1089 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <net/ipv6.h> +#include <net/icmp.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/xt_LOG.h> +#include <net/netfilter/nf_log.h> + +static const struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = LOGLEVEL_NOTICE, + .logflags = NF_LOG_DEFAULT_MASK, + }, + }, +}; + +struct arppayload { + unsigned char mac_src[ETH_ALEN]; + unsigned char ip_src[4]; + unsigned char mac_dst[ETH_ALEN]; + unsigned char ip_dst[4]; +}; + +static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb) +{ + u16 vid; + + if (!skb_vlan_tag_present(skb)) + return; + + vid = skb_vlan_tag_get(skb); + nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid); +} +static void noinline_for_stack +dump_arp_packet(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int nhoff) +{ + const struct arppayload *ap; + struct arppayload _arpp; + const struct arphdr *ah; + unsigned int logflags; + struct arphdr _arph; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (!ah) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_DEFAULT_MASK; + + if (logflags & NF_LOG_MACDECODE) { + nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); + nf_log_dump_vlan(m, skb); + nf_log_buf_add(m, "MACPROTO=%04x ", + ntohs(eth_hdr(skb)->h_proto)); + } + + nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d", + ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op)); + /* If it's for Ethernet and the lengths are OK, then log the ARP + * payload. + */ + if (ah->ar_hrd != htons(ARPHRD_ETHER) || + ah->ar_hln != ETH_ALEN || + ah->ar_pln != sizeof(__be32)) + return; + + ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); + if (!ap) { + nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", + skb->len - sizeof(_arph)); + return; + } + nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4", + ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); +} + +static void +nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *prefix) +{ + const struct net_device *physoutdev __maybe_unused; + const struct net_device *physindev __maybe_unused; + + nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", + '0' + loginfo->u.log.level, prefix, + in ? in->name : "", + out ? out->name : ""); +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + physindev = nf_bridge_get_physindev(skb); + if (physindev && in != physindev) + nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); + physoutdev = nf_bridge_get_physoutdev(skb); + if (physoutdev && out != physoutdev) + nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); +#endif +} + +static void nf_log_arp_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, + prefix); + dump_arp_packet(m, loginfo, skb, 0); + + nf_log_buf_close(m); +} + +static struct nf_logger nf_arp_logger __read_mostly = { + .name = "nf_log_arp", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_arp_packet, + .me = THIS_MODULE, +}; + +static void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m, + struct sock *sk) +{ + if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk))) + return; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + const struct cred *cred = sk->sk_socket->file->f_cred; + + nf_log_buf_add(m, "UID=%u GID=%u ", + from_kuid_munged(&init_user_ns, cred->fsuid), + from_kgid_munged(&init_user_ns, cred->fsgid)); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static noinline_for_stack int +nf_log_dump_tcp_header(struct nf_log_buf *m, + const struct sk_buff *skb, + u8 proto, int fragment, + unsigned int offset, + unsigned int logflags) +{ + struct tcphdr _tcph; + const struct tcphdr *th; + + /* Max length: 10 "PROTO=TCP " */ + nf_log_buf_add(m, "PROTO=TCP "); + + if (fragment) + return 0; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (!th) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + nf_log_buf_add(m, "SPT=%u DPT=%u ", + ntohs(th->source), ntohs(th->dest)); + /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ + if (logflags & NF_LOG_TCPSEQ) { + nf_log_buf_add(m, "SEQ=%u ACK=%u ", + ntohl(th->seq), ntohl(th->ack_seq)); + } + + /* Max length: 13 "WINDOW=65535 " */ + nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window)); + /* Max length: 9 "RES=0x3C " */ + nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & + TCP_RESERVED_BITS) >> 22)); + /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ + if (th->cwr) + nf_log_buf_add(m, "CWR "); + if (th->ece) + nf_log_buf_add(m, "ECE "); + if (th->urg) + nf_log_buf_add(m, "URG "); + if (th->ack) + nf_log_buf_add(m, "ACK "); + if (th->psh) + nf_log_buf_add(m, "PSH "); + if (th->rst) + nf_log_buf_add(m, "RST "); + if (th->syn) + nf_log_buf_add(m, "SYN "); + if (th->fin) + nf_log_buf_add(m, "FIN "); + /* Max length: 11 "URGP=65535 " */ + nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr)); + + if ((logflags & NF_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { + unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr); + u8 _opt[60 - sizeof(struct tcphdr)]; + unsigned int i; + const u8 *op; + + op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), + optsize, _opt); + if (!op) { + nf_log_buf_add(m, "OPT (TRUNCATED)"); + return 1; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + nf_log_buf_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + nf_log_buf_add(m, "%02X", op[i]); + + nf_log_buf_add(m, ") "); + } + + return 0; +} + +static noinline_for_stack int +nf_log_dump_udp_header(struct nf_log_buf *m, + const struct sk_buff *skb, + u8 proto, int fragment, + unsigned int offset) +{ + struct udphdr _udph; + const struct udphdr *uh; + + if (proto == IPPROTO_UDP) + /* Max length: 10 "PROTO=UDP " */ + nf_log_buf_add(m, "PROTO=UDP "); + else /* Max length: 14 "PROTO=UDPLITE " */ + nf_log_buf_add(m, "PROTO=UDPLITE "); + + if (fragment) + goto out; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (!uh) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ", + ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len)); + +out: + return 0; +} + +/* One level of recursion won't kill us */ +static noinline_for_stack void +dump_ipv4_packet(struct net *net, struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int iphoff) +{ + const struct iphdr *ih; + unsigned int logflags; + struct iphdr _iph; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_DEFAULT_MASK; + + ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); + if (!ih) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Important fields: + * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. + * Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " + */ + nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr); + + /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ + nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", + ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, + ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); + + /* Max length: 6 "CE DF MF " */ + if (ntohs(ih->frag_off) & IP_CE) + nf_log_buf_add(m, "CE "); + if (ntohs(ih->frag_off) & IP_DF) + nf_log_buf_add(m, "DF "); + if (ntohs(ih->frag_off) & IP_MF) + nf_log_buf_add(m, "MF "); + + /* Max length: 11 "FRAG:65535 " */ + if (ntohs(ih->frag_off) & IP_OFFSET) + nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); + + if ((logflags & NF_LOG_IPOPT) && + ih->ihl * 4 > sizeof(struct iphdr)) { + unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; + const unsigned char *op; + unsigned int i, optsize; + + optsize = ih->ihl * 4 - sizeof(struct iphdr); + op = skb_header_pointer(skb, iphoff + sizeof(_iph), + optsize, _opt); + if (!op) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + nf_log_buf_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + nf_log_buf_add(m, "%02X", op[i]); + nf_log_buf_add(m, ") "); + } + + switch (ih->protocol) { + case IPPROTO_TCP: + if (nf_log_dump_tcp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff + ih->ihl * 4, logflags)) + return; + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (nf_log_dump_udp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff + ih->ihl * 4)) + return; + break; + case IPPROTO_ICMP: { + static const size_t required_len[NR_ICMP_TYPES + 1] = { + [ICMP_ECHOREPLY] = 4, + [ICMP_DEST_UNREACH] = 8 + sizeof(struct iphdr), + [ICMP_SOURCE_QUENCH] = 8 + sizeof(struct iphdr), + [ICMP_REDIRECT] = 8 + sizeof(struct iphdr), + [ICMP_ECHO] = 4, + [ICMP_TIME_EXCEEDED] = 8 + sizeof(struct iphdr), + [ICMP_PARAMETERPROB] = 8 + sizeof(struct iphdr), + [ICMP_TIMESTAMP] = 20, + [ICMP_TIMESTAMPREPLY] = 20, + [ICMP_ADDRESS] = 12, + [ICMP_ADDRESSREPLY] = 12 }; + const struct icmphdr *ich; + struct icmphdr _icmph; + + /* Max length: 11 "PROTO=ICMP " */ + nf_log_buf_add(m, "PROTO=ICMP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, + sizeof(_icmph), &_icmph); + if (!ich) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl * 4); + break; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (ich->type <= NR_ICMP_TYPES && + required_len[ich->type] && + skb->len - iphoff - ih->ihl * 4 < required_len[ich->type]) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl * 4); + break; + } + + switch (ich->type) { + case ICMP_ECHOREPLY: + case ICMP_ECHO: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + nf_log_buf_add(m, "ID=%u SEQ=%u ", + ntohs(ich->un.echo.id), + ntohs(ich->un.echo.sequence)); + break; + + case ICMP_PARAMETERPROB: + /* Max length: 14 "PARAMETER=255 " */ + nf_log_buf_add(m, "PARAMETER=%u ", + ntohl(ich->un.gateway) >> 24); + break; + case ICMP_REDIRECT: + /* Max length: 24 "GATEWAY=255.255.255.255 " */ + nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); + fallthrough; + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + /* Max length: 3+maxlen */ + if (!iphoff) { /* Only recurse once. */ + nf_log_buf_add(m, "["); + dump_ipv4_packet(net, m, info, skb, + iphoff + ih->ihl * 4 + sizeof(_icmph)); + nf_log_buf_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ich->type == ICMP_DEST_UNREACH && + ich->code == ICMP_FRAG_NEEDED) { + nf_log_buf_add(m, "MTU=%u ", + ntohs(ich->un.frag.mtu)); + } + } + break; + } + /* Max Length */ + case IPPROTO_AH: { + const struct ip_auth_hdr *ah; + struct ip_auth_hdr _ahdr; + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 9 "PROTO=AH " */ + nf_log_buf_add(m, "PROTO=AH "); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ah = skb_header_pointer(skb, iphoff + ih->ihl * 4, + sizeof(_ahdr), &_ahdr); + if (!ah) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl * 4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); + break; + } + case IPPROTO_ESP: { + const struct ip_esp_hdr *eh; + struct ip_esp_hdr _esph; + + /* Max length: 10 "PROTO=ESP " */ + nf_log_buf_add(m, "PROTO=ESP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + eh = skb_header_pointer(skb, iphoff + ih->ihl * 4, + sizeof(_esph), &_esph); + if (!eh) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl * 4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi)); + break; + } + /* Max length: 10 "PROTO 255 " */ + default: + nf_log_buf_add(m, "PROTO=%u ", ih->protocol); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & NF_LOG_UID) && !iphoff) + nf_log_dump_sk_uid_gid(net, m, skb->sk); + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (!iphoff && skb->mark) + nf_log_buf_add(m, "MARK=0x%x ", skb->mark); + + /* Proto Max log string length */ + /* IP: 40+46+6+11+127 = 230 */ + /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ + /* UDP: 10+max(25,20) = 35 */ + /* UDPLITE: 14+max(25,20) = 39 */ + /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ + /* ESP: 10+max(25)+15 = 50 */ + /* AH: 9+max(25)+15 = 49 */ + /* unknown: 10 */ + + /* (ICMP allows recursion one level deep) */ + /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ + /* maxlen = 230+ 91 + 230 + 252 = 803 */ +} + +static noinline_for_stack void +dump_ipv6_packet(struct net *net, struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int ip6hoff, + int recurse) +{ + const struct ipv6hdr *ih; + unsigned int hdrlen = 0; + unsigned int logflags; + struct ipv6hdr _ip6h; + unsigned int ptr; + u8 currenthdr; + int fragment; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_DEFAULT_MASK; + + ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); + if (!ih) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ + nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); + + /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ + nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", + ntohs(ih->payload_len) + sizeof(struct ipv6hdr), + (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, + ih->hop_limit, + (ntohl(*(__be32 *)ih) & 0x000fffff)); + + fragment = 0; + ptr = ip6hoff + sizeof(struct ipv6hdr); + currenthdr = ih->nexthdr; + while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) { + struct ipv6_opt_hdr _hdr; + const struct ipv6_opt_hdr *hp; + + hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); + if (!hp) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Max length: 48 "OPT (...) " */ + if (logflags & NF_LOG_IPOPT) + nf_log_buf_add(m, "OPT ( "); + + switch (currenthdr) { + case IPPROTO_FRAGMENT: { + struct frag_hdr _fhdr; + const struct frag_hdr *fh; + + nf_log_buf_add(m, "FRAG:"); + fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), + &_fhdr); + if (!fh) { + nf_log_buf_add(m, "TRUNCATED "); + return; + } + + /* Max length: 6 "65535 " */ + nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); + + /* Max length: 11 "INCOMPLETE " */ + if (fh->frag_off & htons(0x0001)) + nf_log_buf_add(m, "INCOMPLETE "); + + nf_log_buf_add(m, "ID:%08x ", + ntohl(fh->identification)); + + if (ntohs(fh->frag_off) & 0xFFF8) + fragment = 1; + + hdrlen = 8; + break; + } + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_HOPOPTS: + if (fragment) { + if (logflags & NF_LOG_IPOPT) + nf_log_buf_add(m, ")"); + return; + } + hdrlen = ipv6_optlen(hp); + break; + /* Max Length */ + case IPPROTO_AH: + if (logflags & NF_LOG_IPOPT) { + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + + /* Max length: 3 "AH " */ + nf_log_buf_add(m, "AH "); + + if (fragment) { + nf_log_buf_add(m, ")"); + return; + } + + ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), + &_ahdr); + if (!ah) { + /* Max length: 26 "INCOMPLETE [65535 bytes] )" */ + nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 15 "SPI=0xF1234567 */ + nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); + } + + hdrlen = ipv6_authlen(hp); + break; + case IPPROTO_ESP: + if (logflags & NF_LOG_IPOPT) { + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; + + /* Max length: 4 "ESP " */ + nf_log_buf_add(m, "ESP "); + + if (fragment) { + nf_log_buf_add(m, ")"); + return; + } + + /* Max length: 26 "INCOMPLETE [65535 bytes] )" */ + eh = skb_header_pointer(skb, ptr, sizeof(_esph), + &_esph); + if (!eh) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 16 "SPI=0xF1234567 )" */ + nf_log_buf_add(m, "SPI=0x%x )", + ntohl(eh->spi)); + } + return; + default: + /* Max length: 20 "Unknown Ext Hdr 255" */ + nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr); + return; + } + if (logflags & NF_LOG_IPOPT) + nf_log_buf_add(m, ") "); + + currenthdr = hp->nexthdr; + ptr += hdrlen; + } + + switch (currenthdr) { + case IPPROTO_TCP: + if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment, + ptr, logflags)) + return; + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr)) + return; + break; + case IPPROTO_ICMPV6: { + struct icmp6hdr _icmp6h; + const struct icmp6hdr *ic; + + /* Max length: 13 "PROTO=ICMPv6 " */ + nf_log_buf_add(m, "PROTO=ICMPv6 "); + + if (fragment) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); + if (!ic) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - ptr); + return; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + nf_log_buf_add(m, "TYPE=%u CODE=%u ", + ic->icmp6_type, ic->icmp6_code); + + switch (ic->icmp6_type) { + case ICMPV6_ECHO_REQUEST: + case ICMPV6_ECHO_REPLY: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + nf_log_buf_add(m, "ID=%u SEQ=%u ", + ntohs(ic->icmp6_identifier), + ntohs(ic->icmp6_sequence)); + break; + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + break; + + case ICMPV6_PARAMPROB: + /* Max length: 17 "POINTER=ffffffff " */ + nf_log_buf_add(m, "POINTER=%08x ", + ntohl(ic->icmp6_pointer)); + fallthrough; + case ICMPV6_DEST_UNREACH: + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + /* Max length: 3+maxlen */ + if (recurse) { + nf_log_buf_add(m, "["); + dump_ipv6_packet(net, m, info, skb, + ptr + sizeof(_icmp6h), 0); + nf_log_buf_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) { + nf_log_buf_add(m, "MTU=%u ", + ntohl(ic->icmp6_mtu)); + } + } + break; + } + /* Max length: 10 "PROTO=255 " */ + default: + nf_log_buf_add(m, "PROTO=%u ", currenthdr); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & NF_LOG_UID) && recurse) + nf_log_dump_sk_uid_gid(net, m, skb->sk); + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (recurse && skb->mark) + nf_log_buf_add(m, "MARK=0x%x ", skb->mark); +} + +static void dump_ipv4_mac_header(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & NF_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); + nf_log_dump_vlan(m, skb); + nf_log_buf_add(m, "MACPROTO=%04x ", + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + nf_log_buf_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int i; + + nf_log_buf_add(m, "%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++, p++) + nf_log_buf_add(m, ":%02x", *p); + } + nf_log_buf_add(m, " "); +} + +static void nf_log_ip_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, + out, loginfo, prefix); + + if (in) + dump_ipv4_mac_header(m, loginfo, skb); + + dump_ipv4_packet(net, m, loginfo, skb, 0); + + nf_log_buf_close(m); +} + +static struct nf_logger nf_ip_logger __read_mostly = { + .name = "nf_log_ipv4", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_ip_packet, + .me = THIS_MODULE, +}; + +static void dump_ipv6_mac_header(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & NF_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); + nf_log_dump_vlan(m, skb); + nf_log_buf_add(m, "MACPROTO=%04x ", + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + nf_log_buf_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int len = dev->hard_header_len; + unsigned int i; + + if (dev->type == ARPHRD_SIT) { + p -= ETH_HLEN; + + if (p < skb->head) + p = NULL; + } + + if (p) { + nf_log_buf_add(m, "%02x", *p++); + for (i = 1; i < len; i++) + nf_log_buf_add(m, ":%02x", *p++); + } + nf_log_buf_add(m, " "); + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, + &iph->daddr); + } + } else { + nf_log_buf_add(m, " "); + } +} + +static void nf_log_ip6_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, + loginfo, prefix); + + if (in) + dump_ipv6_mac_header(m, loginfo, skb); + + dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1); + + nf_log_buf_close(m); +} + +static struct nf_logger nf_ip6_logger __read_mostly = { + .name = "nf_log_ipv6", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_ip6_packet, + .me = THIS_MODULE, +}; + +static void nf_log_netdev_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + nf_log_ip_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); + break; + case htons(ETH_P_IPV6): + nf_log_ip6_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); + break; + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): + nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); + break; + } +} + +static struct nf_logger nf_netdev_logger __read_mostly = { + .name = "nf_log_netdev", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_netdev_packet, + .me = THIS_MODULE, +}; + +static struct nf_logger nf_bridge_logger __read_mostly = { + .name = "nf_log_bridge", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_netdev_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_syslog_net_init(struct net *net) +{ + int ret = nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); + + if (ret) + return ret; + + ret = nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); + if (ret) + goto err1; + + ret = nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); + if (ret) + goto err2; + + ret = nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger); + if (ret) + goto err3; + + ret = nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); + if (ret) + goto err4; + return 0; +err4: + nf_log_unset(net, &nf_netdev_logger); +err3: + nf_log_unset(net, &nf_ip6_logger); +err2: + nf_log_unset(net, &nf_arp_logger); +err1: + nf_log_unset(net, &nf_ip_logger); + return ret; +} + +static void __net_exit nf_log_syslog_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_ip_logger); + nf_log_unset(net, &nf_arp_logger); + nf_log_unset(net, &nf_ip6_logger); + nf_log_unset(net, &nf_netdev_logger); +} + +static struct pernet_operations nf_log_syslog_net_ops = { + .init = nf_log_syslog_net_init, + .exit = nf_log_syslog_net_exit, +}; + +static int __init nf_log_syslog_init(void) +{ + int ret; + + ret = register_pernet_subsys(&nf_log_syslog_net_ops); + if (ret < 0) + return ret; + + ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger); + if (ret < 0) + goto err1; + + ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger); + if (ret < 0) + goto err2; + + ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + if (ret < 0) + goto err3; + + ret = nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger); + if (ret < 0) + goto err4; + + ret = nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger); + if (ret < 0) + goto err5; + + return 0; +err5: + nf_log_unregister(&nf_netdev_logger); +err4: + nf_log_unregister(&nf_ip6_logger); +err3: + nf_log_unregister(&nf_arp_logger); +err2: + nf_log_unregister(&nf_ip_logger); +err1: + pr_err("failed to register logger\n"); + unregister_pernet_subsys(&nf_log_syslog_net_ops); + return ret; +} + +static void __exit nf_log_syslog_exit(void) +{ + unregister_pernet_subsys(&nf_log_syslog_net_ops); + nf_log_unregister(&nf_ip_logger); + nf_log_unregister(&nf_arp_logger); + nf_log_unregister(&nf_ip6_logger); + nf_log_unregister(&nf_netdev_logger); + nf_log_unregister(&nf_bridge_logger); +} + +module_init(nf_log_syslog_init); +module_exit(nf_log_syslog_exit); + +MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter syslog packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf_log_arp"); +MODULE_ALIAS("nf_log_bridge"); +MODULE_ALIAS("nf_log_ipv4"); +MODULE_ALIAS("nf_log_ipv6"); +MODULE_ALIAS("nf_log_netdev"); +MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0); +MODULE_ALIAS_NF_LOGGER(AF_INET, 0); +MODULE_ALIAS_NF_LOGGER(3, 0); +MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */ +MODULE_ALIAS_NF_LOGGER(AF_INET6, 0); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fc2526b8bd55..357443b3c0e4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -21,10 +21,13 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_offload.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/sock.h> #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +unsigned int nf_tables_net_id __read_mostly; + static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); @@ -66,9 +69,46 @@ static const struct rhashtable_params nft_objname_ht_params = { .automatic_shrinking = true, }; +struct nft_audit_data { + struct nft_table *table; + int entries; + int op; + struct list_head list; +}; + +static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types + [NFT_MSG_NEWTABLE] = AUDIT_NFT_OP_TABLE_REGISTER, + [NFT_MSG_GETTABLE] = AUDIT_NFT_OP_INVALID, + [NFT_MSG_DELTABLE] = AUDIT_NFT_OP_TABLE_UNREGISTER, + [NFT_MSG_NEWCHAIN] = AUDIT_NFT_OP_CHAIN_REGISTER, + [NFT_MSG_GETCHAIN] = AUDIT_NFT_OP_INVALID, + [NFT_MSG_DELCHAIN] = AUDIT_NFT_OP_CHAIN_UNREGISTER, + [NFT_MSG_NEWRULE] = AUDIT_NFT_OP_RULE_REGISTER, + [NFT_MSG_GETRULE] = AUDIT_NFT_OP_INVALID, + [NFT_MSG_DELRULE] = AUDIT_NFT_OP_RULE_UNREGISTER, + [NFT_MSG_NEWSET] = AUDIT_NFT_OP_SET_REGISTER, + [NFT_MSG_GETSET] = AUDIT_NFT_OP_INVALID, + [NFT_MSG_DELSET] = AUDIT_NFT_OP_SET_UNREGISTER, + [NFT_MSG_NEWSETELEM] = AUDIT_NFT_OP_SETELEM_REGISTER, + [NFT_MSG_GETSETELEM] = AUDIT_NFT_OP_INVALID, + [NFT_MSG_DELSETELEM] = AUDIT_NFT_OP_SETELEM_UNREGISTER, + [NFT_MSG_NEWGEN] = AUDIT_NFT_OP_GEN_REGISTER, + [NFT_MSG_GETGEN] = AUDIT_NFT_OP_INVALID, + [NFT_MSG_TRACE] = AUDIT_NFT_OP_INVALID, + [NFT_MSG_NEWOBJ] = AUDIT_NFT_OP_OBJ_REGISTER, + [NFT_MSG_GETOBJ] = AUDIT_NFT_OP_INVALID, + [NFT_MSG_DELOBJ] = AUDIT_NFT_OP_OBJ_UNREGISTER, + [NFT_MSG_GETOBJ_RESET] = AUDIT_NFT_OP_OBJ_RESET, + [NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER, + [NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID, + [NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, +}; + static void nft_validate_state_update(struct net *net, u8 new_validate_state) { - switch (net->nft.validate_state) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO); break; @@ -79,7 +119,7 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) return; } - net->nft.validate_state = new_validate_state; + nft_net->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); @@ -134,13 +174,15 @@ static void nft_trans_destroy(struct nft_trans *trans) static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) { + struct nftables_pernet *nft_net; struct net *net = ctx->net; struct nft_trans *trans; if (!nft_set_is_anonymous(set)) return; - list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) @@ -234,6 +276,14 @@ static void nf_tables_unregister_hook(struct net *net, nf_unregister_net_hook(net, &basechain->ops); } +static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) +{ + struct nftables_pernet *nft_net; + + nft_net = net_generic(net, nf_tables_net_id); + list_add_tail(&trans->list, &nft_net->commit_list); +} + static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; @@ -245,7 +295,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) if (msg_type == NFT_MSG_NEWTABLE) nft_activate_next(ctx->net, ctx->table); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -278,7 +328,7 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) } } - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } @@ -351,7 +401,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } @@ -417,7 +467,7 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -449,7 +499,7 @@ static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, obj); nft_trans_obj(trans) = obj; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -482,7 +532,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, flowtable); nft_trans_flowtable(trans) = flowtable; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -510,13 +560,15 @@ static struct nft_table *nft_table_lookup(const struct net *net, const struct nlattr *nla, u8 family, u8 genmask, u32 nlpid) { + struct nftables_pernet *nft_net; struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry_rcu(table, &net->nft.tables, list, - lockdep_is_held(&net->nft.commit_mutex)) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list, + lockdep_is_held(&nft_net->commit_mutex)) { if (!nla_strcmp(nla, table->name) && table->family == family && nft_active_genmask(table, genmask)) { @@ -535,9 +587,11 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net; struct nft_table *table; - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && nft_active_genmask(table, genmask)) return table; @@ -586,10 +640,11 @@ struct nft_module_request { }; #ifdef CONFIG_MODULES -static __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, - ...) +__printf(2, 3) int nft_request_module(struct net *net, const char *fmt, + ...) { char module_name[MODULE_NAME_LEN]; + struct nftables_pernet *nft_net; struct nft_module_request *req; va_list args; int ret; @@ -600,7 +655,8 @@ static __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, if (ret >= MODULE_NAME_LEN) return 0; - list_for_each_entry(req, &net->nft.module_list, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry(req, &nft_net->module_list, list) { if (!strcmp(req->module, module_name)) { if (req->done) return 0; @@ -616,10 +672,11 @@ static __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, req->done = false; strlcpy(req->module, module_name, MODULE_NAME_LEN); - list_add_tail(&req->list, &net->nft.module_list); + list_add_tail(&req->list, &nft_net->module_list); return -EAGAIN; } +EXPORT_SYMBOL_GPL(nft_request_module); #endif static void lockdep_nfnl_nft_mutex_not_held(void) @@ -652,6 +709,13 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, return ERR_PTR(-ENOENT); } +static __be16 nft_base_seq(const struct net *net) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return htons(nft_net->base_seq & 0xffff); +} + static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, @@ -666,18 +730,13 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, int family, const struct nft_table *table) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || @@ -715,19 +774,9 @@ static void nft_notify_enqueue(struct sk_buff *skb, bool report, static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { + struct nftables_pernet *nft_net; struct sk_buff *skb; int err; - char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0", - ctx->table->name, ctx->table->handle); - - audit_log_nfcfg(buf, - ctx->family, - ctx->table->use, - event == NFT_MSG_NEWTABLE ? - AUDIT_NFT_OP_TABLE_REGISTER : - AUDIT_NFT_OP_TABLE_UNREGISTER, - GFP_KERNEL); - kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -744,7 +793,8 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_net = net_generic(ctx->net, nf_tables_net_id); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -754,15 +804,17 @@ static int nf_tables_dump_tables(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nftables_pernet *nft_net; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -947,7 +999,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err: nft_trans_destroy(trans); @@ -1010,6 +1062,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -1019,7 +1072,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, u32 flags = 0; int err; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask, NETLINK_CB(skb).portid); @@ -1080,7 +1133,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (err < 0) goto err_trans; - list_add_tail_rcu(&table->list, &net->nft.tables); + list_add_tail_rcu(&table->list, &nft_net->tables); return 0; err_trans: rhltable_destroy(&table->chains_ht); @@ -1168,11 +1221,12 @@ out: static int nft_flush(struct nft_ctx *ctx, int family) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_table *table, *nt; const struct nlattr * const *nla = ctx->nla; int err = 0; - list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) { + list_for_each_entry_safe(table, nt, &nft_net->tables, list) { if (family != AF_UNSPEC && table->family != family) continue; @@ -1291,7 +1345,9 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) static bool lockdep_commit_lock_is_held(const struct net *net) { #ifdef CONFIG_PROVE_LOCKING - return lockdep_is_held(&net->nft.commit_mutex); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return lockdep_is_held(&nft_net->commit_mutex); #else return true; #endif @@ -1438,18 +1494,13 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, const struct nft_chain *chain) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle), @@ -1499,20 +1550,9 @@ nla_put_failure: static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { + struct nftables_pernet *nft_net; struct sk_buff *skb; int err; - char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", - ctx->table->name, ctx->table->handle, - ctx->chain->name, ctx->chain->handle); - - audit_log_nfcfg(buf, - ctx->family, - ctx->chain->use, - event == NFT_MSG_NEWCHAIN ? - AUDIT_NFT_OP_CHAIN_REGISTER : - AUDIT_NFT_OP_CHAIN_UNREGISTER, - GFP_KERNEL); - kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -1530,7 +1570,8 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_net = net_generic(ctx->net, nf_tables_net_id); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -1545,11 +1586,13 @@ static int nf_tables_dump_chains(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -1865,11 +1908,12 @@ static int nft_chain_parse_hook(struct net *net, struct nft_chain_hook *hook, u8 family, bool autoload) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlattr *ha[NFTA_HOOK_MAX + 1]; const struct nft_chain_type *type; int err; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX, @@ -2258,6 +2302,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, if (nla[NFTA_CHAIN_HANDLE] && nla[NFTA_CHAIN_NAME]) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_trans *tmp; char *name; @@ -2267,7 +2312,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, goto err; err = -EEXIST; - list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) { + list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && tmp->ctx.table == table && nft_trans_chain_update(tmp) && @@ -2281,7 +2326,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, nft_trans_chain_name(trans) = name; } - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err: @@ -2293,10 +2338,11 @@ err: static struct nft_chain *nft_chain_lookup_byid(const struct net *net, const struct nlattr *nla) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWCHAIN && @@ -2311,6 +2357,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -2322,7 +2369,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, u64 handle = 0; u32 flags = 0; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask, NETLINK_CB(skb).portid); @@ -2807,20 +2854,15 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, const struct nft_rule *prule) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; const struct nft_expr *expr, *next; struct nlattr *list; u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0, + nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) @@ -2836,6 +2878,9 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; } + if (chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_stats(chain, rule); + list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS); if (list == NULL) goto nla_put_failure; @@ -2863,20 +2908,9 @@ nla_put_failure: static void nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, int event) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct sk_buff *skb; int err; - char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", - ctx->table->name, ctx->table->handle, - ctx->chain->name, ctx->chain->handle); - - audit_log_nfcfg(buf, - ctx->family, - rule->handle, - event == NFT_MSG_NEWRULE ? - AUDIT_NFT_OP_RULE_REGISTER : - AUDIT_NFT_OP_RULE_UNREGISTER, - GFP_KERNEL); - kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -2894,7 +2928,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -2952,11 +2986,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -3187,6 +3223,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); struct nft_expr_info *info = NULL; @@ -3204,7 +3241,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, int err, rem; u64 handle, pos_handle; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, NETLINK_CB(skb).portid); @@ -3376,7 +3413,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, kvfree(info); chain->use++; - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); if (chain->flags & NFT_CHAIN_HW_OFFLOAD) { @@ -3405,10 +3442,11 @@ err1: static struct nft_rule *nft_rule_lookup_byid(const struct net *net, const struct nlattr *nla) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { struct nft_rule *rule = nft_trans_rule(trans); if (trans->msg_type == NFT_MSG_NEWRULE && @@ -3521,13 +3559,14 @@ nft_select_set_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, enum nft_set_policies policy) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; const struct nft_set_type *type; u32 flags = 0; int i; - lockdep_assert_held(&ctx->net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); if (nla[NFTA_SET_FLAGS] != NULL) @@ -3665,10 +3704,11 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table, static struct nft_set *nft_set_lookup_byid(const struct net *net, const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET) { struct nft_set *set = nft_trans_set(trans); @@ -3803,7 +3843,6 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb, static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; u32 portid = ctx->portid; struct nlattr *nest; @@ -3811,16 +3850,11 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, int i; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), - flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, + NFNETLINK_V0, nft_base_seq(ctx->net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) @@ -3908,21 +3942,10 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, int event, gfp_t gfp_flags) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct sk_buff *skb; u32 portid = ctx->portid; int err; - char *buf = kasprintf(gfp_flags, "%s:%llu;%s:%llu", - ctx->table->name, ctx->table->handle, - set->name, set->handle); - - audit_log_nfcfg(buf, - ctx->family, - set->field_count, - event == NFT_MSG_NEWSET ? - AUDIT_NFT_OP_SET_REGISTER : - AUDIT_NFT_OP_SET_UNREGISTER, - gfp_flags); - kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -3938,7 +3961,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -3951,14 +3974,16 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); struct nft_ctx *ctx = cb->data, ctx_set; + struct nftables_pernet *nft_net; if (cb->args[1]) return skb->len; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && ctx->family != table->family) continue; @@ -4797,18 +4822,19 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); + struct nftables_pernet *nft_net; struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; bool set_found = false; - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *nest; u32 portid, seq; int event; rcu_read_lock(); - list_for_each_entry_rcu(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) continue; @@ -4834,16 +4860,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), - NLM_F_MULTI); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI, + table->family, NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = table->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) @@ -4900,22 +4921,16 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *nest; int err; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), - flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, + NFNETLINK_V0, nft_base_seq(ctx->net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) @@ -5103,22 +5118,11 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set_elem *elem, int event, u16 flags) { + struct nftables_pernet *nft_net; struct net *net = ctx->net; u32 portid = ctx->portid; struct sk_buff *skb; int err; - char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", - ctx->table->name, ctx->table->handle, - set->name, set->handle); - - audit_log_nfcfg(buf, - ctx->family, - set->handle, - event == NFT_MSG_NEWSETELEM ? - AUDIT_NFT_OP_SETELEM_REGISTER : - AUDIT_NFT_OP_SETELEM_UNREGISTER, - GFP_KERNEL); - kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; @@ -5134,7 +5138,8 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_net = net_generic(net, nf_tables_net_id); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -5299,16 +5304,35 @@ err_expr: return -ENOMEM; } -static void nft_set_elem_expr_setup(const struct nft_set_ext *ext, int i, - struct nft_expr *expr_array[]) +static int nft_set_elem_expr_setup(struct nft_ctx *ctx, + const struct nft_set_ext *ext, + struct nft_expr *expr_array[], + u32 num_exprs) { struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext); - struct nft_expr *expr = nft_setelem_expr_at(elem_expr, elem_expr->size); + struct nft_expr *expr; + int i, err; + + for (i = 0; i < num_exprs; i++) { + expr = nft_setelem_expr_at(elem_expr, elem_expr->size); + err = nft_expr_clone(expr, expr_array[i]); + if (err < 0) + goto err_elem_expr_setup; + + elem_expr->size += expr_array[i]->ops->size; + nft_expr_destroy(ctx, expr_array[i]); + expr_array[i] = NULL; + } - memcpy(expr, expr_array[i], expr_array[i]->ops->size); - elem_expr->size += expr_array[i]->ops->size; - kfree(expr_array[i]); - expr_array[i] = NULL; + return 0; + +err_elem_expr_setup: + for (; i < num_exprs; i++) { + nft_expr_destroy(ctx, expr_array[i]); + expr_array[i] = NULL; + } + + return -ENOMEM; } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, @@ -5560,12 +5584,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, *nft_set_ext_obj(ext) = obj; obj->use++; } - for (i = 0; i < num_exprs; i++) - nft_set_elem_expr_setup(ext, i, expr_array); + err = nft_set_elem_expr_setup(ctx, ext, expr_array, num_exprs); + if (err < 0) + goto err_elem_expr; trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); - if (trans == NULL) - goto err_trans; + if (trans == NULL) { + err = -ENOMEM; + goto err_elem_expr; + } ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; err = set->ops->insert(ctx->net, set, &elem, &ext2); @@ -5602,14 +5629,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_set_full: set->ops->remove(ctx->net, set, &elem); err_element_clash: kfree(trans); -err_trans: +err_elem_expr: if (obj) obj->use--; @@ -5633,6 +5660,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; @@ -5661,7 +5689,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, return err; } - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, ctx.table); return 0; @@ -5797,7 +5825,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_set_elem_deactivate(ctx->net, set, &elem); nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; fail_ops: @@ -5831,7 +5859,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, nft_set_elem_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err1: @@ -6125,7 +6153,7 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, nft_trans_obj(trans) = obj; nft_trans_obj_update(trans) = true; nft_trans_obj_newobj(trans) = newobj; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -6245,19 +6273,14 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, int family, const struct nft_table *table, struct nft_object *obj, bool reset) { - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || @@ -6292,6 +6315,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) struct nft_obj_filter *filter = cb->data; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; struct nft_object *obj; bool reset = false; @@ -6299,9 +6323,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) reset = true; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -6320,12 +6345,11 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) filter->type != NFT_OBJECT_UNSPEC && obj->ops->type->type != filter->type) goto cont; - if (reset) { char *buf = kasprintf(GFP_ATOMIC, - "%s:%llu;?:0", + "%s:%u", table->name, - table->handle); + nft_net->base_seq); audit_log_nfcfg(buf, family, @@ -6446,8 +6470,11 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, reset = true; if (reset) { - char *buf = kasprintf(GFP_ATOMIC, "%s:%llu;?:0", - table->name, table->handle); + const struct nftables_pernet *nft_net; + char *buf; + + nft_net = net_generic(net, nf_tables_net_id); + buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq); audit_log_nfcfg(buf, family, @@ -6533,17 +6560,18 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct sk_buff *skb; int err; - char *buf = kasprintf(gfp, "%s:%llu;?:0", - table->name, table->handle); + char *buf = kasprintf(gfp, "%s:%u", + table->name, nft_net->base_seq); audit_log_nfcfg(buf, family, obj->handle, event == NFT_MSG_NEWOBJ ? - AUDIT_NFT_OP_OBJ_REGISTER : - AUDIT_NFT_OP_OBJ_UNREGISTER, + AUDIT_NFT_OP_OBJ_REGISTER : + AUDIT_NFT_OP_OBJ_UNREGISTER, gfp); kfree(buf); @@ -6562,7 +6590,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, goto err; } - nft_notify_enqueue(skb, report, &net->nft.notify_list); + nft_notify_enqueue(skb, report, &nft_net->notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -6894,7 +6922,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); list_splice(&flowtable_hook.list, &nft_trans_flowtable_hooks(trans)); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -7082,7 +7110,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); nft_flowtable_hook_release(&flowtable_hook); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -7158,20 +7186,15 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, struct list_head *hook_list) { struct nlattr *nest, *nest_devs; - struct nfgenmsg *nfmsg; struct nft_hook *hook; struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || @@ -7219,12 +7242,14 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; + struct nftables_pernet *nft_net; const struct nft_table *table; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -7359,20 +7384,9 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, struct list_head *hook_list, int event) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct sk_buff *skb; int err; - char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", - flowtable->table->name, flowtable->table->handle, - flowtable->name, flowtable->handle); - - audit_log_nfcfg(buf, - ctx->family, - flowtable->hooknum, - event == NFT_MSG_NEWFLOWTABLE ? - AUDIT_NFT_OP_FLOWTABLE_REGISTER : - AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, - GFP_KERNEL); - kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -7390,7 +7404,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7415,21 +7429,17 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - - if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) || + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) || nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; @@ -7464,6 +7474,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_flowtable *flowtable; + struct nftables_pernet *nft_net; struct nft_table *table; struct net *net; @@ -7471,13 +7482,14 @@ static int nf_tables_flowtable_event(struct notifier_block *this, return 0; net = dev_net(dev); - mutex_lock(&net->nft.commit_mutex); - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(flowtable, &table->flowtables, list) { nft_flowtable_event(event, dev, flowtable); } } - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } @@ -7493,9 +7505,6 @@ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, struct sk_buff *skb2; int err; - audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq, - AUDIT_NFT_OP_GEN_REGISTER, GFP_KERNEL); - if (!nlmsg_report(nlh) && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; @@ -7661,16 +7670,17 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { static int nf_tables_validate(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_table *table; - switch (net->nft.validate_state) { + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: break; case NFT_VALIDATE_NEED: nft_validate_state_update(net, NFT_VALIDATE_DO); fallthrough; case NFT_VALIDATE_DO: - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_validate(net, table) < 0) return -EAGAIN; } @@ -7845,9 +7855,10 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha static void nf_tables_commit_chain_prepare_cancel(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWRULE || @@ -7956,10 +7967,11 @@ static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable, static void nf_tables_module_autoload_cleanup(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_module_request *req, *next; - WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); - list_for_each_entry_safe(req, next, &net->nft.module_list, list) { + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + list_for_each_entry_safe(req, next, &nft_net->module_list, list) { WARN_ON_ONCE(!req->done); list_del(&req->list); kfree(req); @@ -7968,6 +7980,7 @@ static void nf_tables_module_autoload_cleanup(struct net *net) static void nf_tables_commit_release(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; /* all side effects have to be made visible. @@ -7977,35 +7990,36 @@ static void nf_tables_commit_release(struct net *net) * Memory reclaim happens asynchronously from work queue * to prevent expensive synchronize_rcu() in commit phase. */ - if (list_empty(&net->nft.commit_list)) { + if (list_empty(&nft_net->commit_list)) { nf_tables_module_autoload_cleanup(net); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return; } - trans = list_last_entry(&net->nft.commit_list, + trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); get_net(trans->ctx.net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); - list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list); + list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); schedule_work(&trans_destroy_work); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); } static void nft_commit_notify(struct net *net, u32 portid) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct sk_buff *batch_skb = NULL, *nskb, *skb; unsigned char *data; int len; - list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) { + list_for_each_entry_safe(skb, nskb, &nft_net->notify_list, list) { if (!batch_skb) { new_batch: batch_skb = skb; @@ -8031,19 +8045,72 @@ new_batch: NFT_CB(batch_skb).report, GFP_KERNEL); } - WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); + WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); +} + +static int nf_tables_commit_audit_alloc(struct list_head *adl, + struct nft_table *table) +{ + struct nft_audit_data *adp; + + list_for_each_entry(adp, adl, list) { + if (adp->table == table) + return 0; + } + adp = kzalloc(sizeof(*adp), GFP_KERNEL); + if (!adp) + return -ENOMEM; + adp->table = table; + list_add(&adp->list, adl); + return 0; +} + +static void nf_tables_commit_audit_collect(struct list_head *adl, + struct nft_table *table, u32 op) +{ + struct nft_audit_data *adp; + + list_for_each_entry(adp, adl, list) { + if (adp->table == table) + goto found; + } + WARN_ONCE(1, "table=%s not expected in commit list", table->name); + return; +found: + adp->entries++; + if (!adp->op || adp->op > op) + adp->op = op; +} + +#define AUNFTABLENAMELEN (NFT_TABLE_MAXNAMELEN + 22) + +static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation) +{ + struct nft_audit_data *adp, *adn; + char aubuf[AUNFTABLENAMELEN]; + + list_for_each_entry_safe(adp, adn, adl, list) { + snprintf(aubuf, AUNFTABLENAMELEN, "%s:%u", adp->table->name, + generation); + audit_log_nfcfg(aubuf, adp->table->family, adp->entries, + nft2audit_op[adp->op], GFP_KERNEL); + list_del(&adp->list); + kfree(adp); + } } static int nf_tables_commit(struct net *net, struct sk_buff *skb) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + LIST_HEAD(adl); int err; - if (list_empty(&net->nft.commit_list)) { - mutex_unlock(&net->nft.commit_mutex); + if (list_empty(&nft_net->commit_list)) { + mutex_unlock(&nft_net->commit_mutex); return 0; } @@ -8056,9 +8123,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return err; /* 1. Allocate space for next generation rules_gen_X[] */ - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { int ret; + ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); + if (ret) { + nf_tables_commit_chain_prepare_cancel(net); + return ret; + } if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { chain = trans->ctx.chain; @@ -8072,7 +8144,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } /* step 2. Make rules_gen_X visible to packet path */ - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(chain, &table->chains, list) nf_tables_commit_chain(net, chain); } @@ -8081,12 +8153,15 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ - while (++net->nft.base_seq == 0); + while (++nft_net->base_seq == 0) + ; /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { + nf_tables_commit_audit_collect(&adl, trans->ctx.table, + trans->msg_type); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { @@ -8239,6 +8314,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); + nf_tables_commit_audit_log(&adl, nft_net->base_seq); nf_tables_commit_release(net); return 0; @@ -8246,17 +8322,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) static void nf_tables_module_autoload(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_module_request *req, *next; LIST_HEAD(module_list); - list_splice_init(&net->nft.module_list, &module_list); - mutex_unlock(&net->nft.commit_mutex); + list_splice_init(&nft_net->module_list, &module_list); + mutex_unlock(&nft_net->commit_mutex); list_for_each_entry_safe(req, next, &module_list, list) { request_module("%s", req->module); req->done = true; } - mutex_lock(&net->nft.commit_mutex); - list_splice(&module_list, &net->nft.module_list); + mutex_lock(&nft_net->commit_mutex); + list_splice(&module_list, &nft_net->module_list); } static void nf_tables_abort_release(struct nft_trans *trans) @@ -8293,6 +8370,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; struct nft_trans_elem *te; struct nft_hook *hook; @@ -8301,7 +8379,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nf_tables_validate(net) < 0) return -EAGAIN; - list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, + list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: @@ -8425,7 +8503,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, - &net->nft.commit_list, list) { + &nft_net->commit_list, list) { list_del(&trans->list); nf_tables_abort_release(trans); } @@ -8446,22 +8524,24 @@ static void nf_tables_cleanup(struct net *net) static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); int ret = __nf_tables_abort(net, action); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return ret; } static bool nf_tables_valid_genid(struct net *net, u32 genid) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); bool genid_ok; - mutex_lock(&net->nft.commit_mutex); + mutex_lock(&nft_net->commit_mutex); - genid_ok = genid == 0 || net->nft.base_seq == genid; + genid_ok = genid == 0 || nft_net->base_seq == genid; if (!genid_ok) - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); /* else, commit mutex has to be released by commit or abort function */ return genid_ok; @@ -8637,15 +8717,6 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) } EXPORT_SYMBOL_GPL(nft_parse_u32_check); -/** - * nft_parse_register - parse a register value from a netlink attribute - * - * @attr: netlink attribute - * - * Parse and translate a register value from a netlink attribute. - * Registers used to be 128 bit wide, these register numbers will be - * mapped to the corresponding 32 bit register numbers. - */ static unsigned int nft_parse_register(const struct nlattr *attr) { unsigned int reg; @@ -8681,15 +8752,6 @@ int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg) } EXPORT_SYMBOL_GPL(nft_dump_register); -/** - * nft_validate_register_load - validate a load from a register - * - * @reg: the register number - * @len: the length of the data - * - * Validate that the input register is one of the general purpose - * registers and that the length of the load is within the bounds. - */ static int nft_validate_register_load(enum nft_registers reg, unsigned int len) { if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) @@ -8717,20 +8779,6 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) } EXPORT_SYMBOL_GPL(nft_parse_register_load); -/** - * nft_validate_register_store - validate an expressions' register store - * - * @ctx: context of the expression performing the load - * @reg: the destination register number - * @data: the data to load - * @type: the data type - * @len: the length of the data - * - * Validate that a data load uses the appropriate data type for - * the destination register and the length is within the bounds. - * A value of NULL for the data means that its runtime gathered - * data. - */ static int nft_validate_register_store(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, @@ -9048,9 +9096,10 @@ static void __nft_release_hook(struct net *net, struct nft_table *table) static void __nft_release_hooks(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_table *table; - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table)) continue; @@ -9107,9 +9156,10 @@ static void __nft_release_table(struct net *net, struct nft_table *table) static void __nft_release_tables(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_table *table, *nt; - list_for_each_entry_safe(table, nt, &net->nft.tables, list) { + list_for_each_entry_safe(table, nt, &nft_net->tables, list) { if (nft_table_has_owner(table)) continue; @@ -9120,6 +9170,7 @@ static void __nft_release_tables(struct net *net) static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct nftables_pernet *nft_net; struct netlink_notify *n = ptr; struct nft_table *table, *nt; struct net *net = n->net; @@ -9128,8 +9179,9 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) return NOTIFY_DONE; - mutex_lock(&net->nft.commit_mutex); - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && n->portid == table->nlpid) { __nft_release_hook(net, table); @@ -9138,13 +9190,13 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, } if (release) { synchronize_rcu(); - list_for_each_entry_safe(table, nt, &net->nft.tables, list) { + list_for_each_entry_safe(table, nt, &nft_net->tables, list) { if (nft_table_has_owner(table) && n->portid == table->nlpid) __nft_release_table(net, table); } } - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } @@ -9155,13 +9207,15 @@ static struct notifier_block nft_nl_notifier = { static int __net_init nf_tables_init_net(struct net *net) { - INIT_LIST_HEAD(&net->nft.tables); - INIT_LIST_HEAD(&net->nft.commit_list); - INIT_LIST_HEAD(&net->nft.module_list); - INIT_LIST_HEAD(&net->nft.notify_list); - mutex_init(&net->nft.commit_mutex); - net->nft.base_seq = 1; - net->nft.validate_state = NFT_VALIDATE_SKIP; + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + INIT_LIST_HEAD(&nft_net->tables); + INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->module_list); + INIT_LIST_HEAD(&nft_net->notify_list); + mutex_init(&nft_net->commit_mutex); + nft_net->base_seq = 1; + nft_net->validate_state = NFT_VALIDATE_SKIP; return 0; } @@ -9173,27 +9227,30 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net) static void __net_exit nf_tables_exit_net(struct net *net) { - mutex_lock(&net->nft.commit_mutex); - if (!list_empty(&net->nft.commit_list)) + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + mutex_lock(&nft_net->commit_mutex); + if (!list_empty(&nft_net->commit_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); __nft_release_tables(net); - mutex_unlock(&net->nft.commit_mutex); - WARN_ON_ONCE(!list_empty(&net->nft.tables)); - WARN_ON_ONCE(!list_empty(&net->nft.module_list)); - WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); + mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); + WARN_ON_ONCE(!list_empty(&nft_net->module_list)); + WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); } static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .pre_exit = nf_tables_pre_exit_net, .exit = nf_tables_exit_net, + .id = &nf_tables_net_id, + .size = sizeof(struct nftables_pernet), }; static int __init nf_tables_module_init(void) { int err; - spin_lock_init(&nf_tables_destroy_list_lock); err = register_pernet_subsys(&nf_tables_net_ops); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 9ae14270c543..19215e81dd66 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -7,6 +7,8 @@ #include <net/netfilter/nf_tables_offload.h> #include <net/pkt_cls.h> +extern unsigned int nf_tables_net_id; + static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) { struct nft_flow_rule *flow; @@ -45,6 +47,48 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, offsetof(struct nft_flow_key, control); } +struct nft_offload_ethertype { + __be16 value; + __be16 mask; +}; + +static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow) +{ + struct nft_flow_match *match = &flow->match; + struct nft_offload_ethertype ethertype; + + if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL) && + match->key.basic.n_proto != htons(ETH_P_8021Q) && + match->key.basic.n_proto != htons(ETH_P_8021AD)) + return; + + ethertype.value = match->key.basic.n_proto; + ethertype.mask = match->mask.basic.n_proto; + + if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) && + (match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) || + match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) { + match->key.basic.n_proto = match->key.cvlan.vlan_tpid; + match->mask.basic.n_proto = match->mask.cvlan.vlan_tpid; + match->key.cvlan.vlan_tpid = match->key.vlan.vlan_tpid; + match->mask.cvlan.vlan_tpid = match->mask.vlan.vlan_tpid; + match->key.vlan.vlan_tpid = ethertype.value; + match->mask.vlan.vlan_tpid = ethertype.mask; + match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] = + offsetof(struct nft_flow_key, cvlan); + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN); + } else { + match->key.basic.n_proto = match->key.vlan.vlan_tpid; + match->mask.basic.n_proto = match->mask.vlan.vlan_tpid; + match->key.vlan.vlan_tpid = ethertype.value; + match->mask.vlan.vlan_tpid = ethertype.mask; + match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = + offsetof(struct nft_flow_key, vlan); + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN); + } +} + struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule) { @@ -89,6 +133,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, expr = nft_expr_next(expr); } + nft_flow_rule_transfer_vlan(ctx, flow); + flow->proto = ctx->dep.l3num; kfree(ctx); @@ -197,26 +243,56 @@ static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow, cls_flow->rule = flow->rule; } -static int nft_flow_offload_rule(struct nft_chain *chain, - struct nft_rule *rule, - struct nft_flow_rule *flow, - enum flow_cls_command command) +static int nft_flow_offload_cmd(const struct nft_chain *chain, + const struct nft_rule *rule, + struct nft_flow_rule *flow, + enum flow_cls_command command, + struct flow_cls_offload *cls_flow) { struct netlink_ext_ack extack = {}; - struct flow_cls_offload cls_flow; struct nft_base_chain *basechain; if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; basechain = nft_base_chain(chain); - nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, &extack, + nft_flow_cls_offload_setup(cls_flow, basechain, rule, flow, &extack, command); - return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, + return nft_setup_cb_call(TC_SETUP_CLSFLOWER, cls_flow, &basechain->flow_block.cb_list); } +static int nft_flow_offload_rule(const struct nft_chain *chain, + struct nft_rule *rule, + struct nft_flow_rule *flow, + enum flow_cls_command command) +{ + struct flow_cls_offload cls_flow; + + return nft_flow_offload_cmd(chain, rule, flow, command, &cls_flow); +} + +int nft_flow_rule_stats(const struct nft_chain *chain, + const struct nft_rule *rule) +{ + struct flow_cls_offload cls_flow = {}; + struct nft_expr *expr, *next; + int err; + + err = nft_flow_offload_cmd(chain, rule, NULL, FLOW_CLS_STATS, + &cls_flow); + if (err < 0) + return err; + + nft_rule_for_each_expr(expr, next, rule) { + if (expr->ops->offload_stats) + expr->ops->offload_stats(expr, &cls_flow.stats); + } + + return 0; +} + static int nft_flow_offload_bind(struct flow_block_offload *bo, struct nft_base_chain *basechain) { @@ -307,16 +383,18 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb) struct nft_base_chain *basechain = block_cb->indr.data; struct net_device *dev = block_cb->indr.dev; struct netlink_ext_ack extack = {}; + struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct flow_block_offload bo; nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND, basechain, &extack); - mutex_lock(&net->nft.commit_mutex); + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); nft_flow_offload_unbind(&bo, basechain); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); } static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain, @@ -412,9 +490,10 @@ static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy, static void nft_flow_rule_offload_abort(struct net *net, struct nft_trans *trans) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); int err = 0; - list_for_each_entry_continue_reverse(trans, &net->nft.commit_list, list) { + list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) continue; @@ -460,11 +539,12 @@ static void nft_flow_rule_offload_abort(struct net *net, int nft_flow_rule_offload_commit(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; int err = 0; u8 policy; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) continue; @@ -516,7 +596,7 @@ int nft_flow_rule_offload_commit(struct net *net) } } - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) continue; @@ -536,15 +616,15 @@ int nft_flow_rule_offload_commit(struct net *net) return err; } -static struct nft_chain *__nft_offload_get_chain(struct net_device *dev) +static struct nft_chain *__nft_offload_get_chain(const struct nftables_pernet *nft_net, + struct net_device *dev) { struct nft_base_chain *basechain; - struct net *net = dev_net(dev); struct nft_hook *hook, *found; const struct nft_table *table; struct nft_chain *chain; - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV) continue; @@ -576,19 +656,21 @@ static int nft_offload_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct nft_chain *chain; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - mutex_lock(&net->nft.commit_mutex); - chain = __nft_offload_get_chain(dev); + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + chain = __nft_offload_get_chain(nft_net, dev); if (chain) nft_flow_block_chain(nft_base_chain(chain), dev, FLOW_BLOCK_UNBIND); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 87b36da5cd98..0cf3278007ba 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -183,7 +183,6 @@ static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info) void nft_trace_notify(struct nft_traceinfo *info) { const struct nft_pktinfo *pkt = info->pkt; - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; @@ -219,15 +218,11 @@ void nft_trace_notify(struct nft_traceinfo *info) return; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE); - nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); + nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family, + NFNETLINK_V0, 0); if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = info->basechain->type->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt)))) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index d3df66a39b5e..06f5886f652e 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -28,6 +28,7 @@ #include <linux/sched/signal.h> #include <net/netlink.h> +#include <net/netns/generic.h> #include <linux/netfilter/nfnetlink.h> MODULE_LICENSE("GPL"); @@ -41,6 +42,12 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); #define NFNL_MAX_ATTR_COUNT 32 +static unsigned int nfnetlink_pernet_id __read_mostly; + +struct nfnl_net { + struct sock *nfnl; +}; + static struct { struct mutex mutex; const struct nfnetlink_subsystem __rcu *subsys; @@ -75,6 +82,11 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, }; +static struct nfnl_net *nfnl_pernet(struct net *net) +{ + return net_generic(net, nfnetlink_pernet_id); +} + void nfnl_lock(__u8 subsys_id) { mutex_lock(&table[subsys_id].mutex); @@ -149,28 +161,35 @@ nfnetlink_find_client(u16 type, const struct nfnetlink_subsystem *ss) int nfnetlink_has_listeners(struct net *net, unsigned int group) { - return netlink_has_listeners(net->nfnl, group); + struct nfnl_net *nfnlnet = nfnl_pernet(net); + + return netlink_has_listeners(nfnlnet->nfnl, group); } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags) { - return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags); + struct nfnl_net *nfnlnet = nfnl_pernet(net); + + return nlmsg_notify(nfnlnet->nfnl, skb, portid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error) { - return netlink_set_err(net->nfnl, portid, group, error); + struct nfnl_net *nfnlnet = nfnl_pernet(net); + + return netlink_set_err(nfnlnet->nfnl, portid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid) { + struct nfnl_net *nfnlnet = nfnl_pernet(net); int err; - err = nlmsg_unicast(net->nfnl, skb, portid); + err = nlmsg_unicast(nfnlnet->nfnl, skb, portid); if (err == -EAGAIN) err = -ENOBUFS; @@ -178,6 +197,15 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid) } EXPORT_SYMBOL_GPL(nfnetlink_unicast); +void nfnetlink_broadcast(struct net *net, struct sk_buff *skb, __u32 portid, + __u32 group, gfp_t allocation) +{ + struct nfnl_net *nfnlnet = nfnl_pernet(net); + + netlink_broadcast(nfnlnet->nfnl, skb, portid, group, allocation); +} +EXPORT_SYMBOL_GPL(nfnetlink_broadcast); + /* Process one complete nfnetlink message. */ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) @@ -194,6 +222,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, type = nlh->nlmsg_type; replay: rcu_read_lock(); + ss = nfnetlink_get_subsys(type); if (!ss) { #ifdef CONFIG_MODULES @@ -217,6 +246,7 @@ replay: { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + struct nfnl_net *nfnlnet = nfnl_pernet(net); u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1]; struct nlattr *attr = (void *)nlh + min_len; @@ -238,7 +268,7 @@ replay: } if (nc->call_rcu) { - err = nc->call_rcu(net, net->nfnl, skb, nlh, + err = nc->call_rcu(net, nfnlnet->nfnl, skb, nlh, (const struct nlattr **)cda, extack); rcu_read_unlock(); @@ -249,7 +279,7 @@ replay: nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) - err = nc->call(net, net->nfnl, skb, nlh, + err = nc->call(net, nfnlnet->nfnl, skb, nlh, (const struct nlattr **)cda, extack); else @@ -453,7 +483,9 @@ replay_abort: goto ack; if (nc->call_batch) { - err = nc->call_batch(net, net->nfnl, skb, nlh, + struct nfnl_net *nfnlnet = nfnl_pernet(net); + + err = nc->call_batch(net, nfnlnet->nfnl, skb, nlh, (const struct nlattr **)cda, &extack); } @@ -622,7 +654,7 @@ static int nfnetlink_bind(struct net *net, int group) static int __net_init nfnetlink_net_init(struct net *net) { - struct sock *nfnl; + struct nfnl_net *nfnlnet = nfnl_pernet(net); struct netlink_kernel_cfg cfg = { .groups = NFNLGRP_MAX, .input = nfnetlink_rcv, @@ -631,28 +663,29 @@ static int __net_init nfnetlink_net_init(struct net *net) #endif }; - nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg); - if (!nfnl) + nfnlnet->nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg); + if (!nfnlnet->nfnl) return -ENOMEM; - net->nfnl_stash = nfnl; - rcu_assign_pointer(net->nfnl, nfnl); return 0; } static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) { + struct nfnl_net *nfnlnet; struct net *net; - list_for_each_entry(net, net_exit_list, exit_list) - RCU_INIT_POINTER(net->nfnl, NULL); - synchronize_net(); - list_for_each_entry(net, net_exit_list, exit_list) - netlink_kernel_release(net->nfnl_stash); + list_for_each_entry(net, net_exit_list, exit_list) { + nfnlnet = nfnl_pernet(net); + + netlink_kernel_release(nfnlnet->nfnl); + } } static struct pernet_operations nfnetlink_net_ops = { .init = nfnetlink_net_init, .exit_batch = nfnetlink_net_exit_batch, + .id = &nfnetlink_pernet_id, + .size = sizeof(struct nfnl_net), }; static int __init nfnetlink_init(void) diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 0fa1653b5f19..6895f31c5fbb 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -145,21 +145,16 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_acct *acct) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; u32 old_flags; event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, NFACCT_NAME, acct->name)) goto nla_put_failure; @@ -474,8 +469,7 @@ static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct) kfree_skb(skb); return; } - netlink_broadcast(net->nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, - GFP_ATOMIC); + nfnetlink_broadcast(net, skb, 0, NFNLGRP_ACCT_QUOTA, GFP_ATOMIC); } int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 0f94fce1d3ed..22f6f7fcc724 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -526,20 +526,15 @@ nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, NFCTH_NAME, helper->name)) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 89a381f7f945..46da5548d0b3 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -20,6 +20,7 @@ #include <linux/netfilter.h> #include <net/netlink.h> +#include <net/netns/generic.h> #include <net/sock.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -30,6 +31,12 @@ #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> +static unsigned int nfct_timeout_id __read_mostly; + +struct nfct_timeout_pernet { + struct list_head nfct_timeout_list; +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); @@ -42,6 +49,11 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED }, }; +static struct nfct_timeout_pernet *nfct_timeout_pernet(struct net *net) +{ + return net_generic(net, nfct_timeout_id); +} + static int ctnl_timeout_parse_policy(void *timeout, const struct nf_conntrack_l4proto *l4proto, @@ -77,6 +89,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, const struct nlattr * const cda[], struct netlink_ext_ack *extack) { + struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); __u16 l3num; __u8 l4num; const struct nf_conntrack_l4proto *l4proto; @@ -94,7 +107,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - list_for_each_entry(timeout, &net->nfct_timeout_list, head) { + list_for_each_entry(timeout, &pernet->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -146,7 +159,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, timeout->timeout.l3num = l3num; timeout->timeout.l4proto = l4proto; refcount_set(&timeout->refcnt, 1); - list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); + list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list); return 0; err: @@ -160,22 +173,17 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto; struct nlattr *nest_parms; int ret; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->timeout.l3num)) || @@ -206,6 +214,7 @@ nla_put_failure: static int ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct nfct_timeout_pernet *pernet; struct net *net = sock_net(skb->sk); struct ctnl_timeout *cur, *last; @@ -217,7 +226,8 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { + pernet = nfct_timeout_pernet(net); + list_for_each_entry_rcu(cur, &pernet->nfct_timeout_list, head) { if (last) { if (cur != last) continue; @@ -244,6 +254,7 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, const struct nlattr * const cda[], struct netlink_ext_ack *extack) { + struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -259,7 +270,7 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &net->nfct_timeout_list, head) { + list_for_each_entry(cur, &pernet->nfct_timeout_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) @@ -315,12 +326,13 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, const struct nlattr * const cda[], struct netlink_ext_ack *extack) { + struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *cur, *tmp; int ret = -ENOENT; char *name; if (!cda[CTA_TIMEOUT_NAME]) { - list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, + list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) ctnl_timeout_try_del(net, cur); @@ -328,7 +340,7 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, } name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &net->nfct_timeout_list, head) { + list_for_each_entry(cur, &pernet->nfct_timeout_list, head) { if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -382,21 +394,16 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, const unsigned int *timeouts) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; struct nlattr *nest_parms; int ret; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) goto nla_put_failure; @@ -513,9 +520,10 @@ err: static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net, const char *name) { + struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *timeout, *matching = NULL; - list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { + list_for_each_entry_rcu(timeout, &pernet->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -573,19 +581,22 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); static int __net_init cttimeout_net_init(struct net *net) { - INIT_LIST_HEAD(&net->nfct_timeout_list); + struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); + + INIT_LIST_HEAD(&pernet->nfct_timeout_list); return 0; } static void __net_exit cttimeout_net_exit(struct net *net) { + struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *cur, *tmp; nf_ct_unconfirmed_destroy(net); nf_ct_untimeout(net, NULL); - list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { + list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) { list_del_rcu(&cur->head); if (refcount_dec_and_test(&cur->refcnt)) @@ -596,6 +607,8 @@ static void __net_exit cttimeout_net_exit(struct net *net) static struct pernet_operations cttimeout_ops = { .init = cttimeout_net_init, .exit = cttimeout_net_exit, + .id = &nfct_timeout_id, + .size = sizeof(struct nfct_timeout_pernet), }; static int __init cttimeout_init(void) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 26776b88a539..d5f458d0ff3d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -456,20 +456,15 @@ __build_packet_message(struct nfnl_log_net *log, { struct nfulnl_msg_packet_hdr pmsg; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; const unsigned char *hwhdrp; - nlh = nlmsg_put(inst->skb, 0, 0, - nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), - sizeof(struct nfgenmsg), 0); + nlh = nfnl_msg_put(inst->skb, 0, 0, + nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), + 0, pf, NFNETLINK_V0, htons(inst->group_num)); if (!nlh) return -1; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = pf; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(inst->group_num); memset(&pmsg, 0, sizeof(pmsg)); pmsg.hw_protocol = skb->protocol; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 48a07914fd94..37e81d895e61 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -383,7 +383,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct sk_buff *entskb = entry->skb; struct net_device *indev; struct net_device *outdev; @@ -471,18 +470,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nlmsg_failure; } - nlh = nlmsg_put(skb, 0, 0, - nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET), - sizeof(struct nfgenmsg), 0); + nlh = nfnl_msg_put(skb, 0, 0, + nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET), + 0, entry->state.pf, NFNETLINK_V0, + htons(queue->queue_num)); if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); goto nlmsg_failure; } - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = entry->state.pf; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(queue->queue_num); nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); pmsg = nla_data(nla); diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index ff8528ad3dc6..7a9aa57b195b 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -2,6 +2,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/netfilter/nf_tables.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> @@ -10,6 +11,8 @@ #include <net/netfilter/nf_tables_ipv4.h> #include <net/netfilter/nf_tables_ipv6.h> +extern unsigned int nf_tables_net_id; + #ifdef CONFIG_NF_TABLES_IPV4 static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, @@ -355,6 +358,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nftables_pernet *nft_net; struct nft_table *table; struct nft_chain *chain, *nr; struct nft_ctx ctx = { @@ -365,8 +369,9 @@ static int nf_tables_netdev_event(struct notifier_block *this, event != NETDEV_CHANGENAME) return NOTIFY_DONE; - mutex_lock(&ctx.net->nft.commit_mutex); - list_for_each_entry(table, &ctx.net->nft.tables, list) { + nft_net = net_generic(ctx.net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV) continue; @@ -380,7 +385,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, nft_netdev_event(event, dev, &ctx); } } - mutex_unlock(&ctx.net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index eb6a43a180bb..47b6d05f1ae6 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -114,19 +114,56 @@ nla_put_failure: return -1; } +union nft_cmp_offload_data { + u16 val16; + u32 val32; + u64 val64; +}; + +static void nft_payload_n2h(union nft_cmp_offload_data *data, + const u8 *val, u32 len) +{ + switch (len) { + case 2: + data->val16 = ntohs(*((u16 *)val)); + break; + case 4: + data->val32 = ntohl(*((u32 *)val)); + break; + case 8: + data->val64 = be64_to_cpu(*((u64 *)val)); + break; + default: + WARN_ON_ONCE(1); + break; + } +} + static int __nft_cmp_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_cmp_expr *priv) { struct nft_offload_reg *reg = &ctx->regs[priv->sreg]; + union nft_cmp_offload_data _data, _datamask; u8 *mask = (u8 *)&flow->match.mask; u8 *key = (u8 *)&flow->match.key; + u8 *data, *datamask; if (priv->op != NFT_CMP_EQ || priv->len > reg->len) return -EOPNOTSUPP; - memcpy(key + reg->offset, &priv->data, reg->len); - memcpy(mask + reg->offset, ®->mask, reg->len); + if (reg->flags & NFT_OFFLOAD_F_NETWORK2HOST) { + nft_payload_n2h(&_data, (u8 *)&priv->data, reg->len); + nft_payload_n2h(&_datamask, (u8 *)®->mask, reg->len); + data = (u8 *)&_data; + datamask = (u8 *)&_datamask; + } else { + data = (u8 *)&priv->data; + datamask = (u8 *)®->mask; + } + + memcpy(key + reg->offset, data, reg->len); + memcpy(mask + reg->offset, datamask, reg->len); flow->match.dissector.used_keys |= BIT(reg->key); flow->match.dissector.offset[reg->key] = reg->base_offset; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 8e56f353ff35..b8dbd20a6a4c 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -591,19 +591,14 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int rev, int target) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, NFTA_COMPAT_NAME, name) || nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) || nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target))) diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 85ed461ec24e..8edd3b3c173d 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -13,6 +13,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> struct nft_counter { s64 bytes; @@ -248,6 +249,32 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) return 0; } +static int nft_counter_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + /* No specific offload action is needed, but report success. */ + return 0; +} + +static void nft_counter_offload_stats(struct nft_expr *expr, + const struct flow_stats *stats) +{ + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter *this_cpu; + seqcount_t *myseq; + + preempt_disable(); + this_cpu = this_cpu_ptr(priv->counter); + myseq = this_cpu_ptr(&nft_counter_seq); + + write_seqcount_begin(myseq); + this_cpu->packets += stats->pkts; + this_cpu->bytes += stats->bytes; + write_seqcount_end(myseq); + preempt_enable(); +} + static struct nft_expr_type nft_counter_type; static const struct nft_expr_ops nft_counter_ops = { .type = &nft_counter_type, @@ -258,6 +285,8 @@ static const struct nft_expr_ops nft_counter_ops = { .destroy_clone = nft_counter_destroy, .dump = nft_counter_dump, .clone = nft_counter_clone, + .offload = nft_counter_offload, + .offload_stats = nft_counter_offload_stats, }; static struct nft_expr_type nft_counter_type __read_mostly = { diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index d44a70c11b3f..f9437a0dcfef 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -11,6 +11,9 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> +#include <net/netns/generic.h> + +extern unsigned int nf_tables_net_id; struct nft_dynset { struct nft_set *set; @@ -161,13 +164,14 @@ static int nft_dynset_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_dynset *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u64 timeout; int err, i; - lockdep_assert_held(&ctx->net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); if (tb[NFTA_DYNSET_SET_NAME] == NULL || tb[NFTA_DYNSET_OP] == NULL || diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 0e2c315c3b5e..82ec27bdf941 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -76,13 +76,13 @@ static int nft_limit_init(struct nft_limit *limit, return -EOVERFLOW; if (pkts) { - tokens = div_u64(limit->nsecs, limit->rate) * limit->burst; + tokens = div64_u64(limit->nsecs, limit->rate) * limit->burst; } else { /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ - tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), + tokens = div64_u64(limit->nsecs * (limit->rate + limit->burst), limit->rate); } diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index a06a46b039c5..54f6c2035e84 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -128,6 +128,20 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_FLAGS] = { .type = NLA_U32 }, }; +static int nft_log_modprobe(struct net *net, enum nf_log_type t) +{ + switch (t) { + case NF_LOG_TYPE_LOG: + return nft_request_module(net, "%s", "nf_log_syslog"); + case NF_LOG_TYPE_ULOG: + return nft_request_module(net, "%s", "nfnetlink_log"); + case NF_LOG_TYPE_MAX: + break; + } + + return -ENOENT; +} + static int nft_log_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -197,8 +211,12 @@ static int nft_log_init(const struct nft_ctx *ctx, return 0; err = nf_logger_find_get(ctx->family, li->type); - if (err < 0) + if (err < 0) { + if (nft_log_modprobe(ctx->net, li->type) == -EAGAIN) + err = -EAGAIN; + goto err1; + } return 0; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index cb1c8c231880..501c5b24cc39 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -226,8 +226,9 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan, - vlan_tci, sizeof(__be16), reg); + NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_VLAN, vlan, + vlan_tci, sizeof(__be16), reg, + NFT_OFFLOAD_F_NETWORK2HOST); break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto): if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) @@ -241,16 +242,18 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, - vlan_tci, sizeof(__be16), reg); + NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_CVLAN, cvlan, + vlan_tci, sizeof(__be16), reg, + NFT_OFFLOAD_F_NETWORK2HOST); break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) + sizeof(struct vlan_hdr): if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, cvlan, vlan_tpid, sizeof(__be16), reg); + nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; default: return -EOPNOTSUPP; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 6bd31a7a27fc..b7f8d2ed3cc2 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -24,6 +24,7 @@ #include <linux/audit.h> #include <linux/user_namespace.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp.h> @@ -38,6 +39,10 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define XT_PCPU_BLOCK_SIZE 4096 #define XT_MAX_TABLE_SIZE (512 * 1024 * 1024) +struct xt_pernet { + struct list_head tables[NFPROTO_NUMPROTO]; +}; + struct compat_delta { unsigned int offset; /* offset in kernel */ int delta; /* delta in 32bit user land */ @@ -55,7 +60,8 @@ struct xt_af { #endif }; -static struct xt_af *xt; +static unsigned int xt_pernet_id __read_mostly; +static struct xt_af *xt __read_mostly; static const char *const xt_prefix[NFPROTO_NUMPROTO] = { [NFPROTO_UNSPEC] = "x", @@ -733,7 +739,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; - int pad, off = xt_compat_match_offset(match); + int off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; char name[sizeof(m->u.user.name)]; @@ -743,9 +749,6 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, match->compat_from_user(m->data, cm->data); else memcpy(m->data, cm->data, msize - sizeof(*cm)); - pad = XT_ALIGN(match->matchsize) - match->matchsize; - if (pad > 0) - memset(m->data + match->matchsize, 0, pad); msize += off; m->u.user.match_size = msize; @@ -1116,7 +1119,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, { const struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; - int pad, off = xt_compat_target_offset(target); + int off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; char name[sizeof(t->u.user.name)]; @@ -1126,9 +1129,6 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, target->compat_from_user(t->data, ct->data); else memcpy(t->data, ct->data, tsize - sizeof(*ct)); - pad = XT_ALIGN(target->targetsize) - target->targetsize; - if (pad > 0) - memset(t->data + target->targetsize, 0, pad); tsize += off; t->u.user.target_size = tsize; @@ -1203,10 +1203,11 @@ EXPORT_SYMBOL(xt_free_table_info); struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); struct xt_table *t, *found = NULL; mutex_lock(&xt[af].mutex); - list_for_each_entry(t, &net->xt.tables[af], list) + list_for_each_entry(t, &xt_net->tables[af], list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; @@ -1214,7 +1215,8 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, goto out; /* Table doesn't exist in this netns, re-try init */ - list_for_each_entry(t, &init_net.xt.tables[af], list) { + xt_net = net_generic(&init_net, xt_pernet_id); + list_for_each_entry(t, &xt_net->tables[af], list) { int err; if (strcmp(t->name, name)) @@ -1237,8 +1239,9 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, if (!found) goto out; + xt_net = net_generic(net, xt_pernet_id); /* and once again: */ - list_for_each_entry(t, &net->xt.tables[af], list) + list_for_each_entry(t, &xt_net->tables[af], list) if (strcmp(t->name, name) == 0) return t; @@ -1423,9 +1426,10 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { - int ret; + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); struct xt_table_info *private; struct xt_table *t, *table; + int ret; /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); @@ -1436,7 +1440,7 @@ struct xt_table *xt_register_table(struct net *net, mutex_lock(&xt[table->af].mutex); /* Don't autoload: we'd eat our tail... */ - list_for_each_entry(t, &net->xt.tables[table->af], list) { + list_for_each_entry(t, &xt_net->tables[table->af], list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; goto unlock; @@ -1455,7 +1459,7 @@ struct xt_table *xt_register_table(struct net *net, /* save number of initial entries */ private->initial_entries = private->number; - list_add(&table->list, &net->xt.tables[table->af]); + list_add(&table->list, &xt_net->tables[table->af]); mutex_unlock(&xt[table->af].mutex); return table; @@ -1486,19 +1490,25 @@ EXPORT_SYMBOL_GPL(xt_unregister_table); #ifdef CONFIG_PROC_FS static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) { + u8 af = (unsigned long)PDE_DATA(file_inode(seq->file)); struct net *net = seq_file_net(seq); - u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file)); + struct xt_pernet *xt_net; + + xt_net = net_generic(net, xt_pernet_id); mutex_lock(&xt[af].mutex); - return seq_list_start(&net->xt.tables[af], *pos); + return seq_list_start(&xt_net->tables[af], *pos); } static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + u8 af = (unsigned long)PDE_DATA(file_inode(seq->file)); struct net *net = seq_file_net(seq); - u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file)); + struct xt_pernet *xt_net; + + xt_net = net_generic(net, xt_pernet_id); - return seq_list_next(v, &net->xt.tables[af], pos); + return seq_list_next(v, &xt_net->tables[af], pos); } static void xt_table_seq_stop(struct seq_file *seq, void *v) @@ -1864,24 +1874,28 @@ EXPORT_SYMBOL_GPL(xt_percpu_counter_free); static int __net_init xt_net_init(struct net *net) { + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; for (i = 0; i < NFPROTO_NUMPROTO; i++) - INIT_LIST_HEAD(&net->xt.tables[i]); + INIT_LIST_HEAD(&xt_net->tables[i]); return 0; } static void __net_exit xt_net_exit(struct net *net) { + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; for (i = 0; i < NFPROTO_NUMPROTO; i++) - WARN_ON_ONCE(!list_empty(&net->xt.tables[i])); + WARN_ON_ONCE(!list_empty(&xt_net->tables[i])); } static struct pernet_operations xt_net_ops = { .init = xt_net_init, .exit = xt_net_exit, + .id = &xt_pernet_id, + .size = sizeof(struct xt_pernet), }; static int __init xt_init(void) diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index a1e79b517c01..2ff75f7637b0 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -108,3 +108,4 @@ MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); MODULE_DESCRIPTION("Xtables: IPv4/IPv6 packet logging"); MODULE_ALIAS("ipt_LOG"); MODULE_ALIAS("ip6t_LOG"); +MODULE_SOFTDEP("pre: nf_log_syslog"); diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 6e83ce3000db..fb5793208059 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -79,3 +79,4 @@ static void __exit nflog_tg_exit(void) module_init(nflog_tg_init); module_exit(nflog_tg_exit); +MODULE_SOFTDEP("pre: nfnetlink_log"); diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index 349ab5609b1b..5582dce98cae 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -52,3 +52,4 @@ static void __exit trace_tg_exit(void) module_init(trace_tg_init); module_exit(trace_tg_exit); +MODULE_SOFTDEP("pre: nf_log_syslog"); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index dd488938447f..3a62f97acf39 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1019,7 +1019,6 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; } - netlink_lock_table(); if (nlk->netlink_bind && groups) { int group; @@ -1031,13 +1030,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (!err) continue; netlink_undo_bind(group, groups, sk); - goto unlock; + return err; } } /* No need for barriers here as we return to user-space without * using any of the bound attributes. */ + netlink_lock_table(); if (!bound) { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c index 5971fb6f51cc..1150731126e2 100644 --- a/net/nfc/digital_dep.c +++ b/net/nfc/digital_dep.c @@ -1217,7 +1217,7 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg, /* ACK */ if (ddev->atn_count) { - /* The target has previously recevied one or more ATN + /* The target has previously received one or more ATN * PDUs. */ ddev->atn_count = 0; diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index d257ed3b732a..a3b46f888803 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -108,11 +108,13 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) llcp_sock->service_name_len, GFP_KERNEL); if (!llcp_sock->service_name) { + nfc_llcp_local_put(llcp_sock->local); ret = -ENOMEM; goto put_dev; } llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock); if (llcp_sock->ssap == LLCP_SAP_MAX) { + nfc_llcp_local_put(llcp_sock->local); kfree(llcp_sock->service_name); llcp_sock->service_name = NULL; ret = -EADDRINUSE; @@ -671,6 +673,10 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, ret = -EISCONN; goto error; } + if (sk->sk_state == LLCP_CONNECTING) { + ret = -EINPROGRESS; + goto error; + } dev = nfc_get_device(addr->dev_idx); if (dev == NULL) { @@ -702,6 +708,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, llcp_sock->local = nfc_llcp_local_get(local); llcp_sock->ssap = nfc_llcp_get_local_ssap(local); if (llcp_sock->ssap == LLCP_SAP_MAX) { + nfc_llcp_local_put(llcp_sock->local); ret = -ENOMEM; goto put_dev; } @@ -743,9 +750,12 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, sock_unlink: nfc_llcp_sock_unlink(&local->connecting_sockets, sk); + kfree(llcp_sock->service_name); + llcp_sock->service_name = NULL; sock_llcp_release: nfc_llcp_put_ssap(local, llcp_sock->ssap); + nfc_llcp_local_put(llcp_sock->local); put_dev: nfc_put_device(dev); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 59257400697d..9a585332ea84 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1507,7 +1507,7 @@ static void nci_rx_work(struct work_struct *work) } } - /* check if a data exchange timout has occurred */ + /* check if a data exchange timeout has occurred */ if (test_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags)) { /* complete the data exchange transaction, if exists */ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 1204c438e87d..6af5752cde09 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -234,7 +234,7 @@ static void nci_uart_tty_wakeup(struct tty_struct *tty) * Called by tty low level driver when receive data is * available. * - * Arguments: tty pointer to tty isntance data + * Arguments: tty pointer to tty instance data * data pointer to received data * flags pointer to flags for data * count count of received data in bytes @@ -374,7 +374,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data, data += chunk_len; count -= chunk_len; - /* Chcek if packet is fully received */ + /* Check if packet is fully received */ if (nu->rx_packet_len == nu->rx_skb->len) { /* Pass RX packet to driver */ if (nu->ops.recv(nu, nu->rx_skb) != 0) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 71cec03e8612..cadb6a29b285 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -809,8 +809,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, err = nf_nat_packet(ct, ctinfo, hooknum, skb); push: - skb_push(skb, nh_off); - skb_postpush_rcsum(skb, skb->data, nh_off); + skb_push_rcsum(skb, nh_off); return err; } @@ -1322,8 +1321,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, else err = ovs_ct_lookup(net, key, info, skb); - skb_push(skb, nh_ofs); - skb_postpush_rcsum(skb, skb->data, nh_ofs); + skb_push_rcsum(skb, nh_ofs); if (err) kfree_skb(skb); return err; @@ -2034,10 +2032,10 @@ static int ovs_ct_limit_del_zone_limit(struct nlattr *nla_zone_limit, static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info, struct sk_buff *reply) { - struct ovs_zone_limit zone_limit; - - zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE; - zone_limit.limit = info->default_limit; + struct ovs_zone_limit zone_limit = { + .zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE, + .limit = info->default_limit, + }; return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); } diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 57d6436e6f6a..8e1a88f13622 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -44,10 +44,9 @@ static void netdev_port_receive(struct sk_buff *skb) if (unlikely(!skb)) return; - if (skb->dev->type == ARPHRD_ETHER) { - skb_push(skb, ETH_HLEN); - skb_postpush_rcsum(skb, skb->data, ETH_HLEN); - } + if (skb->dev->type == ARPHRD_ETHER) + skb_push_rcsum(skb, ETH_HLEN); + ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 118d585337d7..ba96db1880ea 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1359,7 +1359,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, struct packet_sock *po, *po_next, *po_skip = NULL; unsigned int i, j, room = ROOM_NONE; - po = pkt_sk(f->arr[idx]); + po = pkt_sk(rcu_dereference(f->arr[idx])); if (try_self) { room = packet_rcv_has_room(po, skb); @@ -1371,7 +1371,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, i = j = min_t(int, po->rollover->sock, num - 1); do { - po_next = pkt_sk(f->arr[i]); + po_next = pkt_sk(rcu_dereference(f->arr[i])); if (po_next != po_skip && !READ_ONCE(po_next->pressure) && packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) @@ -1466,7 +1466,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) idx = fanout_demux_rollover(f, skb, idx, true, num); - po = pkt_sk(f->arr[idx]); + po = pkt_sk(rcu_dereference(f->arr[idx])); return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } @@ -1480,7 +1480,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po) struct packet_fanout *f = po->fanout; spin_lock(&f->lock); - f->arr[f->num_members] = sk; + rcu_assign_pointer(f->arr[f->num_members], sk); smp_wmb(); f->num_members++; if (f->num_members == 1) @@ -1495,11 +1495,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_lock(&f->lock); for (i = 0; i < f->num_members; i++) { - if (f->arr[i] == sk) + if (rcu_dereference_protected(f->arr[i], + lockdep_is_held(&f->lock)) == sk) break; } BUG_ON(i >= f->num_members); - f->arr[i] = f->arr[f->num_members - 1]; + rcu_assign_pointer(f->arr[i], + rcu_dereference_protected(f->arr[f->num_members - 1], + lockdep_is_held(&f->lock))); f->num_members--; if (f->num_members == 0) __dev_remove_pack(&f->prot_hook); diff --git a/net/packet/internal.h b/net/packet/internal.h index 5f61e59ebbff..48af35b1aed2 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -94,7 +94,7 @@ struct packet_fanout { spinlock_t lock; refcount_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; - struct sock *arr[]; + struct sock __rcu *arr[]; }; struct packet_rollover { diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 4b46c69e14ab..c0477bec09bd 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -272,7 +272,10 @@ static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port, flow = kzalloc(sizeof(*flow), GFP_KERNEL); if (flow) { init_waitqueue_head(&flow->resume_tx); - radix_tree_insert(&node->qrtr_tx_flow, key, flow); + if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) { + kfree(flow); + flow = NULL; + } } } mutex_unlock(&node->qrtr_tx_lock); diff --git a/net/rds/message.c b/net/rds/message.c index 071a261fdaab..4fc66ff0f1ec 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -180,6 +180,7 @@ void rds_message_put(struct rds_message *rm) rds_message_purge(rm); kfree(rm); + rm = NULL; } } EXPORT_SYMBOL_GPL(rds_message_put); @@ -347,8 +348,9 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE); rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); if (IS_ERR(rm->data.op_sg)) { + void *err = ERR_CAST(rm->data.op_sg); rds_message_put(rm); - return ERR_CAST(rm->data.op_sg); + return err; } for (i = 0; i < rm->data.op_nents; ++i) { diff --git a/net/rds/send.c b/net/rds/send.c index 53444397de66..ee7214ea0fdb 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -665,7 +665,7 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status) unlock_and_drop: spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); - if (was_on_sock) + if (was_on_sock && rm) rds_message_put(rm); } diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 68d6ef9e59fc..ac15a944573f 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -69,7 +69,7 @@ struct rfkill { struct rfkill_int_event { struct list_head list; - struct rfkill_event ev; + struct rfkill_event_ext ev; }; struct rfkill_data { @@ -253,7 +253,8 @@ static void rfkill_global_led_trigger_unregister(void) } #endif /* CONFIG_RFKILL_LEDS */ -static void rfkill_fill_event(struct rfkill_event *ev, struct rfkill *rfkill, +static void rfkill_fill_event(struct rfkill_event_ext *ev, + struct rfkill *rfkill, enum rfkill_operation op) { unsigned long flags; @@ -1237,7 +1238,7 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { struct rfkill *rfkill; - struct rfkill_event ev; + struct rfkill_event_ext ev; int ret; /* we don't need the 'hard' variable but accept it */ diff --git a/net/rfkill/input.c b/net/rfkill/input.c index 4b01baea1d4a..598d0a61bda7 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -36,7 +36,7 @@ module_param_named(master_switch_mode, rfkill_master_switch_mode, uint, 0); MODULE_PARM_DESC(master_switch_mode, "SW_RFKILL_ALL ON should: 0=do nothing (only unlock); 1=restore; 2=unblock all"); -static spinlock_t rfkill_op_lock; +static DEFINE_SPINLOCK(rfkill_op_lock); static bool rfkill_op_pending; static unsigned long rfkill_sw_pending[BITS_TO_LONGS(NUM_RFKILL_TYPES)]; static unsigned long rfkill_sw_state[BITS_TO_LONGS(NUM_RFKILL_TYPES)]; @@ -330,8 +330,6 @@ int __init rfkill_handler_init(void) return -EINVAL; } - spin_lock_init(&rfkill_op_lock); - /* Avoid delay at first schedule */ rfkill_last_scheduled = jiffies - msecs_to_jiffies(RFKILL_OPS_DELAY) - 1; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index b919826939e0..f6d5755d669e 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -158,7 +158,7 @@ static int __tcf_action_put(struct tc_action *p, bool bind) return 0; } -int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) +static int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) { int ret = 0; @@ -184,7 +184,18 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) return ret; } -EXPORT_SYMBOL(__tcf_idr_release); + +int tcf_idr_release(struct tc_action *a, bool bind) +{ + const struct tc_action_ops *ops = a->ops; + int ret; + + ret = __tcf_idr_release(a, bind, false); + if (ret == ACT_P_DELETED) + module_put(ops->owner); + return ret; +} +EXPORT_SYMBOL(tcf_idr_release); static size_t tcf_action_shared_attrs_size(const struct tc_action *act) { @@ -493,6 +504,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, } p->idrinfo = idrinfo; + __module_get(ops->owner); p->ops = ops; *a = p; return 0; @@ -992,7 +1004,8 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct tc_action_ops *a_o, bool rtnl_held, + struct tc_action_ops *a_o, int *init_res, + bool rtnl_held, struct netlink_ext_ack *extack) { struct nla_bitfield32 flags = { 0, 0 }; @@ -1028,6 +1041,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, } if (err < 0) goto err_out; + *init_res = err; if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); @@ -1035,13 +1049,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (!name) a->hw_stats = hw_stats; - /* module count goes up only when brand new policy is created - * if it exists and is only bound to in a_o->init() then - * ACT_P_CREATED is not returned (a zero is). - */ - if (err != ACT_P_CREATED) - module_put(a_o->owner); - return a; err_out: @@ -1056,7 +1063,7 @@ err_out: int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct tc_action *actions[], size_t *attr_size, + struct tc_action *actions[], int init_res[], size_t *attr_size, bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; @@ -1084,7 +1091,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind, - ops[i - 1], rtnl_held, extack); + ops[i - 1], &init_res[i - 1], rtnl_held, + extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -1100,7 +1108,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, tcf_idr_insert_many(actions); *attr_size = tcf_action_full_attrs_size(sz); - return i - 1; + err = i - 1; + goto err_mod; err: tcf_action_destroy(actions, bind); @@ -1497,12 +1506,13 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { size_t attr_size = 0; - int loop, ret; + int loop, ret, i; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; + int init_res[TCA_ACT_MAX_PRIO] = {}; for (loop = 0; loop < 10; loop++) { ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, - actions, &attr_size, true, extack); + actions, init_res, &attr_size, true, extack); if (ret != -EAGAIN) break; } @@ -1510,8 +1520,12 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, if (ret < 0) return ret; ret = tcf_add_notify(net, n, actions, portid, attr_size, extack); - if (ovr) - tcf_action_put_many(actions); + + /* only put existing actions */ + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) + if (init_res[i] == ACT_P_CREATED) + actions[i] = NULL; + tcf_action_put_many(actions); return ret; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d3db70865d66..40fbea626dfd 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -646,7 +646,7 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) struct net_device *dev = block_cb->indr.dev; struct Qdisc *sch = block_cb->indr.sch; struct netlink_ext_ack extack = {}; - struct flow_block_offload bo; + struct flow_block_offload bo = {}; tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND, block_cb->indr.binder_type, @@ -3040,6 +3040,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, { #ifdef CONFIG_NET_CLS_ACT { + int init_res[TCA_ACT_MAX_PRIO] = {}; struct tc_action *act; size_t attr_size = 0; @@ -3051,12 +3052,11 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, return PTR_ERR(a_o); act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND, a_o, rtnl_held, - extack); - if (IS_ERR(act)) { - module_put(a_o->owner); + TCA_ACT_BIND, a_o, init_res, + rtnl_held, extack); + module_put(a_o->owner); + if (IS_ERR(act)) return PTR_ERR(act); - } act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; @@ -3067,8 +3067,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - exts->actions, &attr_size, - rtnl_held, extack); + exts->actions, init_res, + &attr_size, rtnl_held, extack); if (err < 0) return err; exts->nr_actions = err; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 62e12cb41a3e..081c11d5717c 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1675,9 +1675,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, cl->parent->common.classid, NULL); if (q->offload) { - if (new_q) + if (new_q) { htb_set_lockdep_class_child(new_q); - htb_parent_to_leaf_offload(sch, dev_queue, new_q); + htb_parent_to_leaf_offload(sch, dev_queue, new_q); + } } } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 2f1f0a378408..6af6b95bdb67 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -134,6 +134,9 @@ teql_destroy(struct Qdisc *sch) struct teql_sched_data *dat = qdisc_priv(sch); struct teql_master *master = dat->m; + if (!master) + return; + prev = master->slaves; if (prev) { do { diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index c3e89c776e66..bd08807c9e44 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -664,8 +664,8 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) if (!(type & IPV6_ADDR_UNICAST)) return 0; - return sp->inet.freebind || net->ipv6.sysctl.ip_nonlocal_bind || - ipv6_chk_addr(net, in6, NULL, 0); + return ipv6_can_nonlocal_bind(net, &sp->inet) || + ipv6_chk_addr(net, in6, NULL, 0); } /* This function checks if the address is a valid address to be used for @@ -954,8 +954,7 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) net = sock_net(&opt->inet.sk); rcu_read_lock(); dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); - if (!dev || !(opt->inet.freebind || - net->ipv6.sysctl.ip_nonlocal_bind || + if (!dev || !(ipv6_can_nonlocal_bind(net, &opt->inet) || ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0))) { rcu_read_unlock(); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 54e6a708d06e..5f9a7c028274 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3147,7 +3147,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, * primary. */ if (af->is_any(&addr)) - memcpy(&addr.v4, sctp_source(asconf), sizeof(addr)); + memcpy(&addr, sctp_source(asconf), sizeof(addr)); if (security_sctp_bind_connect(asoc->ep->base.sk, SCTP_PARAM_SET_PRIMARY, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 76a388b5021c..b7b90135c36a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1520,11 +1520,9 @@ static void sctp_close(struct sock *sk, long timeout) /* Supposedly, no process has access to the socket, but * the net layers still may. - * Also, sctp_destroy_sock() needs to be called with addr_wq_lock - * held and that should be grabbed before socket lock. */ - spin_lock_bh(&net->sctp.addr_wq_lock); - bh_lock_sock_nested(sk); + local_bh_disable(); + bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. @@ -1533,7 +1531,7 @@ static void sctp_close(struct sock *sk, long timeout) sk_common_release(sk); bh_unlock_sock(sk); - spin_unlock_bh(&net->sctp.addr_wq_lock); + local_bh_enable(); sock_put(sk); @@ -4993,9 +4991,6 @@ static int sctp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); - /* Nothing can fail after this block, otherwise - * sctp_destroy_sock() will be called without addr_wq_lock held - */ if (net->sctp.default_auto_asconf) { spin_lock(&sock_net(sk)->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, @@ -5030,7 +5025,9 @@ static void sctp_destroy_sock(struct sock *sk) if (sp->do_auto_asconf) { sp->do_auto_asconf = 0; + spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); list_del(&sp->auto_asconf_list); + spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); } sctp_endpoint_free(sp->ep); local_bh_disable(); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index d14dab8b6774..f6cb0d4d114c 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -115,9 +115,57 @@ out1: goto out; } +/* Verify that sysctls for non-init netns are safe by either: + * 1) being read-only, or + * 2) having a data pointer which points outside of the global kernel/module + * data segment, and rather into the heap where a per-net object was + * allocated. + */ +static void ensure_safe_net_sysctl(struct net *net, const char *path, + struct ctl_table *table) +{ + struct ctl_table *ent; + + pr_debug("Registering net sysctl (net %p): %s\n", net, path); + for (ent = table; ent->procname; ent++) { + unsigned long addr; + const char *where; + + pr_debug(" procname=%s mode=%o proc_handler=%ps data=%p\n", + ent->procname, ent->mode, ent->proc_handler, ent->data); + + /* If it's not writable inside the netns, then it can't hurt. */ + if ((ent->mode & 0222) == 0) { + pr_debug(" Not writable by anyone\n"); + continue; + } + + /* Where does data point? */ + addr = (unsigned long)ent->data; + if (is_module_address(addr)) + where = "module"; + else if (core_kernel_data(addr)) + where = "kernel"; + else + continue; + + /* If it is writable and points to kernel/module global + * data, then it's probably a netns leak. + */ + WARN(1, "sysctl %s/%s: data points to %s global data: %ps\n", + path, ent->procname, where, ent->data); + + /* Make it "safe" by dropping writable perms */ + ent->mode &= ~0222; + } +} + struct ctl_table_header *register_net_sysctl(struct net *net, const char *path, struct ctl_table *table) { + if (!net_eq(net, &init_net)) + ensure_safe_net_sysctl(net, path, table); + return __register_sysctl_table(&net->sysctls, path, table); } EXPORT_SYMBOL_GPL(register_net_sysctl); diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 76b8428c94a7..e5c43d4d5a75 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1943,12 +1943,13 @@ static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, goto rcv; if (tipc_aead_clone(&tmp, aead) < 0) goto rcv; + WARN_ON(!refcount_inc_not_zero(&tmp->refcnt)); if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) { tipc_aead_free(&tmp->rcu); goto rcv; } tipc_aead_put(aead); - aead = tipc_aead_get((struct tipc_aead __force __rcu *)tmp); + aead = tmp; } if (unlikely(err)) { diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 6db9f9e7c0ac..b4017945d3e5 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -553,7 +553,9 @@ exit: * * On entry, a non-zero 'sk->node' indicates the node where we want lookup to be * performed, which may not be this one. + * * On exit: + * * - If lookup is deferred to another node, leave 'sk->node' unchanged and * return 'true'. * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) which diff --git a/net/tipc/net.c b/net/tipc/net.c index 3f927949bb23..a130195af188 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -89,7 +89,7 @@ * - A spin lock to protect the registry of kernel/driver users (reg.c) * - A global spin_lock (tipc_port_lock), which only task is to ensure * consistency where more than one port is involved in an operation, - * i.e., whe a port is part of a linked list of ports. + * i.e., when a port is part of a linked list of ports. * There are two such lists; 'port_list', which is used for management, * and 'wait_list', which is used to queue ports during congestion. * diff --git a/net/tipc/node.c b/net/tipc/node.c index 707d0dc71fad..8217905348f4 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1739,7 +1739,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, } /* tipc_node_xmit_skb(): send single buffer to destination - * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * Buffers sent via this function are generally TIPC_SYSTEM_IMPORTANCE * messages, which will not be rejected * The only exception is datagram messages rerouted after secondary * lookup, which are rare and safe to dispose of anyway. diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f21162aa0cf7..58935cd0d068 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1262,7 +1262,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(&tmpq, inputq); - kfree_skb(__skb_dequeue(arrvq)); + __skb_dequeue(arrvq); } spin_unlock_bh(&inputq->lock); __skb_queue_purge(&tmpq); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 21e75e28e86a..e556d2cdc064 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -414,8 +414,10 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) err = ip_mc_join_group(sk, &mreqn); #if IS_ENABLED(CONFIG_IPV6) } else { + lock_sock(sk); err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); + release_sock(sk); #endif } return err; diff --git a/net/wireless/core.c b/net/wireless/core.c index a2785379df6e..adfbcb33fb8f 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1092,14 +1092,15 @@ void wiphy_free(struct wiphy *wiphy) } EXPORT_SYMBOL(wiphy_free); -void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) +void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked, + enum rfkill_hard_block_reasons reason) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - if (rfkill_set_hw_state(rdev->rfkill, blocked)) + if (rfkill_set_hw_state_reason(rdev->rfkill, blocked, reason)) schedule_work(&rdev->rfkill_block); } -EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); +EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason); void cfg80211_cqm_config_free(struct wireless_dev *wdev) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 034af85f79d8..7e811a3b0987 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include <linux/if.h> @@ -229,9 +229,13 @@ static int validate_beacon_head(const struct nlattr *attr, unsigned int len = nla_len(attr); const struct element *elem; const struct ieee80211_mgmt *mgmt = (void *)data; - bool s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control); unsigned int fixedlen, hdrlen; + bool s1g_bcn; + if (len < offsetofend(typeof(*mgmt), frame_control)) + goto err; + + s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control); if (s1g_bcn) { fixedlen = offsetof(struct ieee80211_ext, u.s1g_beacon.variable); @@ -309,6 +313,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED] = { .type = NLA_FLAG }, + [NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK] = { .type = NLA_FLAG }, }; static const struct nla_policy @@ -407,9 +412,10 @@ static const struct nla_policy nl80211_fils_discovery_policy[NL80211_FILS_DISCOVERY_ATTR_MAX + 1] = { [NL80211_FILS_DISCOVERY_ATTR_INT_MIN] = NLA_POLICY_MAX(NLA_U32, 10000), [NL80211_FILS_DISCOVERY_ATTR_INT_MAX] = NLA_POLICY_MAX(NLA_U32, 10000), - NLA_POLICY_RANGE(NLA_BINARY, - NL80211_FILS_DISCOVERY_TMPL_MIN_LEN, - IEEE80211_MAX_DATA_LEN), + [NL80211_FILS_DISCOVERY_ATTR_TMPL] = + NLA_POLICY_RANGE(NLA_BINARY, + NL80211_FILS_DISCOVERY_TMPL_MIN_LEN, + IEEE80211_MAX_DATA_LEN), }; static const struct nla_policy @@ -5485,7 +5491,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP], ¶ms); if (err) - return err; + goto out; } nl80211_calculate_ap_params(¶ms); diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index a95c79d18349..6bdd96408022 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2021 Intel Corporation */ #ifndef __PMSR_H #define __PMSR_H @@ -158,6 +158,16 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, return -EINVAL; } + out->ftm.lmr_feedback = + !!tb[NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK]; + if (!out->ftm.trigger_based && !out->ftm.non_trigger_based && + out->ftm.lmr_feedback) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK], + "FTM: LMR feedback set for EDCA based ranging"); + return -EINVAL; + } + return 0; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 68db914df642..0406ce7334fa 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -126,7 +126,7 @@ static int reg_num_devs_support_basehint; * is relevant for all registered devices. */ static bool reg_is_indoor; -static spinlock_t reg_indoor_lock; +static DEFINE_SPINLOCK(reg_indoor_lock); /* Used to track the userspace process controlling the indoor setting */ static u32 reg_is_indoor_portid; @@ -210,11 +210,11 @@ static struct regulatory_request *get_last_request(void) /* Used to queue up regulatory hints */ static LIST_HEAD(reg_requests_list); -static spinlock_t reg_requests_lock; +static DEFINE_SPINLOCK(reg_requests_lock); /* Used to queue up beacon hints for review */ static LIST_HEAD(reg_pending_beacons); -static spinlock_t reg_pending_beacons_lock; +static DEFINE_SPINLOCK(reg_pending_beacons_lock); /* Used to keep track of processed beacon hints */ static LIST_HEAD(reg_beacon_list); @@ -4262,10 +4262,6 @@ int __init regulatory_init(void) if (IS_ERR(reg_pdev)) return PTR_ERR(reg_pdev); - spin_lock_init(®_requests_lock); - spin_lock_init(®_pending_beacons_lock); - spin_lock_init(®_indoor_lock); - rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); user_alpha2[0] = '9'; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 019952d4fc7d..4f06c1825029 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -589,7 +589,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, elem = cfg80211_find_elem(WLAN_EID_REDUCED_NEIGHBOR_REPORT, ies->data, ies->len); - if (!elem || elem->datalen > IEEE80211_MAX_SSID_LEN) + if (!elem) return 0; pos = elem->data; @@ -1751,6 +1751,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, if (rdev->bss_entries >= bss_entries_limit && !cfg80211_bss_expire_oldest(rdev)) { + if (!list_empty(&new->hidden_list)) + list_del(&new->hidden_list); kfree(new); goto drop; } @@ -2352,14 +2354,16 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, return NULL; if (ext) { - struct ieee80211_s1g_bcn_compat_ie *compat; - u8 *ie; + const struct ieee80211_s1g_bcn_compat_ie *compat; + const struct element *elem; - ie = (void *)cfg80211_find_ie(WLAN_EID_S1G_BCN_COMPAT, - variable, ielen); - if (!ie) + elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, + variable, ielen); + if (!elem) + return NULL; + if (elem->datalen < sizeof(*compat)) return NULL; - compat = (void *)(ie + 2); + compat = (void *)elem->data; bssid = ext->u.s1g_beacon.sa; capability = le16_to_cpu(compat->compat_info); beacon_int = le16_to_cpu(compat->beacon_int); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 07756ca5e3b5..08a70b4f090c 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -529,7 +529,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, cfg80211_sme_free(wdev); } - if (WARN_ON(wdev->conn)) + if (wdev->conn) return -EINPROGRESS; wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL); diff --git a/net/wireless/util.c b/net/wireless/util.c index 1bf0200f562a..382c5262d997 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -24,7 +24,7 @@ #include "rdev-ops.h" -struct ieee80211_rate * +const struct ieee80211_rate * ieee80211_get_response_rate(struct ieee80211_supported_band *sband, u32 basic_rates, int bitrate) { diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index d8e8a11ca845..a20aec9d7393 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -216,7 +216,7 @@ static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, case XFRM_MSG_GETSADINFO: case XFRM_MSG_GETSPDINFO: default: - WARN_ONCE(1, "unsupported nlmsg_type %d", nlh_src->nlmsg_type); + pr_warn_once("unsupported nlmsg_type %d\n", nlh_src->nlmsg_type); return ERR_PTR(-EOPNOTSUPP); } @@ -277,7 +277,7 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) return xfrm_nla_cpy(dst, src, nla_len(src)); default: BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID); - WARN_ONCE(1, "unsupported nla_type %d", src->nla_type); + pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } } @@ -315,8 +315,10 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src struct sk_buff *new = NULL; int err; - if (WARN_ON_ONCE(type >= ARRAY_SIZE(xfrm_msg_min))) + if (type >= ARRAY_SIZE(xfrm_msg_min)) { + pr_warn_once("unsupported nlmsg_type %d\n", nlh_src->nlmsg_type); return -EOPNOTSUPP; + } if (skb_shinfo(skb)->frag_list == NULL) { new = alloc_skb(skb->len + skb_tailroom(skb), GFP_ATOMIC); @@ -378,6 +380,10 @@ static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src, struct nlmsghdr *nlmsg = dst; struct nlattr *nla; + /* xfrm_user_rcv_msg_compat() relies on fact that 32-bit messages + * have the same len or shorted than 64-bit ones. + * 32-bit translation that is bigger than 64-bit original is unexpected. + */ if (WARN_ON_ONCE(copy_len > payload)) copy_len = payload; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index edf11893dbe8..6d6917b68856 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -134,8 +134,6 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - xo->flags |= XFRM_XMIT; - if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) { struct sk_buff *segs; diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 495b1f5c979b..8831f5a9e992 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -306,6 +306,8 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } else { + if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) + goto xmit; icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); } @@ -314,6 +316,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) return -EMSGSIZE; } +xmit: xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = tdev; diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 4d422447aadc..2e8afe078d61 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -41,19 +41,16 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) const int plen = skb->len; int dlen = IPCOMP_SCRATCH_SIZE; const u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + u8 *scratch = *this_cpu_ptr(ipcomp_scratches); + struct crypto_comp *tfm = *this_cpu_ptr(ipcd->tfms); int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); int len; if (err) - goto out; + return err; - if (dlen < (plen + sizeof(struct ip_comp_hdr))) { - err = -EINVAL; - goto out; - } + if (dlen < (plen + sizeof(struct ip_comp_hdr))) + return -EINVAL; len = dlen - plen; if (len > skb_tailroom(skb)) @@ -68,16 +65,14 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) skb_frag_t *frag; struct page *page; - err = -EMSGSIZE; if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) - goto out; + return -EMSGSIZE; frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; page = alloc_page(GFP_ATOMIC); - err = -ENOMEM; if (!page) - goto out; + return -ENOMEM; __skb_frag_set_page(frag, page); @@ -96,11 +91,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) skb_shinfo(skb)->nr_frags++; } - err = 0; - -out: - put_cpu(); - return err; + return 0; } int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a7ab19353313..e4cb0ff4dcf4 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -503,22 +503,22 @@ out: return err; } -int xfrm_output_resume(struct sk_buff *skb, int err) +int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) { struct net *net = xs_net(skb_dst(skb)->xfrm); while (likely((err = xfrm_output_one(skb, err)) == 0)) { nf_reset_ct(skb); - err = skb_dst(skb)->ops->local_out(net, skb->sk, skb); + err = skb_dst(skb)->ops->local_out(net, sk, skb); if (unlikely(err != 1)) goto out; if (!skb_dst(skb)->xfrm) - return dst_output(net, skb->sk, skb); + return dst_output(net, sk, skb); err = nf_hook(skb_dst(skb)->ops->family, - NF_INET_POST_ROUTING, net, skb->sk, skb, + NF_INET_POST_ROUTING, net, sk, skb, NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; @@ -534,7 +534,7 @@ EXPORT_SYMBOL_GPL(xfrm_output_resume); static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { - return xfrm_output_resume(skb, 1); + return xfrm_output_resume(sk, skb, 1); } static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -660,6 +660,12 @@ static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + if (x->outer_mode.encap == XFRM_MODE_BEET && + ip_is_fragment(ip_hdr(skb))) { + net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n"); + return -EAFNOSUPPORT; + } + err = xfrm4_tunnel_check_size(skb); if (err) return err; @@ -705,8 +711,15 @@ out: static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) + unsigned int ptr = 0; int err; + if (x->outer_mode.encap == XFRM_MODE_BEET && + ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL) >= 0) { + net_warn_ratelimited("BEET mode doesn't support inner IPv6 fragments\n"); + return -EAFNOSUPPORT; + } + err = xfrm6_tunnel_check_size(skb); if (err) return err; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 156347fd7e2e..ce500f847b99 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3326,39 +3326,6 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) fl4->fl4_icmp_code = icmp[1]; } break; - case IPPROTO_ESP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be32 *ehdr; - - xprth = skb_network_header(skb) + ihl * 4; - ehdr = (__be32 *)xprth; - - fl4->fl4_ipsec_spi = ehdr[0]; - } - break; - case IPPROTO_AH: - if (xprth + 8 < skb->data || - pskb_may_pull(skb, xprth + 8 - skb->data)) { - __be32 *ah_hdr; - - xprth = skb_network_header(skb) + ihl * 4; - ah_hdr = (__be32 *)xprth; - - fl4->fl4_ipsec_spi = ah_hdr[1]; - } - break; - case IPPROTO_COMP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ipcomp_hdr; - - xprth = skb_network_header(skb) + ihl * 4; - ipcomp_hdr = (__be16 *)xprth; - - fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); - } - break; case IPPROTO_GRE: if (xprth + 12 < skb->data || pskb_may_pull(skb, xprth + 12 - skb->data)) { @@ -3377,7 +3344,6 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) } break; default: - fl4->fl4_ipsec_spi = 0; break; } } @@ -3470,12 +3436,7 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) fl6->flowi6_proto = nexthdr; return; #endif - /* XXX Why are there these headers? */ - case IPPROTO_AH: - case IPPROTO_ESP: - case IPPROTO_COMP: default: - fl6->fl6_ipsec_spi = 0; fl6->flowi6_proto = nexthdr; return; } @@ -4173,9 +4134,6 @@ void __init xfrm_init(void) #ifdef CONFIG_XFRM_ESPINTCP espintcp_init(); #endif - - RCU_INIT_POINTER(xfrm_if_cb, NULL); - synchronize_rcu(); } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d01ca1a18418..4496f7efa220 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -44,7 +44,6 @@ static void xfrm_state_gc_task(struct work_struct *work); */ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); static struct kmem_cache *xfrm_state_cache __ro_after_init; static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); @@ -140,7 +139,7 @@ static void xfrm_hash_resize(struct work_struct *work) } spin_lock_bh(&net->xfrm.xfrm_state_lock); - write_seqcount_begin(&xfrm_state_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); @@ -156,7 +155,7 @@ static void xfrm_hash_resize(struct work_struct *work) rcu_assign_pointer(net->xfrm.state_byspi, nspi); net->xfrm.state_hmask = nhashmask; - write_seqcount_end(&xfrm_state_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_state_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); @@ -1063,7 +1062,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, to_put = NULL; - sequence = read_seqcount_begin(&xfrm_state_hash_generation); + sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); rcu_read_lock(); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); @@ -1176,7 +1175,7 @@ out: if (to_put) xfrm_state_put(to_put); - if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) { + if (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)) { *err = -EAGAIN; if (x) { xfrm_state_put(x); @@ -2666,6 +2665,8 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); spin_lock_init(&net->xfrm.xfrm_state_lock); + seqcount_spinlock_init(&net->xfrm.xfrm_state_hash_generation, + &net->xfrm.xfrm_state_lock); return 0; out_byspi: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index df8bc8fc724c..f0aecee4d539 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3480,18 +3480,22 @@ static int __net_init xfrm_user_net_init(struct net *net) return 0; } +static void __net_exit xfrm_user_net_pre_exit(struct net *net) +{ + RCU_INIT_POINTER(net->xfrm.nlsk, NULL); +} + static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) { struct net *net; - list_for_each_entry(net, net_exit_list, exit_list) - RCU_INIT_POINTER(net->xfrm.nlsk, NULL); - synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) netlink_kernel_release(net->xfrm.nlsk_stash); } static struct pernet_operations xfrm_user_net_ops = { .init = xfrm_user_net_init, + .pre_exit = xfrm_user_net_pre_exit, .exit_batch = xfrm_user_net_exit, }; |