From 68ca5d4eebb8c4de246ee5f634eee26bc689562d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 19 Mar 2024 16:38:50 -0700 Subject: bpf: support BPF cookie in raw tracepoint (raw_tp, tp_btf) programs Wire up BPF cookie for raw tracepoint programs (both BTF and non-BTF aware variants). This brings them up to part w.r.t. BPF cookie usage with classic tracepoint and fentry/fexit programs. Acked-by: Stanislav Fomichev Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Message-ID: <20240319233852.1977493-4-andrii@kernel.org> Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3c42b9f1bada..9585f5345353 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1662,8 +1662,10 @@ union bpf_attr { } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ - __u64 name; - __u32 prog_fd; + __u64 name; + __u32 prog_fd; + __u32 :32; + __aligned_u64 cookie; } raw_tracepoint; struct { /* anonymous struct for BPF_BTF_LOAD */ -- cgit v1.2.3 From 1c7b963c20650f1ffd787a088680c9a086ee349f Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 19 Mar 2024 11:26:23 -0700 Subject: wifi: nl80211: rename enum plink_actions kernel-doc flagged the following issue: include/uapi/linux/nl80211.h:6081: warning: expecting prototype for enum nl80211_plink_action. Prototype was for enum plink_actions instead This is because the documentation doesn't match the code. Normally the correct fix for such an issue is to modify the documentation to match the code. However, in this case, since the actual name plink_actions is not referenced by any code, rename it to nl80211_plink_action to give it a proper prefix and match the documentation. Signed-off-by: Jeff Johnson Link: https://msgid.link/20240319-kdoc-nl80211-v1-1-549e09d52866@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f23ecbdd84a2..9397c61a48dc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6077,7 +6077,7 @@ enum nl80211_plink_state { * @NL80211_PLINK_ACTION_BLOCK: block traffic from this mesh peer * @NUM_NL80211_PLINK_ACTIONS: number of possible actions */ -enum plink_actions { +enum nl80211_plink_action { NL80211_PLINK_ACTION_NO_ACTION, NL80211_PLINK_ACTION_OPEN, NL80211_PLINK_ACTION_BLOCK, -- cgit v1.2.3 From ec3a97d93b88602fffd84ce30a461f6eaeb6f067 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 19 Mar 2024 11:26:24 -0700 Subject: wifi: nl80211: fix nl80211 uapi comment style issues Currently kernel-doc raises "warning: bad line:" for several comments that have invalid multi-line comment style; they are missing the leading '*'. And checkpatch.pl raises "WARNING: please, no space before tabs" for a large number of comments which have space then tab after the leading '*'. Fix those issues. Signed-off-by: Jeff Johnson Link: https://msgid.link/20240319-kdoc-nl80211-v1-2-549e09d52866@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 166 +++++++++++++++++++++---------------------- 1 file changed, 83 insertions(+), 83 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9397c61a48dc..4de84fbfd4fa 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -442,11 +442,11 @@ * stations connected and using at least that link as one of its links. * * @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to - * destination %NL80211_ATTR_MAC on the interface identified by - * %NL80211_ATTR_IFINDEX. + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. * @NL80211_CMD_SET_MPATH: Set mesh path attributes for mesh path to - * destination %NL80211_ATTR_MAC on the interface identified by - * %NL80211_ATTR_IFINDEX. + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. * @NL80211_CMD_NEW_MPATH: Create a new mesh path for the destination given by * %NL80211_ATTR_MAC via %NL80211_ATTR_MPATH_NEXT_HOP. * @NL80211_CMD_DEL_MPATH: Delete a mesh path to the destination given by @@ -476,15 +476,15 @@ * after being queried by the kernel. CRDA replies by sending a regulatory * domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our * current alpha2 if it found a match. It also provides - * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each - * regulatory rule is a nested set of attributes given by - * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and - * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by - * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and - * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP. + * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each + * regulatory rule is a nested set of attributes given by + * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and + * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by + * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and + * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP. * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain - * to the specified ISO/IEC 3166-1 alpha2 country code. The core will - * store this as a valid request and then query userspace for it. + * to the specified ISO/IEC 3166-1 alpha2 country code. The core will + * store this as a valid request and then query userspace for it. * * @NL80211_CMD_GET_MESH_CONFIG: Get mesh networking properties for the * interface identified by %NL80211_ATTR_IFINDEX @@ -574,31 +574,31 @@ * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries. * * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain - * has been changed and provides details of the request information - * that caused the change such as who initiated the regulatory request - * (%NL80211_ATTR_REG_INITIATOR), the wiphy_idx - * (%NL80211_ATTR_REG_ALPHA2) on which the request was made from if - * the initiator was %NL80211_REGDOM_SET_BY_COUNTRY_IE or - * %NL80211_REGDOM_SET_BY_DRIVER, the type of regulatory domain - * set (%NL80211_ATTR_REG_TYPE), if the type of regulatory domain is - * %NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on - * to (%NL80211_ATTR_REG_ALPHA2). + * has been changed and provides details of the request information + * that caused the change such as who initiated the regulatory request + * (%NL80211_ATTR_REG_INITIATOR), the wiphy_idx + * (%NL80211_ATTR_REG_ALPHA2) on which the request was made from if + * the initiator was %NL80211_REGDOM_SET_BY_COUNTRY_IE or + * %NL80211_REGDOM_SET_BY_DRIVER, the type of regulatory domain + * set (%NL80211_ATTR_REG_TYPE), if the type of regulatory domain is + * %NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on + * to (%NL80211_ATTR_REG_ALPHA2). * @NL80211_CMD_REG_BEACON_HINT: indicates to userspace that an AP beacon - * has been found while world roaming thus enabling active scan or - * any mode of operation that initiates TX (beacons) on a channel - * where we would not have been able to do either before. As an example - * if you are world roaming (regulatory domain set to world or if your - * driver is using a custom world roaming regulatory domain) and while - * doing a passive scan on the 5 GHz band you find an AP there (if not - * on a DFS channel) you will now be able to actively scan for that AP - * or use AP mode on your card on that same channel. Note that this will - * never be used for channels 1-11 on the 2 GHz band as they are always - * enabled world wide. This beacon hint is only sent if your device had - * either disabled active scanning or beaconing on a channel. We send to - * userspace the wiphy on which we removed a restriction from - * (%NL80211_ATTR_WIPHY) and the channel on which this occurred - * before (%NL80211_ATTR_FREQ_BEFORE) and after (%NL80211_ATTR_FREQ_AFTER) - * the beacon hint was processed. + * has been found while world roaming thus enabling active scan or + * any mode of operation that initiates TX (beacons) on a channel + * where we would not have been able to do either before. As an example + * if you are world roaming (regulatory domain set to world or if your + * driver is using a custom world roaming regulatory domain) and while + * doing a passive scan on the 5 GHz band you find an AP there (if not + * on a DFS channel) you will now be able to actively scan for that AP + * or use AP mode on your card on that same channel. Note that this will + * never be used for channels 1-11 on the 2 GHz band as they are always + * enabled world wide. This beacon hint is only sent if your device had + * either disabled active scanning or beaconing on a channel. We send to + * userspace the wiphy on which we removed a restriction from + * (%NL80211_ATTR_WIPHY) and the channel on which this occurred + * before (%NL80211_ATTR_FREQ_BEFORE) and after (%NL80211_ATTR_FREQ_AFTER) + * the beacon hint was processed. * * @NL80211_CMD_AUTHENTICATE: authentication request and notification. * This command is used both as a command (request to authenticate) and @@ -1715,21 +1715,21 @@ enum nl80211_commands { * (see &enum nl80211_plink_action). * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path. * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path - * info given for %NL80211_CMD_GET_MPATH, nested attribute described at + * info given for %NL80211_CMD_GET_MPATH, nested attribute described at * &enum nl80211_mpath_info. * * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the - * current regulatory domain should be set to or is already set to. - * For example, 'CR', for Costa Rica. This attribute is used by the kernel - * to query the CRDA to retrieve one regulatory domain. This attribute can - * also be used by userspace to query the kernel for the currently set - * regulatory domain. We chose an alpha2 as that is also used by the - * IEEE-802.11 country information element to identify a country. - * Users can also simply ask the wireless core to set regulatory domain - * to a specific alpha2. + * current regulatory domain should be set to or is already set to. + * For example, 'CR', for Costa Rica. This attribute is used by the kernel + * to query the CRDA to retrieve one regulatory domain. This attribute can + * also be used by userspace to query the kernel for the currently set + * regulatory domain. We chose an alpha2 as that is also used by the + * IEEE-802.11 country information element to identify a country. + * Users can also simply ask the wireless core to set regulatory domain + * to a specific alpha2. * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory * rules. * @@ -1772,9 +1772,9 @@ enum nl80211_commands { * @NL80211_ATTR_BSS: scan result BSS * * @NL80211_ATTR_REG_INITIATOR: indicates who requested the regulatory domain - * currently in effect. This could be any of the %NL80211_REGDOM_SET_BY_* + * currently in effect. This could be any of the %NL80211_REGDOM_SET_BY_* * @NL80211_ATTR_REG_TYPE: indicates the type of the regulatory domain currently - * set. This can be one of the nl80211_reg_type (%NL80211_REGDOM_TYPE_*) + * set. This can be one of the nl80211_reg_type (%NL80211_REGDOM_TYPE_*) * * @NL80211_ATTR_SUPPORTED_COMMANDS: wiphy attribute that specifies * an array of command numbers (i.e. a mapping index to command number) @@ -1793,15 +1793,15 @@ enum nl80211_commands { * a u32 * * @NL80211_ATTR_FREQ_BEFORE: A channel which has suffered a regulatory change - * due to considerations from a beacon hint. This attribute reflects - * the state of the channel _before_ the beacon hint processing. This - * attributes consists of a nested attribute containing - * NL80211_FREQUENCY_ATTR_* + * due to considerations from a beacon hint. This attribute reflects + * the state of the channel _before_ the beacon hint processing. This + * attributes consists of a nested attribute containing + * NL80211_FREQUENCY_ATTR_* * @NL80211_ATTR_FREQ_AFTER: A channel which has suffered a regulatory change - * due to considerations from a beacon hint. This attribute reflects - * the state of the channel _after_ the beacon hint processing. This - * attributes consists of a nested attribute containing - * NL80211_FREQUENCY_ATTR_* + * due to considerations from a beacon hint. This attribute reflects + * the state of the channel _after_ the beacon hint processing. This + * attributes consists of a nested attribute containing + * NL80211_FREQUENCY_ATTR_* * * @NL80211_ATTR_CIPHER_SUITES: a set of u32 values indicating the supported * cipher suites @@ -2416,7 +2416,7 @@ enum nl80211_commands { * scheduled scan is started. Or the delay before a WoWLAN * net-detect scan is started, counting from the moment the * system is suspended. This value is a u32, in seconds. - + * * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device * is operating in an indoor environment. * @@ -3823,7 +3823,7 @@ enum nl80211_sta_bss_param { * (u64, to this station) * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute - * containing info as possible, see &enum nl80211_rate_info + * containing info as possible, see &enum nl80211_rate_info * @NL80211_STA_INFO_RX_PACKETS: total received packet (MSDUs and MMPDUs) * (u32, from this station) * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (MSDUs and MMPDUs) @@ -4039,7 +4039,7 @@ enum nl80211_mpath_flags { * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in - * &enum nl80211_mpath_flags; + * &enum nl80211_mpath_flags; * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries * @NL80211_MPATH_INFO_HOP_COUNT: hop count to destination @@ -4201,8 +4201,8 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current * regulatory domain. * @NL80211_FREQUENCY_ATTR_NO_IR: no mechanisms that initiate radiation - * are permitted on this channel, this includes sending probe - * requests, or modes of operation that require beaconing. + * are permitted on this channel, this includes sending probe + * requests, or modes of operation that require beaconing. * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm @@ -4359,14 +4359,14 @@ enum nl80211_bitrate_attr { /** * enum nl80211_initiator - Indicates the initiator of a reg domain request * @NL80211_REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world - * regulatory domain. + * regulatory domain. * @NL80211_REGDOM_SET_BY_USER: User asked the wireless core to set the - * regulatory domain. + * regulatory domain. * @NL80211_REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the - * wireless core it thinks its knows the regulatory domain we should be in. + * wireless core it thinks its knows the regulatory domain we should be in. * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an - * 802.11 country information element with regulatory information it - * thinks we should consider. cfg80211 only processes the country + * 802.11 country information element with regulatory information it + * thinks we should consider. cfg80211 only processes the country * code from the IE, and relies on the regulatory domain information * structure passed by userspace (CRDA) from our wireless-regdb. * If a channel is enabled but the country code indicates it should @@ -4385,11 +4385,11 @@ enum nl80211_reg_initiator { * to a specific country. When this is set you can count on the * ISO / IEC 3166 alpha2 country code being valid. * @NL80211_REGDOM_TYPE_WORLD: the regulatory set domain is the world regulatory - * domain. + * domain. * @NL80211_REGDOM_TYPE_CUSTOM_WORLD: the regulatory domain set is a custom - * driver specific world regulatory domain. These do not apply system-wide - * and are only applicable to the individual devices which have requested - * them to be applied. + * driver specific world regulatory domain. These do not apply system-wide + * and are only applicable to the individual devices which have requested + * them to be applied. * @NL80211_REGDOM_TYPE_INTERSECTION: the regulatory domain set is the product * of an intersection between two regulatory domains -- the previously * set regulatory domain on the system and the last accepted regulatory @@ -4406,21 +4406,21 @@ enum nl80211_reg_type { * enum nl80211_reg_rule_attr - regulatory rule attributes * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional - * considerations for a given frequency range. These are the - * &enum nl80211_reg_rule_flags. + * considerations for a given frequency range. These are the + * &enum nl80211_reg_rule_flags. * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory - * rule in KHz. This is not a center of frequency but an actual regulatory - * band edge. + * rule in KHz. This is not a center of frequency but an actual regulatory + * band edge. * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule - * in KHz. This is not a center a frequency but an actual regulatory - * band edge. + * in KHz. This is not a center a frequency but an actual regulatory + * band edge. * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this * frequency range, in KHz. * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain - * for a given frequency range. The value is in mBi (100 * dBi). - * If you don't have one then don't send this. + * for a given frequency range. The value is in mBi (100 * dBi). + * If you don't have one then don't send this. * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for - * a given frequency range. The value is in mBm (100 * dBm). + * a given frequency range. The value is in mBm (100 * dBm). * @NL80211_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds. * If not present or 0 default CAC time will be used. * @NL80211_ATTR_POWER_RULE_PSD: power spectral density (in dBm). @@ -4507,8 +4507,8 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links * @NL80211_RRF_NO_IR: no mechanisms that initiate radiation are allowed, - * this includes probe requests or modes of operation that require - * beaconing. + * this includes probe requests or modes of operation that require + * beaconing. * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated * base on contiguous rules and wider channels will be allowed to cross * multiple contiguous/overlapping frequency ranges. @@ -4522,9 +4522,9 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_EHT: EHT operation not allowed * @NL80211_RRF_PSD: Ruleset has power spectral density value * @NL80211_RRF_DFS_CONCURRENT: Operation on this channel is allowed for - peer-to-peer or adhoc communication under the control of a DFS master - which operates on the same channel (FCC-594280 D01 Section B.3). - Should be used together with %NL80211_RRF_DFS only. + * peer-to-peer or adhoc communication under the control of a DFS master + * which operates on the same channel (FCC-594280 D01 Section B.3). + * Should be used together with %NL80211_RRF_DFS only. * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed */ -- cgit v1.2.3 From b8cf4f4d700acaa7b3b40eb877a226b9a95164df Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 19 Mar 2024 11:26:25 -0700 Subject: wifi: nl80211: cleanup nl80211.h kernel-doc Fix all of the kernel-doc issues in nl80211.h. Signed-off-by: Jeff Johnson Link: https://msgid.link/20240319-kdoc-nl80211-v1-3-549e09d52866@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 68 ++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 28 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4de84fbfd4fa..f917bc6c9b6f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -413,8 +413,8 @@ * are like for %NL80211_CMD_SET_BEACON, and additionally parameters that * do not change are used, these include %NL80211_ATTR_BEACON_INTERVAL, * %NL80211_ATTR_DTIM_PERIOD, %NL80211_ATTR_SSID, - * %NL80211_ATTR_HIDDEN_SSID, %NL80211_ATTR_CIPHERS_PAIRWISE, - * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS, + * %NL80211_ATTR_HIDDEN_SSID, %NL80211_ATTR_CIPHER_SUITES_PAIRWISE, + * %NL80211_ATTR_CIPHER_SUITE_GROUP, %NL80211_ATTR_WPA_VERSIONS, * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, * %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT, * %NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS. @@ -451,11 +451,6 @@ * %NL80211_ATTR_MAC via %NL80211_ATTR_MPATH_NEXT_HOP. * @NL80211_CMD_DEL_MPATH: Delete a mesh path to the destination given by * %NL80211_ATTR_MAC. - * @NL80211_CMD_NEW_PATH: Add a mesh path with given attributes to the - * interface identified by %NL80211_ATTR_IFINDEX. - * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC - * or, if no MAC address given, all mesh paths, on the interface identified - * by %NL80211_ATTR_IFINDEX. * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by * %NL80211_ATTR_IFINDEX. * @@ -1120,7 +1115,7 @@ * current configuration is not changed. If it is present but * set to zero, the configuration is changed to don't-care * (i.e. the device can decide what to do). - * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported. + * @NL80211_CMD_NAN_MATCH: Notification sent when a match is reported. * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and * %NL80211_ATTR_COOKIE. * @@ -1862,12 +1857,6 @@ enum nl80211_commands { * that protected APs should be used. This is also used with NEW_BEACON to * indicate that the BSS is to use protection. * - * @NL80211_ATTR_CIPHERS_PAIRWISE: Used with CONNECT, ASSOCIATE, and NEW_BEACON - * to indicate which unicast key ciphers will be used with the connection - * (an array of u32). - * @NL80211_ATTR_CIPHER_GROUP: Used with CONNECT, ASSOCIATE, and NEW_BEACON to - * indicate which group key cipher will be used with the connection (a - * u32). * @NL80211_ATTR_WPA_VERSIONS: Used with CONNECT, ASSOCIATE, and NEW_BEACON to * indicate which WPA version(s) the AP we want to associate with is using * (a u32 with flags from &enum nl80211_wpa_versions). @@ -1898,6 +1887,7 @@ enum nl80211_commands { * with %NL80211_KEY_* sub-attributes * * @NL80211_ATTR_PID: Process ID of a network namespace. + * @NL80211_ATTR_NETNS_FD: File descriptor of a network namespace. * * @NL80211_ATTR_GENERATION: Used to indicate consistent snapshots for * dumps. This number increases whenever the object list being @@ -1952,6 +1942,7 @@ enum nl80211_commands { * * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was * acknowledged by the recipient. + * @NL80211_ATTR_ACK_SIGNAL: Station's ack signal strength (s32) * * @NL80211_ATTR_PS_STATE: powersave state, using &enum nl80211_ps_state values. * @@ -2149,6 +2140,9 @@ enum nl80211_commands { * @NL80211_ATTR_DISABLE_HE: Force HE capable interfaces to disable * this feature during association. This is a flag attribute. * Currently only supported in mac80211 drivers. + * @NL80211_ATTR_DISABLE_EHT: Force EHT capable interfaces to disable + * this feature during association. This is a flag attribute. + * Currently only supported in mac80211 drivers. * @NL80211_ATTR_HT_CAPABILITY_MASK: Specify which bits of the * ATTR_HT_CAPABILITY to which attention should be paid. * Currently, only mac80211 NICs support this feature. @@ -2158,6 +2152,12 @@ enum nl80211_commands { * All values are treated as suggestions and may be ignored * by the driver as required. The actual values may be seen in * the station debugfs ht_caps file. + * @NL80211_ATTR_VHT_CAPABILITY_MASK: Specify which bits of the + * ATTR_VHT_CAPABILITY to which attention should be paid. + * Currently, only mac80211 NICs support this feature. + * All values are treated as suggestions and may be ignored + * by the driver as required. The actual values may be seen in + * the station debugfs vht_caps file. * * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country * abides to when initiating radiation on DFS channels. A country maps @@ -3565,7 +3565,7 @@ enum nl80211_sta_flags { * enum nl80211_sta_p2p_ps_status - station support of P2P PS * * @NL80211_P2P_PS_UNSUPPORTED: station doesn't support P2P PS mechanism - * @@NL80211_P2P_PS_SUPPORTED: station supports P2P PS mechanism + * @NL80211_P2P_PS_SUPPORTED: station supports P2P PS mechanism * @NUM_NL80211_P2P_PS_STATUS: number of values */ enum nl80211_sta_p2p_ps_status { @@ -3603,9 +3603,9 @@ enum nl80211_he_gi { /** * enum nl80211_he_ltf - HE long training field - * @NL80211_RATE_INFO_HE_1xLTF: 3.2 usec - * @NL80211_RATE_INFO_HE_2xLTF: 6.4 usec - * @NL80211_RATE_INFO_HE_4xLTF: 12.8 usec + * @NL80211_RATE_INFO_HE_1XLTF: 3.2 usec + * @NL80211_RATE_INFO_HE_2XLTF: 6.4 usec + * @NL80211_RATE_INFO_HE_4XLTF: 12.8 usec */ enum nl80211_he_ltf { NL80211_RATE_INFO_HE_1XLTF, @@ -3720,7 +3720,7 @@ enum nl80211_eht_ru_alloc { * @NL80211_RATE_INFO_HE_GI: HE guard interval identifier * (u8, see &enum nl80211_he_gi) * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1) - * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then + * @NL80211_RATE_INFO_HE_RU_ALLOC: HE RU allocation, if not present then * non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc) * @NL80211_RATE_INFO_320_MHZ_WIDTH: 320 MHz bitrate * @NL80211_RATE_INFO_EHT_MCS: EHT MCS index (u8, 0-15) @@ -3852,8 +3852,8 @@ enum nl80211_sta_bss_param { * Contains a nested array of signal strength attributes (u8, dBm) * @NL80211_STA_INFO_CHAIN_SIGNAL_AVG: per-chain signal strength average * Same format as NL80211_STA_INFO_CHAIN_SIGNAL. - * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the - * 802.11 header (u32, kbps) + * @NL80211_STA_INFO_EXPECTED_THROUGHPUT: expected throughput considering also + * the 802.11 header (u32, kbps) * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons * (u64) * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64) @@ -4179,7 +4179,7 @@ enum nl80211_band_attr { * @NL80211_WMMR_CW_MAX: Maximum contention window slot. * @NL80211_WMMR_AIFSN: Arbitration Inter Frame Space. * @NL80211_WMMR_TXOP: Maximum allowed tx operation time. - * @nl80211_WMMR_MAX: highest possible wmm rule. + * @NL80211_WMMR_MAX: highest possible wmm rule. * @__NL80211_WMMR_LAST: Internal use. */ enum nl80211_wmm_rule { @@ -4203,6 +4203,7 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_NO_IR: no mechanisms that initiate radiation * are permitted on this channel, this includes sending probe * requests, or modes of operation that require beaconing. + * @__NL80211_FREQUENCY_ATTR_NO_IBSS: obsolete, same as _NO_IR * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm @@ -4357,7 +4358,7 @@ enum nl80211_bitrate_attr { }; /** - * enum nl80211_initiator - Indicates the initiator of a reg domain request + * enum nl80211_reg_initiator - Indicates the initiator of a reg domain request * @NL80211_REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world * regulatory domain. * @NL80211_REGDOM_SET_BY_USER: User asked the wireless core to set the @@ -4509,6 +4510,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_IR: no mechanisms that initiate radiation are allowed, * this includes probe requests or modes of operation that require * beaconing. + * @__NL80211_RRF_NO_IBSS: obsolete, same as NO_IR * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated * base on contiguous rules and wider channels will be allowed to cross * multiple contiguous/overlapping frequency ranges. @@ -4707,8 +4709,8 @@ enum nl80211_mntr_flags { * alternate between Active and Doze states, but may not wake up * for neighbor's beacons. * - * @__NL80211_MESH_POWER_AFTER_LAST - internal use - * @NL80211_MESH_POWER_MAX - highest possible power save level + * @__NL80211_MESH_POWER_AFTER_LAST: internal use + * @NL80211_MESH_POWER_MAX: highest possible power save level */ enum nl80211_mesh_power_mode { @@ -5728,7 +5730,7 @@ struct nl80211_pattern_support { * "TCP connection wakeup" for more details. This is a nested attribute * containing the exact information for establishing and keeping alive * the TCP connection. - * @NL80211_WOWLAN_TRIG_TCP_WAKEUP_MATCH: For wakeup reporting only, the + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH: For wakeup reporting only, the * wakeup packet was received on the TCP connection * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST: For wakeup reporting only, the * TCP connection was lost or failed to be established @@ -6404,6 +6406,7 @@ enum nl80211_feature_flags { * receiving control port frames over nl80211 instead of the netdevice. * @NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT: This driver/device supports * (average) ACK signal strength reporting. + * @NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT: Backward-compatible ID * @NL80211_EXT_FEATURE_TXQS: Driver supports FQ-CoDel-enabled intermediate * TXQs. * @NL80211_EXT_FEATURE_SCAN_RANDOM_SN: Driver/device supports randomizing the @@ -6787,6 +6790,8 @@ enum nl80211_acl_policy { * @NL80211_SMPS_STATIC: static SMPS (use a single antenna) * @NL80211_SMPS_DYNAMIC: dynamic smps (start with a single antenna and * turn on other antennas after CTS/RTS). + * @__NL80211_SMPS_AFTER_LAST: internal + * @NL80211_SMPS_MAX: highest used enumeration */ enum nl80211_smps_mode { NL80211_SMPS_OFF, @@ -7008,6 +7013,8 @@ enum nl80211_bss_select_attr { * @NL80211_NAN_FUNC_PUBLISH: function is publish * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up + * @__NL80211_NAN_FUNC_TYPE_AFTER_LAST: internal use + * @NL80211_NAN_FUNC_MAX_TYPE: internal use */ enum nl80211_nan_function_type { NL80211_NAN_FUNC_PUBLISH, @@ -7168,7 +7175,7 @@ enum nl80211_nan_match_attributes { }; /** - * nl80211_external_auth_action - Action to perform with external + * enum nl80211_external_auth_action - Action to perform with external * authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION. * @NL80211_EXTERNAL_AUTH_START: Start the authentication. * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication. @@ -7186,7 +7193,7 @@ enum nl80211_external_auth_action { * @NL80211_FTM_RESP_ATTR_LCI: The content of Measurement Report Element * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10), * i.e. starting with the measurement token - * @NL80211_FTM_RESP_ATTR_CIVIC: The content of Measurement Report Element + * @NL80211_FTM_RESP_ATTR_CIVICLOC: The content of Measurement Report Element * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13), * i.e. starting with the measurement token * @__NL80211_FTM_RESP_ATTR_LAST: Internal @@ -7829,6 +7836,7 @@ enum nl80211_sae_pwe_mechanism { * * @NL80211_SAR_TYPE_POWER: power limitation specified in 0.25dBm unit * + * @NUM_NL80211_SAR_TYPE: internal */ enum nl80211_sar_type { NL80211_SAR_TYPE_POWER, @@ -7842,6 +7850,8 @@ enum nl80211_sar_type { /** * enum nl80211_sar_attrs - Attributes for SAR spec * + * @__NL80211_SAR_ATTR_INVALID: Invalid + * * @NL80211_SAR_ATTR_TYPE: the SAR type as defined in &enum nl80211_sar_type. * * @NL80211_SAR_ATTR_SPECS: Nested array of SAR power @@ -7873,6 +7883,8 @@ enum nl80211_sar_attrs { /** * enum nl80211_sar_specs_attrs - Attributes for SAR power limit specs * + * @__NL80211_SAR_ATTR_SPECS_INVALID: Invalid + * * @NL80211_SAR_ATTR_SPECS_POWER: Required (s32)value to specify the actual * power limit value in units of 0.25 dBm if type is * NL80211_SAR_TYPE_POWER. (i.e., a value of 44 represents 11 dBm). -- cgit v1.2.3 From 5311591fbb349fe9f5c555dcba3b13a5831aa72d Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Tue, 26 Mar 2024 10:17:40 +0000 Subject: bpf: Add support for passing mark with bpf_fib_lookup Extend the bpf_fib_lookup() helper by making it to utilize mark if the BPF_FIB_LOOKUP_MARK flag is set. In order to pass the mark the four bytes of struct bpf_fib_lookup are used, shared with the output-only smac/dmac fields. Signed-off-by: Anton Protopopov Signed-off-by: Daniel Borkmann Reviewed-by: David Ahern Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240326101742.17421-2-aspsk@isovalent.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 20 ++++++++++++++++++-- net/core/filter.c | 12 +++++++++--- tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++-- 3 files changed, 45 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9585f5345353..96d57e483133 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3394,6 +3394,10 @@ union bpf_attr { * for the nexthop. If the src addr cannot be derived, * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this * case, *params*->dmac and *params*->smac are not set either. + * **BPF_FIB_LOOKUP_MARK** + * Use the mark present in *params*->mark for the fib lookup. + * This option should not be used with BPF_FIB_LOOKUP_DIRECT, + * as it only has meaning for full lookups. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -7120,6 +7124,7 @@ enum { BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), BPF_FIB_LOOKUP_SRC = (1U << 4), + BPF_FIB_LOOKUP_MARK = (1U << 5), }; enum { @@ -7197,8 +7202,19 @@ struct bpf_fib_lookup { __u32 tbid; }; - __u8 smac[6]; /* ETH_ALEN */ - __u8 dmac[6]; /* ETH_ALEN */ + union { + /* input */ + struct { + __u32 mark; /* policy routing */ + /* 2 4-byte holes for input */ + }; + + /* output: source and dest mac */ + struct { + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ + }; + }; }; struct bpf_redir_neigh { diff --git a/net/core/filter.c b/net/core/filter.c index 0c66e4a3fc5b..1205dd777dc2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5884,7 +5884,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); } else { - fl4.flowi4_mark = 0; + if (flags & BPF_FIB_LOOKUP_MARK) + fl4.flowi4_mark = params->mark; + else + fl4.flowi4_mark = 0; fl4.flowi4_secid = 0; fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_uid = sock_net_uid(net, NULL); @@ -6027,7 +6030,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res, strict); } else { - fl6.flowi6_mark = 0; + if (flags & BPF_FIB_LOOKUP_MARK) + fl6.flowi6_mark = params->mark; + else + fl6.flowi6_mark = 0; fl6.flowi6_secid = 0; fl6.flowi6_tun_key.tun_id = 0; fl6.flowi6_uid = sock_net_uid(net, NULL); @@ -6105,7 +6111,7 @@ set_fwd_params: #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ - BPF_FIB_LOOKUP_SRC) + BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9585f5345353..96d57e483133 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3394,6 +3394,10 @@ union bpf_attr { * for the nexthop. If the src addr cannot be derived, * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this * case, *params*->dmac and *params*->smac are not set either. + * **BPF_FIB_LOOKUP_MARK** + * Use the mark present in *params*->mark for the fib lookup. + * This option should not be used with BPF_FIB_LOOKUP_DIRECT, + * as it only has meaning for full lookups. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -7120,6 +7124,7 @@ enum { BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), BPF_FIB_LOOKUP_SRC = (1U << 4), + BPF_FIB_LOOKUP_MARK = (1U << 5), }; enum { @@ -7197,8 +7202,19 @@ struct bpf_fib_lookup { __u32 tbid; }; - __u8 smac[6]; /* ETH_ALEN */ - __u8 dmac[6]; /* ETH_ALEN */ + union { + /* input */ + struct { + __u32 mark; /* policy routing */ + /* 2 4-byte holes for input */ + }; + + /* output: source and dest mac */ + struct { + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ + }; + }; }; struct bpf_redir_neigh { -- cgit v1.2.3 From 5832c4a77d6931cebf9ba737129ae8f14b66ee1d Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 16:23:53 +0100 Subject: ip_tunnel: convert __be16 tunnel flags to bitmaps Historically, tunnel flags like TUNNEL_CSUM or TUNNEL_ERSPAN_OPT have been defined as __be16. Now all of those 16 bits are occupied and there's no more free space for new flags. It can't be simply switched to a bigger container with no adjustments to the values, since it's an explicit Endian storage, and on LE systems (__be16)0x0001 equals to (__be64)0x0001000000000000. We could probably define new 64-bit flags depending on the Endianness, i.e. (__be64)0x0001 on BE and (__be64)0x00010000... on LE, but that would introduce an Endianness dependency and spawn a ton of Sparse warnings. To mitigate them, all of those places which were adjusted with this change would be touched anyway, so why not define stuff properly if there's no choice. Define IP_TUNNEL_*_BIT counterparts as a bit number instead of the value already coded and a fistful of <16 <-> bitmap> converters and helpers. The two flags which have a different bit position are SIT_ISATAP_BIT and VTI_ISVTI_BIT, as they were defined not as __cpu_to_be16(), but as (__force __be16), i.e. had different positions on LE and BE. Now they both have strongly defined places. Change all __be16 fields which were used to store those flags, to IP_TUNNEL_DECLARE_FLAGS() -> DECLARE_BITMAP(__IP_TUNNEL_FLAG_NUM) -> unsigned long[1] for now, and replace all TUNNEL_* occurrences to their bitmap counterparts. Use the converters in the places which talk to the userspace, hardware (NFP) or other hosts (GRE header). The rest must explicitly use the new flags only. This must be done at once, otherwise there will be too many conversions throughout the code in the intermediate commits. Finally, disable the old __be16 flags for use in the kernel code (except for the two 'irregular' flags mentioned above), to prevent any accidental (mis)use of them. For the userspace, nothing is changed, only additions were made. Most noticeable bloat-o-meter difference (.text): vmlinux: 307/-1 (306) gre.ko: 62/0 (62) ip_gre.ko: 941/-217 (724) [*] ip_tunnel.ko: 390/-900 (-510) [**] ip_vti.ko: 138/0 (138) ip6_gre.ko: 534/-18 (516) [*] ip6_tunnel.ko: 118/-10 (108) [*] gre_flags_to_tnl_flags() grew, but still is inlined [**] ip_tunnel_find() got uninlined, hence such decrease The average code size increase in non-extreme case is 100-200 bytes per module, mostly due to sizeof(long) > sizeof(__be16), as %__IP_TUNNEL_FLAG_NUM is less than %BITS_PER_LONG and the compilers are able to expand the majority of bitmap_*() calls here into direct operations on scalars. Reviewed-by: Simon Horman Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 19 ++- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 2 +- .../ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 6 +- .../ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c | 12 +- .../ethernet/mellanox/mlx5/core/en/tc_tun_gre.c | 8 +- .../ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 16 ++- .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c | 26 +++-- .../net/ethernet/mellanox/mlxsw/spectrum_span.c | 6 +- drivers/net/ethernet/netronome/nfp/flower/action.c | 27 ++++- drivers/net/geneve.c | 44 ++++--- drivers/net/vxlan/vxlan_core.c | 14 ++- include/net/dst_metadata.h | 10 +- include/net/flow_dissector.h | 2 +- include/net/gre.h | 70 ++++++------ include/net/ip6_tunnel.h | 4 +- include/net/ip_tunnels.h | 119 ++++++++++++++++--- include/net/udp_tunnel.h | 4 +- include/uapi/linux/if_tunnel.h | 33 ++++++ net/bridge/br_vlan_tunnel.c | 9 +- net/core/filter.c | 26 +++-- net/core/flow_dissector.c | 20 +++- net/ipv4/fou_bpf.c | 2 +- net/ipv4/gre_demux.c | 2 +- net/ipv4/ip_gre.c | 127 +++++++++++++-------- net/ipv4/ip_tunnel.c | 54 +++++---- net/ipv4/ip_tunnel_core.c | 80 ++++++++----- net/ipv4/ip_vti.c | 29 +++-- net/ipv4/ipip.c | 21 +++- net/ipv4/udp_tunnel_core.c | 5 +- net/ipv6/ip6_gre.c | 85 ++++++++------ net/ipv6/ip6_tunnel.c | 14 +-- net/ipv6/sit.c | 5 +- net/netfilter/ipvs/ip_vs_core.c | 6 +- net/netfilter/ipvs/ip_vs_xmit.c | 20 ++-- net/netfilter/nft_tunnel.c | 44 ++++--- net/openvswitch/flow_netlink.c | 61 ++++++---- net/psample/psample.c | 26 +++-- net/sched/act_tunnel_key.c | 36 +++--- net/sched/cls_flower.c | 27 ++--- 40 files changed, 715 insertions(+), 415 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 339db6e4a1d5..d5c56ca91b77 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -61,6 +61,7 @@ struct bareudp_dev { static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct metadata_dst *tun_dst = NULL; + IP_TUNNEL_DECLARE_FLAGS(key) = { }; struct bareudp_dev *bareudp; unsigned short family; unsigned int len; @@ -137,7 +138,10 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) bareudp->dev->stats.rx_dropped++; goto drop; } - tun_dst = udp_tun_rx_dst(skb, family, TUNNEL_KEY, 0, 0); + + __set_bit(IP_TUNNEL_KEY_BIT, key); + + tun_dst = udp_tun_rx_dst(skb, family, key, 0, 0); if (!tun_dst) { bareudp->dev->stats.rx_dropped++; goto drop; @@ -285,10 +289,10 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct bareudp_dev *bareudp, const struct ip_tunnel_info *info) { + bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct socket *sock = rcu_dereference(bareudp->sock); - bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); const struct ip_tunnel_key *key = &info->key; struct rtable *rt; __be16 sport, df; @@ -316,7 +320,8 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; - df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? + htons(IP_DF) : 0; skb_scrub_packet(skb, xnet); err = -ENOSPC; @@ -338,7 +343,8 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, udp_tunnel_xmit_skb(rt, sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, bareudp->port, !net_eq(bareudp->net, dev_net(bareudp->dev)), - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; free_dst: @@ -350,10 +356,10 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct bareudp_dev *bareudp, const struct ip_tunnel_info *info) { + bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct socket *sock = rcu_dereference(bareudp->sock); - bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); const struct ip_tunnel_key *key = &info->key; struct dst_entry *dst = NULL; struct in6_addr saddr, daddr; @@ -402,7 +408,8 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, udp_tunnel6_xmit_skb(dst, sock->sk, skb, dev, &saddr, &daddr, prio, ttl, info->key.label, sport, bareudp->port, - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; free_dst: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 92065568bb19..6873c1201803 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -117,7 +117,7 @@ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b, - __be16 tun_flags); + u32 tun_type); #endif /* CONFIG_MLX5_ESWITCH */ #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index f1d1e1542e81..878cbdbf5ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -587,7 +587,7 @@ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b, - __be16 tun_flags) + u32 tun_type) { struct ip_tunnel_info *a_info; struct ip_tunnel_info *b_info; @@ -596,8 +596,8 @@ bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) return false; - a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags); - b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags); + a_has_opts = test_bit(tun_type, a->ip_tun_key->tun_flags); + b_has_opts = test_bit(tun_type, b->ip_tun_key->tun_flags); /* keys are equal when both don't have any options attached */ if (!a_has_opts && !b_has_opts) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index 2bcd10b6d653..bf969212cc77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -106,12 +106,13 @@ static int mlx5e_gen_ip_tunnel_header_geneve(char buf[], memset(geneveh, 0, sizeof(*geneveh)); geneveh->ver = MLX5E_GENEVE_VER; geneveh->opt_len = tun_info->options_len / 4; - geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM); - geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT); + geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, tun_info->key.tun_flags); + geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, + tun_info->key.tun_flags); mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni); geneveh->proto_type = htons(ETH_P_TEB); - if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_info->key.tun_flags)) { if (!geneveh->opt_len) return -EOPNOTSUPP; ip_tunnel_info_opts_get(geneveh->options, tun_info); @@ -188,7 +189,7 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, /* make sure that we're talking about GENEVE options */ - if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) { + if (enc_opts.key->dst_opt_type != IP_TUNNEL_GENEVE_OPT_BIT) { NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options: option type is not GENEVE"); netdev_warn(priv->netdev, @@ -337,7 +338,8 @@ static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b) { - return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_GENEVE_OPT); + return mlx5e_tc_tun_encap_info_equal_options(a, b, + IP_TUNNEL_GENEVE_OPT_BIT); } struct mlx5e_tc_tunnel geneve_tunnel = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c index ada14f0574dc..579eda89fc76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -31,12 +31,16 @@ static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + IP_TUNNEL_DECLARE_FLAGS(unsupp) = { }; int hdr_len; *ip_proto = IPPROTO_GRE; /* the HW does not calculate GRE csum or sequences */ - if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + __set_bit(IP_TUNNEL_CSUM_BIT, unsupp); + __set_bit(IP_TUNNEL_SEQ_BIT, unsupp); + + if (ip_tunnel_flags_intersect(tun_key->tun_flags, unsupp)) return -EOPNOTSUPP; greh->protocol = htons(ETH_P_TEB); @@ -44,7 +48,7 @@ static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], /* GRE key */ hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e); greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); - if (tun_key->tun_flags & TUNNEL_KEY) { + if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags)) { __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); *ptr = tun_id; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index a184d739d5f8..e4e487c8431b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -90,7 +90,7 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], const struct vxlan_metadata *md; struct vxlanhdr *vxh; - if ((tun_key->tun_flags & TUNNEL_VXLAN_OPT) && + if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_key->tun_flags) && e->tun_info->options_len != sizeof(*md)) return -EOPNOTSUPP; vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); @@ -99,7 +99,7 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], udp->dest = tun_key->tp_dst; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(tun_id); - if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) { + if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_key->tun_flags)) { md = ip_tunnel_info_opts(e->tun_info); vxlan_build_gbp_hdr(vxh, md); } @@ -125,7 +125,7 @@ static int mlx5e_tc_tun_parse_vxlan_gbp_option(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (enc_opts.key->dst_opt_type != TUNNEL_VXLAN_OPT) { + if (enc_opts.key->dst_opt_type != IP_TUNNEL_VXLAN_OPT_BIT) { NL_SET_ERR_MSG_MOD(extack, "Wrong VxLAN option type: not GBP"); return -EOPNOTSUPP; } @@ -208,7 +208,8 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, static bool mlx5e_tc_tun_encap_info_equal_vxlan(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b) { - return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_VXLAN_OPT); + return mlx5e_tc_tun_encap_info_equal_options(a, b, + IP_TUNNEL_VXLAN_OPT_BIT); } static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 31ed26cac9bf..f2f19fa37d91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5464,6 +5464,7 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct tunnel_match_enc_opts enc_opts = {}; struct mlx5_rep_uplink_priv *uplink_priv; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct mlx5e_rep_priv *uplink_rpriv; struct metadata_dst *tun_dst; struct tunnel_match_key key; @@ -5471,6 +5472,8 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb struct net_device *dev; int err; + __set_bit(IP_TUNNEL_KEY_BIT, flags); + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; tun_id = tunnel_id >> ENC_OPTS_BITS; @@ -5503,14 +5506,14 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb case FLOW_DISSECTOR_KEY_IPV4_ADDRS: tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, key.enc_ip.tos, key.enc_ip.ttl, - key.enc_tp.dst, TUNNEL_KEY, + key.enc_tp.dst, flags, key32_to_tunnel_id(key.enc_key_id.keyid), enc_opts.key.len); break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, key.enc_ip.tos, key.enc_ip.ttl, - key.enc_tp.dst, 0, TUNNEL_KEY, + key.enc_tp.dst, 0, flags, key32_to_tunnel_id(key.enc_key_id.keyid), enc_opts.key.len); break; @@ -5528,11 +5531,16 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; - if (enc_opts.key.len) + if (enc_opts.key.len) { + ip_tunnel_flags_zero(flags); + if (enc_opts.key.dst_opt_type) + __set_bit(enc_opts.key.dst_opt_type, flags); + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.key.data, enc_opts.key.len, - enc_opts.key.dst_opt_type); + flags); + } skb_dst_set(skb, (struct dst_entry *)tun_dst); dev = dev_get_by_index(&init_net, key.filter_ifindex); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index d67df358a52f..d761a1235994 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -27,23 +27,23 @@ mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev) static bool mlxsw_sp_ipip_parms4_has_ikey(const struct ip_tunnel_parm_kern *parms) { - return !!(parms->i_flags & TUNNEL_KEY); + return test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags); } static bool mlxsw_sp_ipip_parms6_has_ikey(const struct __ip6_tnl_parm *parms) { - return !!(parms->i_flags & TUNNEL_KEY); + return test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags); } static bool mlxsw_sp_ipip_parms4_has_okey(const struct ip_tunnel_parm_kern *parms) { - return !!(parms->o_flags & TUNNEL_KEY); + return test_bit(IP_TUNNEL_KEY_BIT, parms->o_flags); } static bool mlxsw_sp_ipip_parms6_has_okey(const struct __ip6_tnl_parm *parms) { - return !!(parms->o_flags & TUNNEL_KEY); + return test_bit(IP_TUNNEL_KEY_BIT, parms->o_flags); } static u32 mlxsw_sp_ipip_parms4_ikey(const struct ip_tunnel_parm_kern *parms) @@ -242,12 +242,15 @@ static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp, const struct net_device *ol_dev) { struct ip_tunnel *tunnel = netdev_priv(ol_dev); - __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ bool inherit_ttl = tunnel->parms.iph.ttl == 0; bool inherit_tos = tunnel->parms.iph.tos & 0x1; + IP_TUNNEL_DECLARE_FLAGS(okflags) = { }; - return (tunnel->parms.i_flags & ~okflags) == 0 && - (tunnel->parms.o_flags & ~okflags) == 0 && + /* We can't offload any other features. */ + __set_bit(IP_TUNNEL_KEY_BIT, okflags); + + return ip_tunnel_flags_subset(tunnel->parms.i_flags, okflags) && + ip_tunnel_flags_subset(tunnel->parms.o_flags, okflags) && inherit_ttl && inherit_tos && mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev); } @@ -443,10 +446,13 @@ static bool mlxsw_sp_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(ol_dev); bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS; bool inherit_ttl = tparm.hop_limit == 0; - __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ + IP_TUNNEL_DECLARE_FLAGS(okflags) = { }; + + /* We can't offload any other features. */ + __set_bit(IP_TUNNEL_KEY_BIT, okflags); - return (tparm.i_flags & ~okflags) == 0 && - (tparm.o_flags & ~okflags) == 0 && + return ip_tunnel_flags_subset(tparm.i_flags, okflags) && + ip_tunnel_flags_subset(tparm.o_flags, okflags) && inherit_ttl && inherit_tos && mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV6, ol_dev); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index f0cbc6b9b041..3de69b2eb5c4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -461,7 +461,8 @@ mlxsw_sp_span_entry_gretap4_parms(struct mlxsw_sp *mlxsw_sp, if (!(to_dev->flags & IFF_UP) || /* Reject tunnels with GRE keys, checksums, etc. */ - tparm.i_flags || tparm.o_flags || + !ip_tunnel_flags_empty(tparm.i_flags) || + !ip_tunnel_flags_empty(tparm.o_flags) || /* Require a fixed TTL and a TOS copied from the mirrored packet. */ inherit_ttl || !inherit_tos || /* A destination address may not be "any". */ @@ -565,7 +566,8 @@ mlxsw_sp_span_entry_gretap6_parms(struct mlxsw_sp *mlxsw_sp, if (!(to_dev->flags & IFF_UP) || /* Reject tunnels with GRE keys, checksums, etc. */ - tparm.i_flags || tparm.o_flags || + !ip_tunnel_flags_empty(tparm.i_flags) || + !ip_tunnel_flags_empty(tparm.o_flags) || /* Require a fixed TTL and a TOS copied from the mirrored packet. */ inherit_ttl || !inherit_tos || /* A destination address may not be "any". */ diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2c3f62907958..aca2a7417af3 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -396,6 +396,17 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, return 0; } +#define NFP_FL_CHECK(flag) ({ \ + IP_TUNNEL_DECLARE_FLAGS(__check) = { }; \ + __be16 __res; \ + \ + __set_bit(IP_TUNNEL_##flag##_BIT, __check); \ + __res = ip_tunnel_flags_to_be16(__check); \ + \ + BUILD_BUG_ON(__builtin_constant_p(__res) && \ + NFP_FL_TUNNEL_##flag != __res); \ +}) + static int nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, const struct flow_action_entry *act, @@ -410,6 +421,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; + __be16 tun_flags; if (!IS_ENABLED(CONFIG_IPV6) && ipv6) return -EOPNOTSUPP; @@ -417,9 +429,10 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, if (ipv6 && !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) return -EOPNOTSUPP; - BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || - NFP_FL_TUNNEL_KEY != TUNNEL_KEY || - NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); + NFP_FL_CHECK(CSUM); + NFP_FL_CHECK(KEY); + NFP_FL_CHECK(GENEVE_OPT); + if (ip_tun->options_len && (tun_type != NFP_FL_TUNNEL_GENEVE || !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) { @@ -427,7 +440,9 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, return -EOPNOTSUPP; } - if (ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) { + tun_flags = ip_tunnel_flags_to_be16(ip_tun->key.tun_flags); + if (!ip_tunnel_flags_is_be16_compat(ip_tun->key.tun_flags) || + (tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload"); return -EOPNOTSUPP; @@ -442,7 +457,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx); set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); - if (ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) + if (tun_flags & NFP_FL_TUNNEL_KEY) set_tun->tun_id = ip_tun->key.tun_id; if (ip_tun->key.ttl) { @@ -486,7 +501,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, } set_tun->tos = ip_tun->key.tos; - set_tun->tun_flags = ip_tun->key.tun_flags; + set_tun->tun_flags = tun_flags; if (tun_type == NFP_FL_TUNNEL_GENEVE) { set_tun->tun_proto = htons(ETH_P_TEB); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 2f6739fe78af..163f94a5a58f 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -225,10 +225,11 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; - flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) | - (gnvh->critical ? TUNNEL_CRIT_OPT : 0); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); + __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, vni_to_tunnel_id(gnvh->vni), @@ -238,9 +239,11 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, goto drop; } /* Update tunnel dst according to Geneve options. */ + ip_tunnel_flags_zero(flags); + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); ip_tunnel_info_opts_set(&tun_dst->u.tun_info, gnvh->options, gnvh->opt_len * 4, - TUNNEL_GENEVE_OPT); + flags); } else { /* Drop packets w/ critical options, * since we don't support any... @@ -745,14 +748,15 @@ static void geneve_build_header(struct genevehdr *geneveh, { geneveh->ver = GENEVE_VER; geneveh->opt_len = info->options_len / 4; - geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM); - geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT); + geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); + geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, + info->key.tun_flags); geneveh->rsvd1 = 0; tunnel_id_to_vni(info->key.tun_id, geneveh->vni); geneveh->proto_type = inner_proto; geneveh->rsvd2 = 0; - if (info->key.tun_flags & TUNNEL_GENEVE_OPT) + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) ip_tunnel_info_opts_get(geneveh->options, info); } @@ -761,7 +765,7 @@ static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, bool xnet, int ip_hdr_len, bool inner_proto_inherit) { - bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); + bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); struct genevehdr *gnvh; __be16 inner_proto; int min_headroom; @@ -878,7 +882,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (geneve->cfg.collect_md) { ttl = key->ttl; - df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? + htons(IP_DF) : 0; } else { if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); @@ -910,7 +915,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, !net_eq(geneve->net, dev_net(geneve->dev)), - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; } @@ -998,7 +1004,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, &saddr, &key->u.ipv6.dst, prio, ttl, info->key.label, sport, geneve->cfg.info.key.tp_dst, - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; } #endif @@ -1297,7 +1304,8 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, static bool is_tnl_info_zero(const struct ip_tunnel_info *info) { - return !(info->key.tun_id || info->key.tun_flags || info->key.tos || + return !(info->key.tun_id || info->key.tos || + !ip_tunnel_flags_empty(info->key.tun_flags) || info->key.ttl || info->key.label || info->key.tp_src || memchr_inv(&info->key.u, 0, sizeof(info->key.u))); } @@ -1435,7 +1443,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], "Remote IPv6 address cannot be Multicast"); return -EINVAL; } - info->key.tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); cfg->use_udp6_rx_checksums = true; #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], @@ -1510,7 +1518,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) - info->key.tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); } if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { @@ -1520,7 +1528,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) - info->key.tun_flags &= ~TUNNEL_CSUM; + __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], "IPv6 support not enabled in the kernel"); @@ -1753,7 +1761,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) info->key.u.ipv4.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, - !!(info->key.tun_flags & TUNNEL_CSUM))) + test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags))) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) @@ -1762,7 +1771,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) &info->key.u.ipv6.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, - !(info->key.tun_flags & TUNNEL_CSUM))) + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags))) goto nla_put_failure; #endif } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 3495591a5c29..72ecf6cf809c 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1584,7 +1584,8 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, tun_dst = (struct metadata_dst *)skb_dst(skb); if (tun_dst) { - tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, + tun_dst->u.tun_info.key.tun_flags); tun_dst->u.tun_info.options_len = sizeof(*md); } if (gbp->dont_learn) @@ -1716,9 +1717,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; if (vxlan_collect_metadata(vs)) { + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *tun_dst; - tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, + __set_bit(IP_TUNNEL_KEY_BIT, flags); + tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), flags, key32_to_tunnel_id(vni), sizeof(*md)); if (!tun_dst) @@ -2403,14 +2406,14 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; - if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) { if (info->options_len < sizeof(*md)) goto drop; md = ip_tunnel_info_opts(info); } ttl = info->key.ttl; tos = info->key.tos; - udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); + udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); } src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, vxlan->cfg.port_max, true); @@ -2451,7 +2454,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, old_iph->frag_off & htons(IP_DF))) df = htons(IP_DF); } - } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) { + } else if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, + info->key.tun_flags)) { df = htons(IP_DF); } diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 1b7fae4c6b24..4160731dcb6e 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -198,7 +198,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { @@ -215,7 +215,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { @@ -230,7 +230,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { @@ -243,7 +243,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; - info->key.tun_flags = flags; + ip_tunnel_flags_copy(info->key.tun_flags, flags); info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; @@ -259,7 +259,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 1a7131d6cb0e..9ab376d1a677 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -97,7 +97,7 @@ struct flow_dissector_key_enc_opts { * here but seems difficult to #include */ u8 len; - __be16 dst_opt_type; + u32 dst_opt_type; }; struct flow_dissector_key_keyid { diff --git a/include/net/gre.h b/include/net/gre.h index 4e209708b754..ccd293203284 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -49,67 +49,61 @@ static inline bool netif_is_ip6gretap(const struct net_device *dev) !strcmp(dev->rtnl_link_ops->kind, "ip6gretap"); } -static inline int gre_calc_hlen(__be16 o_flags) +static inline int gre_calc_hlen(const unsigned long *o_flags) { int addend = 4; - if (o_flags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, o_flags)) addend += 4; - if (o_flags & TUNNEL_KEY) + if (test_bit(IP_TUNNEL_KEY_BIT, o_flags)) addend += 4; - if (o_flags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, o_flags)) addend += 4; return addend; } -static inline __be16 gre_flags_to_tnl_flags(__be16 flags) +static inline void gre_flags_to_tnl_flags(unsigned long *dst, __be16 flags) { - __be16 tflags = 0; - - if (flags & GRE_CSUM) - tflags |= TUNNEL_CSUM; - if (flags & GRE_ROUTING) - tflags |= TUNNEL_ROUTING; - if (flags & GRE_KEY) - tflags |= TUNNEL_KEY; - if (flags & GRE_SEQ) - tflags |= TUNNEL_SEQ; - if (flags & GRE_STRICT) - tflags |= TUNNEL_STRICT; - if (flags & GRE_REC) - tflags |= TUNNEL_REC; - if (flags & GRE_VERSION) - tflags |= TUNNEL_VERSION; - - return tflags; + IP_TUNNEL_DECLARE_FLAGS(res) = { }; + + __assign_bit(IP_TUNNEL_CSUM_BIT, res, flags & GRE_CSUM); + __assign_bit(IP_TUNNEL_ROUTING_BIT, res, flags & GRE_ROUTING); + __assign_bit(IP_TUNNEL_KEY_BIT, res, flags & GRE_KEY); + __assign_bit(IP_TUNNEL_SEQ_BIT, res, flags & GRE_SEQ); + __assign_bit(IP_TUNNEL_STRICT_BIT, res, flags & GRE_STRICT); + __assign_bit(IP_TUNNEL_REC_BIT, res, flags & GRE_REC); + __assign_bit(IP_TUNNEL_VERSION_BIT, res, flags & GRE_VERSION); + + ip_tunnel_flags_copy(dst, res); } -static inline __be16 gre_tnl_flags_to_gre_flags(__be16 tflags) +static inline __be16 gre_tnl_flags_to_gre_flags(const unsigned long *tflags) { __be16 flags = 0; - if (tflags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, tflags)) flags |= GRE_CSUM; - if (tflags & TUNNEL_ROUTING) + if (test_bit(IP_TUNNEL_ROUTING_BIT, tflags)) flags |= GRE_ROUTING; - if (tflags & TUNNEL_KEY) + if (test_bit(IP_TUNNEL_KEY_BIT, tflags)) flags |= GRE_KEY; - if (tflags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, tflags)) flags |= GRE_SEQ; - if (tflags & TUNNEL_STRICT) + if (test_bit(IP_TUNNEL_STRICT_BIT, tflags)) flags |= GRE_STRICT; - if (tflags & TUNNEL_REC) + if (test_bit(IP_TUNNEL_REC_BIT, tflags)) flags |= GRE_REC; - if (tflags & TUNNEL_VERSION) + if (test_bit(IP_TUNNEL_VERSION_BIT, tflags)) flags |= GRE_VERSION; return flags; } static inline void gre_build_header(struct sk_buff *skb, int hdr_len, - __be16 flags, __be16 proto, + const unsigned long *flags, __be16 proto, __be32 key, __be32 seq) { + IP_TUNNEL_DECLARE_FLAGS(cond) = { }; struct gre_base_hdr *greh; skb_push(skb, hdr_len); @@ -120,18 +114,22 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len, greh->flags = gre_tnl_flags_to_gre_flags(flags); greh->protocol = proto; - if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { + __set_bit(IP_TUNNEL_KEY_BIT, cond); + __set_bit(IP_TUNNEL_CSUM_BIT, cond); + __set_bit(IP_TUNNEL_SEQ_BIT, cond); + + if (ip_tunnel_flags_intersect(flags, cond)) { __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - if (flags & TUNNEL_SEQ) { + if (test_bit(IP_TUNNEL_SEQ_BIT, flags)) { *ptr = seq; ptr--; } - if (flags & TUNNEL_KEY) { + if (test_bit(IP_TUNNEL_KEY_BIT, flags)) { *ptr = key; ptr--; } - if (flags & TUNNEL_CSUM && + if (test_bit(IP_TUNNEL_CSUM_BIT, flags) && !(skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { *ptr = 0; diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 74b369bddf49..399592405c72 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -30,8 +30,8 @@ struct __ip6_tnl_parm { struct in6_addr laddr; /* local tunnel end-point address */ struct in6_addr raddr; /* remote tunnel end-point address */ - __be16 i_flags; - __be16 o_flags; + IP_TUNNEL_DECLARE_FLAGS(i_flags); + IP_TUNNEL_DECLARE_FLAGS(o_flags); __be32 i_key; __be32 o_key; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 20f0319ab149..ed8e48cc9054 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -36,6 +36,24 @@ (sizeof_field(struct ip_tunnel_key, u) - \ sizeof_field(struct ip_tunnel_key, u.ipv4)) +#define __ipt_flag_op(op, ...) \ + op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM) + +#define IP_TUNNEL_DECLARE_FLAGS(...) \ + __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__) + +#define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__) +#define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__) +#define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__) +#define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__) + +#define ip_tunnel_flags_empty(...) \ + __ipt_flag_op(bitmap_empty, __VA_ARGS__) +#define ip_tunnel_flags_intersect(...) \ + __ipt_flag_op(bitmap_intersects, __VA_ARGS__) +#define ip_tunnel_flags_subset(...) \ + __ipt_flag_op(bitmap_subset, __VA_ARGS__) + struct ip_tunnel_key { __be64 tun_id; union { @@ -48,11 +66,11 @@ struct ip_tunnel_key { struct in6_addr dst; } ipv6; } u; - __be16 tun_flags; - u8 tos; /* TOS for IPv4, TC for IPv6 */ - u8 ttl; /* TTL for IPv4, HL for IPv6 */ + IP_TUNNEL_DECLARE_FLAGS(tun_flags); __be32 label; /* Flow Label for IPv6 */ u32 nhid; + u8 tos; /* TOS for IPv4, TC for IPv6 */ + u8 ttl; /* TTL for IPv4, HL for IPv6 */ __be16 tp_src; __be16 tp_dst; __u8 flow_flags; @@ -110,14 +128,14 @@ struct ip_tunnel_prl_entry { struct metadata_dst; -/* Kernel-side copy of ip_tunnel_parm */ +/* Kernel-side variant of ip_tunnel_parm */ struct ip_tunnel_parm_kern { char name[IFNAMSIZ]; - int link; - __be16 i_flags; - __be16 o_flags; + IP_TUNNEL_DECLARE_FLAGS(i_flags); + IP_TUNNEL_DECLARE_FLAGS(o_flags); __be32 i_key; __be32 o_key; + int link; struct iphdr iph; }; @@ -168,7 +186,7 @@ struct ip_tunnel { }; struct tnl_ptk_info { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags); __be16 proto; __be32 key; __be32 seq; @@ -190,11 +208,77 @@ struct ip_tunnel_net { int type; }; +static inline void ip_tunnel_set_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + + ip_tunnel_flags_or(flags, flags, present); +} + +static inline void ip_tunnel_clear_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + + __ipt_flag_op(bitmap_andnot, flags, flags, present); +} + +static inline bool ip_tunnel_is_options_present(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + + return ip_tunnel_flags_intersect(flags, present); +} + +static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(supp) = { }; + + bitmap_set(supp, 0, BITS_PER_TYPE(__be16)); + __set_bit(IP_TUNNEL_VTI_BIT, supp); + + return ip_tunnel_flags_subset(flags, supp); +} + +static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags) +{ + ip_tunnel_flags_zero(dst); + + bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16)); + __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI); +} + +static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags) +{ + __be16 ret; + + ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16))); + if (test_bit(IP_TUNNEL_VTI_BIT, flags)) + ret |= VTI_ISVTI; + + return ret; +} + static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, - __be64 tun_id, __be16 tun_flags) + __be64 tun_id, + const unsigned long *tun_flags) { key->tun_id = tun_id; key->u.ipv4.src = saddr; @@ -204,7 +288,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, key->tos = tos; key->ttl = ttl; key->label = label; - key->tun_flags = tun_flags; + ip_tunnel_flags_copy(key->tun_flags, tun_flags); /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of * the upper tunnel are used. @@ -225,12 +309,8 @@ ip_tunnel_dst_cache_usable(const struct sk_buff *skb, { if (skb->mark) return false; - if (!info) - return true; - if (info->key.tun_flags & TUNNEL_NOCACHE) - return false; - return true; + return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); } static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info @@ -313,7 +393,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, - int link, __be16 flags, + int link, const unsigned long *flags, __be32 remote, __be32 local, __be32 key); @@ -529,12 +609,13 @@ static inline void ip_tunnel_info_opts_get(void *to, static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = len; if (len > 0) { memcpy(ip_tunnel_info_opts(info), from, len); - info->key.tun_flags |= flags; + ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags, + flags); } } @@ -578,7 +659,7 @@ static inline void ip_tunnel_info_opts_get(void *to, static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = 0; } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index d716214fe03d..a93dc51f6323 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -179,8 +179,8 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct dst_cache *dst_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, - __be16 flags, __be64 tunnel_id, - int md_size); + const unsigned long *flags, + __be64 tunnel_id, int md_size); #ifdef CONFIG_INET static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 102119628ff5..838927dd73a4 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -161,6 +161,14 @@ enum { #define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1) +#ifndef __KERNEL__ +/* Historically, tunnel flags have been defined as __be16 and now there are + * no free bits left. It is strongly advised to switch the already existing + * userspace code to the new *_BIT definitions from down below, as __be16 + * can't be simply cast to a wider type on LE systems. All new flags and + * code must use *_BIT only. + */ + #define TUNNEL_CSUM __cpu_to_be16(0x01) #define TUNNEL_ROUTING __cpu_to_be16(0x02) #define TUNNEL_KEY __cpu_to_be16(0x04) @@ -181,5 +189,30 @@ enum { #define TUNNEL_OPTIONS_PRESENT \ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT | \ TUNNEL_GTP_OPT) +#endif + +enum { + IP_TUNNEL_CSUM_BIT = 0U, + IP_TUNNEL_ROUTING_BIT, + IP_TUNNEL_KEY_BIT, + IP_TUNNEL_SEQ_BIT, + IP_TUNNEL_STRICT_BIT, + IP_TUNNEL_REC_BIT, + IP_TUNNEL_VERSION_BIT, + IP_TUNNEL_NO_KEY_BIT, + IP_TUNNEL_DONT_FRAGMENT_BIT, + IP_TUNNEL_OAM_BIT, + IP_TUNNEL_CRIT_OPT_BIT, + IP_TUNNEL_GENEVE_OPT_BIT, /* OPTIONS_PRESENT */ + IP_TUNNEL_VXLAN_OPT_BIT, /* OPTIONS_PRESENT */ + IP_TUNNEL_NOCACHE_BIT, + IP_TUNNEL_ERSPAN_OPT_BIT, /* OPTIONS_PRESENT */ + IP_TUNNEL_GTP_OPT_BIT, /* OPTIONS_PRESENT */ + + IP_TUNNEL_VTI_BIT, + IP_TUNNEL_SIT_ISATAP_BIT = IP_TUNNEL_VTI_BIT, + + __IP_TUNNEL_FLAG_NUM, +}; #endif /* _UAPI_IF_TUNNEL_H_ */ diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index 81833ca7a2c7..a966a6ec8263 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -65,13 +65,14 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, { struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst); __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id)); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; int err; if (metadata) return -EEXIST; - metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, - key, 0); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, flags, key, 0); if (!metadata) return -EINVAL; @@ -185,6 +186,7 @@ void br_handle_ingress_vlan_tunnel(struct sk_buff *skb, int br_handle_egress_vlan_tunnel(struct sk_buff *skb, struct net_bridge_vlan *vlan) { + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *tunnel_dst; __be64 tunnel_id; int err; @@ -202,7 +204,8 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb, return err; if (BR_INPUT_SKB_CB(skb)->backup_nhid) { - tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, + __set_bit(IP_TUNNEL_KEY_BIT, flags); + tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, flags, tunnel_id, 0); if (!tunnel_dst) return -ENOMEM; diff --git a/net/core/filter.c b/net/core/filter.c index 0c66e4a3fc5b..294670d3850d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4662,7 +4662,7 @@ set_compat: to->tunnel_tos = info->key.tos; to->tunnel_ttl = info->key.ttl; if (flags & BPF_F_TUNINFO_FLAGS) - to->tunnel_flags = info->key.tun_flags; + to->tunnel_flags = ip_tunnel_flags_to_be16(info->key.tun_flags); else to->tunnel_ext = 0; @@ -4705,7 +4705,7 @@ BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size) int err; if (unlikely(!info || - !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) { + !ip_tunnel_is_options_present(info->key.tun_flags))) { err = -ENOENT; goto err_clear; } @@ -4775,15 +4775,15 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, memset(info, 0, sizeof(*info)); info->mode = IP_TUNNEL_INFO_TX; - info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; - if (flags & BPF_F_DONT_FRAGMENT) - info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; - if (flags & BPF_F_ZERO_CSUM_TX) - info->key.tun_flags &= ~TUNNEL_CSUM; - if (flags & BPF_F_SEQ_NUMBER) - info->key.tun_flags |= TUNNEL_SEQ; - if (flags & BPF_F_NO_TUNNEL_KEY) - info->key.tun_flags &= ~TUNNEL_KEY; + __set_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); + __assign_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags, + flags & BPF_F_DONT_FRAGMENT); + __assign_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags, + !(flags & BPF_F_ZERO_CSUM_TX)); + __assign_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags, + flags & BPF_F_SEQ_NUMBER); + __assign_bit(IP_TUNNEL_KEY_BIT, info->key.tun_flags, + !(flags & BPF_F_NO_TUNNEL_KEY)); info->key.tun_id = cpu_to_be64(from->tunnel_id); info->key.tos = from->tunnel_tos; @@ -4821,13 +4821,15 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, { struct ip_tunnel_info *info = skb_tunnel_info(skb); const struct metadata_dst *md = this_cpu_ptr(md_dst); + IP_TUNNEL_DECLARE_FLAGS(present) = { }; if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1)))) return -EINVAL; if (unlikely(size > IP_TUNNEL_OPTS_MAX)) return -ENOMEM; - ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT); + ip_tunnel_set_options_present(present); + ip_tunnel_info_opts_set(info, from, size, present); return 0; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 272f09251343..f82e9a7d3b37 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -455,17 +455,25 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { struct flow_dissector_key_enc_opts *enc_opt; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; + u32 val; enc_opt = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS, target_container); - if (info->options_len) { - enc_opt->len = info->options_len; - ip_tunnel_info_opts_get(enc_opt->data, info); - enc_opt->dst_opt_type = info->key.tun_flags & - TUNNEL_OPTIONS_PRESENT; - } + if (!info->options_len) + return; + + enc_opt->len = info->options_len; + ip_tunnel_info_opts_get(enc_opt->data, info); + + ip_tunnel_set_options_present(flags); + ip_tunnel_flags_and(flags, info->key.tun_flags, flags); + + val = find_next_bit(flags, __IP_TUNNEL_FLAG_NUM, + IP_TUNNEL_GENEVE_OPT_BIT); + enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0; } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c index 06e5572f296f..54984f3170a8 100644 --- a/net/ipv4/fou_bpf.c +++ b/net/ipv4/fou_bpf.c @@ -64,7 +64,7 @@ __bpf_kfunc int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, info->encap.type = TUNNEL_ENCAP_NONE; } - if (info->key.tun_flags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)) info->encap.flags |= TUNNEL_ENCAP_FLAG_CSUM; info->encap.sport = encap->sport; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 3757fd93523f..6701a98d9a9f 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -73,7 +73,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; - tpi->flags = gre_flags_to_tnl_flags(greh->flags); + gre_flags_to_tnl_flags(tpi->flags, greh->flags); hdr_len = gre_calc_hlen(tpi->flags); if (!pskb_may_pull(skb, nhs + hdr_len)) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8853c065a985..3c46a7fef8db 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -265,6 +265,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, struct net *net = dev_net(skb->dev); struct metadata_dst *tun_dst = NULL; struct erspan_base_hdr *ershdr; + IP_TUNNEL_DECLARE_FLAGS(flags); struct ip_tunnel_net *itn; struct ip_tunnel *tunnel; const struct iphdr *iph; @@ -272,18 +273,20 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, int ver; int len; + ip_tunnel_flags_copy(flags, tpi->flags); + itn = net_generic(net, erspan_net_id); iph = ip_hdr(skb); if (is_erspan_type1(gre_hdr_len)) { ver = 0; - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, - tpi->flags | TUNNEL_NO_KEY, + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, iph->daddr, 0); } else { ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, - tpi->flags | TUNNEL_KEY, + __set_bit(IP_TUNNEL_KEY_BIT, flags); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, iph->daddr, tpi->key); } @@ -307,10 +310,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, struct ip_tunnel_info *info; unsigned char *gh; __be64 tun_id; - __be16 flags; - tpi->flags |= TUNNEL_KEY; - flags = tpi->flags; + __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags); + ip_tunnel_flags_copy(flags, tpi->flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, @@ -333,7 +335,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, ERSPAN_V2_MDSIZE); info = &tun_dst->u.tun_info; - info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + info->key.tun_flags); info->options_len = sizeof(*md); } @@ -376,10 +379,13 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, tnl_params = &tunnel->parms.iph; if (tunnel->collect_md || tnl_params->daddr == 0) { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; __be64 tun_id; - flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); + __set_bit(IP_TUNNEL_CSUM_BIT, flags); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + ip_tunnel_flags_and(flags, tpi->flags, flags); + tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) @@ -459,12 +465,15 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); - __be16 flags = tunnel->parms.o_flags; + IP_TUNNEL_DECLARE_FLAGS(flags); + + ip_tunnel_flags_copy(flags, tunnel->parms.o_flags); /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, flags, proto, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); + test_bit(IP_TUNNEL_SEQ_BIT, flags) ? + htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -478,10 +487,10 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; int tunnel_hlen; - __be16 flags; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@ -495,14 +504,19 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, goto err_free_skb; /* Push Tunnel header. */ - if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) + if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + tunnel->parms.o_flags))) goto err_free_skb; - flags = tun_info->key.tun_flags & - (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); + __set_bit(IP_TUNNEL_CSUM_BIT, flags); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + __set_bit(IP_TUNNEL_SEQ_BIT, flags); + ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags); + gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); + test_bit(IP_TUNNEL_SEQ_BIT, flags) ? + htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); @@ -516,6 +530,7 @@ err_free_skb: static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct erspan_metadata *md; @@ -531,7 +546,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) goto err_free_skb; key = &tun_info->key; - if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) + if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags)) goto err_free_skb; if (tun_info->options_len < sizeof(*md)) goto err_free_skb; @@ -584,8 +599,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) goto err_free_skb; } - gre_build_header(skb, 8, TUNNEL_SEQ, - proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); + __set_bit(IP_TUNNEL_SEQ_BIT, flags); + gre_build_header(skb, 8, flags, proto, 0, + htonl(atomic_fetch_inc(&tunnel->o_seqno))); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); @@ -659,7 +675,8 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, tnl_params = &tunnel->parms.iph; } - if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + tunnel->parms.o_flags))) goto free_skb; __gre_xmit(skb, dev, tnl_params, skb->protocol); @@ -701,7 +718,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, /* Push ERSPAN header */ if (tunnel->erspan_ver == 0) { proto = htons(ETH_P_ERSPAN); - tunnel->parms.o_flags &= ~TUNNEL_SEQ; + __clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags); } else if (tunnel->erspan_ver == 1) { erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, @@ -716,7 +733,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, goto free_skb; } - tunnel->parms.o_flags &= ~TUNNEL_KEY; + __clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags); __gre_xmit(skb, dev, &tunnel->parms.iph, proto); return NETDEV_TX_OK; @@ -739,7 +756,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + tunnel->parms.o_flags))) goto free_skb; if (skb_cow_head(skb, dev->needed_headroom)) @@ -757,7 +775,6 @@ free_skb: static void ipgre_link_update(struct net_device *dev, bool set_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); - __be16 flags; int len; len = tunnel->tun_hlen; @@ -773,10 +790,9 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) if (set_mtu) dev->mtu = max_t(int, dev->mtu - len, 68); - flags = tunnel->parms.o_flags; - - if (flags & TUNNEL_SEQ || - (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) { + if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) || + (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) && + tunnel->encap.type != TUNNEL_ENCAP_NONE)) { dev->features &= ~NETIF_F_GSO_SOFTWARE; dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; } else { @@ -789,17 +805,25 @@ static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) { + __be16 i_flags, o_flags; int err; + if (!ip_tunnel_flags_is_be16_compat(p->i_flags) || + !ip_tunnel_flags_is_be16_compat(p->o_flags)) + return -EOVERFLOW; + + i_flags = ip_tunnel_flags_to_be16(p->i_flags); + o_flags = ip_tunnel_flags_to_be16(p->o_flags); + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE || p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) || - ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING))) + ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } - p->i_flags = gre_flags_to_tnl_flags(p->i_flags); - p->o_flags = gre_flags_to_tnl_flags(p->o_flags); + gre_flags_to_tnl_flags(p->i_flags, i_flags); + gre_flags_to_tnl_flags(p->o_flags, o_flags); err = ip_tunnel_ctl(dev, p, cmd); if (err) @@ -808,15 +832,18 @@ static int ipgre_tunnel_ctl(struct net_device *dev, if (cmd == SIOCCHGTUNNEL) { struct ip_tunnel *t = netdev_priv(dev); - t->parms.i_flags = p->i_flags; - t->parms.o_flags = p->o_flags; + ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags); + ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags); if (strcmp(dev->rtnl_link_ops->kind, "erspan")) ipgre_link_update(dev, true); } - p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); - p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); + i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); + ip_tunnel_flags_from_be16(p->i_flags, i_flags); + o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); + ip_tunnel_flags_from_be16(p->o_flags, o_flags); + return 0; } @@ -956,7 +983,6 @@ static void ipgre_tunnel_setup(struct net_device *dev) static void __gre_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel; - __be16 flags; tunnel = netdev_priv(dev); tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); @@ -968,14 +994,13 @@ static void __gre_tunnel_init(struct net_device *dev) dev->features |= GRE_FEATURES | NETIF_F_LLTX; dev->hw_features |= GRE_FEATURES; - flags = tunnel->parms.o_flags; - /* TCP offload with GRE SEQ is not supported, nor can we support 2 * levels of outer headers requiring an update. */ - if (flags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags)) return; - if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE) + if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) && + tunnel->encap.type != TUNNEL_ENCAP_NONE) return; dev->features |= NETIF_F_GSO_SOFTWARE; @@ -1148,10 +1173,12 @@ static int ipgre_netlink_parms(struct net_device *dev, parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); + gre_flags_to_tnl_flags(parms->i_flags, + nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); + gre_flags_to_tnl_flags(parms->o_flags, + nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -1411,8 +1438,8 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], if (err < 0) return err; - t->parms.i_flags = p.i_flags; - t->parms.o_flags = p.o_flags; + ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags); + ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags); ipgre_link_update(dev, !tb[IFLA_MTU]); @@ -1440,8 +1467,8 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[], if (err < 0) return err; - t->parms.i_flags = p.i_flags; - t->parms.o_flags = p.o_flags; + ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags); + ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags); return 0; } @@ -1498,7 +1525,9 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm_kern *p = &t->parms; - __be16 o_flags = p->o_flags; + IP_TUNNEL_DECLARE_FLAGS(o_flags); + + ip_tunnel_flags_copy(o_flags, p->o_flags); if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, @@ -1546,7 +1575,7 @@ static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (t->erspan_ver <= 2) { if (t->erspan_ver != 0 && !t->collect_md) - t->parms.o_flags |= TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags); if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) goto nla_put_failure; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index d10f41f81463..f65170a28106 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -57,16 +57,12 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) } static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p, - __be16 flags, __be32 key) + const unsigned long *flags, __be32 key) { - if (p->i_flags & TUNNEL_KEY) { - if (flags & TUNNEL_KEY) - return key == p->i_key; - else - /* key expected, none present */ - return false; - } else - return !(flags & TUNNEL_KEY); + if (!test_bit(IP_TUNNEL_KEY_BIT, flags)) + return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags); + + return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key; } /* Fallback tunnel: no source, no destination, no key, no options @@ -81,7 +77,7 @@ static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p, Given src, dst and key, find appropriate for input tunnel. */ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, - int link, __be16 flags, + int link, const unsigned long *flags, __be32 remote, __be32 local, __be32 key) { @@ -143,7 +139,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, } hlist_for_each_entry_rcu(t, head, hash_node) { - if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) || + if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) && + t->parms.i_key != key) || t->parms.iph.saddr != 0 || t->parms.iph.daddr != 0 || !(t->dev->flags & IFF_UP)) @@ -182,7 +179,8 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, else remote = 0; - if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) + if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) && + test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags)) i_key = 0; h = ip_tunnel_hash(i_key, remote); @@ -211,12 +209,14 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; + IP_TUNNEL_DECLARE_FLAGS(flags); __be32 key = parms->i_key; - __be16 flags = parms->i_flags; int link = parms->link; struct ip_tunnel *t = NULL; struct hlist_head *head = ip_bucket(itn, parms); + ip_tunnel_flags_copy(flags, parms->i_flags); + hlist_for_each_entry_rcu(t, head, hash_node) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && @@ -386,15 +386,15 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, } #endif - if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || - ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { + if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) != + test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) { DEV_STATS_INC(tunnel->dev, rx_crc_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } - if (tunnel->parms.i_flags&TUNNEL_SEQ) { - if (!(tpi->flags&TUNNEL_SEQ) || + if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) { + if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) || (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { DEV_STATS_INC(tunnel->dev, rx_fifo_errors); DEV_STATS_INC(tunnel->dev, rx_errors); @@ -638,7 +638,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (key->tun_flags & TUNNEL_DONT_FRAGMENT) + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags)) df = htons(IP_DF); if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen, key->u.ipv4.dst, true)) { @@ -928,10 +928,10 @@ int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, goto done; if (p->iph.ttl) p->iph.frag_off |= htons(IP_DF); - if (!(p->i_flags & VTI_ISVTI)) { - if (!(p->i_flags & TUNNEL_KEY)) + if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) { + if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags)) p->i_key = 0; - if (!(p->o_flags & TUNNEL_KEY)) + if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags)) p->o_key = 0; } @@ -1016,8 +1016,8 @@ bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, strscpy(kp->name, p.name); kp->link = p.link; - kp->i_flags = p.i_flags; - kp->o_flags = p.o_flags; + ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags); + ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags); kp->i_key = p.i_key; kp->o_key = p.o_key; memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph))); @@ -1030,10 +1030,14 @@ bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp) { struct ip_tunnel_parm p; + if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) || + !ip_tunnel_flags_is_be16_compat(kp->o_flags)) + return false; + strscpy(p.name, kp->name); p.link = kp->link; - p.i_flags = kp->i_flags; - p.o_flags = kp->o_flags; + p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags); + p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags); p.i_key = kp->i_key; p.o_key = kp->o_key; memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph))); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 98f136b07191..a3676155be78 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -125,6 +125,7 @@ EXPORT_SYMBOL_GPL(__iptunnel_pull_header); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags) { + IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { }; struct metadata_dst *res; struct ip_tunnel_info *dst, *src; @@ -144,10 +145,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, sizeof(struct in6_addr)); else dst->key.u.ipv4.dst = src->key.u.ipv4.src; - dst->key.tun_flags = src->key.tun_flags; + ip_tunnel_flags_copy(dst->key.tun_flags, src->key.tun_flags); dst->mode = src->mode | IP_TUNNEL_INFO_TX; ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src), - src->options_len, 0); + src->options_len, tun_flags); return res; } @@ -497,7 +498,7 @@ static int ip_tun_parse_opts_geneve(struct nlattr *attr, opt->opt_class = nla_get_be16(attr); attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE]; opt->type = nla_get_u8(attr); - info->key.tun_flags |= TUNNEL_GENEVE_OPT; + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags); } return sizeof(struct geneve_opt) + data_len; @@ -525,7 +526,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr, attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP]; md->gbp = nla_get_u32(attr); md->gbp &= VXLAN_GBP_MASK; - info->key.tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags); } return sizeof(struct vxlan_metadata); @@ -574,7 +575,7 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr, set_hwid(&md->u.md2, nla_get_u8(attr)); } - info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); } return sizeof(struct erspan_metadata); @@ -585,7 +586,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, { int err, rem, opt_len, opts_len = 0; struct nlattr *nla; - __be16 type = 0; + u32 type = 0; if (!attr) return 0; @@ -598,7 +599,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) { switch (nla_type(nla)) { case LWTUNNEL_IP_OPTS_GENEVE: - if (type && type != TUNNEL_GENEVE_OPT) + if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) return -EINVAL; opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len, extack); @@ -607,7 +608,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, opts_len += opt_len; if (opts_len > IP_TUNNEL_OPTS_MAX) return -EINVAL; - type = TUNNEL_GENEVE_OPT; + type = IP_TUNNEL_GENEVE_OPT_BIT; break; case LWTUNNEL_IP_OPTS_VXLAN: if (type) @@ -617,7 +618,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_VXLAN_OPT; + type = IP_TUNNEL_VXLAN_OPT_BIT; break; case LWTUNNEL_IP_OPTS_ERSPAN: if (type) @@ -627,7 +628,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_ERSPAN_OPT; + type = IP_TUNNEL_ERSPAN_OPT_BIT; break; default: return -EINVAL; @@ -705,10 +706,16 @@ static int ip_tun_build_state(struct net *net, struct nlattr *attr, if (tb[LWTUNNEL_IP_TOS]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); - if (tb[LWTUNNEL_IP_FLAGS]) - tun_info->key.tun_flags |= - (nla_get_be16(tb[LWTUNNEL_IP_FLAGS]) & - ~TUNNEL_OPTIONS_PRESENT); + if (tb[LWTUNNEL_IP_FLAGS]) { + IP_TUNNEL_DECLARE_FLAGS(flags); + + ip_tunnel_flags_from_be16(flags, + nla_get_be16(tb[LWTUNNEL_IP_FLAGS])); + ip_tunnel_clear_options_present(flags); + + ip_tunnel_flags_or(tun_info->key.tun_flags, + tun_info->key.tun_flags, flags); + } tun_info->mode = IP_TUNNEL_INFO_TX; tun_info->options_len = opt_len; @@ -812,18 +819,18 @@ static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type, struct nlattr *nest; int err = 0; - if (!(tun_info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)) + if (!ip_tunnel_is_options_present(tun_info->key.tun_flags)) return 0; nest = nla_nest_start_noflag(skb, type); if (!nest) return -ENOMEM; - if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_info->key.tun_flags)) err = ip_tun_fill_encap_opts_geneve(skb, tun_info); - else if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT) + else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_info->key.tun_flags)) err = ip_tun_fill_encap_opts_vxlan(skb, tun_info); - else if (tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT) + else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags)) err = ip_tun_fill_encap_opts_erspan(skb, tun_info); if (err) { @@ -846,7 +853,8 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || - nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags) || + nla_put_be16(skb, LWTUNNEL_IP_FLAGS, + ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) || ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info)) return -ENOMEM; @@ -857,11 +865,11 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info) { int opt_len; - if (!(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)) + if (!ip_tunnel_is_options_present(info->key.tun_flags)) return 0; opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */ - if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) { struct geneve_opt *opt; int offset = 0; @@ -874,10 +882,10 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info) /* OPT_GENEVE_DATA */ offset += sizeof(*opt) + opt->length * 4; } - } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) { opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */ + nla_total_size(4); /* OPT_VXLAN_GBP */ - } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) { + } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) { struct erspan_metadata *md = ip_tunnel_info_opts(info); opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */ @@ -984,10 +992,17 @@ static int ip6_tun_build_state(struct net *net, struct nlattr *attr, if (tb[LWTUNNEL_IP6_TC]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); - if (tb[LWTUNNEL_IP6_FLAGS]) - tun_info->key.tun_flags |= - (nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]) & - ~TUNNEL_OPTIONS_PRESENT); + if (tb[LWTUNNEL_IP6_FLAGS]) { + IP_TUNNEL_DECLARE_FLAGS(flags); + __be16 data; + + data = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]); + ip_tunnel_flags_from_be16(flags, data); + ip_tunnel_clear_options_present(flags); + + ip_tunnel_flags_or(tun_info->key.tun_flags, + tun_info->key.tun_flags, flags); + } tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6; tun_info->options_len = opt_len; @@ -1008,7 +1023,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) || - nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags) || + nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, + ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) || ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info)) return -ENOMEM; @@ -1139,8 +1155,12 @@ void ip_tunnel_netlink_parms(struct nlattr *data[], if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) parms->iph.frag_off = htons(IP_DF); - if (data[IFLA_IPTUN_FLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + if (data[IFLA_IPTUN_FLAGS]) { + __be16 flags; + + flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + ip_tunnel_flags_from_be16(parms->i_flags, flags); + } if (data[IFLA_IPTUN_PROTO]) parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 7e595f619615..14536da9f5dc 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -51,8 +51,11 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, const struct iphdr *iph = ip_hdr(skb); struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, iph->daddr, 0); if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) @@ -322,8 +325,11 @@ static int vti4_err(struct sk_buff *skb, u32 info) const struct iphdr *iph = (const struct iphdr *)skb->data; int protocol = iph->protocol; struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; + + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->daddr, iph->saddr, 0); if (!tunnel) return -1; @@ -375,6 +381,7 @@ static int vti4_err(struct sk_buff *skb, u32 info) static int vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) { + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; int err = 0; if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { @@ -383,20 +390,26 @@ vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) return -EINVAL; } - if (!(p->i_flags & GRE_KEY)) + if (!ip_tunnel_flags_is_be16_compat(p->i_flags) || + !ip_tunnel_flags_is_be16_compat(p->o_flags)) + return -EOVERFLOW; + + if (!(ip_tunnel_flags_to_be16(p->i_flags) & GRE_KEY)) p->i_key = 0; - if (!(p->o_flags & GRE_KEY)) + if (!(ip_tunnel_flags_to_be16(p->o_flags) & GRE_KEY)) p->o_key = 0; - p->i_flags = VTI_ISVTI; + __set_bit(IP_TUNNEL_VTI_BIT, flags); + ip_tunnel_flags_copy(p->i_flags, flags); err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd != SIOCDELTUNNEL) { - p->i_flags |= GRE_KEY; - p->o_flags |= GRE_KEY; + ip_tunnel_flags_from_be16(flags, GRE_KEY); + ip_tunnel_flags_or(p->i_flags, p->i_flags, flags); + ip_tunnel_flags_or(p->o_flags, p->o_flags, flags); } return 0; } @@ -541,7 +554,7 @@ static void vti_netlink_parms(struct nlattr *data[], if (!data) return; - parms->i_flags = VTI_ISVTI; + __set_bit(IP_TUNNEL_VTI_BIT, parms->i_flags); if (data[IFLA_VTI_LINK]) parms->link = nla_get_u32(data[IFLA_VTI_LINK]); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index db960cd08a3b..923a2ef68c2f 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -130,13 +130,16 @@ static int ipip_err(struct sk_buff *skb, u32 info) struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); const struct iphdr *iph = (const struct iphdr *)skb->data; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; int err = 0; - t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->daddr, iph->saddr, 0); + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); + + t = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->daddr, + iph->saddr, 0); if (!t) { err = -ENOENT; goto out; @@ -213,13 +216,16 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *tun_dst = NULL; struct ip_tunnel *tunnel; const struct iphdr *iph; + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); + iph = ip_hdr(skb); - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->saddr, iph->daddr, 0); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, + iph->daddr, 0); if (tunnel) { const struct tnl_ptk_info *tpi; @@ -238,7 +244,9 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; if (tunnel->collect_md) { - tun_dst = ip_tun_rx_dst(skb, 0, 0, 0); + ip_tunnel_flags_zero(flags); + + tun_dst = ip_tun_rx_dst(skb, flags, 0, 0); if (!tun_dst) return 0; ip_tunnel_md_udp_encap(skb, &tun_dst->u.tun_info); @@ -340,7 +348,8 @@ ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) } p->i_key = p->o_key = 0; - p->i_flags = p->o_flags = 0; + ip_tunnel_flags_zero(p->i_flags); + ip_tunnel_flags_zero(p->o_flags); return ip_tunnel_ctl(dev, p, cmd); } diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 860aff5f8599..e4e0fa869fa4 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -183,7 +183,8 @@ void udp_tunnel_sock_release(struct socket *sock) EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, - __be16 flags, __be64 tunnel_id, int md_size) + const unsigned long *flags, + __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; @@ -199,7 +200,7 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, info->key.tp_src = udp_hdr(skb)->source; info->key.tp_dst = udp_hdr(skb)->dest; if (udp_hdr(skb)->check) - info->key.tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); return tun_dst; } EXPORT_SYMBOL_GPL(udp_tun_rx_dst); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ca7e77e84283..2146cf69d8e2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -496,11 +496,11 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) tpi->proto); if (tunnel) { if (tunnel->parms.collect_md) { + IP_TUNNEL_DECLARE_FLAGS(flags); struct metadata_dst *tun_dst; __be64 tun_id; - __be16 flags; - flags = tpi->flags; + ip_tunnel_flags_copy(flags, tpi->flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0); @@ -548,14 +548,14 @@ static int ip6erspan_rcv(struct sk_buff *skb, if (tunnel->parms.collect_md) { struct erspan_metadata *pkt_md, *md; + IP_TUNNEL_DECLARE_FLAGS(flags); struct metadata_dst *tun_dst; struct ip_tunnel_info *info; unsigned char *gh; __be64 tun_id; - __be16 flags; - tpi->flags |= TUNNEL_KEY; - flags = tpi->flags; + __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags); + ip_tunnel_flags_copy(flags, tpi->flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, @@ -577,7 +577,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, md2 = &md->u.md2; memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); - info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + info->key.tun_flags); info->options_len = sizeof(*md); ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); @@ -745,8 +746,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, __u32 *pmtu, __be16 proto) { struct ip6_tnl *tunnel = netdev_priv(dev); + IP_TUNNEL_DECLARE_FLAGS(flags); __be16 protocol; - __be16 flags; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -778,8 +779,11 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; - flags = key->tun_flags & - (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); + ip_tunnel_flags_zero(flags); + __set_bit(IP_TUNNEL_CSUM_BIT, flags); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + __set_bit(IP_TUNNEL_SEQ_BIT, flags); + ip_tunnel_flags_and(flags, flags, key->tun_flags); tun_hlen = gre_calc_hlen(flags); if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen)) @@ -788,19 +792,21 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) - : 0); + test_bit(IP_TUNNEL_SEQ_BIT, flags) ? + htonl(atomic_fetch_inc(&tunnel->o_seqno)) : + 0); } else { if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) return -ENOMEM; - flags = tunnel->parms.o_flags; + ip_tunnel_flags_copy(flags, tunnel->parms.o_flags); gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) - : 0); + test_bit(IP_TUNNEL_SEQ_BIT, flags) ? + htonl(atomic_fetch_inc(&tunnel->o_seqno)) : + 0); } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, @@ -822,7 +828,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, &dsfield, &encap_limit); - err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + t->parms.o_flags)); if (err) return -1; @@ -856,7 +863,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) return -1; - if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) + if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + t->parms.o_flags))) return -1; err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, @@ -883,7 +891,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) prepare_ip6gre_xmit_other(skb, dev, &fl6, &dsfield, &encap_limit)) return -1; - err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + t->parms.o_flags)); if (err) return err; err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, skb->protocol); @@ -936,6 +945,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, struct ip_tunnel_info *tun_info = NULL; struct ip6_tnl *t = netdev_priv(dev); struct dst_entry *dst = skb_dst(skb); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; bool truncate = false; int encap_limit = -1; __u8 dsfield = false; @@ -979,7 +989,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen)) goto tx_err; - t->parms.o_flags &= ~TUNNEL_KEY; + __clear_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags); IPCB(skb)->flags = 0; /* For collect_md mode, derive fl6 from the tunnel key, @@ -1004,7 +1014,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; - if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) + if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + tun_info->key.tun_flags)) goto tx_err; if (tun_info->options_len < sizeof(*md)) goto tx_err; @@ -1065,7 +1076,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, } /* Push GRE header. */ - gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno))); + __set_bit(IP_TUNNEL_SEQ_BIT, flags); + gre_build_header(skb, 8, flags, proto, 0, + htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) @@ -1208,8 +1221,8 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, t->parms.proto = p->proto; t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; - t->parms.i_flags = p->i_flags; - t->parms.o_flags = p->o_flags; + ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags); + ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags); t->parms.fwmark = p->fwmark; t->parms.erspan_ver = p->erspan_ver; t->parms.index = p->index; @@ -1238,8 +1251,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, p->link = u->link; p->i_key = u->i_key; p->o_key = u->o_key; - p->i_flags = gre_flags_to_tnl_flags(u->i_flags); - p->o_flags = gre_flags_to_tnl_flags(u->o_flags); + gre_flags_to_tnl_flags(p->i_flags, u->i_flags); + gre_flags_to_tnl_flags(p->o_flags, u->o_flags); memcpy(p->name, u->name, sizeof(u->name)); } @@ -1391,7 +1404,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, ipv6h->daddr = t->parms.raddr; p = (__be16 *)(ipv6h + 1); - p[0] = t->parms.o_flags; + p[0] = ip_tunnel_flags_to_be16(t->parms.o_flags); p[1] = htons(type); /* @@ -1455,19 +1468,17 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static void ip6gre_tnl_init_features(struct net_device *dev) { struct ip6_tnl *nt = netdev_priv(dev); - __be16 flags; dev->features |= GRE6_FEATURES | NETIF_F_LLTX; dev->hw_features |= GRE6_FEATURES; - flags = nt->parms.o_flags; - /* TCP offload with GRE SEQ is not supported, nor can we support 2 * levels of outer headers requiring an update. */ - if (flags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, nt->parms.o_flags)) return; - if (flags & TUNNEL_CSUM && nt->encap.type != TUNNEL_ENCAP_NONE) + if (test_bit(IP_TUNNEL_CSUM_BIT, nt->parms.o_flags) && + nt->encap.type != TUNNEL_ENCAP_NONE) return; dev->features |= NETIF_F_GSO_SOFTWARE; @@ -1792,12 +1803,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = gre_flags_to_tnl_flags( - nla_get_be16(data[IFLA_GRE_IFLAGS])); + gre_flags_to_tnl_flags(parms->i_flags, + nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = gre_flags_to_tnl_flags( - nla_get_be16(data[IFLA_GRE_OFLAGS])); + gre_flags_to_tnl_flags(parms->o_flags, + nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -2144,11 +2155,13 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; - __be16 o_flags = p->o_flags; + IP_TUNNEL_DECLARE_FLAGS(o_flags); + + ip_tunnel_flags_copy(o_flags, p->o_flags); if (p->erspan_ver == 1 || p->erspan_ver == 2) { if (!p->collect_md) - o_flags |= TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, o_flags); if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver)) goto nla_put_failure; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e9cc315832cb..57bb3b3ea0c5 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -798,17 +798,15 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, const struct ipv6hdr *ipv6h; int nh, err; - if ((!(tpi->flags & TUNNEL_CSUM) && - (tunnel->parms.i_flags & TUNNEL_CSUM)) || - ((tpi->flags & TUNNEL_CSUM) && - !(tunnel->parms.i_flags & TUNNEL_CSUM))) { + if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) != + test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) { DEV_STATS_INC(tunnel->dev, rx_crc_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } - if (tunnel->parms.i_flags & TUNNEL_SEQ) { - if (!(tpi->flags & TUNNEL_SEQ) || + if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) { + if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) || (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { DEV_STATS_INC(tunnel->dev, rx_fifo_errors); @@ -946,7 +944,9 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; if (t->parms.collect_md) { - tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; + + tun_dst = ipv6_tun_rx_dst(skb, flags, 0, 0); if (!tun_dst) goto drop; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 894e6a4c4be3..83b195f09561 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -207,7 +207,7 @@ static int ipip6_tunnel_create(struct net_device *dev) __dev_addr_set(dev, &t->parms.iph.saddr, 4); memcpy(dev->broadcast, &t->parms.iph.daddr, 4); - if ((__force u16)t->parms.i_flags & SIT_ISATAP) + if (test_bit(IP_TUNNEL_SIT_ISATAP_BIT, t->parms.i_flags)) dev->priv_flags |= IFF_ISATAP; dev->rtnl_link_ops = &sit_link_ops; @@ -1700,7 +1700,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || - nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) || + nla_put_be16(skb, IFLA_IPTUN_FLAGS, + ip_tunnel_flags_to_be16(parm->i_flags)) || nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark)) goto nla_put_failure; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index a2c16b501087..c7a8a08b7308 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1550,6 +1550,7 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb, if (!dest) goto unk; if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + IP_TUNNEL_DECLARE_FLAGS(flags); __be16 type; /* Only support version 0 and C (csum) */ @@ -1560,7 +1561,10 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb, if (type != htons(ETH_P_IP)) goto unk; *proto = IPPROTO_IPIP; - return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags)); + + gre_flags_to_tnl_flags(flags, greh->flags); + + return gre_calc_hlen(flags); } unk: diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 65e0259178da..39b5fd6bbf65 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -390,10 +390,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, skb->ip_summed == CHECKSUM_PARTIAL) mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { - __be16 tflags = 0; + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); mtu -= gre_calc_hlen(tflags); } if (mtu < 68) { @@ -553,10 +553,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, skb->ip_summed == CHECKSUM_PARTIAL) mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { - __be16 tflags = 0; + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); mtu -= gre_calc_hlen(tflags); } if (mtu < IPV6_MIN_MTU) { @@ -1082,11 +1082,11 @@ ipvs_gre_encap(struct net *net, struct sk_buff *skb, { __be16 proto = *next_protocol == IPPROTO_IPIP ? htons(ETH_P_IP) : htons(ETH_P_IPV6); - __be16 tflags = 0; + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; size_t hdrlen; if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); hdrlen = gre_calc_hlen(tflags); gre_build_header(skb, hdrlen, tflags, proto, 0, 0); @@ -1165,11 +1165,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, max_headroom += sizeof(struct udphdr) + gue_hdrlen; } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; size_t gre_hdrlen; - __be16 tflags = 0; if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); gre_hdrlen = gre_calc_hlen(tflags); max_headroom += gre_hdrlen; @@ -1310,11 +1310,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, max_headroom += sizeof(struct udphdr) + gue_hdrlen; } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; size_t gre_hdrlen; - __be16 tflags = 0; if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); gre_hdrlen = gre_calc_hlen(tflags); max_headroom += gre_hdrlen; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index f735d79d8be5..60a76e6e348e 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -174,8 +174,8 @@ struct nft_tunnel_opts { struct erspan_metadata erspan; u8 data[IP_TUNNEL_OPTS_MAX]; } u; + IP_TUNNEL_DECLARE_FLAGS(flags); u32 len; - __be16 flags; }; struct nft_tunnel_obj { @@ -271,7 +271,8 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr, opts->u.vxlan.gbp = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_VXLAN_GBP])); opts->len = sizeof(struct vxlan_metadata); - opts->flags = TUNNEL_VXLAN_OPT; + ip_tunnel_flags_zero(opts->flags); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags); return 0; } @@ -325,7 +326,8 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr, opts->u.erspan.version = version; opts->len = sizeof(struct erspan_metadata); - opts->flags = TUNNEL_ERSPAN_OPT; + ip_tunnel_flags_zero(opts->flags); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags); return 0; } @@ -366,7 +368,8 @@ static int nft_tunnel_obj_geneve_init(const struct nlattr *attr, opt->length = data_len / 4; opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]); opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]); - opts->flags = TUNNEL_GENEVE_OPT; + ip_tunnel_flags_zero(opts->flags); + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags); return 0; } @@ -385,8 +388,8 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, struct nft_tunnel_opts *opts) { struct nlattr *nla; - __be16 type = 0; int err, rem; + u32 type = 0; err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX, nft_tunnel_opts_policy, NULL); @@ -401,7 +404,7 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, err = nft_tunnel_obj_vxlan_init(nla, opts); if (err) return err; - type = TUNNEL_VXLAN_OPT; + type = IP_TUNNEL_VXLAN_OPT_BIT; break; case NFTA_TUNNEL_KEY_OPTS_ERSPAN: if (type) @@ -409,15 +412,15 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, err = nft_tunnel_obj_erspan_init(nla, opts); if (err) return err; - type = TUNNEL_ERSPAN_OPT; + type = IP_TUNNEL_ERSPAN_OPT_BIT; break; case NFTA_TUNNEL_KEY_OPTS_GENEVE: - if (type && type != TUNNEL_GENEVE_OPT) + if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) return -EINVAL; err = nft_tunnel_obj_geneve_init(nla, opts); if (err) return err; - type = TUNNEL_GENEVE_OPT; + type = IP_TUNNEL_GENEVE_OPT_BIT; break; default: return -EOPNOTSUPP; @@ -454,7 +457,9 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx, memset(&info, 0, sizeof(info)); info.mode = IP_TUNNEL_INFO_TX; info.key.tun_id = key32_to_tunnel_id(nla_get_be32(tb[NFTA_TUNNEL_KEY_ID])); - info.key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; + __set_bit(IP_TUNNEL_KEY_BIT, info.key.tun_flags); + __set_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags); + __set_bit(IP_TUNNEL_NOCACHE_BIT, info.key.tun_flags); if (tb[NFTA_TUNNEL_KEY_IP]) { err = nft_tunnel_obj_ip_init(ctx, tb[NFTA_TUNNEL_KEY_IP], &info); @@ -483,11 +488,12 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; if (tun_flags & NFT_TUNNEL_F_ZERO_CSUM_TX) - info.key.tun_flags &= ~TUNNEL_CSUM; + __clear_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags); if (tun_flags & NFT_TUNNEL_F_DONT_FRAGMENT) - info.key.tun_flags |= TUNNEL_DONT_FRAGMENT; + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, + info.key.tun_flags); if (tun_flags & NFT_TUNNEL_F_SEQ_NUMBER) - info.key.tun_flags |= TUNNEL_SEQ; + __set_bit(IP_TUNNEL_SEQ_BIT, info.key.tun_flags); } if (tb[NFTA_TUNNEL_KEY_TOS]) info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]); @@ -583,7 +589,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, if (!nest) return -1; - if (opts->flags & TUNNEL_VXLAN_OPT) { + if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags)) { inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN); if (!inner) goto failure; @@ -591,7 +597,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, htonl(opts->u.vxlan.gbp))) goto inner_failure; nla_nest_end(skb, inner); - } else if (opts->flags & TUNNEL_ERSPAN_OPT) { + } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags)) { inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN); if (!inner) goto failure; @@ -613,7 +619,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, break; } nla_nest_end(skb, inner); - } else if (opts->flags & TUNNEL_GENEVE_OPT) { + } else if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags)) { struct geneve_opt *opt; int offset = 0; @@ -658,11 +664,11 @@ static int nft_tunnel_flags_dump(struct sk_buff *skb, { u32 flags = 0; - if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags)) flags |= NFT_TUNNEL_F_DONT_FRAGMENT; - if (!(info->key.tun_flags & TUNNEL_CSUM)) + if (!test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)) flags |= NFT_TUNNEL_F_ZERO_CSUM_TX; - if (info->key.tun_flags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags)) flags |= NFT_TUNNEL_F_SEQ_NUMBER; if (nla_put_be32(skb, NFTA_TUNNEL_KEY_FLAGS, htonl(flags)) < 0) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ebc5728aab4e..f224d9bcea5e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -152,6 +152,13 @@ static void update_range(struct sw_flow_match *match, sizeof((match)->key->field)); \ } while (0) +#define SW_FLOW_KEY_BITMAP_COPY(match, field, value_p, nbits, is_mask) ({ \ + update_range(match, offsetof(struct sw_flow_key, field), \ + bitmap_size(nbits), is_mask); \ + bitmap_copy(is_mask ? (match)->mask->key.field : (match)->key->field, \ + value_p, nbits); \ +}) + static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs, bool log) { @@ -670,8 +677,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, bool log) { bool ttl = false, ipv4 = false, ipv6 = false; + IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { }; bool info_bridge_mode = false; - __be16 tun_flags = 0; int opts_type = 0; struct nlattr *a; int rem; @@ -697,7 +704,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_ID: SW_FLOW_KEY_PUT(match, tun_key.tun_id, nla_get_be64(a), is_mask); - tun_flags |= TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, @@ -729,10 +736,10 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, ttl = true; break; case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tun_flags |= TUNNEL_DONT_FRAGMENT; + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_CSUM: - tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_TP_SRC: SW_FLOW_KEY_PUT(match, tun_key.tp_src, @@ -743,7 +750,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, nla_get_be16(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_OAM: - tun_flags |= TUNNEL_OAM; + __set_bit(IP_TUNNEL_OAM_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: if (opts_type) { @@ -755,7 +762,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, if (err) return err; - tun_flags |= TUNNEL_GENEVE_OPT; + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: @@ -768,7 +775,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, if (err) return err; - tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_PAD: @@ -784,7 +791,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, if (err) return err; - tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE: @@ -798,7 +805,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, } } - SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); + SW_FLOW_KEY_BITMAP_COPY(match, tun_key.tun_flags, tun_flags, + __IP_TUNNEL_FLAG_NUM, is_mask); if (is_mask) SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); else @@ -823,13 +831,15 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, } if (ipv4) { if (info_bridge_mode) { + __clear_bit(IP_TUNNEL_KEY_BIT, tun_flags); + if (match->key->tun_key.u.ipv4.src || match->key->tun_key.u.ipv4.dst || match->key->tun_key.tp_src || match->key->tun_key.tp_dst || match->key->tun_key.ttl || match->key->tun_key.tos || - tun_flags & ~TUNNEL_KEY) { + !ip_tunnel_flags_empty(tun_flags)) { OVS_NLERR(log, "IPv4 tun info is not correct"); return -EINVAL; } @@ -874,7 +884,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, const void *tun_opts, int swkey_tun_opts_len, unsigned short tun_proto, u8 mode) { - if (output->tun_flags & TUNNEL_KEY && + if (test_bit(IP_TUNNEL_KEY_BIT, output->tun_flags) && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, OVS_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; @@ -910,10 +920,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, return -EMSGSIZE; if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl)) return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_CSUM) && + if (test_bit(IP_TUNNEL_CSUM_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; if (output->tp_src && @@ -922,18 +932,20 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, if (output->tp_dst && nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_OAM) && + if (test_bit(IP_TUNNEL_OAM_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; if (swkey_tun_opts_len) { - if (output->tun_flags & TUNNEL_GENEVE_OPT && + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, output->tun_flags) && nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, swkey_tun_opts_len, tun_opts)) return -EMSGSIZE; - else if (output->tun_flags & TUNNEL_VXLAN_OPT && + else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, + output->tun_flags) && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; - else if (output->tun_flags & TUNNEL_ERSPAN_OPT && + else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + output->tun_flags) && nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, swkey_tun_opts_len, tun_opts)) return -EMSGSIZE; @@ -2029,7 +2041,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if ((swkey->tun_proto || is_mask)) { const void *opts = NULL; - if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) + if (ip_tunnel_is_options_present(output->tun_key.tun_flags)) opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ip_tun_to_nlattr(skb, &output->tun_key, opts, @@ -2752,7 +2764,8 @@ static int validate_geneve_opts(struct sw_flow_key *key) opts_len -= len; } - key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + if (crit_opt) + __set_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_key.tun_flags); return 0; } @@ -2760,6 +2773,7 @@ static int validate_geneve_opts(struct sw_flow_key *key) static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { + IP_TUNNEL_DECLARE_FLAGS(dst_opt_type) = { }; struct sw_flow_match match; struct sw_flow_key key; struct metadata_dst *tun_dst; @@ -2767,9 +2781,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct ovs_tunnel_info *ovs_tun; struct nlattr *a; int err = 0, start, opts_type; - __be16 dst_opt_type; - dst_opt_type = 0; ovs_match_init(&match, &key, true, NULL); opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) @@ -2781,13 +2793,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, err = validate_geneve_opts(&key); if (err < 0) return err; - dst_opt_type = TUNNEL_GENEVE_OPT; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, dst_opt_type); break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: - dst_opt_type = TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, dst_opt_type); break; case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: - dst_opt_type = TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, dst_opt_type); break; } } diff --git a/net/psample/psample.c b/net/psample/psample.c index ddd211a151d0..a5d9b8446f77 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -221,7 +221,7 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, const struct ip_tunnel_key *tun_key = &tun_info->key; int tun_opts_len = tun_info->options_len; - if (tun_key->tun_flags & TUNNEL_KEY && + if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags) && nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id, PSAMPLE_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; @@ -257,10 +257,10 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, return -EMSGSIZE; if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl)) return -EMSGSIZE; - if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_key->tun_flags) && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) return -EMSGSIZE; - if ((tun_key->tun_flags & TUNNEL_CSUM) && + if (test_bit(IP_TUNNEL_CSUM_BIT, tun_key->tun_flags) && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; if (tun_key->tp_src && @@ -269,15 +269,16 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, if (tun_key->tp_dst && nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst)) return -EMSGSIZE; - if ((tun_key->tun_flags & TUNNEL_OAM) && + if (test_bit(IP_TUNNEL_OAM_BIT, tun_key->tun_flags) && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; if (tun_opts_len) { - if (tun_key->tun_flags & TUNNEL_GENEVE_OPT && + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_key->tun_flags) && nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, tun_opts_len, tun_opts)) return -EMSGSIZE; - else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT && + else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + tun_key->tun_flags) && nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, tun_opts_len, tun_opts)) return -EMSGSIZE; @@ -314,7 +315,7 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) int tun_opts_len = tun_info->options_len; int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */ - if (tun_key->tun_flags & TUNNEL_KEY) + if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags)) sum += nla_total_size_64bit(sizeof(u64)); if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE) @@ -337,20 +338,21 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) if (tun_key->tos) sum += nla_total_size(sizeof(u8)); sum += nla_total_size(sizeof(u8)); /* TTL */ - if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_key->tun_flags)) sum += nla_total_size(0); - if (tun_key->tun_flags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, tun_key->tun_flags)) sum += nla_total_size(0); if (tun_key->tp_src) sum += nla_total_size(sizeof(u16)); if (tun_key->tp_dst) sum += nla_total_size(sizeof(u16)); - if (tun_key->tun_flags & TUNNEL_OAM) + if (test_bit(IP_TUNNEL_OAM_BIT, tun_key->tun_flags)) sum += nla_total_size(0); if (tun_opts_len) { - if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_key->tun_flags)) sum += nla_total_size(tun_opts_len); - else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT) + else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + tun_key->tun_flags)) sum += nla_total_size(tun_opts_len); } diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 1536f8b16f1b..af7c99845948 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -230,7 +230,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, nla_for_each_attr(attr, head, len, rem) { switch (nla_type(attr)) { case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: - if (type && type != TUNNEL_GENEVE_OPT) { + if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) { NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); return -EINVAL; } @@ -247,7 +247,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, dst_len -= opt_len; dst += opt_len; } - type = TUNNEL_GENEVE_OPT; + type = IP_TUNNEL_GENEVE_OPT_BIT; break; case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: if (type) { @@ -259,7 +259,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_VXLAN_OPT; + type = IP_TUNNEL_VXLAN_OPT_BIT; break; case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: if (type) { @@ -271,7 +271,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_ERSPAN_OPT; + type = IP_TUNNEL_ERSPAN_OPT_BIT; break; } } @@ -302,7 +302,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, switch (nla_type(nla_data(nla))) { case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: #if IS_ENABLED(CONFIG_INET) - info->key.tun_flags |= TUNNEL_GENEVE_OPT; + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else @@ -310,7 +310,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, #endif case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: #if IS_ENABLED(CONFIG_INET) - info->key.tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else @@ -318,7 +318,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, #endif case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: #if IS_ENABLED(CONFIG_INET) - info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else @@ -363,6 +363,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *metadata = NULL; struct tcf_chain *goto_ch = NULL; struct tc_tunnel_key *parm; @@ -371,7 +372,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, __be16 dst_port = 0; __be64 key_id = 0; int opts_len = 0; - __be16 flags = 0; u8 tos, ttl; int ret = 0; u32 index; @@ -412,16 +412,16 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]); key_id = key32_to_tunnel_id(key32); - flags = TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, flags); } - flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, flags); if (tb[TCA_TUNNEL_KEY_NO_CSUM] && nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM])) - flags &= ~TUNNEL_CSUM; + __clear_bit(IP_TUNNEL_CSUM_BIT, flags); if (nla_get_flag(tb[TCA_TUNNEL_KEY_NO_FRAG])) - flags |= TUNNEL_DONT_FRAGMENT; + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, flags); if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); @@ -663,15 +663,15 @@ static int tunnel_key_opts_dump(struct sk_buff *skb, if (!start) return -EMSGSIZE; - if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_geneve_opts_dump(skb, info); if (err) goto err_out; - } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_vxlan_opts_dump(skb, info); if (err) goto err_out; - } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) { + } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_erspan_opts_dump(skb, info); if (err) goto err_out; @@ -741,7 +741,7 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, struct ip_tunnel_key *key = &info->key; __be32 key_id = tunnel_id_to_key32(key->tun_id); - if (((key->tun_flags & TUNNEL_KEY) && + if ((test_bit(IP_TUNNEL_KEY_BIT, key->tun_flags) && nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) || tunnel_key_dump_addresses(skb, ¶ms->tcft_enc_metadata->u.tun_info) || @@ -749,8 +749,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst)) || nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM, - !(key->tun_flags & TUNNEL_CSUM)) || - ((key->tun_flags & TUNNEL_DONT_FRAGMENT) && + !test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags)) || + (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) && nla_put_flag(skb, TCA_TUNNEL_KEY_NO_FRAG)) || tunnel_key_opts_dump(skb, info)) goto nla_put_failure; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e1314674b4a9..ae14d649140f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1454,12 +1454,13 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, switch (nla_type(nla_opt_key)) { case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: if (key->enc_opts.dst_opt_type && - key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) { + key->enc_opts.dst_opt_type != + IP_TUNNEL_GENEVE_OPT_BIT) { NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT; option_len = fl_set_geneve_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1470,7 +1471,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT; option_len = fl_set_geneve_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1489,7 +1490,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT; option_len = fl_set_vxlan_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1500,7 +1501,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT; option_len = fl_set_vxlan_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1519,7 +1520,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT; option_len = fl_set_erspan_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1530,7 +1531,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT; option_len = fl_set_erspan_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1550,7 +1551,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_GTP_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT; option_len = fl_set_gtp_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1561,7 +1562,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_GTP_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT; option_len = fl_set_gtp_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -3202,22 +3203,22 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, goto nla_put_failure; switch (enc_opts->dst_opt_type) { - case TUNNEL_GENEVE_OPT: + case IP_TUNNEL_GENEVE_OPT_BIT: err = fl_dump_key_geneve_opt(skb, enc_opts); if (err) goto nla_put_failure; break; - case TUNNEL_VXLAN_OPT: + case IP_TUNNEL_VXLAN_OPT_BIT: err = fl_dump_key_vxlan_opt(skb, enc_opts); if (err) goto nla_put_failure; break; - case TUNNEL_ERSPAN_OPT: + case IP_TUNNEL_ERSPAN_OPT_BIT: err = fl_dump_key_erspan_opt(skb, enc_opts); if (err) goto nla_put_failure; break; - case TUNNEL_GTP_OPT: + case IP_TUNNEL_GTP_OPT_BIT: err = fl_dump_key_gtp_opt(skb, enc_opts); if (err) goto nla_put_failure; -- cgit v1.2.3 From 6dd514f48110ebb4bf36875b9e7e02d07b589caa Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Wed, 27 Mar 2024 16:23:56 +0100 Subject: pfcp: always set pfcp metadata In PFCP receive path set metadata needed by flower code to do correct classification based on this metadata. Signed-off-by: Michal Swiatkowski Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- drivers/net/pfcp.c | 81 ++++++++++++++++++++++++++++++- include/net/ip_tunnels.h | 3 ++ include/net/pfcp.h | 73 ++++++++++++++++++++++++++++ include/uapi/linux/if_tunnel.h | 3 ++ include/uapi/linux/pkt_cls.h | 14 ++++++ net/core/net_test.c | 7 +-- net/sched/cls_flower.c | 107 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 282 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/pfcp.c b/drivers/net/pfcp.c index 3f1ee0ae7111..cc5b28c5f99f 100644 --- a/drivers/net/pfcp.c +++ b/drivers/net/pfcp.c @@ -21,6 +21,8 @@ struct pfcp_dev { struct socket *sock; struct net_device *dev; struct net *net; + + struct gro_cells gro_cells; }; static unsigned int pfcp_net_id __read_mostly; @@ -29,6 +31,78 @@ struct pfcp_net { struct list_head pfcp_dev_list; }; +static void +pfcp_session_recv(struct pfcp_dev *pfcp, struct sk_buff *skb, + struct pfcp_metadata *md) +{ + struct pfcphdr_session *unparsed = pfcp_hdr_session(skb); + + md->seid = unparsed->seid; + md->type = PFCP_TYPE_SESSION; +} + +static void +pfcp_node_recv(struct pfcp_dev *pfcp, struct sk_buff *skb, + struct pfcp_metadata *md) +{ + md->type = PFCP_TYPE_NODE; +} + +static int pfcp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; + struct metadata_dst *tun_dst; + struct pfcp_metadata *md; + struct pfcphdr *unparsed; + struct pfcp_dev *pfcp; + + if (unlikely(!pskb_may_pull(skb, PFCP_HLEN))) + goto drop; + + pfcp = rcu_dereference_sk_user_data(sk); + if (unlikely(!pfcp)) + goto drop; + + unparsed = pfcp_hdr(skb); + + ip_tunnel_flags_zero(flags); + tun_dst = udp_tun_rx_dst(skb, sk->sk_family, flags, 0, + sizeof(*md)); + if (unlikely(!tun_dst)) + goto drop; + + md = ip_tunnel_info_opts(&tun_dst->u.tun_info); + if (unlikely(!md)) + goto drop; + + if (unparsed->flags & PFCP_SEID_FLAG) + pfcp_session_recv(pfcp, skb, md); + else + pfcp_node_recv(pfcp, skb, md); + + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, flags); + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, md, sizeof(*md), + flags); + + if (unlikely(iptunnel_pull_header(skb, PFCP_HLEN, skb->protocol, + !net_eq(sock_net(sk), + dev_net(pfcp->dev))))) + goto drop; + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + + skb_reset_network_header(skb); + skb_reset_mac_header(skb); + skb->dev = pfcp->dev; + + gro_cells_receive(&pfcp->gro_cells, skb); + + return 0; +drop: + kfree_skb(skb); + return 0; +} + static void pfcp_del_sock(struct pfcp_dev *pfcp) { udp_tunnel_sock_release(pfcp->sock); @@ -39,6 +113,7 @@ static void pfcp_dev_uninit(struct net_device *dev) { struct pfcp_dev *pfcp = netdev_priv(dev); + gro_cells_destroy(&pfcp->gro_cells); pfcp_del_sock(pfcp); } @@ -48,7 +123,7 @@ static int pfcp_dev_init(struct net_device *dev) pfcp->dev = dev; - return 0; + return gro_cells_init(&pfcp->gro_cells, dev); } static const struct net_device_ops pfcp_netdev_ops = { @@ -94,6 +169,10 @@ static struct socket *pfcp_create_sock(struct pfcp_dev *pfcp) if (err) return ERR_PTR(err); + tuncfg.sk_user_data = pfcp; + tuncfg.encap_rcv = pfcp_encap_recv; + tuncfg.encap_type = 1; + setup_udp_tunnel_sock(net, sock, &tuncfg); return sock; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index ed8e48cc9054..d8f574fbb11e 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -216,6 +216,7 @@ static inline void ip_tunnel_set_options_present(unsigned long *flags) __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); ip_tunnel_flags_or(flags, flags, present); } @@ -228,6 +229,7 @@ static inline void ip_tunnel_clear_options_present(unsigned long *flags) __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); __ipt_flag_op(bitmap_andnot, flags, flags, present); } @@ -240,6 +242,7 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags) __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); return ip_tunnel_flags_intersect(flags, present); } diff --git a/include/net/pfcp.h b/include/net/pfcp.h index 3f9ebf27a8ff..af14f970b80e 100644 --- a/include/net/pfcp.h +++ b/include/net/pfcp.h @@ -2,12 +2,85 @@ #ifndef _PFCP_H_ #define _PFCP_H_ +#include +#include #include +#include +#include +#include +#include #include #include +#include #define PFCP_PORT 8805 +/* PFCP protocol header */ +struct pfcphdr { + u8 flags; + u8 message_type; + __be16 message_length; +}; + +/* PFCP header flags */ +#define PFCP_SEID_FLAG BIT(0) +#define PFCP_MP_FLAG BIT(1) + +#define PFCP_VERSION_MASK GENMASK(4, 0) + +#define PFCP_HLEN (sizeof(struct udphdr) + sizeof(struct pfcphdr)) + +/* PFCP node related messages */ +struct pfcphdr_node { + u8 seq_number[3]; + u8 reserved; +}; + +/* PFCP session related messages */ +struct pfcphdr_session { + __be64 seid; + u8 seq_number[3]; +#ifdef __LITTLE_ENDIAN_BITFIELD + u8 message_priority:4, + reserved:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:4, + message_priprity:4; +#else +#error "Please fix " +#endif +}; + +struct pfcp_metadata { + u8 type; + __be64 seid; +} __packed; + +enum { + PFCP_TYPE_NODE = 0, + PFCP_TYPE_SESSION = 1, +}; + +#define PFCP_HEADROOM (sizeof(struct iphdr) + sizeof(struct udphdr) + \ + sizeof(struct pfcphdr) + sizeof(struct ethhdr)) +#define PFCP6_HEADROOM (sizeof(struct ipv6hdr) + sizeof(struct udphdr) + \ + sizeof(struct pfcphdr) + sizeof(struct ethhdr)) + +static inline struct pfcphdr *pfcp_hdr(struct sk_buff *skb) +{ + return (struct pfcphdr *)(udp_hdr(skb) + 1); +} + +static inline struct pfcphdr_node *pfcp_hdr_node(struct sk_buff *skb) +{ + return (struct pfcphdr_node *)(pfcp_hdr(skb) + 1); +} + +static inline struct pfcphdr_session *pfcp_hdr_session(struct sk_buff *skb) +{ + return (struct pfcphdr_session *)(pfcp_hdr(skb) + 1); +} + static inline bool netif_is_pfcp(const struct net_device *dev) { return dev->rtnl_link_ops && diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 838927dd73a4..e1a246dd8c62 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -212,6 +212,9 @@ enum { IP_TUNNEL_VTI_BIT, IP_TUNNEL_SIT_ISATAP_BIT = IP_TUNNEL_VTI_BIT, + /* Flags starting from here are not available via the old UAPI */ + IP_TUNNEL_PFCP_OPT_BIT, /* OPTIONS_PRESENT */ + __IP_TUNNEL_FLAG_NUM, }; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index ea277039f89d..229fc925ec3a 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -587,6 +587,10 @@ enum { * TCA_FLOWER_KEY_ENC_OPT_GTP_ * attributes */ + TCA_FLOWER_KEY_ENC_OPTS_PFCP, /* Nested + * TCA_FLOWER_KEY_ENC_IPT_PFCP + * attributes + */ __TCA_FLOWER_KEY_ENC_OPTS_MAX, }; @@ -636,6 +640,16 @@ enum { #define TCA_FLOWER_KEY_ENC_OPT_GTP_MAX \ (__TCA_FLOWER_KEY_ENC_OPT_GTP_MAX - 1) +enum { + TCA_FLOWER_KEY_ENC_OPT_PFCP_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID, /* be64 */ + __TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX - 1) + enum { TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC, TCA_FLOWER_KEY_MPLS_OPTS_LSE, diff --git a/net/core/net_test.c b/net/core/net_test.c index 30062b9621e4..9c3a590865d2 100644 --- a/net/core/net_test.c +++ b/net/core/net_test.c @@ -335,11 +335,8 @@ static const struct ip_tunnel_flags_test ip_tunnel_flags_test[] = { ip_tunnel_flags_1), IP_TUNNEL_FLAGS_TEST("conflict", ip_tunnel_flags_2_src, true, VTI_ISVTI, ip_tunnel_flags_2_exp), - IP_TUNNEL_FLAGS_TEST("new", ip_tunnel_flags_3_src, - /* This must be set to ``false`` once - * ``__IP_TUNNEL_FLAG_NUM`` goes above 17. - */ - true, cpu_to_be16(BIT(IP_TUNNEL_VXLAN_OPT_BIT)), + IP_TUNNEL_FLAGS_TEST("new", ip_tunnel_flags_3_src, false, + cpu_to_be16(BIT(IP_TUNNEL_VXLAN_OPT_BIT)), ip_tunnel_flags_3_exp), }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index ae14d649140f..fd9a6f20b60b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -741,6 +742,7 @@ enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { [TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_GTP] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_PFCP] = { .type = NLA_NESTED }, }; static const struct nla_policy @@ -770,6 +772,12 @@ gtp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GTP_MAX + 1] = { [TCA_FLOWER_KEY_ENC_OPT_GTP_QFI] = { .type = NLA_U8 }, }; +static const struct nla_policy +pfcp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID] = { .type = NLA_U64 }, +}; + static const struct nla_policy mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = { [TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 }, @@ -1419,6 +1427,44 @@ static int fl_set_gtp_opt(const struct nlattr *nla, struct fl_flow_key *key, return sizeof(*sinfo); } +static int fl_set_pfcp_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX + 1]; + struct pfcp_metadata *md; + int err; + + md = (struct pfcp_metadata *)&key->enc_opts.data[key->enc_opts.len]; + memset(md, 0xff, sizeof(*md)); + + if (!depth) + return sizeof(*md); + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_PFCP) { + NL_SET_ERR_MSG_MOD(extack, "Non-pfcp option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX, nla, + pfcp_opt_policy, extack); + if (err < 0) + return err; + + if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing tunnel key pfcp option type"); + return -EINVAL; + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]) + md->type = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]); + + if (tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID]) + md->seid = nla_get_be64(tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID]); + + return sizeof(*md); +} + static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -1576,6 +1622,36 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } break; + case TCA_FLOWER_KEY_ENC_OPTS_PFCP: + if (key->enc_opts.dst_opt_type) { + NL_SET_ERR_MSG_MOD(extack, "Duplicate type for pfcp options"); + return -EINVAL; + } + option_len = 0; + key->enc_opts.dst_opt_type = IP_TUNNEL_PFCP_OPT_BIT; + option_len = fl_set_pfcp_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = IP_TUNNEL_PFCP_OPT_BIT; + option_len = fl_set_pfcp_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG_MOD(extack, "Key and mask miss aligned"); + return -EINVAL; + } + break; default: NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); return -EINVAL; @@ -3118,6 +3194,32 @@ nla_put_failure: return -EMSGSIZE; } +static int fl_dump_key_pfcp_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct pfcp_metadata *md; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_PFCP); + if (!nest) + goto nla_put_failure; + + md = (struct pfcp_metadata *)&enc_opts->data[0]; + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE, md->type)) + goto nla_put_failure; + + if (nla_put_be64(skb, TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID, + md->seid, 0)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static int fl_dump_key_ct(struct sk_buff *skb, struct flow_dissector_key_ct *key, struct flow_dissector_key_ct *mask) @@ -3223,6 +3325,11 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, if (err) goto nla_put_failure; break; + case IP_TUNNEL_PFCP_OPT_BIT: + err = fl_dump_key_pfcp_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; default: goto nla_put_failure; } -- cgit v1.2.3 From ca4ddc26f8acaa9cb451fcb20f7ab0f02e4970cb Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 29 Mar 2024 10:28:46 -0500 Subject: bpf: Fix typo in uapi doc comments In a few places in the bpf uapi headers, EOPNOTSUPP is missing a "P" in the doc comments. This adds the missing "P". Signed-off-by: David Lechner Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240329152900.398260-2-dlechner@baylibre.com --- include/uapi/linux/bpf.h | 4 ++-- tools/include/uapi/linux/bpf.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 96d57e483133..79c548276b6b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5026,7 +5026,7 @@ union bpf_attr { * bytes will be copied to *dst* * Return * The **hash_algo** is returned on success, - * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if + * **-EOPNOTSUPP** if IMA is disabled or **-EINVAL** if * invalid arguments are passed. * * struct socket *bpf_sock_from_file(struct file *file) @@ -5512,7 +5512,7 @@ union bpf_attr { * bytes will be copied to *dst* * Return * The **hash_algo** is returned on success, - * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if + * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. * * void *bpf_kptr_xchg(void *map_value, void *ptr) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 96d57e483133..79c548276b6b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5026,7 +5026,7 @@ union bpf_attr { * bytes will be copied to *dst* * Return * The **hash_algo** is returned on success, - * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if + * **-EOPNOTSUPP** if IMA is disabled or **-EINVAL** if * invalid arguments are passed. * * struct socket *bpf_sock_from_file(struct file *file) @@ -5512,7 +5512,7 @@ union bpf_attr { * bytes will be copied to *dst* * Return * The **hash_algo** is returned on success, - * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if + * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. * * void *bpf_kptr_xchg(void *map_value, void *ptr) -- cgit v1.2.3 From e57ba7e3d7bccd12b6ac9298a9fe0fd8d92ea31d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 1 Apr 2024 11:10:04 +0800 Subject: uapi: team: use header file generated from YAML spec generated with: $ ./tools/net/ynl/ynl-gen-c.py --mode uapi \ > --spec Documentation/netlink/specs/team.yaml \ > --header -o include/uapi/linux/if_team.h Signed-off-by: Hangbin Liu Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240401031004.1159713-5-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_team.h | 116 ++++++++++++++++--------------------------- 1 file changed, 43 insertions(+), 73 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_team.h b/include/uapi/linux/if_team.h index 13c61fecb78b..a5c06243a435 100644 --- a/include/uapi/linux/if_team.h +++ b/include/uapi/linux/if_team.h @@ -1,108 +1,78 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -/* - * include/linux/if_team.h - Network team device driver header - * Copyright (c) 2011 Jiri Pirko - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/team.yaml */ +/* YNL-GEN uapi header */ -#ifndef _UAPI_LINUX_IF_TEAM_H_ -#define _UAPI_LINUX_IF_TEAM_H_ +#ifndef _UAPI_LINUX_IF_TEAM_H +#define _UAPI_LINUX_IF_TEAM_H +#define TEAM_GENL_NAME "team" +#define TEAM_GENL_VERSION 1 -#define TEAM_STRING_MAX_LEN 32 - -/********************************** - * NETLINK_GENERIC netlink family. - **********************************/ - -enum { - TEAM_CMD_NOOP, - TEAM_CMD_OPTIONS_SET, - TEAM_CMD_OPTIONS_GET, - TEAM_CMD_PORT_LIST_GET, - - __TEAM_CMD_MAX, - TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1), -}; +#define TEAM_STRING_MAX_LEN 32 +#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event" enum { TEAM_ATTR_UNSPEC, - TEAM_ATTR_TEAM_IFINDEX, /* u32 */ - TEAM_ATTR_LIST_OPTION, /* nest */ - TEAM_ATTR_LIST_PORT, /* nest */ + TEAM_ATTR_TEAM_IFINDEX, + TEAM_ATTR_LIST_OPTION, + TEAM_ATTR_LIST_PORT, __TEAM_ATTR_MAX, - TEAM_ATTR_MAX = __TEAM_ATTR_MAX - 1, + TEAM_ATTR_MAX = (__TEAM_ATTR_MAX - 1) }; -/* Nested layout of get/set msg: - * - * [TEAM_ATTR_LIST_OPTION] - * [TEAM_ATTR_ITEM_OPTION] - * [TEAM_ATTR_OPTION_*], ... - * [TEAM_ATTR_ITEM_OPTION] - * [TEAM_ATTR_OPTION_*], ... - * ... - * [TEAM_ATTR_LIST_PORT] - * [TEAM_ATTR_ITEM_PORT] - * [TEAM_ATTR_PORT_*], ... - * [TEAM_ATTR_ITEM_PORT] - * [TEAM_ATTR_PORT_*], ... - * ... - */ - enum { TEAM_ATTR_ITEM_OPTION_UNSPEC, - TEAM_ATTR_ITEM_OPTION, /* nest */ + TEAM_ATTR_ITEM_OPTION, __TEAM_ATTR_ITEM_OPTION_MAX, - TEAM_ATTR_ITEM_OPTION_MAX = __TEAM_ATTR_ITEM_OPTION_MAX - 1, + TEAM_ATTR_ITEM_OPTION_MAX = (__TEAM_ATTR_ITEM_OPTION_MAX - 1) }; enum { TEAM_ATTR_OPTION_UNSPEC, - TEAM_ATTR_OPTION_NAME, /* string */ - TEAM_ATTR_OPTION_CHANGED, /* flag */ - TEAM_ATTR_OPTION_TYPE, /* u8 */ - TEAM_ATTR_OPTION_DATA, /* dynamic */ - TEAM_ATTR_OPTION_REMOVED, /* flag */ - TEAM_ATTR_OPTION_PORT_IFINDEX, /* u32 */ /* for per-port options */ - TEAM_ATTR_OPTION_ARRAY_INDEX, /* u32 */ /* for array options */ + TEAM_ATTR_OPTION_NAME, + TEAM_ATTR_OPTION_CHANGED, + TEAM_ATTR_OPTION_TYPE, + TEAM_ATTR_OPTION_DATA, + TEAM_ATTR_OPTION_REMOVED, + TEAM_ATTR_OPTION_PORT_IFINDEX, + TEAM_ATTR_OPTION_ARRAY_INDEX, __TEAM_ATTR_OPTION_MAX, - TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1, + TEAM_ATTR_OPTION_MAX = (__TEAM_ATTR_OPTION_MAX - 1) }; enum { TEAM_ATTR_ITEM_PORT_UNSPEC, - TEAM_ATTR_ITEM_PORT, /* nest */ + TEAM_ATTR_ITEM_PORT, __TEAM_ATTR_ITEM_PORT_MAX, - TEAM_ATTR_ITEM_PORT_MAX = __TEAM_ATTR_ITEM_PORT_MAX - 1, + TEAM_ATTR_ITEM_PORT_MAX = (__TEAM_ATTR_ITEM_PORT_MAX - 1) }; enum { TEAM_ATTR_PORT_UNSPEC, - TEAM_ATTR_PORT_IFINDEX, /* u32 */ - TEAM_ATTR_PORT_CHANGED, /* flag */ - TEAM_ATTR_PORT_LINKUP, /* flag */ - TEAM_ATTR_PORT_SPEED, /* u32 */ - TEAM_ATTR_PORT_DUPLEX, /* u8 */ - TEAM_ATTR_PORT_REMOVED, /* flag */ + TEAM_ATTR_PORT_IFINDEX, + TEAM_ATTR_PORT_CHANGED, + TEAM_ATTR_PORT_LINKUP, + TEAM_ATTR_PORT_SPEED, + TEAM_ATTR_PORT_DUPLEX, + TEAM_ATTR_PORT_REMOVED, __TEAM_ATTR_PORT_MAX, - TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1, + TEAM_ATTR_PORT_MAX = (__TEAM_ATTR_PORT_MAX - 1) }; -/* - * NETLINK_GENERIC related info - */ -#define TEAM_GENL_NAME "team" -#define TEAM_GENL_VERSION 0x1 -#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event" +enum { + TEAM_CMD_NOOP, + TEAM_CMD_OPTIONS_SET, + TEAM_CMD_OPTIONS_GET, + TEAM_CMD_PORT_LIST_GET, + + __TEAM_CMD_MAX, + TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1) +}; -#endif /* _UAPI_LINUX_IF_TEAM_H_ */ +#endif /* _UAPI_LINUX_IF_TEAM_H */ -- cgit v1.2.3 From 9cc8a6e62624e4a0e0bcf8dfd10cc89db6e0f14b Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Tue, 2 Apr 2024 22:11:19 +0200 Subject: net: ethtool: Add impedance mismatch result code to cable test Some PHYs can recognize during a cable test if the impedance in the cable is okay. They can detect reflections caused by impedance discontinuity between a regular 100 Ohm cable and an abnormal part with a higher or lower impedance. This commit introduces a new result code: ETHTOOL_A_CABLE_RESULT_CODE_IMPEDANCE_MISMATCH, which represents the results of a cable test indicating issues with impedance integrity. Signed-off-by: Pawel Dembicki Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240402201123.2961909-2-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 3f89074aa06c..accbb1a231df 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -515,6 +515,10 @@ enum { ETHTOOL_A_CABLE_RESULT_CODE_OPEN, ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT, ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT, + /* detected reflection caused by the impedance discontinuity between + * a regular 100 Ohm cable and a part with the abnormal impedance value + */ + ETHTOOL_A_CABLE_RESULT_CODE_IMPEDANCE_MISMATCH, }; enum { -- cgit v1.2.3 From f91717007217d975aa975ddabd91ae1a107b9bff Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Wed, 3 Apr 2024 14:33:03 +0200 Subject: bpf: Pack struct bpf_fib_lookup The struct bpf_fib_lookup is supposed to be of size 64. A recent commit 59b418c7063d ("bpf: Add a check for struct bpf_fib_lookup size") added a static assertion to check this property so that future changes to the structure will not accidentally break this assumption. As it immediately turned out, on some 32-bit arm systems, when AEABI=n, the total size of the structure was equal to 68, see [1]. This happened because the bpf_fib_lookup structure contains a union of two 16-bit fields: union { __u16 tot_len; __u16 mtu_result; }; which was supposed to compile to a 16-bit-aligned 16-bit field. On the aforementioned setups it was instead both aligned and padded to 32-bits. Declare this inner union as __attribute__((packed, aligned(2))) such that it always is of size 2 and is aligned to 16 bits. [1] https://lore.kernel.org/all/CA+G9fYtsoP51f-oP_Sp5MOq-Ffv8La2RztNpwvE6+R1VtFiLrw@mail.gmail.com/#t Reported-by: Naresh Kamboju Fixes: e1850ea9bd9e ("bpf: bpf_fib_lookup return MTU value as output when looked up") Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Reviewed-by: Alexander Lobakin Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240403123303.1452184-1-aspsk@isovalent.com --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 79c548276b6b..6fe9f11c8abe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7157,7 +7157,7 @@ struct bpf_fib_lookup { /* output: MTU value */ __u16 mtu_result; - }; + } __attribute__((packed, aligned(2))); /* input: L3 device index for lookup * output: device index from FIB lookup */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 79c548276b6b..6fe9f11c8abe 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7157,7 +7157,7 @@ struct bpf_fib_lookup { /* output: MTU value */ __u16 mtu_result; - }; + } __attribute__((packed, aligned(2))); /* input: L3 device index for lookup * output: device index from FIB lookup */ -- cgit v1.2.3 From 0e9c127729be19adcdf2f5cc1f3450a4322fdf3a Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Wed, 3 Apr 2024 14:28:39 -0700 Subject: ethtool: add interface to read Tx hardware timestamping statistics Multiple network devices that support hardware timestamping appear to have common behavior with regards to timestamp handling. Implement common Tx hardware timestamping statistics in a tx_stats struct_group. Common Rx hardware timestamping statistics can subsequently be implemented in a rx_stats struct_group for ethtool_ts_stats. Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Link: https://lore.kernel.org/r/20240403212931.128541-2-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 17 +++++++++ Documentation/networking/ethtool-netlink.rst | 9 +++++ include/linux/ethtool.h | 27 ++++++++++++++- include/uapi/linux/ethtool_netlink.h | 14 ++++++++ net/ethtool/tsinfo.c | 52 +++++++++++++++++++++++++++- 5 files changed, 117 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index d0e4a47e0f21..b76916394ec9 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -565,6 +565,18 @@ attribute-sets: - name: tx-lpi-timer type: u32 + - + name: ts-stat + attributes: + - + name: tx-pkts + type: uint + - + name: tx-lost + type: uint + - + name: tx-err + type: uint - name: tsinfo attributes: @@ -587,6 +599,10 @@ attribute-sets: - name: phc-index type: u32 + - + name: stats + type: nest + nested-attributes: ts-stat - name: cable-result attributes: @@ -1394,6 +1410,7 @@ operations: - tx-types - rx-filters - phc-index + - stats dump: *tsinfo-get-op - name: cable-test-act diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index d583d9abf2f8..08d330b0f50f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1237,12 +1237,21 @@ Kernel response contents: ``ETHTOOL_A_TSINFO_TX_TYPES`` bitset supported Tx types ``ETHTOOL_A_TSINFO_RX_FILTERS`` bitset supported Rx filters ``ETHTOOL_A_TSINFO_PHC_INDEX`` u32 PTP hw clock index + ``ETHTOOL_A_TSINFO_STATS`` nested HW timestamping statistics ===================================== ====== ========================== ``ETHTOOL_A_TSINFO_PHC_INDEX`` is absent if there is no associated PHC (there is no special value for this case). The bitset attributes are omitted if they would be empty (no bit set). +Additional hardware timestamping statistics response contents: + + ===================================== ====== =================================== + ``ETHTOOL_A_TS_STAT_TX_PKTS`` u64 Packets with Tx HW timestamps + ``ETHTOOL_A_TS_STAT_TX_LOST`` u64 Tx HW timestamp not arrived count + ``ETHTOOL_A_TS_STAT_TX_ERR`` u64 HW error request Tx timestamp count + ===================================== ====== =================================== + CABLE_TEST ========== diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9901e563f706..6fd9107d3cc0 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -480,6 +480,26 @@ struct ethtool_rmon_stats { ); }; +/** + * struct ethtool_ts_stats - HW timestamping statistics + * @pkts: Number of packets successfully timestamped by the hardware. + * @lost: Number of hardware timestamping requests where the timestamping + * information from the hardware never arrived for submission with + * the skb. + * @err: Number of arbitrary timestamp generation error events that the + * hardware encountered, exclusive of @lost statistics. Cases such + * as resource exhaustion, unavailability, firmware errors, and + * detected illogical timestamp values not submitted with the skb + * are inclusive to this counter. + */ +struct ethtool_ts_stats { + struct_group(tx_stats, + u64 pkts; + u64 lost; + u64 err; + ); +}; + #define ETH_MODULE_EEPROM_PAGE_LEN 128 #define ETH_MODULE_MAX_I2C_ADDRESS 0x7f @@ -755,7 +775,10 @@ struct ethtool_rxfh_param { * @get_ts_info: Get the time stamping and PTP hardware clock capabilities. * It may be called with RCU, or rtnl or reference on the device. * Drivers supporting transmit time stamps in software should set this to - * ethtool_op_get_ts_info(). + * ethtool_op_get_ts_info(). Drivers must not zero statistics which they + * don't report. The stats structure is initialized to ETHTOOL_STAT_NOT_SET + * indicating driver does not report statistics. + * @get_ts_stats: Query the device hardware timestamping statistics. * @get_module_info: Get the size and type of the eeprom contained within * a plug-in module. * @get_module_eeprom: Get the eeprom information from the plug-in module @@ -898,6 +921,8 @@ struct ethtool_ops { struct ethtool_dump *, void *); int (*set_dump)(struct net_device *, struct ethtool_dump *); int (*get_ts_info)(struct net_device *, struct ethtool_ts_info *); + void (*get_ts_stats)(struct net_device *dev, + struct ethtool_ts_stats *ts_stats); int (*get_module_info)(struct net_device *, struct ethtool_modinfo *); int (*get_module_eeprom)(struct net_device *, diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index accbb1a231df..708272026d80 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -478,12 +478,26 @@ enum { ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ + ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ /* add new constants above here */ __ETHTOOL_A_TSINFO_CNT, ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) }; +enum { + ETHTOOL_A_TS_STAT_UNSPEC, + + ETHTOOL_A_TS_STAT_TX_PKTS, /* u64 */ + ETHTOOL_A_TS_STAT_TX_LOST, /* u64 */ + ETHTOOL_A_TS_STAT_TX_ERR, /* u64 */ + + /* add new constants above here */ + __ETHTOOL_A_TS_STAT_CNT, + ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) + +}; + /* PHC VCLOCKS */ enum { diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 9daed0aab162..be2755c8d8fd 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -13,14 +13,18 @@ struct tsinfo_req_info { struct tsinfo_reply_data { struct ethnl_reply_data base; struct ethtool_ts_info ts_info; + struct ethtool_ts_stats stats; }; #define TSINFO_REPDATA(__reply_base) \ container_of(__reply_base, struct tsinfo_reply_data, base) +#define ETHTOOL_TS_STAT_CNT \ + (__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1)) + const struct nla_policy ethnl_tsinfo_get_policy[] = { [ETHTOOL_A_TSINFO_HEADER] = - NLA_POLICY_NESTED(ethnl_header_policy), + NLA_POLICY_NESTED(ethnl_header_policy_stats), }; static int tsinfo_prepare_data(const struct ethnl_req_info *req_base, @@ -34,6 +38,12 @@ static int tsinfo_prepare_data(const struct ethnl_req_info *req_base, ret = ethnl_ops_begin(dev); if (ret < 0) return ret; + if (req_base->flags & ETHTOOL_FLAG_STATS && + dev->ethtool_ops->get_ts_stats) { + ethtool_stats_init((u64 *)&data->stats, + sizeof(data->stats) / sizeof(u64)); + dev->ethtool_ops->get_ts_stats(dev, &data->stats); + } ret = __ethtool_get_ts_info(dev, &data->ts_info); ethnl_ops_complete(dev); @@ -79,10 +89,47 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base, } if (ts_info->phc_index >= 0) len += nla_total_size(sizeof(u32)); /* _TSINFO_PHC_INDEX */ + if (req_base->flags & ETHTOOL_FLAG_STATS) + len += nla_total_size(0) + /* _TSINFO_STATS */ + nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT; return len; } +static int tsinfo_put_stat(struct sk_buff *skb, u64 val, u16 attrtype) +{ + if (val == ETHTOOL_STAT_NOT_SET) + return 0; + if (nla_put_uint(skb, attrtype, val)) + return -EMSGSIZE; + return 0; +} + +static int tsinfo_put_stats(struct sk_buff *skb, + const struct ethtool_ts_stats *stats) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_STATS); + if (!nest) + return -EMSGSIZE; + + if (tsinfo_put_stat(skb, stats->tx_stats.pkts, + ETHTOOL_A_TS_STAT_TX_PKTS) || + tsinfo_put_stat(skb, stats->tx_stats.lost, + ETHTOOL_A_TS_STAT_TX_LOST) || + tsinfo_put_stat(skb, stats->tx_stats.err, + ETHTOOL_A_TS_STAT_TX_ERR)) + goto err_cancel; + + nla_nest_end(skb, nest); + return 0; + +err_cancel: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static int tsinfo_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) @@ -119,6 +166,9 @@ static int tsinfo_fill_reply(struct sk_buff *skb, if (ts_info->phc_index >= 0 && nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index)) return -EMSGSIZE; + if (req_base->flags & ETHTOOL_FLAG_STATS && + tsinfo_put_stats(skb, &data->stats)) + return -EMSGSIZE; return 0; } -- cgit v1.2.3 From ff8877b04ef282b2bdb16c9dccc2e42216a34f62 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 5 Apr 2024 22:22:38 -0700 Subject: netlink: specs: ethtool: define header-flags as an enum Recent changes added header flags to the spec. Use an enum instead of defines for more seamless codegen. [Jakub: drop the already applied parts and rewrite message] Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Link: https://lore.kernel.org/r/20240403212931.128541-6-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 1 - include/uapi/linux/ethtool_netlink.h | 11 +++++------ 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index b76916394ec9..87ae7b397984 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -18,7 +18,6 @@ definitions: entries: [] - name: header-flags - enum-name: type: flags entries: [ compact-bitsets, omit-reply, stats ] diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 708272026d80..e06a34cded78 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -117,12 +117,11 @@ enum { /* request header */ -/* use compact bitsets in reply */ -#define ETHTOOL_FLAG_COMPACT_BITSETS (1 << 0) -/* provide optional reply for SET or ACT requests */ -#define ETHTOOL_FLAG_OMIT_REPLY (1 << 1) -/* request statistics, if supported by the driver */ -#define ETHTOOL_FLAG_STATS (1 << 2) +enum ethtool_header_flags { + ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ + ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ + ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ +}; #define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \ ETHTOOL_FLAG_OMIT_REPLY | \ -- cgit v1.2.3 From 6916e461e7933d3d003441291c543938f2ccb371 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 4 Apr 2024 11:29:51 +0200 Subject: net: phy: Introduce ethernet link topology representation Link topologies containing multiple network PHYs attached to the same net_device can be found when using a PHY as a media converter for use with an SFP connector, on which an SFP transceiver containing a PHY can be used. With the current model, the transceiver's PHY can't be used for operations such as cable testing, timestamping, macsec offload, etc. The reason being that most of the logic for these configuration, coming from either ethtool netlink or ioctls tend to use netdev->phydev, which in multi-phy systems will reference the PHY closest to the MAC. Introduce a numbering scheme allowing to enumerate PHY devices that belong to any netdev, which can in turn allow userspace to take more precise decisions with regard to each PHY's configuration. The numbering is maintained per-netdev, in a phy_device_list. The numbering works similarly to a netdevice's ifindex, with identifiers that are only recycled once INT_MAX has been reached. This prevents races that could occur between PHY listing and SFP transceiver removal/insertion. The identifiers are assigned at phy_attach time, as the numbering depends on the netdevice the phy is attached to. The PHY index can be re-used for PHYs that are persistent. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- MAINTAINERS | 2 + drivers/net/phy/Makefile | 2 +- drivers/net/phy/phy_device.c | 7 +++ drivers/net/phy/phy_link_topology.c | 105 +++++++++++++++++++++++++++++++++ include/linux/netdevice.h | 4 +- include/linux/phy.h | 4 ++ include/linux/phy_link_topology.h | 72 ++++++++++++++++++++++ include/linux/phy_link_topology_core.h | 25 ++++++++ include/uapi/linux/ethtool.h | 16 +++++ net/core/dev.c | 9 +++ 10 files changed, 244 insertions(+), 2 deletions(-) create mode 100644 drivers/net/phy/phy_link_topology.c create mode 100644 include/linux/phy_link_topology.h create mode 100644 include/linux/phy_link_topology_core.h (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index 046598b471b7..4745ea94d463 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8015,6 +8015,8 @@ F: include/linux/mii.h F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h +F: include/linux/phy_link_topology.h +F: include/linux/phy_link_topology_core.h F: include/linux/phylib_stubs.h F: include/linux/platform_data/mdio-bcm-unimac.h F: include/linux/platform_data/mdio-gpio.h diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 202ed7f450da..1d8be374915f 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -2,7 +2,7 @@ # Makefile for Linux PHY drivers libphy-y := phy.o phy-c45.o phy-core.o phy_device.o \ - linkmode.o + linkmode.o phy_link_topology.o mdio-bus-y += mdio_bus.o mdio_device.o ifdef CONFIG_MDIO_DEVICE diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 6c6ec9475709..452fc8b3406d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -1511,6 +1512,11 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, if (phydev->sfp_bus_attached) dev->sfp_bus = phydev->sfp_bus; + + err = phy_link_topo_add_phy(dev->link_topo, phydev, + PHY_UPSTREAM_MAC, dev); + if (err) + goto error; } /* Some Ethernet drivers try to connect to a PHY device before @@ -1938,6 +1944,7 @@ void phy_detach(struct phy_device *phydev) if (dev) { phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; + phy_link_topo_del_phy(dev->link_topo, phydev); } phydev->phylink = NULL; diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c new file mode 100644 index 000000000000..985941c5c558 --- /dev/null +++ b/drivers/net/phy/phy_link_topology.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Infrastructure to handle all PHY devices connected to a given netdev, + * either directly or indirectly attached. + * + * Copyright (c) 2023 Maxime Chevallier + */ + +#include +#include +#include +#include +#include + +struct phy_link_topology *phy_link_topo_create(struct net_device *dev) +{ + struct phy_link_topology *topo; + + topo = kzalloc(sizeof(*topo), GFP_KERNEL); + if (!topo) + return ERR_PTR(-ENOMEM); + + xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1); + topo->next_phy_index = 1; + + return topo; +} + +void phy_link_topo_destroy(struct phy_link_topology *topo) +{ + if (!topo) + return; + + xa_destroy(&topo->phys); + kfree(topo); +} + +int phy_link_topo_add_phy(struct phy_link_topology *topo, + struct phy_device *phy, + enum phy_upstream upt, void *upstream) +{ + struct phy_device_node *pdn; + int ret; + + pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); + if (!pdn) + return -ENOMEM; + + pdn->phy = phy; + switch (upt) { + case PHY_UPSTREAM_MAC: + pdn->upstream.netdev = (struct net_device *)upstream; + if (phy_on_sfp(phy)) + pdn->parent_sfp_bus = pdn->upstream.netdev->sfp_bus; + break; + case PHY_UPSTREAM_PHY: + pdn->upstream.phydev = (struct phy_device *)upstream; + if (phy_on_sfp(phy)) + pdn->parent_sfp_bus = pdn->upstream.phydev->sfp_bus; + break; + default: + ret = -EINVAL; + goto err; + } + pdn->upstream_type = upt; + + /* Attempt to re-use a previously allocated phy_index */ + if (phy->phyindex) { + ret = xa_insert(&topo->phys, phy->phyindex, pdn, GFP_KERNEL); + + /* Errors could be either -ENOMEM or -EBUSY. If the phy has an + * index, and there's another entry at the same index, this is + * unexpected and we still error-out + */ + if (ret) + goto err; + return 0; + } + + ret = xa_alloc_cyclic(&topo->phys, &phy->phyindex, pdn, xa_limit_32b, + &topo->next_phy_index, GFP_KERNEL); + if (ret) + goto err; + + return 0; + +err: + kfree(pdn); + return ret; +} +EXPORT_SYMBOL_GPL(phy_link_topo_add_phy); + +void phy_link_topo_del_phy(struct phy_link_topology *topo, + struct phy_device *phy) +{ + struct phy_device_node *pdn = xa_erase(&topo->phys, phy->phyindex); + + /* We delete the PHY from the topology, however we don't re-set the + * phy->phyindex field. If the PHY isn't gone, we can re-assign it the + * same index next time it's added back to the topology + */ + + kfree(pdn); +} +EXPORT_SYMBOL_GPL(phy_link_topo_del_phy); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0c198620ac93..d45f330d083d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,7 +40,6 @@ #include #endif #include - #include #include #include @@ -52,6 +51,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -1974,6 +1974,7 @@ enum netdev_reg_state { * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * * @priomap: XXX: need comments on this one + * @link_topo: Physical link topology tracking attached PHYs * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. @@ -2364,6 +2365,7 @@ struct net_device { #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif + struct phy_link_topology *link_topo; struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; diff --git a/include/linux/phy.h b/include/linux/phy.h index e6e83304558e..8c848c79b1fd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -550,6 +550,9 @@ struct macsec_ops; * @drv: Pointer to the driver for this PHY instance * @devlink: Create a link between phy dev and mac dev, if the external phy * used by current mac interface is managed by another mac interface. + * @phyindex: Unique id across the phy's parent tree of phys to address the PHY + * from userspace, similar to ifindex. A zero index means the PHY + * wasn't assigned an id yet. * @phy_id: UID for this device found during discovery * @c45_ids: 802.3-c45 Device Identifiers if is_c45. * @is_c45: Set to true if this PHY uses clause 45 addressing. @@ -650,6 +653,7 @@ struct phy_device { struct device_link *devlink; + u32 phyindex; u32 phy_id; struct phy_c45_device_ids c45_ids; diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h new file mode 100644 index 000000000000..6b79feb607e7 --- /dev/null +++ b/include/linux/phy_link_topology.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PHY device list allow maintaining a list of PHY devices that are + * part of a netdevice's link topology. PHYs can for example be chained, + * as is the case when using a PHY that exposes an SFP module, on which an + * SFP transceiver that embeds a PHY is connected. + * + * This list can then be used by userspace to leverage individual PHY + * capabilities. + */ +#ifndef __PHY_LINK_TOPOLOGY_H +#define __PHY_LINK_TOPOLOGY_H + +#include +#include + +struct xarray; +struct phy_device; +struct net_device; +struct sfp_bus; + +struct phy_device_node { + enum phy_upstream upstream_type; + + union { + struct net_device *netdev; + struct phy_device *phydev; + } upstream; + + struct sfp_bus *parent_sfp_bus; + + struct phy_device *phy; +}; + +struct phy_link_topology { + struct xarray phys; + u32 next_phy_index; +}; + +static inline struct phy_device * +phy_link_topo_get_phy(struct phy_link_topology *topo, u32 phyindex) +{ + struct phy_device_node *pdn = xa_load(&topo->phys, phyindex); + + if (pdn) + return pdn->phy; + + return NULL; +} + +#if IS_REACHABLE(CONFIG_PHYLIB) +int phy_link_topo_add_phy(struct phy_link_topology *topo, + struct phy_device *phy, + enum phy_upstream upt, void *upstream); + +void phy_link_topo_del_phy(struct phy_link_topology *lt, struct phy_device *phy); + +#else +static inline int phy_link_topo_add_phy(struct phy_link_topology *topo, + struct phy_device *phy, + enum phy_upstream upt, void *upstream) +{ + return 0; +} + +static inline void phy_link_topo_del_phy(struct phy_link_topology *topo, + struct phy_device *phy) +{ +} +#endif + +#endif /* __PHY_LINK_TOPOLOGY_H */ diff --git a/include/linux/phy_link_topology_core.h b/include/linux/phy_link_topology_core.h new file mode 100644 index 000000000000..0a6479055745 --- /dev/null +++ b/include/linux/phy_link_topology_core.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PHY_LINK_TOPOLOGY_CORE_H +#define __PHY_LINK_TOPOLOGY_CORE_H + +struct phy_link_topology; + +#if IS_REACHABLE(CONFIG_PHYLIB) + +struct phy_link_topology *phy_link_topo_create(struct net_device *dev); +void phy_link_topo_destroy(struct phy_link_topology *topo); + +#else + +static inline struct phy_link_topology *phy_link_topo_create(struct net_device *dev) +{ + return NULL; +} + +static inline void phy_link_topo_destroy(struct phy_link_topology *topo) +{ +} + +#endif + +#endif /* __PHY_LINK_TOPOLOGY_CORE_H */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 11fc18988bc2..95c2f09f0d0a 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2268,4 +2268,20 @@ struct ethtool_link_settings { * __u32 map_lp_advertising[link_mode_masks_nwords]; */ }; + +/** + * enum phy_upstream - Represents the upstream component a given PHY device + * is connected to, as in what is on the other end of the MII bus. Most PHYs + * will be attached to an Ethernet MAC controller, but in some cases, there's + * an intermediate PHY used as a media-converter, which will driver another + * MII interface as its output. + * @PHY_UPSTREAM_MAC: Upstream component is a MAC (a switch port, + * or ethernet controller) + * @PHY_UPSTREAM_PHY: Upstream component is a PHY (likely a media converter) + */ +enum phy_upstream { + PHY_UPSTREAM_MAC, + PHY_UPSTREAM_PHY, +}; + #endif /* _UAPI_LINUX_ETHTOOL_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 92f5bddbc2de..854a3a28a8d8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -158,6 +158,7 @@ #include #include #include +#include #include "dev.h" #include "net-sysfs.h" @@ -10962,6 +10963,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #ifdef CONFIG_NET_SCHED hash_init(dev->qdisc_hash); #endif + dev->link_topo = phy_link_topo_create(dev); + if (IS_ERR(dev->link_topo)) { + dev->link_topo = NULL; + goto free_all; + } + dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -11050,6 +11057,8 @@ void free_netdev(struct net_device *dev) free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; + phy_link_topo_destroy(dev->link_topo); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); -- cgit v1.2.3 From 841942bc62122c59f654f799c2d00ce3fda93efa Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 4 Apr 2024 11:29:55 +0200 Subject: net: ethtool: Allow passing a phy index for some commands Some netlink commands are target towards ethernet PHYs, to control some of their features. As there's several such commands, add the ability to pass a PHY index in the ethnl request, which will populate the generic ethnl_req_info with the relevant phydev when the command targets a PHY. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 7 ++++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.c | 48 ++++++++++++++++++++++++++-- net/ethtool/netlink.h | 5 +++ 4 files changed, 59 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 08d330b0f50f..5dc42f7ce429 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -57,6 +57,7 @@ Structure of this header is ``ETHTOOL_A_HEADER_DEV_INDEX`` u32 device ifindex ``ETHTOOL_A_HEADER_DEV_NAME`` string device name ``ETHTOOL_A_HEADER_FLAGS`` u32 flags common for all requests + ``ETHTOOL_A_HEADER_PHY_INDEX`` u32 phy device index ============================== ====== ============================= ``ETHTOOL_A_HEADER_DEV_INDEX`` and ``ETHTOOL_A_HEADER_DEV_NAME`` identify the @@ -81,6 +82,12 @@ the behaviour is backward compatible, i.e. requests from old clients not aware of the flag should be interpreted the way the client expects. A client must not set flags it does not understand. +``ETHTOOL_A_HEADER_PHY_INDEX`` identifies the Ethernet PHY the message relates to. +As there are numerous commands that are related to PHY configuration, and because +there may be more than one PHY on the link, the PHY index can be passed in the +request for the commands that needs it. It is, however, not mandatory, and if it +is not passed for commands that target a PHY, the net_device.phydev pointer +is used. Bit sets ======== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index e06a34cded78..23e225f00fb0 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -132,6 +132,7 @@ enum { ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ ETHTOOL_A_HEADER_DEV_NAME, /* string */ ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ + ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index bd04f28d5cf4..563e94e0cbd8 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -4,6 +4,7 @@ #include #include #include "netlink.h" +#include static struct genl_family ethtool_genl_family; @@ -30,6 +31,24 @@ const struct nla_policy ethnl_header_policy_stats[] = { ETHTOOL_FLAGS_STATS), }; +const struct nla_policy ethnl_header_policy_phy[] = { + [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 }, + [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING, + .len = ALTIFNAMSIZ - 1 }, + [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, + ETHTOOL_FLAGS_BASIC), + [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + +const struct nla_policy ethnl_header_policy_phy_stats[] = { + [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 }, + [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING, + .len = ALTIFNAMSIZ - 1 }, + [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, + ETHTOOL_FLAGS_STATS), + [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + int ethnl_ops_begin(struct net_device *dev) { int ret; @@ -89,8 +108,9 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, const struct nlattr *header, struct net *net, struct netlink_ext_ack *extack, bool require_dev) { - struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)]; + struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy_phy)]; const struct nlattr *devname_attr; + struct phy_device *phydev = NULL; struct net_device *dev = NULL; u32 flags = 0; int ret; @@ -104,7 +124,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, /* No validation here, command policy should have a nested policy set * for the header, therefore validation should have already been done. */ - ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header, + ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy_phy) - 1, header, NULL, extack); if (ret < 0) return ret; @@ -145,6 +165,30 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, return -EINVAL; } + if (dev) { + if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) { + struct nlattr *phy_id; + + phy_id = tb[ETHTOOL_A_HEADER_PHY_INDEX]; + phydev = phy_link_topo_get_phy(dev->link_topo, + nla_get_u32(phy_id)); + if (!phydev) { + NL_SET_BAD_ATTR(extack, phy_id); + return -ENODEV; + } + } else { + /* If we need a PHY but no phy index is specified, fallback + * to dev->phydev + */ + phydev = dev->phydev; + } + } else if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) { + NL_SET_ERR_MSG_ATTR(extack, header, + "can't target a PHY without a netdev"); + return -EINVAL; + } + + req_info->phydev = phydev; req_info->dev = dev; req_info->flags = flags; return 0; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 9a333a8d04c1..d57a890b5d9e 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -250,6 +250,7 @@ static inline unsigned int ethnl_reply_header_size(void) * @dev: network device the request is for (may be null) * @dev_tracker: refcount tracker for @dev reference * @flags: request flags common for all request types + * @phydev: phy_device connected to @dev this request is for (may be null) * * This is a common base for request specific structures holding data from * parsed userspace request. These always embed struct ethnl_req_info at @@ -259,6 +260,7 @@ struct ethnl_req_info { struct net_device *dev; netdevice_tracker dev_tracker; u32 flags; + struct phy_device *phydev; }; static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) @@ -395,9 +397,12 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops; extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; +extern const struct ethnl_request_ops ethnl_phy_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; +extern const struct nla_policy ethnl_header_policy_phy[ETHTOOL_A_HEADER_PHY_INDEX + 1]; +extern const struct nla_policy ethnl_header_policy_phy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1]; extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1]; extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1]; extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1]; -- cgit v1.2.3 From 5af3e3876d567fb79a355bec1cb48e432d69b4fb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sat, 6 Apr 2024 04:05:37 +0300 Subject: devlink: Support setting max_io_eqs Many devices send event notifications for the IO queues, such as tx and rx queues, through event queues. Enable a privileged owner, such as a hypervisor PF, to set the number of IO event queues for the VF and SF during the provisioning stage. example: Get maximum IO event queues of the VF device:: $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 10 Set maximum IO event queues of the VF device:: $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 32 Reviewed-by: Jiri Pirko Reviewed-by: Shay Drory Signed-off-by: Parav Pandit Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-port.rst | 33 ++++++++++++++ include/net/devlink.h | 14 ++++++ include/uapi/linux/devlink.h | 1 + net/devlink/port.c | 53 +++++++++++++++++++++++ 4 files changed, 101 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index 562f46b41274..9d22d41a7cd1 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -134,6 +134,9 @@ Users may also set the IPsec crypto capability of the function using Users may also set the IPsec packet capability of the function using `devlink port function set ipsec_packet` command. +Users may also set the maximum IO event queues of the function +using `devlink port function set max_io_eqs` command. + Function attributes =================== @@ -295,6 +298,36 @@ policy is processed in software by the kernel. function: hw_addr 00:00:00:00:00:00 ipsec_packet enabled +Maximum IO events queues setup +------------------------------ +When user sets maximum number of IO event queues for a SF or +a VF, such function driver is limited to consume only enforced +number of IO event queues. + +IO event queues deliver events related to IO queues, including network +device transmit and receive queues (txq and rxq) and RDMA Queue Pairs (QPs). +For example, the number of netdevice channels and RDMA device completion +vectors are derived from the function's IO event queues. Usually, the number +of interrupt vectors consumed by the driver is limited by the number of IO +event queues per device, as each of the IO event queues is connected to an +interrupt vector. + +- Get maximum IO event queues of the VF device:: + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 10 + +- Set maximum IO event queues of the VF device:: + + $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 32 + Subfunction ============ diff --git a/include/net/devlink.h b/include/net/devlink.h index 9ac394bdfbe4..bb1af599d101 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1602,6 +1602,14 @@ void devlink_free(struct devlink *devlink); * capability. Should be used by device drivers to * enable/disable ipsec_packet capability of a * function managed by the devlink port. + * @port_fn_max_io_eqs_get: Callback used to get port function's maximum number + * of event queues. Should be used by device drivers to + * report the maximum event queues of a function + * managed by the devlink port. + * @port_fn_max_io_eqs_set: Callback used to set port function's maximum number + * of event queues. Should be used by device drivers to + * configure maximum number of event queues + * of a function managed by the devlink port. * * Note: Driver should return -EOPNOTSUPP if it doesn't support * port function (@port_fn_*) handling for a particular port. @@ -1651,6 +1659,12 @@ struct devlink_port_ops { int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack); + int (*port_fn_max_io_eqs_get)(struct devlink_port *devlink_port, + u32 *max_eqs, + struct netlink_ext_ack *extack); + int (*port_fn_max_io_eqs_set)(struct devlink_port *devlink_port, + u32 max_eqs, + struct netlink_ext_ack *extack); }; void devlink_port_init(struct devlink *devlink, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 2da0c7eb6710..9401aa343673 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -686,6 +686,7 @@ enum devlink_port_function_attr { DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ DEVLINK_PORT_FN_ATTR_DEVLINK, /* nested */ + DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, /* u32 */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 diff --git a/net/devlink/port.c b/net/devlink/port.c index 118d130d2afd..be9158b4453c 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -16,6 +16,7 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ DEVLINK_PORT_FN_STATE_ACTIVE), [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK), + [DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 }, }; #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \ @@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port, return 0; } +static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) +{ + u32 max_io_eqs; + int err; + + if (!port->ops->port_fn_max_io_eqs_get) + return 0; + + err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, max_io_eqs); + if (err) + return err; + *msg_updated = true; + return 0; +} + int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port) { if (devlink_nl_put_handle(msg, devlink_port->devlink)) @@ -409,6 +434,18 @@ static int devlink_port_fn_caps_set(struct devlink_port *devlink_port, return 0; } +static int +devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + u32 max_io_eqs; + + max_io_eqs = nla_get_u32(attr); + return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port, + max_io_eqs, extack); +} + static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) @@ -428,6 +465,9 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (err) goto out; err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); + if (err) + goto out; + err = devlink_port_fn_max_io_eqs_fill(port, msg, extack, &msg_updated); if (err) goto out; err = devlink_rel_devlink_handle_put(msg, port->devlink, @@ -726,6 +766,12 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, } } } + if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] && + !ops->port_fn_max_io_eqs_set) { + NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS], + "Function does not support max_io_eqs setting"); + return -EOPNOTSUPP; + } return 0; } @@ -761,6 +807,13 @@ static int devlink_port_function_set(struct devlink_port *port, return err; } + attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS]; + if (attr) { + err = devlink_port_fn_max_io_eqs_set(port, attr, extack); + if (err) + return err; + } + /* Keep this as the last function attribute set, so that when * multiple port function attributes are set along with state, * Those can be applied first before activating the state. -- cgit v1.2.3 From 65f35aa76c0e21b0243fd734e513fd2263f22a18 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 9 Apr 2024 16:25:16 -0700 Subject: ethtool: update tsinfo statistics attribute docs with correct type nla_put_uint can either write a u32 or u64 netlink attribute value. The size depends on whether the value can be represented with a u32 or requires a u64. Use a uint annotation in various documentation to represent this. Signed-off-by: Rahul Rameshbabu Link: https://lore.kernel.org/r/20240409232520.237613-2-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 6 +++--- include/uapi/linux/ethtool_netlink.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 5dc42f7ce429..4e63d3708ed9 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1254,9 +1254,9 @@ would be empty (no bit set). Additional hardware timestamping statistics response contents: ===================================== ====== =================================== - ``ETHTOOL_A_TS_STAT_TX_PKTS`` u64 Packets with Tx HW timestamps - ``ETHTOOL_A_TS_STAT_TX_LOST`` u64 Tx HW timestamp not arrived count - ``ETHTOOL_A_TS_STAT_TX_ERR`` u64 HW error request Tx timestamp count + ``ETHTOOL_A_TS_STAT_TX_PKTS`` uint Packets with Tx HW timestamps + ``ETHTOOL_A_TS_STAT_TX_LOST`` uint Tx HW timestamp not arrived count + ``ETHTOOL_A_TS_STAT_TX_ERR`` uint HW error request Tx timestamp count ===================================== ====== =================================== CABLE_TEST diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 23e225f00fb0..b4f0d233d048 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -488,9 +488,9 @@ enum { enum { ETHTOOL_A_TS_STAT_UNSPEC, - ETHTOOL_A_TS_STAT_TX_PKTS, /* u64 */ - ETHTOOL_A_TS_STAT_TX_LOST, /* u64 */ - ETHTOOL_A_TS_STAT_TX_ERR, /* u64 */ + ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ + ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ + ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ /* add new constants above here */ __ETHTOOL_A_TS_STAT_CNT, -- cgit v1.2.3 From 699c23f02c65cbfc3e638f14ce0d70c23a2e1f02 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 9 Apr 2024 21:35:27 -0700 Subject: bpf: Add bpf_link support for sk_msg and sk_skb progs Add bpf_link support for sk_msg and sk_skb programs. We have an internal request to support bpf_link for sk_msg programs so user space can have a uniform handling with bpf_link based libbpf APIs. Using bpf_link based libbpf API also has a benefit which makes system robust by decoupling prog life cycle and attachment life cycle. Reviewed-by: John Fastabend Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240410043527.3737160-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 + include/linux/skmsg.h | 4 + include/uapi/linux/bpf.h | 5 + kernel/bpf/syscall.c | 4 + net/core/sock_map.c | 263 ++++++++++++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 5 + 6 files changed, 271 insertions(+), 16 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 62762390c93d..5034c1b4ded7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2996,6 +2996,7 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog); void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); @@ -3094,6 +3095,11 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e65ec3fd2799..9c8dd4c01412 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -58,6 +58,10 @@ struct sk_psock_progs { struct bpf_prog *stream_parser; struct bpf_prog *stream_verdict; struct bpf_prog *skb_verdict; + struct bpf_link *msg_parser_link; + struct bpf_link *stream_parser_link; + struct bpf_link *stream_verdict_link; + struct bpf_link *skb_verdict_link; }; enum sk_psock_state_bits { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fe9f11c8abe..cee0a7915c08 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1135,6 +1135,7 @@ enum bpf_link_type { BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, + BPF_LINK_TYPE_SOCKMAP = 14, __MAX_BPF_LINK_TYPE, }; @@ -6724,6 +6725,10 @@ struct bpf_link_info { __u32 ifindex; __u32 attach_type; } netkit; + struct { + __u32 map_id; + __u32 attach_type; + } sockmap; }; } __attribute__((aligned(8))); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e44c276e8617..7d392ec83655 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5213,6 +5213,10 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_SK_LOOKUP: ret = netns_bpf_link_create(attr, prog); break; + case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_SK_SKB: + ret = sock_map_link_create(attr, prog); + break; #ifdef CONFIG_NET case BPF_PROG_TYPE_XDP: ret = bpf_xdp_link_attach(attr, prog); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 27d733c0f65e..63c016b4c169 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -24,8 +24,16 @@ struct bpf_stab { #define SOCK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) +/* This mutex is used to + * - protect race between prog/link attach/detach and link prog update, and + * - protect race between releasing and accessing map in bpf_link. + * A single global mutex lock is used since it is expected contention is low. + */ +static DEFINE_MUTEX(sockmap_mutex); + static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which); + struct bpf_prog *old, struct bpf_link *link, + u32 which); static struct sk_psock_progs *sock_map_progs(struct bpf_map *map); static struct bpf_map *sock_map_alloc(union bpf_attr *attr) @@ -71,7 +79,9 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); + mutex_lock(&sockmap_mutex); + ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type); + mutex_unlock(&sockmap_mutex); fdput(f); return ret; } @@ -103,7 +113,9 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) goto put_prog; } - ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); + mutex_lock(&sockmap_mutex); + ret = sock_map_prog_update(map, NULL, prog, NULL, attr->attach_type); + mutex_unlock(&sockmap_mutex); put_prog: bpf_prog_put(prog); put_map: @@ -1454,55 +1466,84 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) return NULL; } -static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog, - u32 which) +static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog, + struct bpf_link ***plink, u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); + struct bpf_prog **cur_pprog; + struct bpf_link **cur_plink; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - *pprog = &progs->msg_parser; + cur_pprog = &progs->msg_parser; + cur_plink = &progs->msg_parser_link; break; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) case BPF_SK_SKB_STREAM_PARSER: - *pprog = &progs->stream_parser; + cur_pprog = &progs->stream_parser; + cur_plink = &progs->stream_parser_link; break; #endif case BPF_SK_SKB_STREAM_VERDICT: if (progs->skb_verdict) return -EBUSY; - *pprog = &progs->stream_verdict; + cur_pprog = &progs->stream_verdict; + cur_plink = &progs->stream_verdict_link; break; case BPF_SK_SKB_VERDICT: if (progs->stream_verdict) return -EBUSY; - *pprog = &progs->skb_verdict; + cur_pprog = &progs->skb_verdict; + cur_plink = &progs->skb_verdict_link; break; default: return -EOPNOTSUPP; } + *pprog = cur_pprog; + if (plink) + *plink = cur_plink; return 0; } +/* Handle the following four cases: + * prog_attach: prog != NULL, old == NULL, link == NULL + * prog_detach: prog == NULL, old != NULL, link == NULL + * link_attach: prog != NULL, old == NULL, link != NULL + * link_detach: prog == NULL, old != NULL, link != NULL + */ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which) + struct bpf_prog *old, struct bpf_link *link, + u32 which) { struct bpf_prog **pprog; + struct bpf_link **plink; int ret; - ret = sock_map_prog_lookup(map, &pprog, which); + ret = sock_map_prog_link_lookup(map, &pprog, &plink, which); if (ret) return ret; - if (old) - return psock_replace_prog(pprog, prog, old); + /* for prog_attach/prog_detach/link_attach, return error if a bpf_link + * exists for that prog. + */ + if ((!link || prog) && *plink) + return -EBUSY; - psock_set_prog(pprog, prog); - return 0; + if (old) { + ret = psock_replace_prog(pprog, prog, old); + if (!ret) + *plink = NULL; + } else { + psock_set_prog(pprog, prog); + if (link) + *plink = link; + } + + return ret; } int sock_map_bpf_prog_query(const union bpf_attr *attr, @@ -1527,7 +1568,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr, rcu_read_lock(); - ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type); + ret = sock_map_prog_link_lookup(map, &pprog, NULL, attr->query.attach_type); if (ret) goto end; @@ -1657,6 +1698,196 @@ void sock_map_close(struct sock *sk, long timeout) } EXPORT_SYMBOL_GPL(sock_map_close); +struct sockmap_link { + struct bpf_link link; + struct bpf_map *map; + enum bpf_attach_type attach_type; +}; + +static void sock_map_link_release(struct bpf_link *link) +{ + struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + + mutex_lock(&sockmap_mutex); + if (!sockmap_link->map) + goto out; + + WARN_ON_ONCE(sock_map_prog_update(sockmap_link->map, NULL, link->prog, link, + sockmap_link->attach_type)); + + bpf_map_put_with_uref(sockmap_link->map); + sockmap_link->map = NULL; +out: + mutex_unlock(&sockmap_mutex); +} + +static int sock_map_link_detach(struct bpf_link *link) +{ + sock_map_link_release(link); + return 0; +} + +static void sock_map_link_dealloc(struct bpf_link *link) +{ + kfree(link); +} + +/* Handle the following two cases: + * case 1: link != NULL, prog != NULL, old != NULL + * case 2: link != NULL, prog != NULL, old == NULL + */ +static int sock_map_link_update_prog(struct bpf_link *link, + struct bpf_prog *prog, + struct bpf_prog *old) +{ + const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + struct bpf_prog **pprog, *old_link_prog; + struct bpf_link **plink; + int ret = 0; + + mutex_lock(&sockmap_mutex); + + /* If old prog is not NULL, ensure old prog is the same as link->prog. */ + if (old && link->prog != old) { + ret = -EPERM; + goto out; + } + /* Ensure link->prog has the same type/attach_type as the new prog. */ + if (link->prog->type != prog->type || + link->prog->expected_attach_type != prog->expected_attach_type) { + ret = -EINVAL; + goto out; + } + + ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink, + sockmap_link->attach_type); + if (ret) + goto out; + + /* return error if the stored bpf_link does not match the incoming bpf_link. */ + if (link != *plink) { + ret = -EBUSY; + goto out; + } + + if (old) { + ret = psock_replace_prog(pprog, prog, old); + if (ret) + goto out; + } else { + psock_set_prog(pprog, prog); + } + + bpf_prog_inc(prog); + old_link_prog = xchg(&link->prog, prog); + bpf_prog_put(old_link_prog); + +out: + mutex_unlock(&sockmap_mutex); + return ret; +} + +static u32 sock_map_link_get_map_id(const struct sockmap_link *sockmap_link) +{ + u32 map_id = 0; + + mutex_lock(&sockmap_mutex); + if (sockmap_link->map) + map_id = sockmap_link->map->id; + mutex_unlock(&sockmap_mutex); + return map_id; +} + +static int sock_map_link_fill_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + u32 map_id = sock_map_link_get_map_id(sockmap_link); + + info->sockmap.map_id = map_id; + info->sockmap.attach_type = sockmap_link->attach_type; + return 0; +} + +static void sock_map_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + u32 map_id = sock_map_link_get_map_id(sockmap_link); + + seq_printf(seq, "map_id:\t%u\n", map_id); + seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type); +} + +static const struct bpf_link_ops sock_map_link_ops = { + .release = sock_map_link_release, + .dealloc = sock_map_link_dealloc, + .detach = sock_map_link_detach, + .update_prog = sock_map_link_update_prog, + .fill_link_info = sock_map_link_fill_info, + .show_fdinfo = sock_map_link_show_fdinfo, +}; + +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct bpf_link_primer link_primer; + struct sockmap_link *sockmap_link; + enum bpf_attach_type attach_type; + struct bpf_map *map; + int ret; + + if (attr->link_create.flags) + return -EINVAL; + + map = bpf_map_get_with_uref(attr->link_create.target_fd); + if (IS_ERR(map)) + return PTR_ERR(map); + if (map->map_type != BPF_MAP_TYPE_SOCKMAP && map->map_type != BPF_MAP_TYPE_SOCKHASH) { + ret = -EINVAL; + goto out; + } + + sockmap_link = kzalloc(sizeof(*sockmap_link), GFP_USER); + if (!sockmap_link) { + ret = -ENOMEM; + goto out; + } + + attach_type = attr->link_create.attach_type; + bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog); + sockmap_link->map = map; + sockmap_link->attach_type = attach_type; + + ret = bpf_link_prime(&sockmap_link->link, &link_primer); + if (ret) { + kfree(sockmap_link); + goto out; + } + + mutex_lock(&sockmap_mutex); + ret = sock_map_prog_update(map, prog, NULL, &sockmap_link->link, attach_type); + mutex_unlock(&sockmap_mutex); + if (ret) { + bpf_link_cleanup(&link_primer); + goto out; + } + + /* Increase refcnt for the prog since when old prog is replaced with + * psock_replace_prog() and psock_set_prog() its refcnt will be decreased. + * + * Actually, we do not need to increase refcnt for the prog since bpf_link + * will hold a reference. But in order to have less complexity w.r.t. + * replacing/setting prog, let us increase the refcnt to make things simpler. + */ + bpf_prog_inc(prog); + + return bpf_link_settle(&link_primer); + +out: + bpf_map_put_with_uref(map); + return ret; +} + static int sock_map_iter_attach_target(struct bpf_prog *prog, union bpf_iter_link_info *linfo, struct bpf_iter_aux_info *aux) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6fe9f11c8abe..cee0a7915c08 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1135,6 +1135,7 @@ enum bpf_link_type { BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, + BPF_LINK_TYPE_SOCKMAP = 14, __MAX_BPF_LINK_TYPE, }; @@ -6724,6 +6725,10 @@ struct bpf_link_info { __u32 ifindex; __u32 attach_type; } netkit; + struct { + __u32 map_id; + __u32 attach_type; + } sockmap; }; } __attribute__((aligned(8))); -- cgit v1.2.3 From 18d82cde743237def9f80171e32be1153f5cadac Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 10 Apr 2024 11:48:24 +0200 Subject: mptcp: add last time fields in mptcp_info This patch adds "last time" fields last_data_sent, last_data_recv and last_ack_recv in struct mptcp_sock to record the last time data_sent, data_recv and ack_recv happened. They all are initialized as tcp_jiffies32 in __mptcp_init_sock(), and updated as tcp_jiffies32 too when data is sent in __subflow_push_pending(), data is received in __mptcp_move_skbs_from_subflow(), and ack is received in ack_update_msk(). Similar to tcpi_last_data_sent, tcpi_last_data_recv and tcpi_last_ack_recv exposed with TCP, this patch exposes the last time "an action happened" for MPTCP in mptcp_info, named mptcpi_last_data_sent, mptcpi_last_data_recv and mptcpi_last_ack_recv, calculated in mptcp_diag_fill_info() as the time deltas between now and the newly added last time fields in mptcp_sock. Since msk->last_ack_recv needs to be protected by mptcp_data_lock/unlock, and lock_sock_fast can sleep and be quite slow, move the entire mptcp_data_lock/unlock block after the lock/unlock_sock_fast block. Then mptcpi_last_data_sent and mptcpi_last_data_recv are set in lock/unlock_sock_fast block, while mptcpi_last_ack_recv is set in mptcp_data_lock/unlock block, which is protected by a spinlock and should not block for too long. Also add three reserved bytes in struct mptcp_info not to have holes in this structure exposed to userspace. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/446 Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240410-upstream-net-next-20240405-mptcp-last-time-info-v2-1-f95bd6b33e51@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 4 ++++ net/mptcp/options.c | 1 + net/mptcp/protocol.c | 7 +++++++ net/mptcp/protocol.h | 3 +++ net/mptcp/sockopt.c | 16 +++++++++++----- 5 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 74cfe496891e..67d015df8893 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -58,6 +58,10 @@ struct mptcp_info { __u64 mptcpi_bytes_received; __u64 mptcpi_bytes_acked; __u8 mptcpi_subflows_total; + __u8 reserved[3]; + __u32 mptcpi_last_data_sent; + __u32 mptcpi_last_data_recv; + __u32 mptcpi_last_ack_recv; }; /* MPTCP Reset reason codes, rfc8684 */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 27ca42c77b02..8e8dcfbc2993 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1068,6 +1068,7 @@ static void ack_update_msk(struct mptcp_sock *msk, __mptcp_snd_una_update(msk, new_snd_una); __mptcp_data_acked(sk); } + msk->last_ack_recv = tcp_jiffies32; mptcp_data_unlock(sk); trace_ack_update_msk(mp_opt->data_ack, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 995b53cd021c..f8bc34f0d973 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -706,6 +706,8 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } } while (more_data_avail); + if (moved > 0) + msk->last_data_recv = tcp_jiffies32; *bytes += moved; return done; } @@ -1556,6 +1558,8 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, err = copied; out: + if (err > 0) + msk->last_data_sent = tcp_jiffies32; return err; } @@ -2793,6 +2797,9 @@ static void __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->allow_infinite_fallback, true); msk->recovery = false; msk->subflow_id = 1; + msk->last_data_sent = tcp_jiffies32; + msk->last_data_recv = tcp_jiffies32; + msk->last_ack_recv = tcp_jiffies32; mptcp_pm_data_init(msk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 46f4655b7123..fdfa843e2d88 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -282,6 +282,9 @@ struct mptcp_sock { u64 bytes_acked; u64 snd_una; u64 wnd_end; + u32 last_data_sent; + u32 last_data_recv; + u32 last_ack_recv; unsigned long timer_ival; u32 token; int rmem_released; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 9d5d42a77bcc..1fea43f5b6f3 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -898,6 +898,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) struct sock *sk = (struct sock *)msk; u32 flags = 0; bool slow; + u32 now; memset(info, 0, sizeof(*info)); @@ -926,11 +927,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) if (READ_ONCE(msk->can_ack)) flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; info->mptcpi_flags = flags; - mptcp_data_lock(sk); - info->mptcpi_snd_una = msk->snd_una; - info->mptcpi_rcv_nxt = msk->ack_seq; - info->mptcpi_bytes_acked = msk->bytes_acked; - mptcp_data_unlock(sk); slow = lock_sock_fast(sk); info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); @@ -942,7 +938,17 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_bytes_retrans = msk->bytes_retrans; info->mptcpi_subflows_total = info->mptcpi_subflows + __mptcp_has_initial_subflow(msk); + now = tcp_jiffies32; + info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent); + info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv); unlock_sock_fast(sk, slow); + + mptcp_data_lock(sk); + info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv); + info->mptcpi_snd_una = msk->snd_una; + info->mptcpi_rcv_nxt = msk->ack_seq; + info->mptcpi_bytes_acked = msk->bytes_acked; + mptcp_data_unlock(sk); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); -- cgit v1.2.3 From aeb48a428d7dbe636203ae892e981bcc3e2ac042 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Fri, 12 Apr 2024 13:50:44 +0200 Subject: udpencap: Remove Obsolete UDP_ENCAP_ESPINUDP_NON_IKE Support The UDP_ENCAP_ESPINUDP_NON_IKE mode, introduced into the Linux kernel in 2004 [2], has remained inactive and obsolete for an extended period. This mode was originally defined in an early version of an IETF draft [1] from 2001. By the time it was integrated into the kernel in 2004 [2], it had already been replaced by UDP_ENCAP_ESPINUDP [3] in later versions of draft-ietf-ipsec-udp-encaps, particularly in version 06. Over time, UDP_ENCAP_ESPINUDP_NON_IKE has lost its relevance, with no known use cases. With this commit, we remove support for UDP_ENCAP_ESPINUDP_NON_IKE, simplifying the codebase and eliminating unnecessary complexity. Kernel will return an error -ENOPROTOOPT if the userspace tries to set this option. References: [1] https://datatracker.ietf.org/doc/html/draft-ietf-ipsec-udp-encaps-00.txt [2] Commit that added UDP_ENCAP_ESPINUDP_NON_IKE to the Linux historic repository. Author: Andreas Gruenbacher Date: Fri Apr 9 01:47:47 2004 -0700 [IPSEC]: Support draft-ietf-ipsec-udp-encaps-00/01, some ipec impls need it. [3] Commit that added UDP_ENCAP_ESPINUDP to the Linux historic repository. Author: Derek Atkins Date: Wed Apr 2 13:21:02 2003 -0800 [IPSEC]: Implement UDP Encapsulation framework. Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/udp.h | 2 +- net/ipv4/esp4.c | 12 ------------ net/ipv4/udp.c | 2 -- net/ipv4/xfrm4_input.c | 13 ------------- net/ipv6/esp6.c | 12 ------------ net/ipv6/xfrm6_input.c | 13 ------------- 6 files changed, 1 insertion(+), 53 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 4828794efcf8..1a0fe8b151fb 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -36,7 +36,7 @@ struct udphdr { #define UDP_GRO 104 /* This socket can receive UDP GRO packets */ /* UDP encapsulation types */ -#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ +#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* unused draft-ietf-ipsec-nat-t-ike-00/01 */ #define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-06 */ #define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ #define UDP_ENCAP_GTP0 4 /* GSM TS 09.60 */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 3d647c9a7a21..7d38ddd64115 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -347,7 +347,6 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, __be16 dport) { struct udphdr *uh; - __be32 *udpdata32; unsigned int len; len = skb->len + esp->tailen - skb_transport_offset(skb); @@ -362,12 +361,6 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, *skb_mac_header(skb) = IPPROTO_UDP; - if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) { - udpdata32 = (__be32 *)(uh + 1); - udpdata32[0] = udpdata32[1] = 0; - return (struct ip_esp_hdr *)(udpdata32 + 2); - } - return (struct ip_esp_hdr *)(uh + 1); } @@ -423,7 +416,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport); break; case TCP_ENCAP_ESPINTCP: @@ -775,7 +767,6 @@ int esp_input_done2(struct sk_buff *skb, int err) source = th->source; break; case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: source = uh->source; break; default: @@ -1179,9 +1170,6 @@ static int esp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) case UDP_ENCAP_ESPINUDP: x->props.header_len += sizeof(struct udphdr); break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); - break; #ifdef CONFIG_INET_ESPINTCP case TCP_ENCAP_ESPINTCP: /* only the length field, TCP encap is done by diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7613daa339b0..4ca781065a07 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2699,8 +2699,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk); - fallthrough; - case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) WRITE_ONCE(up->encap_rcv, diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index dae35101d189..0918b0682174 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -113,19 +113,6 @@ static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull /* Must be an IKE packet.. pass it through */ return 1; break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - /* Check if this is a keepalive packet. If so, eat it. */ - if (len == 1 && udpdata[0] == 0xff) { - return -EINVAL; - } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && - udpdata32[0] == 0 && udpdata32[1] == 0) { - - /* ESP Packet with Non-IKE marker */ - len = sizeof(struct udphdr) + 2 * sizeof(u32); - } else - /* Must be an IKE packet.. pass it through */ - return 1; - break; } /* At this point we are sure that this is an ESPinUDP packet, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index fe8d53f5a5ee..27df148530a6 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -383,7 +383,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, __be16 dport) { struct udphdr *uh; - __be32 *udpdata32; unsigned int len; len = skb->len + esp->tailen - skb_transport_offset(skb); @@ -398,12 +397,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, *skb_mac_header(skb) = IPPROTO_UDP; - if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) { - udpdata32 = (__be32 *)(uh + 1); - udpdata32[0] = udpdata32[1] = 0; - return (struct ip_esp_hdr *)(udpdata32 + 2); - } - return (struct ip_esp_hdr *)(uh + 1); } @@ -459,7 +452,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport); break; case TCP_ENCAP_ESPINTCP: @@ -822,7 +814,6 @@ int esp6_input_done2(struct sk_buff *skb, int err) source = th->source; break; case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: source = uh->source; break; default: @@ -1232,9 +1223,6 @@ static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) case UDP_ENCAP_ESPINUDP: x->props.header_len += sizeof(struct udphdr); break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); - break; #ifdef CONFIG_INET6_ESPINTCP case TCP_ENCAP_ESPINTCP: /* only the length field, TCP encap is done by diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a17d783dc7c0..2c6aeb090b7a 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -109,19 +109,6 @@ static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull /* Must be an IKE packet.. pass it through */ return 1; break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - /* Check if this is a keepalive packet. If so, eat it. */ - if (len == 1 && udpdata[0] == 0xff) { - return -EINVAL; - } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && - udpdata32[0] == 0 && udpdata32[1] == 0) { - - /* ESP Packet with Non-IKE marker */ - len = sizeof(struct udphdr) + 2 * sizeof(u32); - } else - /* Must be an IKE packet.. pass it through */ - return 1; - break; } /* At this point we are sure that this is an ESPinUDP packet, -- cgit v1.2.3 From b58be8db6327b21e0c7fbee559b8eb47f5110efe Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Wed, 17 Apr 2024 16:39:49 +0200 Subject: ethtool: Expand Ethernet Power Equipment with c33 (PoE) alongside PoDL In the current PSE interface for Ethernet Power Equipment, support is limited to PoDL. This patch extends the interface to accommodate the objects specified in IEEE 802.3-2022 145.2 for Power sourcing Equipment (PSE). The following objects are now supported and considered mandatory: - IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus - IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState - IEEE 802.3-2022 30.9.1.2.1 aPSEAdminControl To avoid confusion between "PoDL PSE" and "PoE PSE", which have similar names but distinct values, we have followed the suggestion of Oleksij Rempel and Andrew Lunn to maintain separate naming schemes for each, using c33 (clause 33) prefix for "PoE PSE". You can find more details in the discussion threads here: https://lore.kernel.org/netdev/20230912110637.GI780075@pengutronix.de/ https://lore.kernel.org/netdev/2539b109-72ad-470a-9dae-9f53de4f64ec@lunn.ch/ Reviewed-by: Andrew Lunn Reviewed-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240417-feature_poe-v9-1-242293fd1900@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/networking/index.rst | 1 + Documentation/networking/pse-pd/index.rst | 9 +++ Documentation/networking/pse-pd/introduction.rst | 73 ++++++++++++++++++++++++ include/linux/pse-pd/pse.h | 9 +++ include/uapi/linux/ethtool.h | 43 ++++++++++++++ include/uapi/linux/ethtool_netlink.h | 3 + 6 files changed, 138 insertions(+) create mode 100644 Documentation/networking/pse-pd/index.rst create mode 100644 Documentation/networking/pse-pd/introduction.rst (limited to 'include/uapi') diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 473d72c36d61..7664c0bfe461 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -93,6 +93,7 @@ Contents: plip ppp_generic proc_net_tcp + pse-pd/index radiotap-headers rds regulatory diff --git a/Documentation/networking/pse-pd/index.rst b/Documentation/networking/pse-pd/index.rst new file mode 100644 index 000000000000..18197bc7303d --- /dev/null +++ b/Documentation/networking/pse-pd/index.rst @@ -0,0 +1,9 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Power Sourcing Equipment (PSE) Documentation +============================================ + +.. toctree:: + :maxdepth: 2 + + introduction diff --git a/Documentation/networking/pse-pd/introduction.rst b/Documentation/networking/pse-pd/introduction.rst new file mode 100644 index 000000000000..e3d3faaef717 --- /dev/null +++ b/Documentation/networking/pse-pd/introduction.rst @@ -0,0 +1,73 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Power Sourcing Equipment (PSE) in IEEE 802.3 Standard +===================================================== + +Overview +-------- + +Power Sourcing Equipment (PSE) is essential in networks for delivering power +along with data over Ethernet cables. It usually refers to devices like +switches and hubs that supply power to Powered Devices (PDs) such as IP +cameras, VoIP phones, and wireless access points. + +PSE vs. PoDL PSE +---------------- + +PSE in the IEEE 802.3 standard generally refers to equipment that provides +power alongside data over Ethernet cables, typically associated with Power over +Ethernet (PoE). + +PoDL PSE, or Power over Data Lines PSE, specifically denotes PSEs operating +with single balanced twisted-pair PHYs, as per Clause 104 of IEEE 802.3. PoDL +is significant in contexts like automotive and industrial controls where power +and data delivery over a single pair is advantageous. + +IEEE 802.3-2018 Addendums and Related Clauses +--------------------------------------------- + +Key addenda to the IEEE 802.3-2018 standard relevant to power delivery over +Ethernet are as follows: + +- **802.3af (Approved in 2003-06-12)**: Known as PoE in the market, detailed in + Clause 33, delivering up to 15.4W of power. +- **802.3at (Approved in 2009-09-11)**: Marketed as PoE+, enhancing PoE as + covered in Clause 33, increasing power delivery to up to 30W. +- **802.3bt (Approved in 2018-09-27)**: Known as 4PPoE in the market, outlined + in Clause 33. Type 3 delivers up to 60W, and Type 4 up to 100W. +- **802.3bu (Approved in 2016-12-07)**: Formerly referred to as PoDL, detailed + in Clause 104. Introduces Classes 0 - 9. Class 9 PoDL PSE delivers up to ~65W + +Kernel Naming Convention Recommendations +---------------------------------------- + +For clarity and consistency within the Linux kernel's networking subsystem, the +following naming conventions are recommended: + +- For general PSE (PoE) code, use "c33_pse" key words. For example: + ``enum ethtool_c33_pse_admin_state c33_admin_control;``. + This aligns with Clause 33, encompassing various PoE forms. + +- For PoDL PSE - specific code, use "podl_pse". For example: + ``enum ethtool_podl_pse_admin_state podl_admin_control;`` to differentiate + PoDL PSE settings according to Clause 104. + +Summary of Clause 33: Data Terminal Equipment (DTE) Power via Media Dependent Interface (MDI) +--------------------------------------------------------------------------------------------- + +Clause 33 of the IEEE 802.3 standard defines the functional and electrical +characteristics of Powered Device (PD) and Power Sourcing Equipment (PSE). +These entities enable power delivery using the same generic cabling as for data +transmission, integrating power with data communication for devices such as +10BASE-T, 100BASE-TX, or 1000BASE-T. + +Summary of Clause 104: Power over Data Lines (PoDL) of Single Balanced Twisted-Pair Ethernet +-------------------------------------------------------------------------------------------- + +Clause 104 of the IEEE 802.3 standard delineates the functional and electrical +characteristics of PoDL Powered Devices (PDs) and PoDL Power Sourcing Equipment +(PSEs). These are designed for use with single balanced twisted-pair Ethernet +Physical Layers. In this clause, 'PSE' refers specifically to PoDL PSE, and +'PD' to PoDL PD. The key intent is to provide devices with a unified interface +for both data and the power required to process this data over a single +balanced twisted-pair Ethernet connection. diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 199cf4ae3cf2..be4e5754eb24 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -17,9 +17,12 @@ struct pse_controller_dev; * * @podl_admin_control: set PoDL PSE admin control as described in * IEEE 802.3-2018 30.15.1.2.1 acPoDLPSEAdminControl + * @c33_admin_control: set PSE admin control as described in + * IEEE 802.3-2022 30.9.1.2.1 acPSEAdminControl */ struct pse_control_config { enum ethtool_podl_pse_admin_state podl_admin_control; + enum ethtool_c33_pse_admin_state c33_admin_control; }; /** @@ -29,10 +32,16 @@ struct pse_control_config { * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState * @podl_pw_status: power detection status of the PoDL PSE. * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + * @c33_admin_state: operational state of the PSE + * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState + * @c33_pw_status: power detection status of the PSE. + * IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus: */ struct pse_control_status { enum ethtool_podl_pse_admin_state podl_admin_state; enum ethtool_podl_pse_pw_d_status podl_pw_status; + enum ethtool_c33_pse_admin_state c33_admin_state; + enum ethtool_c33_pse_pw_d_status c33_pw_status; }; /** diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 95c2f09f0d0a..7b9a3d890949 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -752,6 +752,49 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_c33_pse_admin_state - operational state of the PoDL PSE + * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState + * @ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN: state of PSE functions is unknown + * @ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED: PSE functions are disabled + * @ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED: PSE functions are enabled + */ +enum ethtool_c33_pse_admin_state { + ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN = 1, + ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED, + ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED, +}; + +/** + * enum ethtool_c33_pse_pw_d_status - power detection status of the PSE. + * IEEE 802.3-2022 30.9.1.1.3 aPoDLPSEPowerDetectionStatus: + * @ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN: PSE status is unknown + * @ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED: The enumeration "disabled" + * indicates that the PSE State diagram is in the state DISABLED. + * @ETHTOOL_C33_PSE_PW_D_STATUS_SEARCHING: The enumeration "searching" + * indicates the PSE State diagram is in a state other than those + * listed. + * @ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING: The enumeration + * "deliveringPower" indicates that the PSE State diagram is in the + * state POWER_ON. + * @ETHTOOL_C33_PSE_PW_D_STATUS_TEST: The enumeration "test" indicates that + * the PSE State diagram is in the state TEST_MODE. + * @ETHTOOL_C33_PSE_PW_D_STATUS_FAULT: The enumeration "fault" indicates that + * the PSE State diagram is in the state TEST_ERROR. + * @ETHTOOL_C33_PSE_PW_D_STATUS_OTHERFAULT: The enumeration "otherFault" + * indicates that the PSE State diagram is in the state IDLE due to + * the variable error_condition = true. + */ +enum ethtool_c33_pse_pw_d_status { + ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN = 1, + ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED, + ETHTOOL_C33_PSE_PW_D_STATUS_SEARCHING, + ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING, + ETHTOOL_C33_PSE_PW_D_STATUS_TEST, + ETHTOOL_C33_PSE_PW_D_STATUS_FAULT, + ETHTOOL_C33_PSE_PW_D_STATUS_OTHERFAULT, +}; + /** * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b4f0d233d048..f17dbe54bf5e 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -913,6 +913,9 @@ enum { ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ /* add new constants above here */ __ETHTOOL_A_PSE_CNT, -- cgit v1.2.3 From 47e0dd53c5eb9dab66689592523f31ea39fc32fa Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Wed, 17 Apr 2024 16:39:50 +0200 Subject: net: pse-pd: Introduce PSE types enumeration Introduce an enumeration to define PSE types (C33 or PoDL), utilizing a bitfield for potential future support of both types. Include 'pse_get_types' helper for external access to PSE type info. Reviewed-by: Andrew Lunn Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240417-feature_poe-v9-2-242293fd1900@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pse_core.c | 12 ++++++++++++ drivers/net/pse-pd/pse_regulator.c | 1 + include/linux/pse-pd/pse.h | 15 +++++++++++++++ include/uapi/linux/ethtool.h | 12 ++++++++++++ 4 files changed, 40 insertions(+) (limited to 'include/uapi') diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index 146b81f08a89..fed006cbc185 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -312,3 +312,15 @@ int pse_ethtool_set_config(struct pse_control *psec, return err; } EXPORT_SYMBOL_GPL(pse_ethtool_set_config); + +bool pse_has_podl(struct pse_control *psec) +{ + return psec->pcdev->types & ETHTOOL_PSE_PODL; +} +EXPORT_SYMBOL_GPL(pse_has_podl); + +bool pse_has_c33(struct pse_control *psec) +{ + return psec->pcdev->types & ETHTOOL_PSE_C33; +} +EXPORT_SYMBOL_GPL(pse_has_c33); diff --git a/drivers/net/pse-pd/pse_regulator.c b/drivers/net/pse-pd/pse_regulator.c index 1dedf4de296e..547af384764b 100644 --- a/drivers/net/pse-pd/pse_regulator.c +++ b/drivers/net/pse-pd/pse_regulator.c @@ -116,6 +116,7 @@ pse_reg_probe(struct platform_device *pdev) priv->pcdev.owner = THIS_MODULE; priv->pcdev.ops = &pse_reg_ops; priv->pcdev.dev = dev; + priv->pcdev.types = ETHTOOL_PSE_PODL; ret = devm_pse_controller_register(dev, &priv->pcdev); if (ret) { dev_err(dev, "failed to register PSE controller (%pe)\n", diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index be4e5754eb24..19589571157f 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -77,6 +77,7 @@ struct pse_control; * device tree to id as given to the PSE control ops * @nr_lines: number of PSE controls in this controller device * @lock: Mutex for serialization access to the PSE controller + * @types: types of the PSE controller */ struct pse_controller_dev { const struct pse_controller_ops *ops; @@ -89,6 +90,7 @@ struct pse_controller_dev { const struct of_phandle_args *pse_spec); unsigned int nr_lines; struct mutex lock; + enum ethtool_pse_types types; }; #if IS_ENABLED(CONFIG_PSE_CONTROLLER) @@ -108,6 +110,9 @@ int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config); +bool pse_has_podl(struct pse_control *psec); +bool pse_has_c33(struct pse_control *psec); + #else static inline struct pse_control *of_pse_control_get(struct device_node *node) @@ -133,6 +138,16 @@ static inline int pse_ethtool_set_config(struct pse_control *psec, return -ENOTSUPP; } +static inline bool pse_has_podl(struct pse_control *psec) +{ + return false; +} + +static inline bool pse_has_c33(struct pse_control *psec) +{ + return false; +} + #endif #endif diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 7b9a3d890949..041e09c3515d 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -752,6 +752,18 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_pse_types - Types of PSE controller. + * @ETHTOOL_PSE_UNKNOWN: Type of PSE controller is unknown + * @ETHTOOL_PSE_PODL: PSE controller which support PoDL + * @ETHTOOL_PSE_C33: PSE controller which support Clause 33 (PoE) + */ +enum ethtool_pse_types { + ETHTOOL_PSE_UNKNOWN = 1 << 0, + ETHTOOL_PSE_PODL = 1 << 1, + ETHTOOL_PSE_C33 = 1 << 2, +}; + /** * enum ethtool_c33_pse_admin_state - operational state of the PoDL PSE * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState -- cgit v1.2.3 From d56b63cf0c0f71e1b2e04dd8220b408f049e67ff Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 20 Apr 2024 11:09:05 +0200 Subject: bpf: add support for bpf_wq user type Mostly a copy/paste from the bpf_timer API, without the initialization and free, as they will be done in a separate patch. Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20240420-bpf_wq-v2-5-6c986a5a741f@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 ++++++++++- include/uapi/linux/bpf.h | 4 ++++ kernel/bpf/btf.c | 17 +++++++++++++++++ kernel/bpf/syscall.c | 6 +++++- kernel/bpf/verifier.c | 9 +++++++++ 5 files changed, 45 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5034c1b4ded7..c7dcfd395555 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -185,7 +185,7 @@ struct bpf_map_ops { enum { /* Support at most 10 fields in a BTF type */ - BTF_FIELDS_MAX = 10, + BTF_FIELDS_MAX = 11, }; enum btf_field_type { @@ -202,6 +202,7 @@ enum btf_field_type { BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), + BPF_WORKQUEUE = (1 << 10), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -238,6 +239,7 @@ struct btf_record { u32 field_mask; int spin_lock_off; int timer_off; + int wq_off; int refcount_off; struct btf_field fields[]; }; @@ -312,6 +314,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "bpf_spin_lock"; case BPF_TIMER: return "bpf_timer"; + case BPF_WORKQUEUE: + return "bpf_wq"; case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; @@ -340,6 +344,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_spin_lock); case BPF_TIMER: return sizeof(struct bpf_timer); + case BPF_WORKQUEUE: + return sizeof(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: @@ -367,6 +373,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_spin_lock); case BPF_TIMER: return __alignof__(struct bpf_timer); + case BPF_WORKQUEUE: + return __alignof__(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: @@ -406,6 +414,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_WORKQUEUE: case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cee0a7915c08..e4ae83550fb3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7306,6 +7306,10 @@ struct bpf_timer { __u64 __opaque[2]; } __attribute__((aligned(8))); +struct bpf_wq { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + struct bpf_dynptr { __u64 __opaque[2]; } __attribute__((aligned(8))); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 6d46cee47ae3..8291fbfd27b1 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3464,6 +3464,15 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask, goto end; } } + if (field_mask & BPF_WORKQUEUE) { + if (!strcmp(name, "bpf_wq")) { + if (*seen_mask & BPF_WORKQUEUE) + return -E2BIG; + *seen_mask |= BPF_WORKQUEUE; + type = BPF_WORKQUEUE; + goto end; + } + } field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head"); field_mask_test_name(BPF_LIST_NODE, "bpf_list_node"); field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root"); @@ -3515,6 +3524,7 @@ static int btf_find_struct_field(const struct btf *btf, switch (field_type) { case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_WORKQUEUE: case BPF_LIST_NODE: case BPF_RB_NODE: case BPF_REFCOUNT: @@ -3582,6 +3592,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, switch (field_type) { case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_WORKQUEUE: case BPF_LIST_NODE: case BPF_RB_NODE: case BPF_REFCOUNT: @@ -3816,6 +3827,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type rec->spin_lock_off = -EINVAL; rec->timer_off = -EINVAL; + rec->wq_off = -EINVAL; rec->refcount_off = -EINVAL; for (i = 0; i < cnt; i++) { field_type_size = btf_field_type_size(info_arr[i].type); @@ -3846,6 +3858,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* Cache offset for faster lookup at runtime */ rec->timer_off = rec->fields[i].offset; break; + case BPF_WORKQUEUE: + WARN_ON_ONCE(rec->wq_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->wq_off = rec->fields[i].offset; + break; case BPF_REFCOUNT: WARN_ON_ONCE(rec->refcount_off >= 0); /* Cache offset for faster lookup at runtime */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7d392ec83655..0848e4141b00 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -559,6 +559,7 @@ void btf_record_free(struct btf_record *rec) case BPF_SPIN_LOCK: case BPF_TIMER: case BPF_REFCOUNT: + case BPF_WORKQUEUE: /* Nothing to release */ break; default: @@ -608,6 +609,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) case BPF_SPIN_LOCK: case BPF_TIMER: case BPF_REFCOUNT: + case BPF_WORKQUEUE: /* Nothing to acquire */ break; default: @@ -679,6 +681,8 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) case BPF_TIMER: bpf_timer_cancel_and_free(field_ptr); break; + case BPF_WORKQUEUE: + break; case BPF_KPTR_UNREF: WRITE_ONCE(*(u64 *)field_ptr, 0); break; @@ -1085,7 +1089,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | - BPF_RB_ROOT | BPF_REFCOUNT, + BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE, map->value_size); if (!IS_ERR_OR_NULL(map->record)) { int i; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5a7e34e83a5b..89490a95b120 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1838,6 +1838,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) */ if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER)) reg->map_uid = reg->id; + if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE)) + reg->map_uid = reg->id; } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { reg->type = PTR_TO_XDP_SOCK; } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || @@ -18141,6 +18143,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } + if (btf_record_has_field(map->record, BPF_WORKQUEUE)) { + if (is_tracing_prog_type(prog_type)) { + verbose(env, "tracing progs cannot use bpf_wq yet\n"); + return -EINVAL; + } + } + if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); -- cgit v1.2.3 From 48e2cd3e3dcfe04f212df4fb189fa04c2a87b980 Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Fri, 26 Apr 2024 00:17:23 +0800 Subject: bpf: add mrtt and srtt as BPF_SOCK_OPS_RTT_CB args Two important arguments in RTT estimation, mrtt and srtt, are passed to tcp_bpf_rtt(), so that bpf programs get more information about RTT computation in BPF_SOCK_OPS_RTT_CB. The difference between bpf_sock_ops->srtt_us and the srtt here is: the former is an old rtt before update, while srtt passed by tcp_bpf_rtt() is that after update. Signed-off-by: Philo Lu Link: https://lore.kernel.org/r/20240425161724.73707-2-lulie@linux.alibaba.com Signed-off-by: Martin KaFai Lau --- include/net/tcp.h | 4 ++-- include/uapi/linux/bpf.h | 2 ++ net/ipv4/tcp_input.c | 4 ++-- tools/include/uapi/linux/bpf.h | 2 ++ 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/net/tcp.h b/include/net/tcp.h index 6ae35199d3b3..0f75d03287c2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2706,10 +2706,10 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } -static inline void tcp_bpf_rtt(struct sock *sk) +static inline void tcp_bpf_rtt(struct sock *sk, long mrtt, u32 srtt) { if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) - tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); + tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_RTT_CB, mrtt, srtt); } #if IS_ENABLED(CONFIG_SMC) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e4ae83550fb3..d94a72593ead 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6947,6 +6947,8 @@ enum { * socket transition to LISTEN state. */ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + * Arg1: measured RTT input (mrtt) + * Arg2: updated srtt */ BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. * It will be called to handle diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5d874817a78d..d1115d7c3936 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -911,7 +911,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->rtt_seq = tp->snd_nxt; tp->mdev_max_us = tcp_rto_min_us(sk); - tcp_bpf_rtt(sk); + tcp_bpf_rtt(sk, mrtt_us, srtt); } } else { /* no previous measure. */ @@ -921,7 +921,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->mdev_max_us = tp->rttvar_us; tp->rtt_seq = tp->snd_nxt; - tcp_bpf_rtt(sk); + tcp_bpf_rtt(sk, mrtt_us, srtt); } tp->srtt_us = max(1U, srtt); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e4ae83550fb3..d94a72593ead 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6947,6 +6947,8 @@ enum { * socket transition to LISTEN state. */ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + * Arg1: measured RTT input (mrtt) + * Arg2: updated srtt */ BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. * It will be called to handle -- cgit v1.2.3 From 5055cccfc2d1cc1a7306f6bcdcd0ee9521d707f5 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Tue, 23 Apr 2024 14:49:04 +0200 Subject: net: hsr: Provide RedBox support (HSR-SAN) Introduce RedBox support (HSR-SAN to be more precise) for HSR networks. Following traffic reduction optimizations have been implemented: - Do not send HSR supervisory frames to Port C (interlink) - Do not forward to HSR ring frames addressed to Port C - Do not forward to Port C frames from HSR ring - Do not send duplicate HSR frame to HSR ring when destination is Port C The corresponding patch to modify iptable2 sources has already been sent: https://lore.kernel.org/netdev/20240308145729.490863-1-lukma@denx.de/T/ Testing procedure (veth and netns): ----------------------------------- One shall run: linux-vanila/tools/testing/selftests/net/hsr/hsr_redbox.sh (Detailed description of the setup one can find in the test script file). Testing procedure (real hardware): ---------------------------------- The EVB-KSZ9477 has been used for testing on net-next branch (SHA1: 5fc68320c1fb3c7d456ddcae0b4757326a043e6f). Ports 4/5 were used for SW managed HSR (hsr1) as first hsr0 for ports 1/2 (with HW offloading for ksz9477) was created. Port 3 has been used as interlink port (single USB-ETH dongle). Configuration - RedBox (EVB-KSZ9477): if link set lan1 down;ip link set lan2 down ip link add name hsr0 type hsr slave1 lan1 slave2 lan2 supervision 45 version 1 ip link add name hsr1 type hsr slave1 lan4 slave2 lan5 interlink lan3 supervision 45 version 1 ip link set lan4 up;ip link set lan5 up ip link set lan3 up ip addr add 192.168.0.11/24 dev hsr1 ip link set hsr1 up Configuration - DAN-H (EVB-KSZ9477): ip link set lan1 down;ip link set lan2 down ip link add name hsr0 type hsr slave1 lan1 slave2 lan2 supervision 45 version 1 ip link add name hsr1 type hsr slave1 lan4 slave2 lan5 supervision 45 version 1 ip link set lan4 up;ip link set lan5 up ip addr add 192.168.0.12/24 dev hsr1 ip link set hsr1 up This approach uses only SW based HSR devices (hsr1). -------------- ----------------- ------------ DAN-H Port5 | <------> | Port5 | | Port4 | <------> | Port4 Port3 | <---> | PC | | (RedBox) | | (USB-ETH) EVB-KSZ9477 | | EVB-KSZ9477 | | -------------- ----------------- ------------ Signed-off-by: Lukasz Majewski Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 1 + net/hsr/hsr_device.c | 36 +++++++++++++++++-- net/hsr/hsr_device.h | 4 +-- net/hsr/hsr_forward.c | 85 +++++++++++++++++++++++++++++++++++++++----- net/hsr/hsr_framereg.c | 52 +++++++++++++++++++++++++++ net/hsr/hsr_framereg.h | 4 +++ net/hsr/hsr_main.h | 7 ++++ net/hsr/hsr_netlink.c | 30 ++++++++++++++-- net/hsr/hsr_slave.c | 1 + 9 files changed, 204 insertions(+), 16 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ffa637b38c93..e9f10860ec8e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1771,6 +1771,7 @@ enum { IFLA_HSR_PROTOCOL, /* Indicate different protocol than * HSR. For example PRP. */ + IFLA_HSR_INTERLINK, /* HSR interlink network device */ __IFLA_HSR_MAX, }; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e9d45133d641..cd1e7c6d2fc0 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -146,6 +146,9 @@ static int hsr_dev_open(struct net_device *dev) case HSR_PT_SLAVE_B: designation = "Slave B"; break; + case HSR_PT_INTERLINK: + designation = "Interlink"; + break; default: designation = "Unknown"; } @@ -285,6 +288,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, struct hsr_priv *hsr = master->hsr; __u8 type = HSR_TLV_LIFE_CHECK; struct hsr_sup_payload *hsr_sp; + struct hsr_sup_tlv *hsr_stlv; struct hsr_sup_tag *hsr_stag; struct sk_buff *skb; @@ -324,6 +328,16 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); + if (hsr->redbox) { + hsr_stlv = skb_put(skb, sizeof(struct hsr_sup_tlv)); + hsr_stlv->HSR_TLV_type = PRP_TLV_REDBOX_MAC; + hsr_stlv->HSR_TLV_length = sizeof(struct hsr_sup_payload); + + /* Payload: MacAddressRedBox */ + hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); + ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox); + } + if (skb_put_padto(skb, ETH_ZLEN)) { spin_unlock_bh(&hsr->seqnr_lock); return; @@ -405,6 +419,10 @@ void hsr_del_ports(struct hsr_priv *hsr) if (port) hsr_del_port(port); + port = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK); + if (port) + hsr_del_port(port); + port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (port) hsr_del_port(port); @@ -534,8 +552,8 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { }; int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec, u8 protocol_version, - struct netlink_ext_ack *extack) + struct net_device *interlink, unsigned char multicast_spec, + u8 protocol_version, struct netlink_ext_ack *extack) { bool unregister = false; struct hsr_priv *hsr; @@ -544,6 +562,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], hsr = netdev_priv(hsr_dev); INIT_LIST_HEAD(&hsr->ports); INIT_LIST_HEAD(&hsr->node_db); + INIT_LIST_HEAD(&hsr->proxy_node_db); spin_lock_init(&hsr->list_lock); eth_hw_addr_set(hsr_dev, slave[0]->dev_addr); @@ -569,6 +588,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], /* Overflow soon to find bugs easier: */ hsr->sequence_nr = HSR_SEQNR_START; hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; + hsr->interlink_sequence_nr = HSR_SEQNR_START; timer_setup(&hsr->announce_timer, hsr_announce, 0); timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0); @@ -604,6 +624,18 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res) goto err_unregister; + if (interlink) { + res = hsr_add_port(hsr, interlink, HSR_PT_INTERLINK, extack); + if (res) + goto err_unregister; + + hsr->redbox = true; + ether_addr_copy(hsr->macaddress_redbox, interlink->dev_addr); + timer_setup(&hsr->prune_proxy_timer, hsr_prune_proxy_nodes, 0); + mod_timer(&hsr->prune_proxy_timer, + jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD)); + } + hsr_debugfs_init(hsr, hsr_dev); mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 9060c92168f9..655284095b78 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -16,8 +16,8 @@ void hsr_del_ports(struct hsr_priv *hsr); void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec, u8 protocol_version, - struct netlink_ext_ack *extack); + struct net_device *interlink, unsigned char multicast_spec, + u8 protocol_version, struct netlink_ext_ack *extack); void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); int hsr_get_max_mtu(struct hsr_priv *hsr); #endif /* __HSR_DEVICE_H */ diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 5d68cb181695..05a61b8286ec 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -377,6 +377,15 @@ static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port, */ ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr); } + + /* When HSR node is used as RedBox - the frame received from HSR ring + * requires source MAC address (SA) replacement to one which can be + * recognized by SAN devices (otherwise, frames are dropped by switch) + */ + if (port->type == HSR_PT_INTERLINK) + ether_addr_copy(eth_hdr(skb)->h_source, + port->hsr->macaddress_redbox); + return dev_queue_xmit(skb); } @@ -390,9 +399,57 @@ bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) { + struct sk_buff *skb; + if (port->dev->features & NETIF_F_HW_HSR_FWD) return prp_drop_frame(frame, port); + /* RedBox specific frames dropping policies + * + * Do not send HSR supervisory frames to SAN devices + */ + if (frame->is_supervision && port->type == HSR_PT_INTERLINK) + return true; + + /* Do not forward to other HSR port (A or B) unicast frames which + * are addressed to interlink port (and are in the ProxyNodeTable). + */ + skb = frame->skb_hsr; + if (skb && prp_drop_frame(frame, port) && + is_unicast_ether_addr(eth_hdr(skb)->h_dest) && + hsr_is_node_in_db(&port->hsr->proxy_node_db, + eth_hdr(skb)->h_dest)) { + return true; + } + + /* Do not forward to port C (Interlink) frames from nodes A and B + * if DA is in NodeTable. + */ + if ((frame->port_rcv->type == HSR_PT_SLAVE_A || + frame->port_rcv->type == HSR_PT_SLAVE_B) && + port->type == HSR_PT_INTERLINK) { + skb = frame->skb_hsr; + if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) && + hsr_is_node_in_db(&port->hsr->node_db, + eth_hdr(skb)->h_dest)) { + return true; + } + } + + /* Do not forward to port A and B unicast frames received on the + * interlink port if it is addressed to one of nodes registered in + * the ProxyNodeTable. + */ + if ((port->type == HSR_PT_SLAVE_A || port->type == HSR_PT_SLAVE_B) && + frame->port_rcv->type == HSR_PT_INTERLINK) { + skb = frame->skb_std; + if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) && + hsr_is_node_in_db(&port->hsr->proxy_node_db, + eth_hdr(skb)->h_dest)) { + return true; + } + } + return false; } @@ -448,13 +505,14 @@ static void hsr_forward_do(struct hsr_frame_info *frame) } /* Check if frame is to be dropped. Eg. for PRP no forward - * between ports. + * between ports, or sending HSR supervision to RedBox. */ if (hsr->proto_ops->drop_frame && hsr->proto_ops->drop_frame(frame, port)) continue; - if (port->type != HSR_PT_MASTER) + if (port->type == HSR_PT_SLAVE_A || + port->type == HSR_PT_SLAVE_B) skb = hsr->proto_ops->create_tagged_frame(frame, port); else skb = hsr->proto_ops->get_untagged_frame(frame, port); @@ -469,7 +527,9 @@ static void hsr_forward_do(struct hsr_frame_info *frame) hsr_deliver_master(skb, port->dev, frame->node_src); } else { if (!hsr_xmit(skb, port, frame)) - sent = true; + if (port->type == HSR_PT_SLAVE_A || + port->type == HSR_PT_SLAVE_B) + sent = true; } } } @@ -503,10 +563,12 @@ static void handle_std_frame(struct sk_buff *skb, frame->skb_prp = NULL; frame->skb_std = skb; - if (port->type != HSR_PT_MASTER) { + if (port->type != HSR_PT_MASTER) frame->is_from_san = true; - } else { - /* Sequence nr for the master node */ + + if (port->type == HSR_PT_MASTER || + port->type == HSR_PT_INTERLINK) { + /* Sequence nr for the master/interlink node */ lockdep_assert_held(&hsr->seqnr_lock); frame->sequence_nr = hsr->sequence_nr; hsr->sequence_nr++; @@ -564,6 +626,7 @@ static int fill_frame_info(struct hsr_frame_info *frame, { struct hsr_priv *hsr = port->hsr; struct hsr_vlan_ethhdr *vlan_hdr; + struct list_head *n_db; struct ethhdr *ethhdr; __be16 proto; int ret; @@ -574,9 +637,13 @@ static int fill_frame_info(struct hsr_frame_info *frame, memset(frame, 0, sizeof(*frame)); frame->is_supervision = is_supervision_frame(port->hsr, skb); - frame->node_src = hsr_get_node(port, &hsr->node_db, skb, - frame->is_supervision, - port->type); + + n_db = &hsr->node_db; + if (port->type == HSR_PT_INTERLINK) + n_db = &hsr->proxy_node_db; + + frame->node_src = hsr_get_node(port, n_db, skb, + frame->is_supervision, port->type); if (!frame->node_src) return -1; /* Unknown node and !is_supervision, or no mem */ diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 26329db09210..614df9649794 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -71,6 +71,14 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db, return NULL; } +/* Check if node for a given MAC address is already present in data base + */ +bool hsr_is_node_in_db(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]) +{ + return !!find_node_by_addr_A(node_db, addr); +} + /* Helper for device init; the self_node is used in hsr_rcv() to recognize * frames from self that's been looped over the HSR ring. */ @@ -223,6 +231,15 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, } } + /* Check if required node is not in proxy nodes table */ + list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) { + if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) { + if (hsr->proto_ops->update_san_info) + hsr->proto_ops->update_san_info(node, is_sup); + return node; + } + } + /* Everyone may create a node entry, connected node to a HSR/PRP * device. */ @@ -418,6 +435,10 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); + if (!node_dst && port->hsr->redbox) + node_dst = find_node_by_addr_A(&port->hsr->proxy_node_db, + eth_hdr(skb)->h_dest); + if (!node_dst) { if (port->hsr->prot_version != PRP_V1 && net_ratelimit()) netdev_err(skb->dev, "%s: Unknown node\n", __func__); @@ -561,6 +582,37 @@ void hsr_prune_nodes(struct timer_list *t) jiffies + msecs_to_jiffies(PRUNE_PERIOD)); } +void hsr_prune_proxy_nodes(struct timer_list *t) +{ + struct hsr_priv *hsr = from_timer(hsr, t, prune_proxy_timer); + unsigned long timestamp; + struct hsr_node *node; + struct hsr_node *tmp; + + spin_lock_bh(&hsr->list_lock); + list_for_each_entry_safe(node, tmp, &hsr->proxy_node_db, mac_list) { + timestamp = node->time_in[HSR_PT_INTERLINK]; + + /* Prune old entries */ + if (time_is_before_jiffies(timestamp + + msecs_to_jiffies(HSR_PROXY_NODE_FORGET_TIME))) { + hsr_nl_nodedown(hsr, node->macaddress_A); + if (!node->removed) { + list_del_rcu(&node->mac_list); + node->removed = true; + /* Note that we need to free this entry later: */ + kfree_rcu(node, rcu_head); + } + } + } + + spin_unlock_bh(&hsr->list_lock); + + /* Restart timer */ + mod_timer(&hsr->prune_proxy_timer, + jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD)); +} + void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]) { diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index b23556251d62..7619e31c1d2d 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -46,6 +46,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, u16 sequence_nr); void hsr_prune_nodes(struct timer_list *t); +void hsr_prune_proxy_nodes(struct timer_list *t); int hsr_create_self_node(struct hsr_priv *hsr, const unsigned char addr_a[ETH_ALEN], @@ -67,6 +68,9 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port, struct hsr_node *node); void prp_update_san_info(struct hsr_node *node, bool is_sup); +bool hsr_is_node_in_db(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]); + struct hsr_node { struct list_head mac_list; /* Protect R/W access to seq_out */ diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 18e01791ad79..23850b16d1ea 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -21,6 +21,7 @@ */ #define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */ #define HSR_NODE_FORGET_TIME 60000 /* ms */ +#define HSR_PROXY_NODE_FORGET_TIME 60000 /* ms */ #define HSR_ANNOUNCE_INTERVAL 100 /* ms */ #define HSR_ENTRY_FORGET_TIME 400 /* ms */ @@ -35,6 +36,7 @@ * HSR_NODE_FORGET_TIME? */ #define PRUNE_PERIOD 3000 /* ms */ +#define PRUNE_PROXY_PERIOD 3000 /* ms */ #define HSR_TLV_EOT 0 /* End of TLVs */ #define HSR_TLV_ANNOUNCE 22 #define HSR_TLV_LIFE_CHECK 23 @@ -192,11 +194,14 @@ struct hsr_priv { struct rcu_head rcu_head; struct list_head ports; struct list_head node_db; /* Known HSR nodes */ + struct list_head proxy_node_db; /* RedBox HSR proxy nodes */ struct hsr_self_node __rcu *self_node; /* MACs of slaves */ struct timer_list announce_timer; /* Supervision frame dispatch */ struct timer_list prune_timer; + struct timer_list prune_proxy_timer; int announce_count; u16 sequence_nr; + u16 interlink_sequence_nr; /* Interlink port seq_nr */ u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */ spinlock_t seqnr_lock; /* locking for sequence_nr */ @@ -209,6 +214,8 @@ struct hsr_priv { * of lan_id */ bool fwd_offloaded; /* Forwarding offloaded to HW */ + bool redbox; /* Device supports HSR RedBox */ + unsigned char macaddress_redbox[ETH_ALEN]; unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16)); /* Align to u16 boundary to avoid unaligned access * in ether_addr_equal diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 78fe40eb9f01..898f18c6da53 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,6 +23,7 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN }, [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, [IFLA_HSR_PROTOCOL] = { .type = NLA_U8 }, + [IFLA_HSR_INTERLINK] = { .type = NLA_U32 }, }; /* Here, it seems a netdevice has already been allocated for us, and the @@ -35,8 +36,8 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, enum hsr_version proto_version; unsigned char multicast_spec; u8 proto = HSR_PROTOCOL_HSR; - struct net_device *link[2]; + struct net_device *link[2], *interlink = NULL; if (!data) { NL_SET_ERR_MSG_MOD(extack, "No slave devices specified"); return -EINVAL; @@ -67,6 +68,20 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; } + if (data[IFLA_HSR_INTERLINK]) + interlink = __dev_get_by_index(src_net, + nla_get_u32(data[IFLA_HSR_INTERLINK])); + + if (interlink && interlink == link[0]) { + NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave1 are the same"); + return -EINVAL; + } + + if (interlink && interlink == link[1]) { + NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave2 are the same"); + return -EINVAL; + } + if (!data[IFLA_HSR_MULTICAST_SPEC]) multicast_spec = 0; else @@ -96,10 +111,17 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, } } - if (proto == HSR_PROTOCOL_PRP) + if (proto == HSR_PROTOCOL_PRP) { proto_version = PRP_V1; + if (interlink) { + NL_SET_ERR_MSG_MOD(extack, + "Interlink only works with HSR"); + return -EINVAL; + } + } - return hsr_dev_finalize(dev, link, multicast_spec, proto_version, extack); + return hsr_dev_finalize(dev, link, interlink, multicast_spec, + proto_version, extack); } static void hsr_dellink(struct net_device *dev, struct list_head *head) @@ -107,6 +129,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head) struct hsr_priv *hsr = netdev_priv(dev); del_timer_sync(&hsr->prune_timer); + del_timer_sync(&hsr->prune_proxy_timer); del_timer_sync(&hsr->announce_timer); hsr_debugfs_term(hsr); @@ -114,6 +137,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head) hsr_del_self_node(hsr); hsr_del_nodes(&hsr->node_db); + hsr_del_nodes(&hsr->proxy_node_db); unregister_netdevice_queue(dev, head); } diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 1b6457f357bd..af6cf64a00e0 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -55,6 +55,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) protocol = eth_hdr(skb)->h_proto; if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) && + port->type != HSR_PT_INTERLINK && hsr->proto_ops->invalid_dan_ingress_frame && hsr->proto_ops->invalid_dan_ingress_frame(protocol)) goto finish_pass; -- cgit v1.2.3 From 34cfe87221363d98160f74788dd060b1c43bae0d Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Fri, 26 Apr 2024 11:39:22 +0800 Subject: virtio_net: introduce device stats feature and structures The virtio-net device stats spec: https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 We introduce the relative feature and structures. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- include/uapi/linux/virtio_net.h | 143 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index cc65ef0f3c3e..ac9174717ef1 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,6 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_DEVICE_STATS 50 /* Device can provide device-level statistics. */ #define VIRTIO_NET_F_VQ_NOTF_COAL 52 /* Device supports virtqueue notification coalescing */ #define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ #define VIRTIO_NET_F_GUEST_USO4 54 /* Guest can handle USOv4 in. */ @@ -406,4 +407,146 @@ struct virtio_net_ctrl_coal_vq { struct virtio_net_ctrl_coal coal; }; +/* + * Device Statistics + */ +#define VIRTIO_NET_CTRL_STATS 8 +#define VIRTIO_NET_CTRL_STATS_QUERY 0 +#define VIRTIO_NET_CTRL_STATS_GET 1 + +struct virtio_net_stats_capabilities { + +#define VIRTIO_NET_STATS_TYPE_CVQ (1ULL << 32) + +#define VIRTIO_NET_STATS_TYPE_RX_BASIC (1ULL << 0) +#define VIRTIO_NET_STATS_TYPE_RX_CSUM (1ULL << 1) +#define VIRTIO_NET_STATS_TYPE_RX_GSO (1ULL << 2) +#define VIRTIO_NET_STATS_TYPE_RX_SPEED (1ULL << 3) + +#define VIRTIO_NET_STATS_TYPE_TX_BASIC (1ULL << 16) +#define VIRTIO_NET_STATS_TYPE_TX_CSUM (1ULL << 17) +#define VIRTIO_NET_STATS_TYPE_TX_GSO (1ULL << 18) +#define VIRTIO_NET_STATS_TYPE_TX_SPEED (1ULL << 19) + + __le64 supported_stats_types[1]; +}; + +struct virtio_net_ctrl_queue_stats { + struct { + __le16 vq_index; + __le16 reserved[3]; + __le64 types_bitmap[1]; + } stats[1]; +}; + +struct virtio_net_stats_reply_hdr { +#define VIRTIO_NET_STATS_TYPE_REPLY_CVQ 32 + +#define VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC 0 +#define VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM 1 +#define VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO 2 +#define VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED 3 + +#define VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC 16 +#define VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM 17 +#define VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO 18 +#define VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED 19 + __u8 type; + __u8 reserved; + __le16 vq_index; + __le16 reserved1; + __le16 size; +}; + +struct virtio_net_stats_cvq { + struct virtio_net_stats_reply_hdr hdr; + + __le64 command_num; + __le64 ok_num; +}; + +struct virtio_net_stats_rx_basic { + struct virtio_net_stats_reply_hdr hdr; + + __le64 rx_notifications; + + __le64 rx_packets; + __le64 rx_bytes; + + __le64 rx_interrupts; + + __le64 rx_drops; + __le64 rx_drop_overruns; +}; + +struct virtio_net_stats_tx_basic { + struct virtio_net_stats_reply_hdr hdr; + + __le64 tx_notifications; + + __le64 tx_packets; + __le64 tx_bytes; + + __le64 tx_interrupts; + + __le64 tx_drops; + __le64 tx_drop_malformed; +}; + +struct virtio_net_stats_rx_csum { + struct virtio_net_stats_reply_hdr hdr; + + __le64 rx_csum_valid; + __le64 rx_needs_csum; + __le64 rx_csum_none; + __le64 rx_csum_bad; +}; + +struct virtio_net_stats_tx_csum { + struct virtio_net_stats_reply_hdr hdr; + + __le64 tx_csum_none; + __le64 tx_needs_csum; +}; + +struct virtio_net_stats_rx_gso { + struct virtio_net_stats_reply_hdr hdr; + + __le64 rx_gso_packets; + __le64 rx_gso_bytes; + __le64 rx_gso_packets_coalesced; + __le64 rx_gso_bytes_coalesced; +}; + +struct virtio_net_stats_tx_gso { + struct virtio_net_stats_reply_hdr hdr; + + __le64 tx_gso_packets; + __le64 tx_gso_bytes; + __le64 tx_gso_segments; + __le64 tx_gso_segments_bytes; + __le64 tx_gso_packets_noseg; + __le64 tx_gso_bytes_noseg; +}; + +struct virtio_net_stats_rx_speed { + struct virtio_net_stats_reply_hdr hdr; + + /* rx_{packets,bytes}_allowance_exceeded are too long. So rename to + * short name. + */ + __le64 rx_ratelimit_packets; + __le64 rx_ratelimit_bytes; +}; + +struct virtio_net_stats_tx_speed { + struct virtio_net_stats_reply_hdr hdr; + + /* tx_{packets,bytes}_allowance_exceeded are too long. So rename to + * short name. + */ + __le64 tx_ratelimit_packets; + __le64 tx_ratelimit_bytes; +}; + #endif /* _UAPI_LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 0cfe71f45f420e412fda2395807a56c453a6e0b6 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Fri, 26 Apr 2024 11:39:27 +0800 Subject: netdev: add queue stats These stats are commonly. Support reporting those via netdev-genl queue stats. name: rx-hw-drops name: rx-hw-drop-overruns name: rx-csum-unnecessary name: rx-csum-none name: rx-csum-bad name: rx-hw-gro-packets name: rx-hw-gro-bytes name: rx-hw-gro-wire-packets name: rx-hw-gro-wire-bytes name: rx-hw-drop-ratelimits name: tx-hw-drops name: tx-hw-drop-errors name: tx-csum-none name: tx-needs-csum name: tx-hw-gso-packets name: tx-hw-gso-bytes name: tx-hw-gso-wire-packets name: tx-hw-gso-wire-bytes name: tx-hw-drop-ratelimits Signed-off-by: Xuan Zhuo Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/netdev.yaml | 104 ++++++++++++++++++++++++++++++++ include/net/netdev_queues.h | 27 +++++++++ include/uapi/linux/netdev.h | 19 ++++++ net/core/netdev-genl.c | 23 ++++++- tools/include/uapi/linux/netdev.h | 19 ++++++ 5 files changed, 190 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 679c4130707c..2be4b3714d17 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -335,6 +335,110 @@ attribute-sets: Allocation failure may, or may not result in a packet drop, depending on driver implementation and whether system recovers quickly. type: uint + - + name: rx-hw-drops + doc: | + Number of all packets which entered the device, but never left it, + including but not limited to: packets dropped due to lack of buffer + space, processing errors, explicit or implicit policies and packet + filters. + type: uint + - + name: rx-hw-drop-overruns + doc: | + Number of packets dropped due to transient lack of resources, such as + buffer space, host descriptors etc. + type: uint + - + name: rx-csum-unnecessary + doc: Number of packets that were marked as CHECKSUM_UNNECESSARY. + type: uint + - + name: rx-csum-none + doc: Number of packets that were not checksummed by device. + type: uint + - + name: rx-csum-bad + doc: | + Number of packets with bad checksum. The packets are not discarded, + but still delivered to the stack. + type: uint + - + name: rx-hw-gro-packets + doc: | + Number of packets that were coalesced from smaller packets by the device. + Counts only packets coalesced with the HW-GRO netdevice feature, + LRO-coalesced packets are not counted. + type: uint + - + name: rx-hw-gro-bytes + doc: See `rx-hw-gro-packets`. + type: uint + - + name: rx-hw-gro-wire-packets + doc: | + Number of packets that were coalesced to bigger packetss with the HW-GRO + netdevice feature. LRO-coalesced packets are not counted. + type: uint + - + name: rx-hw-gro-wire-bytes + doc: See `rx-hw-gro-wire-packets`. + type: uint + - + name: rx-hw-drop-ratelimits + doc: | + Number of the packets dropped by the device due to the received + packets bitrate exceeding the device rate limit. + type: uint + - + name: tx-hw-drops + doc: | + Number of packets that arrived at the device but never left it, + encompassing packets dropped for reasons such as processing errors, as + well as those affected by explicitly defined policies and packet + filtering criteria. + type: uint + - + name: tx-hw-drop-errors + doc: Number of packets dropped because they were invalid or malformed. + type: uint + - + name: tx-csum-none + doc: | + Number of packets that did not require the device to calculate the + checksum. + type: uint + - + name: tx-needs-csum + doc: | + Number of packets that required the device to calculate the checksum. + type: uint + - + name: tx-hw-gso-packets + doc: | + Number of packets that necessitated segmentation into smaller packets + by the device. + type: uint + - + name: tx-hw-gso-bytes + doc: See `tx-hw-gso-packets`. + type: uint + - + name: tx-hw-gso-wire-packets + doc: | + Number of wire-sized packets generated by processing + `tx-hw-gso-packets` + type: uint + - + name: tx-hw-gso-wire-bytes + doc: See `tx-hw-gso-wire-packets`. + type: uint + - + name: tx-hw-drop-ratelimits + doc: | + Number of the packets dropped by the device due to the transmit + packets bitrate exceeding the device rate limit. + type: uint operations: list: diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 1ec408585373..c7ac4539eafc 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -9,11 +9,38 @@ struct netdev_queue_stats_rx { u64 bytes; u64 packets; u64 alloc_fail; + + u64 hw_drops; + u64 hw_drop_overruns; + + u64 csum_unnecessary; + u64 csum_none; + u64 csum_bad; + + u64 hw_gro_packets; + u64 hw_gro_bytes; + u64 hw_gro_wire_packets; + u64 hw_gro_wire_bytes; + + u64 hw_drop_ratelimits; }; struct netdev_queue_stats_tx { u64 bytes; u64 packets; + + u64 hw_drops; + u64 hw_drop_errors; + + u64 csum_none; + u64 needs_csum; + + u64 hw_gso_packets; + u64 hw_gso_bytes; + u64 hw_gso_wire_packets; + u64 hw_gso_wire_bytes; + + u64 hw_drop_ratelimits; }; /** diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index bb65ee840cda..cf24f1d9adf8 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -146,6 +146,25 @@ enum { NETDEV_A_QSTATS_TX_PACKETS, NETDEV_A_QSTATS_TX_BYTES, NETDEV_A_QSTATS_RX_ALLOC_FAIL, + NETDEV_A_QSTATS_RX_HW_DROPS, + NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, + NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, + NETDEV_A_QSTATS_RX_CSUM_NONE, + NETDEV_A_QSTATS_RX_CSUM_BAD, + NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, + NETDEV_A_QSTATS_RX_HW_GRO_BYTES, + NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, + NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, + NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, + NETDEV_A_QSTATS_TX_HW_DROPS, + NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, + NETDEV_A_QSTATS_TX_CSUM_NONE, + NETDEV_A_QSTATS_TX_NEEDS_CSUM, + NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, + NETDEV_A_QSTATS_TX_HW_GSO_BYTES, + NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, + NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, + NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, __NETDEV_A_QSTATS_MAX, NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index dd6510f2c652..4b5054087309 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -489,7 +489,17 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) || - netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail)) + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits)) return -EMSGSIZE; return 0; } @@ -498,7 +508,16 @@ static int netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) || - netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes)) + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits)) return -EMSGSIZE; return 0; } diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index bb65ee840cda..cf24f1d9adf8 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -146,6 +146,25 @@ enum { NETDEV_A_QSTATS_TX_PACKETS, NETDEV_A_QSTATS_TX_BYTES, NETDEV_A_QSTATS_RX_ALLOC_FAIL, + NETDEV_A_QSTATS_RX_HW_DROPS, + NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, + NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, + NETDEV_A_QSTATS_RX_CSUM_NONE, + NETDEV_A_QSTATS_RX_CSUM_BAD, + NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, + NETDEV_A_QSTATS_RX_HW_GRO_BYTES, + NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, + NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, + NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, + NETDEV_A_QSTATS_TX_HW_DROPS, + NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, + NETDEV_A_QSTATS_TX_CSUM_NONE, + NETDEV_A_QSTATS_TX_NEEDS_CSUM, + NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, + NETDEV_A_QSTATS_TX_HW_GSO_BYTES, + NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, + NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, + NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, __NETDEV_A_QSTATS_MAX, NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) -- cgit v1.2.3 From 535a3692ba7245792e6f23654507865d4293c850 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Apr 2024 13:28:24 +0200 Subject: bpf: Add support for kprobe session attach Adding support to attach bpf program for entry and return probe of the same function. This is common use case which at the moment requires to create two kprobe multi links. Adding new BPF_TRACE_KPROBE_SESSION attach type that instructs kernel to attach single link program to both entry and exit probe. It's possible to control execution of the bpf program on return probe simply by returning zero or non zero from the entry bpf program execution to execute or not the bpf program on return probe respectively. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240430112830.1184228-2-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 7 ++++++- kernel/trace/bpf_trace.c | 28 ++++++++++++++++++++-------- tools/include/uapi/linux/bpf.h | 1 + 4 files changed, 28 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d94a72593ead..90706a47f6ff 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1115,6 +1115,7 @@ enum bpf_attach_type { BPF_CGROUP_UNIX_GETSOCKNAME, BPF_NETKIT_PRIMARY, BPF_NETKIT_PEER, + BPF_TRACE_KPROBE_SESSION, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f655adf42e39..13ad74ecf2cd 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4016,11 +4016,15 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && attach_type != BPF_TRACE_KPROBE_MULTI) return -EINVAL; + if (prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION && + attach_type != BPF_TRACE_KPROBE_SESSION) + return -EINVAL; if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && attach_type != BPF_TRACE_UPROBE_MULTI) return -EINVAL; if (attach_type != BPF_PERF_EVENT && attach_type != BPF_TRACE_KPROBE_MULTI && + attach_type != BPF_TRACE_KPROBE_SESSION && attach_type != BPF_TRACE_UPROBE_MULTI) return -EINVAL; return 0; @@ -5281,7 +5285,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_KPROBE: if (attr->link_create.attach_type == BPF_PERF_EVENT) ret = bpf_perf_link_attach(attr, prog); - else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI) + else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI || + attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION) ret = bpf_kprobe_multi_link_attach(attr, prog); else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI) ret = bpf_uprobe_multi_link_attach(attr, prog); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0ba722b57af3..06a9671834b6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1631,6 +1631,17 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +static bool is_kprobe_multi(const struct bpf_prog *prog) +{ + return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI || + prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; +} + +static inline bool is_kprobe_session(const struct bpf_prog *prog) +{ + return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; +} + static const struct bpf_func_proto * kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1646,13 +1657,13 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_override_return_proto; #endif case BPF_FUNC_get_func_ip: - if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) + if (is_kprobe_multi(prog)) return &bpf_get_func_ip_proto_kprobe_multi; if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) return &bpf_get_func_ip_proto_uprobe_multi; return &bpf_get_func_ip_proto_kprobe; case BPF_FUNC_get_attach_cookie: - if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) + if (is_kprobe_multi(prog)) return &bpf_get_attach_cookie_proto_kmulti; if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) return &bpf_get_attach_cookie_proto_umulti; @@ -2834,10 +2845,11 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, void *data) { struct bpf_kprobe_multi_link *link; + int err; link = container_of(fp, struct bpf_kprobe_multi_link, fp); - kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); - return 0; + err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); + return is_kprobe_session(link->link.prog) ? err : 0; } static void @@ -2981,7 +2993,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; - if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI) + if (!is_kprobe_multi(prog)) return -EINVAL; flags = attr->link_create.kprobe_multi.flags; @@ -3062,10 +3074,10 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (err) goto error; - if (flags & BPF_F_KPROBE_MULTI_RETURN) - link->fp.exit_handler = kprobe_multi_link_exit_handler; - else + if (!(flags & BPF_F_KPROBE_MULTI_RETURN)) link->fp.entry_handler = kprobe_multi_link_handler; + if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog)) + link->fp.exit_handler = kprobe_multi_link_exit_handler; link->addrs = addrs; link->cookies = cookies; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d94a72593ead..90706a47f6ff 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1115,6 +1115,7 @@ enum bpf_attach_type { BPF_CGROUP_UNIX_GETSOCKNAME, BPF_NETKIT_PRIMARY, BPF_NETKIT_PEER, + BPF_TRACE_KPROBE_SESSION, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From a4a87fa4e96c7746e009de06a567688fd9af6013 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Tue, 30 Apr 2024 09:08:52 +0200 Subject: xfrm: Add Direction to the SA in or out This patch introduces the 'dir' attribute, 'in' or 'out', to the xfrm_state, SA, enhancing usability by delineating the scope of values based on direction. An input SA will restrict values pertinent to input, effectively segregating them from output-related values. And an output SA will restrict attributes for output. This change aims to streamline the configuration process and improve the overall consistency of SA attributes during configuration. This feature sets the groundwork for future patches, including the upcoming IP-TFS patch. Signed-off-by: Antony Antony Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + include/uapi/linux/xfrm.h | 6 ++ net/xfrm/xfrm_compat.c | 7 ++- net/xfrm/xfrm_device.c | 6 ++ net/xfrm/xfrm_replay.c | 3 +- net/xfrm/xfrm_state.c | 8 +++ net/xfrm/xfrm_user.c | 138 ++++++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 160 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 57c743b7e4fe..7c9be06f8302 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -291,6 +291,7 @@ struct xfrm_state { /* Private data of this transformer, format is opaque, * interpreted by xfrm_type methods. */ void *data; + u8 dir; }; static inline struct net *xs_net(struct xfrm_state *x) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 6a77328be114..18ceaba8486e 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -141,6 +141,11 @@ enum { XFRM_POLICY_MAX = 3 }; +enum xfrm_sa_dir { + XFRM_SA_DIR_IN = 1, + XFRM_SA_DIR_OUT = 2 +}; + enum { XFRM_SHARE_ANY, /* No limitations */ XFRM_SHARE_SESSION, /* For this session only */ @@ -315,6 +320,7 @@ enum xfrm_attr_type_t { XFRMA_SET_MARK_MASK, /* __u32 */ XFRMA_IF_ID, /* __u32 */ XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ + XFRMA_SA_DIR, /* __u8 */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 655fe4ff8621..703d4172c7d7 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -98,6 +98,7 @@ static const int compat_msg_min[XFRM_NR_MSGTYPES] = { }; static const struct nla_policy compat_policy[XFRMA_MAX+1] = { + [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR }, [XFRMA_SA] = { .len = XMSGSIZE(compat_xfrm_usersa_info)}, [XFRMA_POLICY] = { .len = XMSGSIZE(compat_xfrm_userpolicy_info)}, [XFRMA_LASTUSED] = { .type = NLA_U64}, @@ -129,6 +130,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, @@ -277,9 +279,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_SET_MARK_MASK: case XFRMA_IF_ID: case XFRMA_MTIMER_THRESH: + case XFRMA_SA_DIR: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -434,7 +437,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 6346690d5c69..2455a76a1cff 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -253,6 +253,12 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, return -EINVAL; } + if ((xuo->flags & XFRM_OFFLOAD_INBOUND && x->dir == XFRM_SA_DIR_OUT) || + (!(xuo->flags & XFRM_OFFLOAD_INBOUND) && x->dir == XFRM_SA_DIR_IN)) { + NL_SET_ERR_MSG(extack, "Mismatched SA and offload direction"); + return -EINVAL; + } + is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET; /* We don't yet support UDP encapsulation and TFC padding. */ diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index ce56d659c55a..bc56c6305725 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -778,7 +778,8 @@ int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack) } if (x->props.flags & XFRM_STATE_ESN) { - if (replay_esn->replay_window == 0) { + if (replay_esn->replay_window == 0 && + (!x->dir || x->dir == XFRM_SA_DIR_IN)) { NL_SET_ERR_MSG(extack, "ESN replay window must be > 0"); return -EINVAL; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0c306473a79d..649bb739df0d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1292,6 +1292,7 @@ found: if (km_query(x, tmpl, pol) == 0) { spin_lock_bh(&net->xfrm.xfrm_state_lock); x->km.state = XFRM_STATE_ACQ; + x->dir = XFRM_SA_DIR_OUT; list_add(&x->km.all, &net->xfrm.state_all); XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, @@ -1744,6 +1745,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->lastused = orig->lastused; x->new_mapping = 0; x->new_mapping_sport = 0; + x->dir = orig->dir; return x; @@ -1864,8 +1866,14 @@ int xfrm_state_update(struct xfrm_state *x) } if (x1->km.state == XFRM_STATE_ACQ) { + if (x->dir && x1->dir != x->dir) + goto out; + __xfrm_state_insert(x); x = NULL; + } else { + if (x1->dir != x->dir) + goto out; } err = 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 810b520493f3..f5eb3af4fb81 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -130,7 +130,7 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs, struct netlink_ext_a } static inline int verify_replay(struct xfrm_usersa_info *p, - struct nlattr **attrs, + struct nlattr **attrs, u8 sa_dir, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; @@ -168,6 +168,30 @@ static inline int verify_replay(struct xfrm_usersa_info *p, return -EINVAL; } + if (sa_dir == XFRM_SA_DIR_OUT) { + if (rs->replay_window) { + NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA"); + return -EINVAL; + } + if (rs->seq || rs->seq_hi) { + NL_SET_ERR_MSG(extack, + "Replay seq and seq_hi should be 0 for output SA"); + return -EINVAL; + } + if (rs->bmp_len) { + NL_SET_ERR_MSG(extack, "Replay bmp_len should 0 for output SA"); + return -EINVAL; + } + } + + if (sa_dir == XFRM_SA_DIR_IN) { + if (rs->oseq || rs->oseq_hi) { + NL_SET_ERR_MSG(extack, + "Replay oseq and oseq_hi should be 0 for input SA"); + return -EINVAL; + } + } + return 0; } @@ -176,6 +200,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, struct netlink_ext_ack *extack) { int err; + u8 sa_dir = attrs[XFRMA_SA_DIR] ? nla_get_u8(attrs[XFRMA_SA_DIR]) : 0; err = -EINVAL; switch (p->family) { @@ -334,7 +359,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(attrs, extack))) goto out; - if ((err = verify_replay(p, attrs, extack))) + if ((err = verify_replay(p, attrs, sa_dir, extack))) goto out; err = -EINVAL; @@ -358,6 +383,77 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; goto out; } + + if (sa_dir == XFRM_SA_DIR_OUT) { + NL_SET_ERR_MSG(extack, + "MTIMER_THRESH attribute should not be set on output SA"); + err = -EINVAL; + goto out; + } + } + + if (sa_dir == XFRM_SA_DIR_OUT) { + if (p->flags & XFRM_STATE_DECAP_DSCP) { + NL_SET_ERR_MSG(extack, "Flag DECAP_DSCP should not be set for output SA"); + err = -EINVAL; + goto out; + } + + if (p->flags & XFRM_STATE_ICMP) { + NL_SET_ERR_MSG(extack, "Flag ICMP should not be set for output SA"); + err = -EINVAL; + goto out; + } + + if (p->flags & XFRM_STATE_WILDRECV) { + NL_SET_ERR_MSG(extack, "Flag WILDRECV should not be set for output SA"); + err = -EINVAL; + goto out; + } + + if (p->replay_window) { + NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_REPLAY_VAL]) { + struct xfrm_replay_state *replay; + + replay = nla_data(attrs[XFRMA_REPLAY_VAL]); + + if (replay->seq || replay->bitmap) { + NL_SET_ERR_MSG(extack, + "Replay seq and bitmap should be 0 for output SA"); + err = -EINVAL; + goto out; + } + } + } + + if (sa_dir == XFRM_SA_DIR_IN) { + if (p->flags & XFRM_STATE_NOPMTUDISC) { + NL_SET_ERR_MSG(extack, "Flag NOPMTUDISC should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_SA_EXTRA_FLAGS]) { + u32 xflags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); + + if (xflags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) { + NL_SET_ERR_MSG(extack, "Flag DONT_ENCAP_DSCP should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (xflags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP) { + NL_SET_ERR_MSG(extack, "Flag OSEQ_MAY_WRAP should not be set for input SA"); + err = -EINVAL; + goto out; + } + + } } out: @@ -734,6 +830,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_IF_ID]) x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (attrs[XFRMA_SA_DIR]) + x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]); + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); if (err) goto error; @@ -1182,8 +1281,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (ret) goto out; } - if (x->mapping_maxage) + if (x->mapping_maxage) { ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage); + if (ret) + goto out; + } + if (x->dir) + ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); out: return ret; } @@ -1618,6 +1722,9 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) goto out; + if (attrs[XFRMA_SA_DIR]) + x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]); + resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); @@ -2402,7 +2509,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x) + nla_total_size_64bit(sizeof(struct xfrm_lifetime_cur)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ - + nla_total_size(4); /* XFRM_AE_ETHR */ + + nla_total_size(4) /* XFRM_AE_ETHR */ + + nla_total_size(sizeof(x->dir)); /* XFRMA_SA_DIR */ } static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) @@ -2459,6 +2567,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) goto out_cancel; + if (x->dir) { + err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); + if (err) + goto out_cancel; + } + nlmsg_end(skb, nlh); return 0; @@ -3018,6 +3132,7 @@ EXPORT_SYMBOL_GPL(xfrm_msg_min); #undef XMSGSIZE const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { + [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR }, [XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)}, [XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)}, [XFRMA_LASTUSED] = { .type = NLA_U64}, @@ -3049,6 +3164,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), }; EXPORT_SYMBOL_GPL(xfrma_policy); @@ -3189,8 +3305,9 @@ static void xfrm_netlink_rcv(struct sk_buff *skb) static inline unsigned int xfrm_expire_msgsize(void) { - return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) - + nla_total_size(sizeof(struct xfrm_mark)); + return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + + nla_total_size(sizeof(struct xfrm_mark)) + + nla_total_size(sizeof_field(struct xfrm_state, dir)); } static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) @@ -3217,6 +3334,12 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) return err; + if (x->dir) { + err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); + if (err) + return err; + } + nlmsg_end(skb, nlh); return 0; } @@ -3324,6 +3447,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->mapping_maxage) l += nla_total_size(sizeof(x->mapping_maxage)); + if (x->dir) + l += nla_total_size(sizeof(x->dir)); + return l; } -- cgit v1.2.3 From 601a0867f86cbb5e137ce485a7eb60cbf9fc5180 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Tue, 30 Apr 2024 09:09:09 +0200 Subject: xfrm: Add dir validation to "out" data path lookup Introduces validation for the x->dir attribute within the XFRM output data lookup path. If the configured direction does not match the expected direction, output, increment the XfrmOutStateDirError counter and drop the packet to ensure data integrity and correct flow handling. grep -vw 0 /proc/net/xfrm_stat XfrmOutPolError 1 XfrmOutStateDirError 1 Signed-off-by: Antony Antony Reviewed-by: Sabrina Dubroca Reviewed-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- Documentation/networking/xfrm_proc.rst | 3 +++ include/uapi/linux/snmp.h | 1 + net/xfrm/xfrm_policy.c | 6 ++++++ net/xfrm/xfrm_proc.c | 1 + 4 files changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/networking/xfrm_proc.rst b/Documentation/networking/xfrm_proc.rst index 0a771c5a7399..5ac3acf4cf51 100644 --- a/Documentation/networking/xfrm_proc.rst +++ b/Documentation/networking/xfrm_proc.rst @@ -111,3 +111,6 @@ XfrmOutPolError: XfrmOutStateInvalid: State is invalid, perhaps expired + +XfrmOutStateDirError: + State direction mismatch (lookup found an input state on the output path, expected output or no direction) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index a0819c6a5988..23792b8412bd 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -337,6 +337,7 @@ enum LINUX_MIB_XFRMFWDHDRERROR, /* XfrmFwdHdrError*/ LINUX_MIB_XFRMOUTSTATEINVALID, /* XfrmOutStateInvalid */ LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */ + LINUX_MIB_XFRMOUTSTATEDIRERROR, /* XfrmOutStateDirError */ __LINUX_MIB_XFRMMAX }; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6affe5cd85d8..298b3a9eb48d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2489,6 +2489,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family, policy->if_id); + if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR); + xfrm_state_put(x); + error = -EINVAL; + goto fail; + } if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 5f9bf8e5c933..98606f1078f7 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -41,6 +41,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), + SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR), SNMP_MIB_SENTINEL }; -- cgit v1.2.3 From 304b44f0d5a4c2f91f82f7c31538d00485fb484c Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Tue, 30 Apr 2024 09:09:29 +0200 Subject: xfrm: Add dir validation to "in" data path lookup Introduces validation for the x->dir attribute within the XFRM input data lookup path. If the configured direction does not match the expected direction, input, increment the XfrmInStateDirError counter and drop the packet to ensure data integrity and correct flow handling. grep -vw 0 /proc/net/xfrm_stat XfrmInStateDirError 1 Signed-off-by: Antony Antony Reviewed-by: Sabrina Dubroca Reviewed-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- Documentation/networking/xfrm_proc.rst | 3 +++ include/uapi/linux/snmp.h | 1 + net/ipv6/xfrm6_input.c | 7 +++++++ net/xfrm/xfrm_input.c | 11 +++++++++++ net/xfrm/xfrm_proc.c | 1 + 5 files changed, 23 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/networking/xfrm_proc.rst b/Documentation/networking/xfrm_proc.rst index 5ac3acf4cf51..973d1571acac 100644 --- a/Documentation/networking/xfrm_proc.rst +++ b/Documentation/networking/xfrm_proc.rst @@ -73,6 +73,9 @@ XfrmAcquireError: XfrmFwdHdrError: Forward routing of a packet is not allowed +XfrmInStateDirError: + State direction mismatch (lookup found an output state on the input path, expected input or no direction) + Outbound errors ~~~~~~~~~~~~~~~ XfrmOutError: diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 23792b8412bd..adf5fd78dd50 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -338,6 +338,7 @@ enum LINUX_MIB_XFRMOUTSTATEINVALID, /* XfrmOutStateInvalid */ LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */ LINUX_MIB_XFRMOUTSTATEDIRERROR, /* XfrmOutStateDirError */ + LINUX_MIB_XFRMINSTATEDIRERROR, /* XfrmInStateDirError */ __LINUX_MIB_XFRMMAX }; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 2c6aeb090b7a..d5bac0d76b6e 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -266,6 +266,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, if (!x) continue; + if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); + xfrm_state_put(x); + x = NULL; + continue; + } + spin_lock(&x->lock); if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) && diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 161f535c8b94..71b42de6e3c9 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -466,6 +466,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) { x = xfrm_input_state(skb); + if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); + goto drop; + } + if (unlikely(x->km.state != XFRM_STATE_VALID)) { if (x->km.state == XFRM_STATE_ACQ) XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); @@ -571,6 +576,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } + if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); + xfrm_state_put(x); + goto drop; + } + skb->mark = xfrm_smark_get(skb->mark, x); sp->xvec[sp->len++] = x; diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 98606f1078f7..eeb984be03a7 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -42,6 +42,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR), + SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR), SNMP_MIB_SENTINEL }; -- cgit v1.2.3 From 4a3540a8bf3c13dc3955f0c0895332b9c653be3f Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Wed, 6 Mar 2024 15:18:04 +0100 Subject: netfilter: conntrack: fix ct-state for ICMPv6 Multicast Router Discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far Multicast Router Advertisements and Multicast Router Solicitations from the Multicast Router Discovery protocol (RFC4286) would be marked as INVALID for IPv6, even if they are in fact intact and adhering to RFC4286. This broke MRA reception and by that multicast reception on IPv6 multicast routers in a Proxmox managed setup, where Proxmox would install a rule like "-m conntrack --ctstate INVALID -j DROP" at the top of the FORWARD chain with br-nf-call-ip6tables enabled by default. Similar to as it's done for MLDv1, MLDv2 and IPv6 Neighbor Discovery already, fix this issue by excluding MRD from connection tracking handling as MRD always uses predefined multicast destinations for its messages, too. This changes the ct-state for ICMPv6 MRD messages from INVALID to UNTRACKED. This issue was found and fixed with the help of the mrdisc tool (https://github.com/troglobit/mrdisc). Signed-off-by: Linus Lüssing Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/icmpv6.h | 1 + net/netfilter/nf_conntrack_proto_icmpv6.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index ecaece3af38d..4eaab89e2856 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -112,6 +112,7 @@ struct icmp6hdr { #define ICMPV6_MOBILE_PREFIX_ADV 147 #define ICMPV6_MRDISC_ADV 151 +#define ICMPV6_MRDISC_SOL 152 #define ICMPV6_MSG_MAX 255 diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 1020d67600a9..327b8059025d 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -62,7 +62,9 @@ static const u_int8_t noct_valid_new[] = { [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1, [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1, - [ICMPV6_MLD2_REPORT - 130] = 1 + [ICMPV6_MLD2_REPORT - 130] = 1, + [ICMPV6_MRDISC_ADV - 130] = 1, + [ICMPV6_MRDISC_SOL - 130] = 1 }; bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, -- cgit v1.2.3 From 999cb275c807b92662f8a74fdaee9619700f64a5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 7 May 2024 01:12:08 +0200 Subject: gtp: add IPv6 support Add new iflink attributes to configure in-kernel UDP listener socket address: IFLA_GTP_LOCAL and IFLA_GTP_LOCAL6. If none of these attributes are specified, default is still to IPv4 INADDR_ANY for backward compatibility. Add new attributes to set up family and IPv6 address of GTP tunnels: GTPA_FAMILY, GTPA_PEER_ADDR6 and GTPA_MS_ADDR6. If no GTPA_FAMILY is specified, AF_INET is assumed for backward compatibility. setsockopt IPV6_ADDRFORM allows to downgrade socket from IPv6 to IPv4 after socket is bound. Assumption is that socket listener that is attached to the gtp device needs to be either IPv4 or IPv6. Therefore, GTP socket listener does not allow for IPv4-mapped-IPv6 listener. Signed-off-by: Pablo Neira Ayuso --- drivers/net/gtp.c | 396 ++++++++++++++++++++++++++++++++++++++++--- include/uapi/linux/gtp.h | 3 + include/uapi/linux/if_link.h | 2 + 3 files changed, 374 insertions(+), 27 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 811e4a660636..84ff502a38bc 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -52,9 +53,11 @@ struct pdp_ctx { union { struct in_addr addr; + struct in6_addr addr6; } ms; union { struct in_addr addr; + struct in6_addr addr6; } peer; struct sock *sk; @@ -131,6 +134,11 @@ static inline u32 ipv4_hashfn(__be32 ip) return jhash_1word((__force u32)ip, gtp_h_initval); } +static u32 ipv6_hashfn(const struct in6_addr *ip6) +{ + return jhash(ip6, sizeof(*ip6), gtp_h_initval); +} + /* Resolve a PDP context structure based on the 64bit TID. */ static struct pdp_ctx *gtp0_pdp_find(struct gtp_dev *gtp, u64 tid) { @@ -180,6 +188,23 @@ static struct pdp_ctx *ipv4_pdp_find(struct gtp_dev *gtp, __be32 ms_addr) return NULL; } +static struct pdp_ctx *ipv6_pdp_find(struct gtp_dev *gtp, + const struct in6_addr *ms_addr) +{ + struct hlist_head *head; + struct pdp_ctx *pdp; + + head = >p->addr_hash[ipv6_hashfn(ms_addr) % gtp->hash_size]; + + hlist_for_each_entry_rcu(pdp, head, hlist_addr) { + if (pdp->af == AF_INET6 && + memcmp(&pdp->ms.addr6, ms_addr, sizeof(struct in6_addr)) == 0) + return pdp; + } + + return NULL; +} + static bool gtp_check_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx, unsigned int hdrlen, unsigned int role) { @@ -196,6 +221,28 @@ static bool gtp_check_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx, return iph->saddr == pctx->ms.addr.s_addr; } +static bool gtp_check_ms_ipv6(struct sk_buff *skb, struct pdp_ctx *pctx, + unsigned int hdrlen, unsigned int role) +{ + struct ipv6hdr *ip6h; + int ret; + + if (!pskb_may_pull(skb, hdrlen + sizeof(struct ipv6hdr))) + return false; + + ip6h = (struct ipv6hdr *)(skb->data + hdrlen); + + if (role == GTP_ROLE_SGSN) { + ret = memcmp(&ip6h->daddr, &pctx->ms.addr6, + sizeof(struct in6_addr)); + } else { + ret = memcmp(&ip6h->saddr, &pctx->ms.addr6, + sizeof(struct in6_addr)); + } + + return ret == 0; +} + /* Check if the inner IP address in this packet is assigned to any * existing mobile subscriber. */ @@ -205,6 +252,8 @@ static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx, switch (ntohs(skb->protocol)) { case ETH_P_IP: return gtp_check_ms_ipv4(skb, pctx, hdrlen, role); + case ETH_P_IPV6: + return gtp_check_ms_ipv6(skb, pctx, hdrlen, role); } return false; } @@ -260,6 +309,27 @@ static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4, return ip_route_output_key(sock_net(sk), fl4); } +static struct rt6_info *ip6_route_output_gtp(struct net *net, + struct flowi6 *fl6, + const struct sock *sk, + const struct in6_addr *daddr, + struct in6_addr *saddr) +{ + struct dst_entry *dst; + + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->daddr = *daddr; + fl6->saddr = *saddr; + fl6->flowi6_proto = sk->sk_protocol; + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, fl6, NULL); + if (IS_ERR(dst)) + return ERR_PTR(-ENETUNREACH); + + return (struct rt6_info *)dst; +} + /* GSM TS 09.60. 7.3 * In all Path Management messages: * - TID: is not used and shall be set to 0. @@ -838,12 +908,15 @@ struct gtp_pktinfo { struct sock *sk; union { struct iphdr *iph; + struct ipv6hdr *ip6h; }; union { struct flowi4 fl4; + struct flowi6 fl6; }; union { struct rtable *rt; + struct rt6_info *rt6; }; struct pdp_ctx *pctx; struct net_device *dev; @@ -878,6 +951,20 @@ static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo, pktinfo->dev = dev; } +static void gtp_set_pktinfo_ipv6(struct gtp_pktinfo *pktinfo, + struct sock *sk, struct ipv6hdr *ip6h, + struct pdp_ctx *pctx, struct rt6_info *rt6, + struct flowi6 *fl6, + struct net_device *dev) +{ + pktinfo->sk = sk; + pktinfo->ip6h = ip6h; + pktinfo->pctx = pctx; + pktinfo->rt6 = rt6; + pktinfo->fl6 = *fl6; + pktinfo->dev = dev; +} + static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, struct gtp_pktinfo *pktinfo) { @@ -959,6 +1046,81 @@ err: return -EBADMSG; } +static int gtp_build_skb_ip6(struct sk_buff *skb, struct net_device *dev, + struct gtp_pktinfo *pktinfo) +{ + struct gtp_dev *gtp = netdev_priv(dev); + struct net *net = gtp->net; + struct dst_entry *dst; + struct pdp_ctx *pctx; + struct ipv6hdr *ip6h; + struct rt6_info *rt; + struct flowi6 fl6; + int mtu; + + /* Read the IP destination address and resolve the PDP context. + * Prepend PDP header with TEI/TID from PDP ctx. + */ + ip6h = ipv6_hdr(skb); + if (gtp->role == GTP_ROLE_SGSN) + pctx = ipv6_pdp_find(gtp, &ip6h->saddr); + else + pctx = ipv6_pdp_find(gtp, &ip6h->daddr); + + if (!pctx) { + netdev_dbg(dev, "no PDP ctx found for %pI6, skip\n", + &ip6h->daddr); + return -ENOENT; + } + netdev_dbg(dev, "found PDP context %p\n", pctx); + + rt = ip6_route_output_gtp(net, &fl6, pctx->sk, &pctx->peer.addr6, + &inet6_sk(pctx->sk)->saddr); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to SSGN %pI6\n", + &pctx->peer.addr6); + dev->stats.tx_carrier_errors++; + goto err; + } + dst = &rt->dst; + + if (rt->dst.dev == dev) { + netdev_dbg(dev, "circular route to SSGN %pI6\n", + &pctx->peer.addr6); + dev->stats.collisions++; + goto err_rt; + } + + mtu = dst_mtu(&rt->dst) - dev->hard_header_len - + sizeof(struct ipv6hdr) - sizeof(struct udphdr); + switch (pctx->gtp_version) { + case GTP_V0: + mtu -= sizeof(struct gtp0_header); + break; + case GTP_V1: + mtu -= sizeof(struct gtp1_header); + break; + } + + skb_dst_update_pmtu_no_confirm(skb, mtu); + + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) { + netdev_dbg(dev, "packet too big, fragmentation needed\n"); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + goto err_rt; + } + + gtp_set_pktinfo_ipv6(pktinfo, pctx->sk, ip6h, pctx, rt, &fl6, dev); + gtp_push_header(skb, pktinfo); + + return 0; +err_rt: + dst_release(dst); +err: + return -EBADMSG; +} + static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned int proto = ntohs(skb->protocol); @@ -977,6 +1139,9 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) case ETH_P_IP: err = gtp_build_skb_ip4(skb, dev, &pktinfo); break; + case ETH_P_IPV6: + err = gtp_build_skb_ip6(skb, dev, &pktinfo); + break; default: err = -EOPNOTSUPP; break; @@ -1000,6 +1165,21 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) dev_net(dev)), false); break; + case ETH_P_IPV6: +#if IS_ENABLED(CONFIG_IPV6) + netdev_dbg(pktinfo.dev, "gtp -> IP src: %pI6 dst: %pI6\n", + &pktinfo.ip6h->saddr, &pktinfo.ip6h->daddr); + udp_tunnel6_xmit_skb(&pktinfo.rt6->dst, pktinfo.sk, skb, dev, + &pktinfo.fl6.saddr, &pktinfo.fl6.daddr, + ipv6_get_dsfield(pktinfo.ip6h), + ip6_dst_hoplimit(&pktinfo.rt->dst), + 0, + pktinfo.gtph_port, pktinfo.gtph_port, + false); +#else + goto tx_err; +#endif + break; } return NETDEV_TX_OK; @@ -1057,17 +1237,45 @@ static void gtp_destructor(struct net_device *dev) kfree(gtp->tid_hash); } -static struct sock *gtp_create_sock(int type, struct gtp_dev *gtp) +static int gtp_sock_udp_config(struct udp_port_cfg *udp_conf, + const struct nlattr *nla, int family) +{ + udp_conf->family = family; + + switch (udp_conf->family) { + case AF_INET: + udp_conf->local_ip.s_addr = nla_get_be32(nla); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + udp_conf->local_ip6 = nla_get_in6_addr(nla); + break; +#endif + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static struct sock *gtp_create_sock(int type, struct gtp_dev *gtp, + const struct nlattr *nla, int family) { struct udp_tunnel_sock_cfg tuncfg = {}; - struct udp_port_cfg udp_conf = { - .local_ip.s_addr = htonl(INADDR_ANY), - .family = AF_INET, - }; + struct udp_port_cfg udp_conf = {}; struct net *net = gtp->net; struct socket *sock; int err; + if (nla) { + err = gtp_sock_udp_config(&udp_conf, nla, family); + if (err < 0) + return ERR_PTR(err); + } else { + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.family = AF_INET; + } + if (type == UDP_ENCAP_GTP0) udp_conf.local_udp_port = htons(GTP0_PORT); else if (type == UDP_ENCAP_GTP1U) @@ -1089,16 +1297,17 @@ static struct sock *gtp_create_sock(int type, struct gtp_dev *gtp) return sock->sk; } -static int gtp_create_sockets(struct gtp_dev *gtp, struct nlattr *data[]) +static int gtp_create_sockets(struct gtp_dev *gtp, const struct nlattr *nla, + int family) { struct sock *sk1u; struct sock *sk0; - sk0 = gtp_create_sock(UDP_ENCAP_GTP0, gtp); + sk0 = gtp_create_sock(UDP_ENCAP_GTP0, gtp, nla, family); if (IS_ERR(sk0)) return PTR_ERR(sk0); - sk1u = gtp_create_sock(UDP_ENCAP_GTP1U, gtp); + sk1u = gtp_create_sock(UDP_ENCAP_GTP1U, gtp, nla, family); if (IS_ERR(sk1u)) { udp_tunnel_sock_release(sk0->sk_socket); return PTR_ERR(sk1u); @@ -1111,6 +1320,9 @@ static int gtp_create_sockets(struct gtp_dev *gtp, struct nlattr *data[]) return 0; } +#define GTP_TH_MAXLEN (sizeof(struct udphdr) + sizeof(struct gtp0_header)) +#define GTP_IPV6_MAXLEN (sizeof(struct ipv6hdr) + GTP_TH_MAXLEN) + static int gtp_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1120,6 +1332,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, struct gtp_net *gn; int hashsize, err; +#if !IS_ENABLED(CONFIG_IPV6) + if (data[IFLA_GTP_LOCAL6]) + return -EAFNOSUPPORT; +#endif + gtp = netdev_priv(dev); if (!data[IFLA_GTP_PDP_HASHSIZE]) { @@ -1148,13 +1365,24 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; - if (data[IFLA_GTP_CREATE_SOCKETS]) - err = gtp_create_sockets(gtp, data); - else + if (data[IFLA_GTP_CREATE_SOCKETS]) { + if (data[IFLA_GTP_LOCAL6]) + err = gtp_create_sockets(gtp, data[IFLA_GTP_LOCAL6], AF_INET6); + else + err = gtp_create_sockets(gtp, data[IFLA_GTP_LOCAL], AF_INET); + } else { err = gtp_encap_enable(gtp, data); + } + if (err < 0) goto out_hashtable; + if ((gtp->sk0 && gtp->sk0->sk_family == AF_INET6) || + (gtp->sk1u && gtp->sk1u->sk_family == AF_INET6)) { + dev->mtu = ETH_DATA_LEN - GTP_IPV6_MAXLEN; + dev->needed_headroom = LL_MAX_HEADER + GTP_IPV6_MAXLEN; + } + err = register_netdevice(dev); if (err < 0) { netdev_dbg(dev, "failed to register new netdev %d\n", err); @@ -1199,6 +1427,8 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = { [IFLA_GTP_ROLE] = { .type = NLA_U32 }, [IFLA_GTP_CREATE_SOCKETS] = { .type = NLA_U8 }, [IFLA_GTP_RESTART_COUNT] = { .type = NLA_U8 }, + [IFLA_GTP_LOCAL] = { .type = NLA_U32 }, + [IFLA_GTP_LOCAL6] = { .len = sizeof(struct in6_addr) }, }; static int gtp_validate(struct nlattr *tb[], struct nlattr *data[], @@ -1298,6 +1528,12 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, goto out_sock; } + if (sk->sk_family == AF_INET6 && + !sk->sk_ipv6only) { + sk = ERR_PTR(-EADDRNOTAVAIL); + goto out_sock; + } + lock_sock(sk); if (sk->sk_user_data) { sk = ERR_PTR(-EBUSY); @@ -1349,6 +1585,13 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]) gtp->sk0 = sk0; gtp->sk1u = sk1u; + if (sk0 && sk1u && + sk0->sk_family != sk1u->sk_family) { + gtp_encap_disable_sock(sk0); + gtp_encap_disable_sock(sk1u); + return -EINVAL; + } + return 0; } @@ -1378,14 +1621,9 @@ static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[]) return gtp; } -static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) +static void gtp_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) { pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]); - pctx->af = AF_INET; - pctx->peer.addr.s_addr = - nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]); - pctx->ms.addr.s_addr = - nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); switch (pctx->gtp_version) { case GTP_V0: @@ -1405,21 +1643,78 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) } } +static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) +{ + pctx->peer.addr.s_addr = + nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]); + pctx->ms.addr.s_addr = + nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); + gtp_pdp_fill(pctx, info); +} + +static void ipv6_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) +{ + pctx->peer.addr6 = nla_get_in6_addr(info->attrs[GTPA_PEER_ADDR6]); + pctx->ms.addr6 = nla_get_in6_addr(info->attrs[GTPA_MS_ADDR6]); + gtp_pdp_fill(pctx, info); +} + static struct pdp_ctx *gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk, struct genl_info *info) { struct pdp_ctx *pctx, *pctx_tid = NULL; struct net_device *dev = gtp->dev; u32 hash_ms, hash_tid = 0; + struct in6_addr ms_addr6; unsigned int version; bool found = false; __be32 ms_addr; + int family; - ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); - hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size; version = nla_get_u32(info->attrs[GTPA_VERSION]); - pctx = ipv4_pdp_find(gtp, ms_addr); + if (info->attrs[GTPA_FAMILY]) + family = nla_get_u8(info->attrs[GTPA_FAMILY]); + else + family = AF_INET; + +#if !IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6) + return ERR_PTR(-EAFNOSUPPORT); +#endif + + if ((info->attrs[GTPA_PEER_ADDRESS] && + sk->sk_family == AF_INET6) || + (info->attrs[GTPA_PEER_ADDR6] && + sk->sk_family == AF_INET)) + return ERR_PTR(-EAFNOSUPPORT); + + switch (family) { + case AF_INET: + if (!info->attrs[GTPA_MS_ADDRESS] || + !info->attrs[GTPA_PEER_ADDRESS] || + info->attrs[GTPA_MS_ADDR6] || + info->attrs[GTPA_PEER_ADDR6]) + return ERR_PTR(-EINVAL); + + ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); + hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size; + pctx = ipv4_pdp_find(gtp, ms_addr); + break; + case AF_INET6: + if (!info->attrs[GTPA_MS_ADDR6] || + !info->attrs[GTPA_PEER_ADDR6] || + info->attrs[GTPA_MS_ADDRESS] || + info->attrs[GTPA_PEER_ADDRESS]) + return ERR_PTR(-EINVAL); + + ms_addr6 = nla_get_in6_addr(info->attrs[GTPA_MS_ADDR6]); + hash_ms = ipv6_hashfn(&ms_addr6) % gtp->hash_size; + pctx = ipv6_pdp_find(gtp, &ms_addr6); + break; + default: + return ERR_PTR(-EAFNOSUPPORT); + } if (pctx) found = true; if (version == GTP_V0) @@ -1442,7 +1737,14 @@ static struct pdp_ctx *gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk, if (!pctx) pctx = pctx_tid; - ipv4_pdp_fill(pctx, info); + switch (pctx->af) { + case AF_INET: + ipv4_pdp_fill(pctx, info); + break; + case AF_INET6: + ipv6_pdp_fill(pctx, info); + break; + } if (pctx->gtp_version == GTP_V0) netdev_dbg(dev, "GTPv0-U: update tunnel id = %llx (pdp %p)\n", @@ -1462,7 +1764,30 @@ static struct pdp_ctx *gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk, sock_hold(sk); pctx->sk = sk; pctx->dev = gtp->dev; - ipv4_pdp_fill(pctx, info); + pctx->af = family; + + switch (pctx->af) { + case AF_INET: + if (!info->attrs[GTPA_MS_ADDRESS] || + !info->attrs[GTPA_PEER_ADDRESS]) { + sock_put(sk); + kfree(pctx); + return ERR_PTR(-EINVAL); + } + + ipv4_pdp_fill(pctx, info); + break; + case AF_INET6: + if (!info->attrs[GTPA_MS_ADDR6] || + !info->attrs[GTPA_PEER_ADDR6]) { + sock_put(sk); + kfree(pctx); + return ERR_PTR(-EINVAL); + } + + ipv6_pdp_fill(pctx, info); + break; + } atomic_set(&pctx->tx_seq, 0); switch (pctx->gtp_version) { @@ -1524,9 +1849,7 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info) int err; if (!info->attrs[GTPA_VERSION] || - !info->attrs[GTPA_LINK] || - !info->attrs[GTPA_PEER_ADDRESS] || - !info->attrs[GTPA_MS_ADDRESS]) + !info->attrs[GTPA_LINK]) return -EINVAL; version = nla_get_u32(info->attrs[GTPA_VERSION]); @@ -1593,6 +1916,10 @@ static struct pdp_ctx *gtp_find_pdp_by_link(struct net *net, __be32 ip = nla_get_be32(nla[GTPA_MS_ADDRESS]); return ipv4_pdp_find(gtp, ip); + } else if (nla[GTPA_MS_ADDR6]) { + struct in6_addr addr = nla_get_in6_addr(nla[GTPA_MS_ADDR6]); + + return ipv6_pdp_find(gtp, &addr); } else if (nla[GTPA_VERSION]) { u32 gtp_version = nla_get_u32(nla[GTPA_VERSION]); @@ -1663,10 +1990,22 @@ static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq, if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) || nla_put_u32(skb, GTPA_LINK, pctx->dev->ifindex) || - nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer.addr.s_addr) || - nla_put_be32(skb, GTPA_MS_ADDRESS, pctx->ms.addr.s_addr)) + nla_put_u8(skb, GTPA_FAMILY, pctx->af)) goto nla_put_failure; + switch (pctx->af) { + case AF_INET: + if (nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer.addr.s_addr) || + nla_put_be32(skb, GTPA_MS_ADDRESS, pctx->ms.addr.s_addr)) + goto nla_put_failure; + break; + case AF_INET6: + if (nla_put_in6_addr(skb, GTPA_PEER_ADDR6, &pctx->peer.addr6) || + nla_put_in6_addr(skb, GTPA_MS_ADDR6, &pctx->ms.addr6)) + goto nla_put_failure; + break; + } + switch (pctx->gtp_version) { case GTP_V0: if (nla_put_u64_64bit(skb, GTPA_TID, pctx->u.v0.tid, GTPA_PAD) || @@ -1893,6 +2232,9 @@ static const struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = { [GTPA_NET_NS_FD] = { .type = NLA_U32, }, [GTPA_I_TEI] = { .type = NLA_U32, }, [GTPA_O_TEI] = { .type = NLA_U32, }, + [GTPA_PEER_ADDR6] = { .len = sizeof(struct in6_addr), }, + [GTPA_MS_ADDR6] = { .len = sizeof(struct in6_addr), }, + [GTPA_FAMILY] = { .type = NLA_U8, }, }; static const struct genl_small_ops gtp_genl_ops[] = { diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 3dcdb9e33cba..40f5388d6de0 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -31,6 +31,9 @@ enum gtp_attrs { GTPA_I_TEI, /* for GTPv1 only */ GTPA_O_TEI, /* for GTPv1 only */ GTPA_PAD, + GTPA_PEER_ADDR6, + GTPA_MS_ADDR6, + GTPA_FAMILY, __GTPA_MAX, }; #define GTPA_MAX (__GTPA_MAX - 1) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e9f10860ec8e..6dc258993b17 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1466,6 +1466,8 @@ enum { IFLA_GTP_ROLE, IFLA_GTP_CREATE_SOCKETS, IFLA_GTP_RESTART_COUNT, + IFLA_GTP_LOCAL, + IFLA_GTP_LOCAL6, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- cgit v1.2.3 From c084ebd77a00b1a16d9daa57b6ecdfdf1f43c78a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 9 May 2024 20:10:10 +0200 Subject: tcp: socket option to check for MPTCP fallback to TCP A way for an application to know if an MPTCP connection fell back to TCP is to use getsockopt(MPTCP_INFO) and look for errors. The issue with this technique is that the same errors -- EOPNOTSUPP (IPv4) and ENOPROTOOPT (IPv6) -- are returned if there was a fallback, *or* if the kernel doesn't support this socket option. The userspace then has to look at the kernel version to understand what the errors mean. It is not clean, and it doesn't take into account older kernels where the socket option has been backported. A cleaner way would be to expose this info to the TCP socket level. In case of MPTCP socket where no fallback happened, the socket options for the TCP level will be handled in MPTCP code, in mptcp_getsockopt_sol_tcp(). If not, that will be in TCP code, in do_tcp_getsockopt(). So MPTCP simply has to set the value 1, while TCP has to set 0. If the socket option is not supported, one of these two errors will be reported: - EOPNOTSUPP (95 - Operation not supported) for MPTCP sockets - ENOPROTOOPT (92 - Protocol not available) for TCP sockets, e.g. on the socket received after an 'accept()', when the client didn't request to use MPTCP: this socket will be a TCP one, even if the listen socket was an MPTCP one. With this new option, the kernel can return a clear answer to both "Is this kernel new enough to tell me the fallback status?" and "If it is new enough, is it currently a TCP or MPTCP socket?" questions, while not breaking the previous method. Acked-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240509-upstream-net-next-20240509-mptcp-tcp_is_mptcp-v1-1-f846df999202@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 3 +++ net/mptcp/sockopt.c | 2 ++ 3 files changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index c07e9f90c084..dbf896f3146c 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -135,6 +135,8 @@ enum { #define TCP_AO_GET_KEYS 41 /* List MKT(s) */ #define TCP_AO_REPAIR 42 /* Get/Set SNEs and ISNs */ +#define TCP_IS_MPTCP 43 /* Is MPTCP being used? */ + #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 #define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 06aab937d60a..681b54e1f3a6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4363,6 +4363,9 @@ zerocopy_rcv_out: return err; } + case TCP_IS_MPTCP: + val = 0; + break; default: return -ENOPROTOOPT; } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 1fea43f5b6f3..eaa3b79651a4 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1348,6 +1348,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); case TCP_NOTSENT_LOWAT: return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); + case TCP_IS_MPTCP: + return mptcp_put_int_option(msk, optval, optlen, 1); } return -EOPNOTSUPP; } -- cgit v1.2.3 From b56035101e1cdd9c4420ea5da17f09f87fb69285 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 10 May 2024 23:19:26 +0300 Subject: netdev: Add queue stats for TX stop and wake TX queue stop and wake are counted by some drivers. Support reporting these via netdev-genl queue stats. Signed-off-by: Daniel Jurgens Reviewed-by: Jiri Pirko Reviewed-by: Jason Xing Link: https://lore.kernel.org/r/20240510201927.1821109-2-danielj@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 14 ++++++++++++++ include/net/netdev_queues.h | 3 +++ include/uapi/linux/netdev.h | 2 ++ net/core/netdev-genl.c | 4 +++- tools/include/uapi/linux/netdev.h | 2 ++ 5 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 2be4b3714d17..11a32373365a 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -439,6 +439,20 @@ attribute-sets: Number of the packets dropped by the device due to the transmit packets bitrate exceeding the device rate limit. type: uint + - + name: tx-stop + doc: | + Number of times driver paused accepting new tx packets + from the stack to this queue, because the queue was full. + Note that if BQL is supported and enabled on the device + the networking stack will avoid queuing a lot of data at once. + type: uint + - + name: tx-wake + doc: | + Number of times driver re-started accepting send + requests to this queue from the stack. + type: uint operations: list: diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index e7b84f018cee..a8a7e48dfa6c 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -41,6 +41,9 @@ struct netdev_queue_stats_tx { u64 hw_gso_wire_bytes; u64 hw_drop_ratelimits; + + u64 stop; + u64 wake; }; /** diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index cf24f1d9adf8..a8188202413e 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -165,6 +165,8 @@ enum { NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, + NETDEV_A_QSTATS_TX_STOP, + NETDEV_A_QSTATS_TX_WAKE, __NETDEV_A_QSTATS_MAX, NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 4b5054087309..1f6ae6379e0f 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -517,7 +517,9 @@ netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx) netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) || - netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits)) + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake)) return -EMSGSIZE; return 0; } diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index cf24f1d9adf8..a8188202413e 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -165,6 +165,8 @@ enum { NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, + NETDEV_A_QSTATS_TX_STOP, + NETDEV_A_QSTATS_TX_WAKE, __NETDEV_A_QSTATS_MAX, NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) -- cgit v1.2.3 From 5c1672705a1a2389f5ad78e0fea6f08ed32d6f18 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 13 May 2024 08:41:55 -0700 Subject: net: revert partially applied PHY topology series The series is causing issues with PHY drivers built as modules. Since it was only partially applied and the merge window has opened let's revert and try again for v6.11. Revert 6916e461e793 ("net: phy: Introduce ethernet link topology representation") Revert 0ec5ed6c130e ("net: sfp: pass the phy_device when disconnecting an sfp module's PHY") Revert e75e4e074c44 ("net: phy: add helpers to handle sfp phy connect/disconnect") Revert fdd353965b52 ("net: sfp: Add helper to return the SFP bus name") Revert 841942bc6212 ("net: ethtool: Allow passing a phy index for some commands") Link: https://lore.kernel.org/all/171242462917.4000.9759453824684907063.git-patchwork-notify@kernel.org/ Link: https://lore.kernel.org/all/20240507102822.2023826-1-maxime.chevallier@bootlin.com/ Link: https://lore.kernel.org/r/20240513154156.104281-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 7 -- MAINTAINERS | 2 - drivers/net/phy/Makefile | 2 +- drivers/net/phy/marvell-88x2222.c | 2 - drivers/net/phy/marvell.c | 2 - drivers/net/phy/marvell10g.c | 2 - drivers/net/phy/phy_device.c | 55 -------------- drivers/net/phy/phy_link_topology.c | 105 --------------------------- drivers/net/phy/phylink.c | 3 +- drivers/net/phy/qcom/at803x.c | 2 - drivers/net/phy/qcom/qca807x.c | 2 - drivers/net/phy/sfp-bus.c | 15 +--- include/linux/netdevice.h | 4 +- include/linux/phy.h | 6 -- include/linux/phy_link_topology.h | 72 ------------------ include/linux/phy_link_topology_core.h | 25 ------- include/linux/sfp.h | 8 +- include/uapi/linux/ethtool.h | 16 ---- include/uapi/linux/ethtool_netlink.h | 1 - net/core/dev.c | 9 --- net/ethtool/netlink.c | 48 +----------- net/ethtool/netlink.h | 5 -- 22 files changed, 8 insertions(+), 385 deletions(-) delete mode 100644 drivers/net/phy/phy_link_topology.c delete mode 100644 include/linux/phy_link_topology.h delete mode 100644 include/linux/phy_link_topology_core.h (limited to 'include/uapi') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 8bc71f249448..160bfb0ae8ba 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -57,7 +57,6 @@ Structure of this header is ``ETHTOOL_A_HEADER_DEV_INDEX`` u32 device ifindex ``ETHTOOL_A_HEADER_DEV_NAME`` string device name ``ETHTOOL_A_HEADER_FLAGS`` u32 flags common for all requests - ``ETHTOOL_A_HEADER_PHY_INDEX`` u32 phy device index ============================== ====== ============================= ``ETHTOOL_A_HEADER_DEV_INDEX`` and ``ETHTOOL_A_HEADER_DEV_NAME`` identify the @@ -82,12 +81,6 @@ the behaviour is backward compatible, i.e. requests from old clients not aware of the flag should be interpreted the way the client expects. A client must not set flags it does not understand. -``ETHTOOL_A_HEADER_PHY_INDEX`` identifies the Ethernet PHY the message relates to. -As there are numerous commands that are related to PHY configuration, and because -there may be more than one PHY on the link, the PHY index can be passed in the -request for the commands that needs it. It is, however, not mandatory, and if it -is not passed for commands that target a PHY, the net_device.phydev pointer -is used. Bit sets ======== diff --git a/MAINTAINERS b/MAINTAINERS index 40eb26e6c60c..b4ed6480e919 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8027,8 +8027,6 @@ F: include/linux/mii.h F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h -F: include/linux/phy_link_topology.h -F: include/linux/phy_link_topology_core.h F: include/linux/phylib_stubs.h F: include/linux/platform_data/mdio-bcm-unimac.h F: include/linux/platform_data/mdio-gpio.h diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 1d8be374915f..202ed7f450da 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -2,7 +2,7 @@ # Makefile for Linux PHY drivers libphy-y := phy.o phy-c45.o phy-core.o phy_device.o \ - linkmode.o phy_link_topology.o + linkmode.o mdio-bus-y += mdio_bus.o mdio_device.o ifdef CONFIG_MDIO_DEVICE diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c index 0b777cdd7078..b88398e6872b 100644 --- a/drivers/net/phy/marvell-88x2222.c +++ b/drivers/net/phy/marvell-88x2222.c @@ -553,8 +553,6 @@ static const struct sfp_upstream_ops sfp_phy_ops = { .link_down = mv2222_sfp_link_down, .attach = phy_sfp_attach, .detach = phy_sfp_detach, - .connect_phy = phy_sfp_connect_phy, - .disconnect_phy = phy_sfp_disconnect_phy, }; static int mv2222_probe(struct phy_device *phydev) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 9964bf3dea2f..b89fbffa6a93 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -3613,8 +3613,6 @@ static const struct sfp_upstream_ops m88e1510_sfp_ops = { .module_remove = m88e1510_sfp_remove, .attach = phy_sfp_attach, .detach = phy_sfp_detach, - .connect_phy = phy_sfp_connect_phy, - .disconnect_phy = phy_sfp_disconnect_phy, }; static int m88e1510_probe(struct phy_device *phydev) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 6642eb642d4b..ad43e280930c 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -503,8 +503,6 @@ static int mv3310_sfp_insert(void *upstream, const struct sfp_eeprom_id *id) static const struct sfp_upstream_ops mv3310_sfp_ops = { .attach = phy_sfp_attach, .detach = phy_sfp_detach, - .connect_phy = phy_sfp_connect_phy, - .disconnect_phy = phy_sfp_disconnect_phy, .module_insert = mv3310_sfp_insert, }; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 616bd7ba46cb..6c6ec9475709 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -277,14 +276,6 @@ static void phy_mdio_device_remove(struct mdio_device *mdiodev) static struct phy_driver genphy_driver; -static struct phy_link_topology *phy_get_link_topology(struct phy_device *phydev) -{ - if (phydev->attached_dev) - return phydev->attached_dev->link_topo; - - return NULL; -} - static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); @@ -1378,46 +1369,6 @@ phy_standalone_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(phy_standalone); -/** - * phy_sfp_connect_phy - Connect the SFP module's PHY to the upstream PHY - * @upstream: pointer to the upstream phy device - * @phy: pointer to the SFP module's phy device - * - * This helper allows keeping track of PHY devices on the link. It adds the - * SFP module's phy to the phy namespace of the upstream phy - */ -int phy_sfp_connect_phy(void *upstream, struct phy_device *phy) -{ - struct phy_device *phydev = upstream; - struct phy_link_topology *topo = phy_get_link_topology(phydev); - - if (topo) - return phy_link_topo_add_phy(topo, phy, PHY_UPSTREAM_PHY, phydev); - - return 0; -} -EXPORT_SYMBOL(phy_sfp_connect_phy); - -/** - * phy_sfp_disconnect_phy - Disconnect the SFP module's PHY from the upstream PHY - * @upstream: pointer to the upstream phy device - * @phy: pointer to the SFP module's phy device - * - * This helper allows keeping track of PHY devices on the link. It removes the - * SFP module's phy to the phy namespace of the upstream phy. As the module phy - * will be destroyed, re-inserting the same module will add a new phy with a - * new index. - */ -void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy) -{ - struct phy_device *phydev = upstream; - struct phy_link_topology *topo = phy_get_link_topology(phydev); - - if (topo) - phy_link_topo_del_phy(topo, phy); -} -EXPORT_SYMBOL(phy_sfp_disconnect_phy); - /** * phy_sfp_attach - attach the SFP bus to the PHY upstream network device * @upstream: pointer to the phy device @@ -1560,11 +1511,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, if (phydev->sfp_bus_attached) dev->sfp_bus = phydev->sfp_bus; - - err = phy_link_topo_add_phy(dev->link_topo, phydev, - PHY_UPSTREAM_MAC, dev); - if (err) - goto error; } /* Some Ethernet drivers try to connect to a PHY device before @@ -1992,7 +1938,6 @@ void phy_detach(struct phy_device *phydev) if (dev) { phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; - phy_link_topo_del_phy(dev->link_topo, phydev); } phydev->phylink = NULL; diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c deleted file mode 100644 index 985941c5c558..000000000000 --- a/drivers/net/phy/phy_link_topology.c +++ /dev/null @@ -1,105 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Infrastructure to handle all PHY devices connected to a given netdev, - * either directly or indirectly attached. - * - * Copyright (c) 2023 Maxime Chevallier - */ - -#include -#include -#include -#include -#include - -struct phy_link_topology *phy_link_topo_create(struct net_device *dev) -{ - struct phy_link_topology *topo; - - topo = kzalloc(sizeof(*topo), GFP_KERNEL); - if (!topo) - return ERR_PTR(-ENOMEM); - - xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1); - topo->next_phy_index = 1; - - return topo; -} - -void phy_link_topo_destroy(struct phy_link_topology *topo) -{ - if (!topo) - return; - - xa_destroy(&topo->phys); - kfree(topo); -} - -int phy_link_topo_add_phy(struct phy_link_topology *topo, - struct phy_device *phy, - enum phy_upstream upt, void *upstream) -{ - struct phy_device_node *pdn; - int ret; - - pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); - if (!pdn) - return -ENOMEM; - - pdn->phy = phy; - switch (upt) { - case PHY_UPSTREAM_MAC: - pdn->upstream.netdev = (struct net_device *)upstream; - if (phy_on_sfp(phy)) - pdn->parent_sfp_bus = pdn->upstream.netdev->sfp_bus; - break; - case PHY_UPSTREAM_PHY: - pdn->upstream.phydev = (struct phy_device *)upstream; - if (phy_on_sfp(phy)) - pdn->parent_sfp_bus = pdn->upstream.phydev->sfp_bus; - break; - default: - ret = -EINVAL; - goto err; - } - pdn->upstream_type = upt; - - /* Attempt to re-use a previously allocated phy_index */ - if (phy->phyindex) { - ret = xa_insert(&topo->phys, phy->phyindex, pdn, GFP_KERNEL); - - /* Errors could be either -ENOMEM or -EBUSY. If the phy has an - * index, and there's another entry at the same index, this is - * unexpected and we still error-out - */ - if (ret) - goto err; - return 0; - } - - ret = xa_alloc_cyclic(&topo->phys, &phy->phyindex, pdn, xa_limit_32b, - &topo->next_phy_index, GFP_KERNEL); - if (ret) - goto err; - - return 0; - -err: - kfree(pdn); - return ret; -} -EXPORT_SYMBOL_GPL(phy_link_topo_add_phy); - -void phy_link_topo_del_phy(struct phy_link_topology *topo, - struct phy_device *phy) -{ - struct phy_device_node *pdn = xa_erase(&topo->phys, phy->phyindex); - - /* We delete the PHY from the topology, however we don't re-set the - * phy->phyindex field. If the PHY isn't gone, we can re-assign it the - * same index next time it's added back to the topology - */ - - kfree(pdn); -} -EXPORT_SYMBOL_GPL(phy_link_topo_del_phy); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index b7e5c669dc8e..994471fad833 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -3411,8 +3411,7 @@ static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) return ret; } -static void phylink_sfp_disconnect_phy(void *upstream, - struct phy_device *phydev) +static void phylink_sfp_disconnect_phy(void *upstream) { phylink_disconnect_phy(upstream); } diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c index 105602581a03..c8f83e5f78ab 100644 --- a/drivers/net/phy/qcom/at803x.c +++ b/drivers/net/phy/qcom/at803x.c @@ -770,8 +770,6 @@ static const struct sfp_upstream_ops at8031_sfp_ops = { .attach = phy_sfp_attach, .detach = phy_sfp_detach, .module_insert = at8031_sfp_insert, - .connect_phy = phy_sfp_connect_phy, - .disconnect_phy = phy_sfp_disconnect_phy, }; static int at8031_parse_dt(struct phy_device *phydev) diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index 5eb0ab1cb70e..672c6929119a 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c @@ -699,8 +699,6 @@ static const struct sfp_upstream_ops qca807x_sfp_ops = { .detach = phy_sfp_detach, .module_insert = qca807x_sfp_insert, .module_remove = qca807x_sfp_remove, - .connect_phy = phy_sfp_connect_phy, - .disconnect_phy = phy_sfp_disconnect_phy, }; static int qca807x_probe(struct phy_device *phydev) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 37c85f1e6534..2f44fc51848f 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -487,7 +487,7 @@ static void sfp_unregister_bus(struct sfp_bus *bus) bus->socket_ops->stop(bus->sfp); bus->socket_ops->detach(bus->sfp); if (bus->phydev && ops && ops->disconnect_phy) - ops->disconnect_phy(bus->upstream, bus->phydev); + ops->disconnect_phy(bus->upstream); } bus->registered = false; } @@ -743,7 +743,7 @@ void sfp_remove_phy(struct sfp_bus *bus) const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus); if (ops && ops->disconnect_phy) - ops->disconnect_phy(bus->upstream, bus->phydev); + ops->disconnect_phy(bus->upstream); bus->phydev = NULL; } EXPORT_SYMBOL_GPL(sfp_remove_phy); @@ -860,14 +860,3 @@ void sfp_unregister_socket(struct sfp_bus *bus) sfp_bus_put(bus); } EXPORT_SYMBOL_GPL(sfp_unregister_socket); - -const char *sfp_get_name(struct sfp_bus *bus) -{ - ASSERT_RTNL(); - - if (bus->sfp_dev) - return dev_name(bus->sfp_dev); - - return NULL; -} -EXPORT_SYMBOL_GPL(sfp_get_name); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cf261fb89d73..d20c6c99eb88 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,6 +40,7 @@ #include #endif #include + #include #include #include @@ -51,7 +52,6 @@ #include #include #include -#include struct netpoll_info; struct device; @@ -1975,7 +1975,6 @@ enum netdev_reg_state { * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * * @priomap: XXX: need comments on this one - * @link_topo: Physical link topology tracking attached PHYs * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. @@ -2368,7 +2367,6 @@ struct net_device { #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif - struct phy_link_topology *link_topo; struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; diff --git a/include/linux/phy.h b/include/linux/phy.h index 3ddfe7fe781a..e6e83304558e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -550,9 +550,6 @@ struct macsec_ops; * @drv: Pointer to the driver for this PHY instance * @devlink: Create a link between phy dev and mac dev, if the external phy * used by current mac interface is managed by another mac interface. - * @phyindex: Unique id across the phy's parent tree of phys to address the PHY - * from userspace, similar to ifindex. A zero index means the PHY - * wasn't assigned an id yet. * @phy_id: UID for this device found during discovery * @c45_ids: 802.3-c45 Device Identifiers if is_c45. * @is_c45: Set to true if this PHY uses clause 45 addressing. @@ -653,7 +650,6 @@ struct phy_device { struct device_link *devlink; - u32 phyindex; u32 phy_id; struct phy_c45_device_ids c45_ids; @@ -1758,8 +1754,6 @@ int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); int __phy_resume(struct phy_device *phydev); int phy_loopback(struct phy_device *phydev, bool enable); -int phy_sfp_connect_phy(void *upstream, struct phy_device *phy); -void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy); void phy_sfp_attach(void *upstream, struct sfp_bus *bus); void phy_sfp_detach(void *upstream, struct sfp_bus *bus); int phy_sfp_probe(struct phy_device *phydev, diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h deleted file mode 100644 index 6b79feb607e7..000000000000 --- a/include/linux/phy_link_topology.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * PHY device list allow maintaining a list of PHY devices that are - * part of a netdevice's link topology. PHYs can for example be chained, - * as is the case when using a PHY that exposes an SFP module, on which an - * SFP transceiver that embeds a PHY is connected. - * - * This list can then be used by userspace to leverage individual PHY - * capabilities. - */ -#ifndef __PHY_LINK_TOPOLOGY_H -#define __PHY_LINK_TOPOLOGY_H - -#include -#include - -struct xarray; -struct phy_device; -struct net_device; -struct sfp_bus; - -struct phy_device_node { - enum phy_upstream upstream_type; - - union { - struct net_device *netdev; - struct phy_device *phydev; - } upstream; - - struct sfp_bus *parent_sfp_bus; - - struct phy_device *phy; -}; - -struct phy_link_topology { - struct xarray phys; - u32 next_phy_index; -}; - -static inline struct phy_device * -phy_link_topo_get_phy(struct phy_link_topology *topo, u32 phyindex) -{ - struct phy_device_node *pdn = xa_load(&topo->phys, phyindex); - - if (pdn) - return pdn->phy; - - return NULL; -} - -#if IS_REACHABLE(CONFIG_PHYLIB) -int phy_link_topo_add_phy(struct phy_link_topology *topo, - struct phy_device *phy, - enum phy_upstream upt, void *upstream); - -void phy_link_topo_del_phy(struct phy_link_topology *lt, struct phy_device *phy); - -#else -static inline int phy_link_topo_add_phy(struct phy_link_topology *topo, - struct phy_device *phy, - enum phy_upstream upt, void *upstream) -{ - return 0; -} - -static inline void phy_link_topo_del_phy(struct phy_link_topology *topo, - struct phy_device *phy) -{ -} -#endif - -#endif /* __PHY_LINK_TOPOLOGY_H */ diff --git a/include/linux/phy_link_topology_core.h b/include/linux/phy_link_topology_core.h deleted file mode 100644 index 0a6479055745..000000000000 --- a/include/linux/phy_link_topology_core.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PHY_LINK_TOPOLOGY_CORE_H -#define __PHY_LINK_TOPOLOGY_CORE_H - -struct phy_link_topology; - -#if IS_REACHABLE(CONFIG_PHYLIB) - -struct phy_link_topology *phy_link_topo_create(struct net_device *dev); -void phy_link_topo_destroy(struct phy_link_topology *topo); - -#else - -static inline struct phy_link_topology *phy_link_topo_create(struct net_device *dev) -{ - return NULL; -} - -static inline void phy_link_topo_destroy(struct phy_link_topology *topo) -{ -} - -#endif - -#endif /* __PHY_LINK_TOPOLOGY_CORE_H */ diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 5ebc57f78c95..a45da7eef9a2 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -544,7 +544,7 @@ struct sfp_upstream_ops { void (*link_down)(void *priv); void (*link_up)(void *priv); int (*connect_phy)(void *priv, struct phy_device *); - void (*disconnect_phy)(void *priv, struct phy_device *); + void (*disconnect_phy)(void *priv); }; #if IS_ENABLED(CONFIG_SFP) @@ -570,7 +570,6 @@ struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode); int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, const struct sfp_upstream_ops *ops); void sfp_bus_del_upstream(struct sfp_bus *bus); -const char *sfp_get_name(struct sfp_bus *bus); #else static inline int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, @@ -649,11 +648,6 @@ static inline int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, static inline void sfp_bus_del_upstream(struct sfp_bus *bus) { } - -static inline const char *sfp_get_name(struct sfp_bus *bus) -{ - return NULL; -} #endif #endif diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 041e09c3515d..8733a3117902 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2323,20 +2323,4 @@ struct ethtool_link_settings { * __u32 map_lp_advertising[link_mode_masks_nwords]; */ }; - -/** - * enum phy_upstream - Represents the upstream component a given PHY device - * is connected to, as in what is on the other end of the MII bus. Most PHYs - * will be attached to an Ethernet MAC controller, but in some cases, there's - * an intermediate PHY used as a media-converter, which will driver another - * MII interface as its output. - * @PHY_UPSTREAM_MAC: Upstream component is a MAC (a switch port, - * or ethernet controller) - * @PHY_UPSTREAM_PHY: Upstream component is a PHY (likely a media converter) - */ -enum phy_upstream { - PHY_UPSTREAM_MAC, - PHY_UPSTREAM_PHY, -}; - #endif /* _UAPI_LINUX_ETHTOOL_H */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index f17dbe54bf5e..b49b804b9495 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -132,7 +132,6 @@ enum { ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ ETHTOOL_A_HEADER_DEV_NAME, /* string */ ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ - ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, diff --git a/net/core/dev.c b/net/core/dev.c index d2ce91a334c1..e1bb6d7856d9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -158,7 +158,6 @@ #include #include #include -#include #include "dev.h" #include "net-sysfs.h" @@ -10998,12 +10997,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #ifdef CONFIG_NET_SCHED hash_init(dev->qdisc_hash); #endif - dev->link_topo = phy_link_topo_create(dev); - if (IS_ERR(dev->link_topo)) { - dev->link_topo = NULL; - goto free_all; - } - dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -11092,8 +11085,6 @@ void free_netdev(struct net_device *dev) free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; - phy_link_topo_destroy(dev->link_topo); - /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED || dev->reg_state == NETREG_DUMMY) { diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 563e94e0cbd8..bd04f28d5cf4 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -4,7 +4,6 @@ #include #include #include "netlink.h" -#include static struct genl_family ethtool_genl_family; @@ -31,24 +30,6 @@ const struct nla_policy ethnl_header_policy_stats[] = { ETHTOOL_FLAGS_STATS), }; -const struct nla_policy ethnl_header_policy_phy[] = { - [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 }, - [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING, - .len = ALTIFNAMSIZ - 1 }, - [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, - ETHTOOL_FLAGS_BASIC), - [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), -}; - -const struct nla_policy ethnl_header_policy_phy_stats[] = { - [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 }, - [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING, - .len = ALTIFNAMSIZ - 1 }, - [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, - ETHTOOL_FLAGS_STATS), - [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), -}; - int ethnl_ops_begin(struct net_device *dev) { int ret; @@ -108,9 +89,8 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, const struct nlattr *header, struct net *net, struct netlink_ext_ack *extack, bool require_dev) { - struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy_phy)]; + struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)]; const struct nlattr *devname_attr; - struct phy_device *phydev = NULL; struct net_device *dev = NULL; u32 flags = 0; int ret; @@ -124,7 +104,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, /* No validation here, command policy should have a nested policy set * for the header, therefore validation should have already been done. */ - ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy_phy) - 1, header, + ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header, NULL, extack); if (ret < 0) return ret; @@ -165,30 +145,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, return -EINVAL; } - if (dev) { - if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) { - struct nlattr *phy_id; - - phy_id = tb[ETHTOOL_A_HEADER_PHY_INDEX]; - phydev = phy_link_topo_get_phy(dev->link_topo, - nla_get_u32(phy_id)); - if (!phydev) { - NL_SET_BAD_ATTR(extack, phy_id); - return -ENODEV; - } - } else { - /* If we need a PHY but no phy index is specified, fallback - * to dev->phydev - */ - phydev = dev->phydev; - } - } else if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) { - NL_SET_ERR_MSG_ATTR(extack, header, - "can't target a PHY without a netdev"); - return -EINVAL; - } - - req_info->phydev = phydev; req_info->dev = dev; req_info->flags = flags; return 0; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index d57a890b5d9e..9a333a8d04c1 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -250,7 +250,6 @@ static inline unsigned int ethnl_reply_header_size(void) * @dev: network device the request is for (may be null) * @dev_tracker: refcount tracker for @dev reference * @flags: request flags common for all request types - * @phydev: phy_device connected to @dev this request is for (may be null) * * This is a common base for request specific structures holding data from * parsed userspace request. These always embed struct ethnl_req_info at @@ -260,7 +259,6 @@ struct ethnl_req_info { struct net_device *dev; netdevice_tracker dev_tracker; u32 flags; - struct phy_device *phydev; }; static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) @@ -397,12 +395,9 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops; extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; -extern const struct ethnl_request_ops ethnl_phy_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; -extern const struct nla_policy ethnl_header_policy_phy[ETHTOOL_A_HEADER_PHY_INDEX + 1]; -extern const struct nla_policy ethnl_header_policy_phy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1]; extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1]; extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1]; extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1]; -- cgit v1.2.3 From 84a4bb6548a29326564f0e659fb8064503ecc1c7 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 6 May 2024 18:33:52 -0400 Subject: Bluetooth: HCI: Remove HCI_AMP support Since BT_HS has been remove HCI_AMP controllers no longer has any use so remove it along with the capability of creating AMP controllers. Since we no longer need to differentiate between AMP and Primary controllers, as only HCI_PRIMARY is left, this also remove hdev->dev_type altogether. Fixes: e7b02296fb40 ("Bluetooth: Remove BT_HS") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmrvl_main.c | 9 --- drivers/bluetooth/btrsi.c | 1 - drivers/bluetooth/btsdio.c | 8 --- drivers/bluetooth/btusb.c | 5 -- drivers/bluetooth/hci_bcm4377.c | 1 - drivers/bluetooth/hci_ldisc.c | 6 -- drivers/bluetooth/hci_serdev.c | 5 -- drivers/bluetooth/hci_uart.h | 1 - drivers/bluetooth/hci_vhci.c | 10 +-- drivers/bluetooth/virtio_bt.c | 2 - include/net/bluetooth/hci.h | 114 ------------------------------ include/net/bluetooth/hci_core.h | 46 +----------- include/uapi/linux/virtio_bt.h | 1 - net/bluetooth/hci_conn.c | 3 +- net/bluetooth/hci_core.c | 132 +++-------------------------------- net/bluetooth/hci_event.c | 147 --------------------------------------- net/bluetooth/hci_sock.c | 5 +- net/bluetooth/hci_sync.c | 112 ++--------------------------- net/bluetooth/l2cap_core.c | 21 ++---- net/bluetooth/mgmt.c | 84 +++++++--------------- 20 files changed, 49 insertions(+), 664 deletions(-) (limited to 'include/uapi') diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c index 9658b33c824a..18f34998a120 100644 --- a/drivers/bluetooth/btmrvl_main.c +++ b/drivers/bluetooth/btmrvl_main.c @@ -121,13 +121,6 @@ int btmrvl_process_event(struct btmrvl_private *priv, struct sk_buff *skb) ((event->data[2] == MODULE_BROUGHT_UP) || (event->data[2] == MODULE_ALREADY_UP)) ? "Bring-up succeed" : "Bring-up failed"); - - if (event->length > 3 && event->data[3]) - priv->btmrvl_dev.dev_type = HCI_AMP; - else - priv->btmrvl_dev.dev_type = HCI_PRIMARY; - - BT_DBG("dev_type: %d", priv->btmrvl_dev.dev_type); } else if (priv->btmrvl_dev.sendcmdflag && event->data[1] == MODULE_SHUTDOWN_REQ) { BT_DBG("EVENT:%s", (event->data[2]) ? @@ -686,8 +679,6 @@ int btmrvl_register_hdev(struct btmrvl_private *priv) hdev->wakeup = btmrvl_wakeup; SET_HCIDEV_DEV(hdev, &card->func->dev); - hdev->dev_type = priv->btmrvl_dev.dev_type; - ret = hci_register_dev(hdev); if (ret < 0) { BT_ERR("Can not register HCI device"); diff --git a/drivers/bluetooth/btrsi.c b/drivers/bluetooth/btrsi.c index 634cf8f5ed2d..0c91d7635ac3 100644 --- a/drivers/bluetooth/btrsi.c +++ b/drivers/bluetooth/btrsi.c @@ -134,7 +134,6 @@ static int rsi_hci_attach(void *priv, struct rsi_proto_ops *ops) hdev->bus = HCI_USB; hci_set_drvdata(hdev, h_adapter); - hdev->dev_type = HCI_PRIMARY; hdev->open = rsi_hci_open; hdev->close = rsi_hci_close; hdev->flush = rsi_hci_flush; diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index f19d31ee37ea..fdcfe9c50313 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -32,9 +32,6 @@ static const struct sdio_device_id btsdio_table[] = { /* Generic Bluetooth Type-B SDIO device */ { SDIO_DEVICE_CLASS(SDIO_CLASS_BT_B) }, - /* Generic Bluetooth AMP controller */ - { SDIO_DEVICE_CLASS(SDIO_CLASS_BT_AMP) }, - { } /* Terminating entry */ }; @@ -319,11 +316,6 @@ static int btsdio_probe(struct sdio_func *func, hdev->bus = HCI_SDIO; hci_set_drvdata(hdev, data); - if (id->class == SDIO_CLASS_BT_AMP) - hdev->dev_type = HCI_AMP; - else - hdev->dev_type = HCI_PRIMARY; - data->hdev = hdev; SET_HCIDEV_DEV(hdev, &func->dev); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 90d1f1697357..e384ef6ff050 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -4359,11 +4359,6 @@ static int btusb_probe(struct usb_interface *intf, hdev->bus = HCI_USB; hci_set_drvdata(hdev, data); - if (id->driver_info & BTUSB_AMP) - hdev->dev_type = HCI_AMP; - else - hdev->dev_type = HCI_PRIMARY; - data->hdev = hdev; SET_HCIDEV_DEV(hdev, &intf->dev); diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c index 9a7243d5db71..0c2f15235b4c 100644 --- a/drivers/bluetooth/hci_bcm4377.c +++ b/drivers/bluetooth/hci_bcm4377.c @@ -2361,7 +2361,6 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id) bcm4377->hdev = hdev; hdev->bus = HCI_PCI; - hdev->dev_type = HCI_PRIMARY; hdev->open = bcm4377_hci_open; hdev->close = bcm4377_hci_close; hdev->send = bcm4377_hci_send_frame; diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index a26367e9fb19..17a2f158a0df 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -667,11 +667,6 @@ static int hci_uart_register_dev(struct hci_uart *hu) if (!test_bit(HCI_UART_RESET_ON_INIT, &hu->hdev_flags)) set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); - if (test_bit(HCI_UART_CREATE_AMP, &hu->hdev_flags)) - hdev->dev_type = HCI_AMP; - else - hdev->dev_type = HCI_PRIMARY; - /* Only call open() for the protocol after hdev is fully initialized as * open() (or a timer/workqueue it starts) may attempt to reference it. */ @@ -722,7 +717,6 @@ static int hci_uart_set_flags(struct hci_uart *hu, unsigned long flags) { unsigned long valid_flags = BIT(HCI_UART_RAW_DEVICE) | BIT(HCI_UART_RESET_ON_INIT) | - BIT(HCI_UART_CREATE_AMP) | BIT(HCI_UART_INIT_PENDING) | BIT(HCI_UART_EXT_CONFIG) | BIT(HCI_UART_VND_DETECT); diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 85c0d9b68f5f..89a22e9b3253 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -366,11 +366,6 @@ int hci_uart_register_device_priv(struct hci_uart *hu, if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags)) set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); - if (test_bit(HCI_UART_CREATE_AMP, &hu->hdev_flags)) - hdev->dev_type = HCI_AMP; - else - hdev->dev_type = HCI_PRIMARY; - if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return 0; diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 68c8c7e95d64..00bf7ae82c5b 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -37,7 +37,6 @@ #define HCI_UART_RAW_DEVICE 0 #define HCI_UART_RESET_ON_INIT 1 -#define HCI_UART_CREATE_AMP 2 #define HCI_UART_INIT_PENDING 3 #define HCI_UART_EXT_CONFIG 4 #define HCI_UART_VND_DETECT 5 diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 572d68d52965..28750a40f0ed 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -384,17 +384,10 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) { struct hci_dev *hdev; struct sk_buff *skb; - __u8 dev_type; if (data->hdev) return -EBADFD; - /* bits 0-1 are dev_type (Primary or AMP) */ - dev_type = opcode & 0x03; - - if (dev_type != HCI_PRIMARY && dev_type != HCI_AMP) - return -EINVAL; - /* bits 2-5 are reserved (must be zero) */ if (opcode & 0x3c) return -EINVAL; @@ -412,7 +405,6 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) data->hdev = hdev; hdev->bus = HCI_VIRTUAL; - hdev->dev_type = dev_type; hci_set_drvdata(hdev, data); hdev->open = vhci_open_dev; @@ -634,7 +626,7 @@ static void vhci_open_timeout(struct work_struct *work) struct vhci_data *data = container_of(work, struct vhci_data, open_timeout.work); - vhci_create_device(data, amp ? HCI_AMP : HCI_PRIMARY); + vhci_create_device(data, 0x00); } static int vhci_open(struct inode *inode, struct file *file) diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 2ac70b560c46..18208e152a36 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -274,7 +274,6 @@ static int virtbt_probe(struct virtio_device *vdev) switch (type) { case VIRTIO_BT_CONFIG_TYPE_PRIMARY: - case VIRTIO_BT_CONFIG_TYPE_AMP: break; default: return -EINVAL; @@ -303,7 +302,6 @@ static int virtbt_probe(struct virtio_device *vdev) vbt->hdev = hdev; hdev->bus = HCI_VIRTIO; - hdev->dev_type = type; hci_set_drvdata(hdev, vbt); hdev->open = virtbt_open; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index de530262d824..fe932ca3bc8c 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -33,9 +33,6 @@ #define HCI_MAX_FRAME_SIZE (HCI_MAX_ACL_SIZE + 4) #define HCI_LINK_KEY_SIZE 16 -#define HCI_AMP_LINK_KEY_SIZE (2 * HCI_LINK_KEY_SIZE) - -#define HCI_MAX_AMP_ASSOC_SIZE 672 #define HCI_MAX_CPB_DATA_SIZE 252 @@ -71,26 +68,6 @@ #define HCI_SMD 9 #define HCI_VIRTIO 10 -/* HCI controller types */ -#define HCI_PRIMARY 0x00 -#define HCI_AMP 0x01 - -/* First BR/EDR Controller shall have ID = 0 */ -#define AMP_ID_BREDR 0x00 - -/* AMP controller types */ -#define AMP_TYPE_BREDR 0x00 -#define AMP_TYPE_80211 0x01 - -/* AMP controller status */ -#define AMP_STATUS_POWERED_DOWN 0x00 -#define AMP_STATUS_BLUETOOTH_ONLY 0x01 -#define AMP_STATUS_NO_CAPACITY 0x02 -#define AMP_STATUS_LOW_CAPACITY 0x03 -#define AMP_STATUS_MEDIUM_CAPACITY 0x04 -#define AMP_STATUS_HIGH_CAPACITY 0x05 -#define AMP_STATUS_FULL_CAPACITY 0x06 - /* HCI device quirks */ enum { /* When this quirk is set, the HCI Reset command is send when @@ -527,7 +504,6 @@ enum { #define ESCO_LINK 0x02 /* Low Energy links do not have defined link type. Use invented one */ #define LE_LINK 0x80 -#define AMP_LINK 0x81 #define ISO_LINK 0x82 #define INVALID_LINK 0xff @@ -943,56 +919,6 @@ struct hci_cp_io_capability_neg_reply { __u8 reason; } __packed; -#define HCI_OP_CREATE_PHY_LINK 0x0435 -struct hci_cp_create_phy_link { - __u8 phy_handle; - __u8 key_len; - __u8 key_type; - __u8 key[HCI_AMP_LINK_KEY_SIZE]; -} __packed; - -#define HCI_OP_ACCEPT_PHY_LINK 0x0436 -struct hci_cp_accept_phy_link { - __u8 phy_handle; - __u8 key_len; - __u8 key_type; - __u8 key[HCI_AMP_LINK_KEY_SIZE]; -} __packed; - -#define HCI_OP_DISCONN_PHY_LINK 0x0437 -struct hci_cp_disconn_phy_link { - __u8 phy_handle; - __u8 reason; -} __packed; - -struct ext_flow_spec { - __u8 id; - __u8 stype; - __le16 msdu; - __le32 sdu_itime; - __le32 acc_lat; - __le32 flush_to; -} __packed; - -#define HCI_OP_CREATE_LOGICAL_LINK 0x0438 -#define HCI_OP_ACCEPT_LOGICAL_LINK 0x0439 -struct hci_cp_create_accept_logical_link { - __u8 phy_handle; - struct ext_flow_spec tx_flow_spec; - struct ext_flow_spec rx_flow_spec; -} __packed; - -#define HCI_OP_DISCONN_LOGICAL_LINK 0x043a -struct hci_cp_disconn_logical_link { - __le16 log_handle; -} __packed; - -#define HCI_OP_LOGICAL_LINK_CANCEL 0x043b -struct hci_cp_logical_link_cancel { - __u8 phy_handle; - __u8 flow_spec_id; -} __packed; - #define HCI_OP_ENHANCED_SETUP_SYNC_CONN 0x043d struct hci_coding_format { __u8 id; @@ -1614,46 +1540,6 @@ struct hci_rp_read_enc_key_size { __u8 key_size; } __packed; -#define HCI_OP_READ_LOCAL_AMP_INFO 0x1409 -struct hci_rp_read_local_amp_info { - __u8 status; - __u8 amp_status; - __le32 total_bw; - __le32 max_bw; - __le32 min_latency; - __le32 max_pdu; - __u8 amp_type; - __le16 pal_cap; - __le16 max_assoc_size; - __le32 max_flush_to; - __le32 be_flush_to; -} __packed; - -#define HCI_OP_READ_LOCAL_AMP_ASSOC 0x140a -struct hci_cp_read_local_amp_assoc { - __u8 phy_handle; - __le16 len_so_far; - __le16 max_len; -} __packed; -struct hci_rp_read_local_amp_assoc { - __u8 status; - __u8 phy_handle; - __le16 rem_len; - __u8 frag[]; -} __packed; - -#define HCI_OP_WRITE_REMOTE_AMP_ASSOC 0x140b -struct hci_cp_write_remote_amp_assoc { - __u8 phy_handle; - __le16 len_so_far; - __le16 rem_len; - __u8 frag[]; -} __packed; -struct hci_rp_write_remote_amp_assoc { - __u8 status; - __u8 phy_handle; -} __packed; - #define HCI_OP_GET_MWS_TRANSPORT_CONFIG 0x140c #define HCI_OP_ENABLE_DUT_MODE 0x1803 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index be012f393bcb..431d0c8a0e27 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -126,7 +126,6 @@ enum suspended_state { struct hci_conn_hash { struct list_head list; unsigned int acl_num; - unsigned int amp_num; unsigned int sco_num; unsigned int iso_num; unsigned int le_num; @@ -341,14 +340,6 @@ struct adv_monitor { /* Default authenticated payload timeout 30s */ #define DEFAULT_AUTH_PAYLOAD_TIMEOUT 0x0bb8 -struct amp_assoc { - __u16 len; - __u16 offset; - __u16 rem_len; - __u16 len_so_far; - __u8 data[HCI_MAX_AMP_ASSOC_SIZE]; -}; - #define HCI_MAX_PAGES 3 struct hci_dev { @@ -361,7 +352,6 @@ struct hci_dev { unsigned long flags; __u16 id; __u8 bus; - __u8 dev_type; bdaddr_t bdaddr; bdaddr_t setup_addr; bdaddr_t public_addr; @@ -467,21 +457,6 @@ struct hci_dev { __u16 sniff_min_interval; __u16 sniff_max_interval; - __u8 amp_status; - __u32 amp_total_bw; - __u32 amp_max_bw; - __u32 amp_min_latency; - __u32 amp_max_pdu; - __u8 amp_type; - __u16 amp_pal_cap; - __u16 amp_assoc_size; - __u32 amp_max_flush_to; - __u32 amp_be_flush_to; - - struct amp_assoc loc_assoc; - - __u8 flow_ctl_mode; - unsigned int auto_accept_delay; unsigned long quirks; @@ -501,11 +476,6 @@ struct hci_dev { unsigned int le_pkts; unsigned int iso_pkts; - __u16 block_len; - __u16 block_mtu; - __u16 num_blocks; - __u16 block_cnt; - unsigned long acl_last_tx; unsigned long sco_last_tx; unsigned long le_last_tx; @@ -778,7 +748,6 @@ struct hci_conn { void *l2cap_data; void *sco_data; void *iso_data; - struct amp_mgr *amp_mgr; struct list_head link_list; struct hci_conn *parent; @@ -805,7 +774,6 @@ struct hci_chan { struct sk_buff_head data_q; unsigned int sent; __u8 state; - bool amp; }; struct hci_conn_params { @@ -1014,9 +982,6 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) case ACL_LINK: h->acl_num++; break; - case AMP_LINK: - h->amp_num++; - break; case LE_LINK: h->le_num++; if (c->role == HCI_ROLE_SLAVE) @@ -1043,9 +1008,6 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) case ACL_LINK: h->acl_num--; break; - case AMP_LINK: - h->amp_num--; - break; case LE_LINK: h->le_num--; if (c->role == HCI_ROLE_SLAVE) @@ -1067,8 +1029,6 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) switch (type) { case ACL_LINK: return h->acl_num; - case AMP_LINK: - return h->amp_num; case LE_LINK: return h->le_num; case SCO_LINK: @@ -1085,7 +1045,7 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) { struct hci_conn_hash *c = &hdev->conn_hash; - return c->acl_num + c->amp_num + c->sco_num + c->le_num + c->iso_num; + return c->acl_num + c->sco_num + c->le_num + c->iso_num; } static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn) @@ -1610,10 +1570,6 @@ static inline void hci_conn_drop(struct hci_conn *conn) } break; - case AMP_LINK: - timeo = conn->disc_timeout; - break; - default: timeo = 0; break; diff --git a/include/uapi/linux/virtio_bt.h b/include/uapi/linux/virtio_bt.h index af798f4c9680..3cc7d633456b 100644 --- a/include/uapi/linux/virtio_bt.h +++ b/include/uapi/linux/virtio_bt.h @@ -13,7 +13,6 @@ enum virtio_bt_config_type { VIRTIO_BT_CONFIG_TYPE_PRIMARY = 0, - VIRTIO_BT_CONFIG_TYPE_AMP = 1, }; enum virtio_bt_config_vendor { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f21e7c73021d..0c76dcde5361 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1168,8 +1168,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) list_for_each_entry(d, &hci_dev_list, list) { if (!test_bit(HCI_UP, &d->flags) || - hci_dev_test_flag(d, HCI_USER_CHANNEL) || - d->dev_type != HCI_PRIMARY) + hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Simple routing: diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b52125512985..7bc431af7b04 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -402,11 +402,6 @@ int hci_inquiry(void __user *arg) goto done; } - if (hdev->dev_type != HCI_PRIMARY) { - err = -EOPNOTSUPP; - goto done; - } - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; @@ -759,11 +754,6 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) goto done; } - if (hdev->dev_type != HCI_PRIMARY) { - err = -EOPNOTSUPP; - goto done; - } - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; @@ -917,7 +907,7 @@ int hci_get_dev_info(void __user *arg) strscpy(di.name, hdev->name, sizeof(di.name)); di.bdaddr = hdev->bdaddr; - di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); + di.type = (hdev->bus & 0x0f); di.flags = flags; di.pkt_type = hdev->pkt_type; if (lmp_bredr_capable(hdev)) { @@ -1033,8 +1023,7 @@ static void hci_power_on(struct work_struct *work) */ if (hci_dev_test_flag(hdev, HCI_RFKILLED) || hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || - (hdev->dev_type == HCI_PRIMARY && - !bacmp(&hdev->bdaddr, BDADDR_ANY) && + (!bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_do_close(hdev); @@ -2642,21 +2631,7 @@ int hci_register_dev(struct hci_dev *hdev) if (!hdev->open || !hdev->close || !hdev->send) return -EINVAL; - /* Do not allow HCI_AMP devices to register at index 0, - * so the index can be used as the AMP controller ID. - */ - switch (hdev->dev_type) { - case HCI_PRIMARY: - id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL); - break; - case HCI_AMP: - id = ida_alloc_range(&hci_index_ida, 1, HCI_MAX_ID - 1, - GFP_KERNEL); - break; - default: - return -EINVAL; - } - + id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL); if (id < 0) return id; @@ -2708,12 +2683,10 @@ int hci_register_dev(struct hci_dev *hdev) hci_dev_set_flag(hdev, HCI_SETUP); hci_dev_set_flag(hdev, HCI_AUTO_OFF); - if (hdev->dev_type == HCI_PRIMARY) { - /* Assume BR/EDR support until proven otherwise (such as - * through reading supported features during init. - */ - hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); - } + /* Assume BR/EDR support until proven otherwise (such as + * through reading supported features during init. + */ + hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); write_lock(&hci_dev_list_lock); list_add(&hdev->list, &hci_dev_list); @@ -3249,17 +3222,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT; - switch (hdev->dev_type) { - case HCI_PRIMARY: - hci_add_acl_hdr(skb, conn->handle, flags); - break; - case HCI_AMP: - hci_add_acl_hdr(skb, chan->handle, flags); - break; - default: - bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); - return; - } + hci_add_acl_hdr(skb, conn->handle, flags); list = skb_shinfo(skb)->frag_list; if (!list) { @@ -3419,9 +3382,6 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote) case ACL_LINK: cnt = hdev->acl_cnt; break; - case AMP_LINK: - cnt = hdev->block_cnt; - break; case SCO_LINK: case ESCO_LINK: cnt = hdev->sco_cnt; @@ -3619,12 +3579,6 @@ static void hci_prio_recalculate(struct hci_dev *hdev, __u8 type) } -static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) -{ - /* Calculate count of blocks used by this packet */ - return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len); -} - static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type) { unsigned long last_tx; @@ -3738,81 +3692,15 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) hci_prio_recalculate(hdev, ACL_LINK); } -static void hci_sched_acl_blk(struct hci_dev *hdev) -{ - unsigned int cnt = hdev->block_cnt; - struct hci_chan *chan; - struct sk_buff *skb; - int quote; - u8 type; - - BT_DBG("%s", hdev->name); - - if (hdev->dev_type == HCI_AMP) - type = AMP_LINK; - else - type = ACL_LINK; - - __check_timeout(hdev, cnt, type); - - while (hdev->block_cnt > 0 && - (chan = hci_chan_sent(hdev, type, "e))) { - u32 priority = (skb_peek(&chan->data_q))->priority; - while (quote > 0 && (skb = skb_peek(&chan->data_q))) { - int blocks; - - BT_DBG("chan %p skb %p len %d priority %u", chan, skb, - skb->len, skb->priority); - - /* Stop if priority has changed */ - if (skb->priority < priority) - break; - - skb = skb_dequeue(&chan->data_q); - - blocks = __get_blocks(hdev, skb); - if (blocks > hdev->block_cnt) - return; - - hci_conn_enter_active_mode(chan->conn, - bt_cb(skb)->force_active); - - hci_send_frame(hdev, skb); - hdev->acl_last_tx = jiffies; - - hdev->block_cnt -= blocks; - quote -= blocks; - - chan->sent += blocks; - chan->conn->sent += blocks; - } - } - - if (cnt != hdev->block_cnt) - hci_prio_recalculate(hdev, type); -} - static void hci_sched_acl(struct hci_dev *hdev) { BT_DBG("%s", hdev->name); /* No ACL link over BR/EDR controller */ - if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY) - return; - - /* No AMP link over AMP controller */ - if (!hci_conn_num(hdev, AMP_LINK) && hdev->dev_type == HCI_AMP) + if (!hci_conn_num(hdev, ACL_LINK)) return; - switch (hdev->flow_ctl_mode) { - case HCI_FLOW_CTL_MODE_PACKET_BASED: - hci_sched_acl_pkt(hdev); - break; - - case HCI_FLOW_CTL_MODE_BLOCK_BASED: - hci_sched_acl_blk(hdev); - break; - } + hci_sched_acl_pkt(hdev); } static void hci_sched_le(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 97c6f34ba640..a487f9df8145 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -913,21 +913,6 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_read_flow_control_mode(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_read_flow_control_mode *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - hdev->flow_ctl_mode = rp->mode; - - return rp->status; -} - static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -1071,28 +1056,6 @@ static u8 hci_cc_write_page_scan_type(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_read_data_block_size(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_read_data_block_size *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - hdev->block_mtu = __le16_to_cpu(rp->max_acl_len); - hdev->block_len = __le16_to_cpu(rp->block_len); - hdev->num_blocks = __le16_to_cpu(rp->num_blocks); - - hdev->block_cnt = hdev->num_blocks; - - BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, - hdev->block_cnt, hdev->block_len); - - return rp->status; -} - static u8 hci_cc_read_clock(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -1127,30 +1090,6 @@ unlock: return rp->status; } -static u8 hci_cc_read_local_amp_info(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_read_local_amp_info *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - hdev->amp_status = rp->amp_status; - hdev->amp_total_bw = __le32_to_cpu(rp->total_bw); - hdev->amp_max_bw = __le32_to_cpu(rp->max_bw); - hdev->amp_min_latency = __le32_to_cpu(rp->min_latency); - hdev->amp_max_pdu = __le32_to_cpu(rp->max_pdu); - hdev->amp_type = rp->amp_type; - hdev->amp_pal_cap = __le16_to_cpu(rp->pal_cap); - hdev->amp_assoc_size = __le16_to_cpu(rp->max_assoc_size); - hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); - hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); - - return rp->status; -} - static u8 hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -4120,12 +4059,6 @@ static const struct hci_cc { HCI_CC(HCI_OP_READ_PAGE_SCAN_TYPE, hci_cc_read_page_scan_type, sizeof(struct hci_rp_read_page_scan_type)), HCI_CC_STATUS(HCI_OP_WRITE_PAGE_SCAN_TYPE, hci_cc_write_page_scan_type), - HCI_CC(HCI_OP_READ_DATA_BLOCK_SIZE, hci_cc_read_data_block_size, - sizeof(struct hci_rp_read_data_block_size)), - HCI_CC(HCI_OP_READ_FLOW_CONTROL_MODE, hci_cc_read_flow_control_mode, - sizeof(struct hci_rp_read_flow_control_mode)), - HCI_CC(HCI_OP_READ_LOCAL_AMP_INFO, hci_cc_read_local_amp_info, - sizeof(struct hci_rp_read_local_amp_info)), HCI_CC(HCI_OP_READ_CLOCK, hci_cc_read_clock, sizeof(struct hci_rp_read_clock)), HCI_CC(HCI_OP_READ_ENC_KEY_SIZE, hci_cc_read_enc_key_size, @@ -4461,11 +4394,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, flex_array_size(ev, handles, ev->num))) return; - if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { - bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode); - return; - } - bt_dev_dbg(hdev, "num %d", ev->num); for (i = 0; i < ev->num; i++) { @@ -4533,78 +4461,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, queue_work(hdev->workqueue, &hdev->tx_work); } -static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev, - __u16 handle) -{ - struct hci_chan *chan; - - switch (hdev->dev_type) { - case HCI_PRIMARY: - return hci_conn_hash_lookup_handle(hdev, handle); - case HCI_AMP: - chan = hci_chan_lookup_handle(hdev, handle); - if (chan) - return chan->conn; - break; - default: - bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); - break; - } - - return NULL; -} - -static void hci_num_comp_blocks_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_num_comp_blocks *ev = data; - int i; - - if (!hci_ev_skb_pull(hdev, skb, HCI_EV_NUM_COMP_BLOCKS, - flex_array_size(ev, handles, ev->num_hndl))) - return; - - if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) { - bt_dev_err(hdev, "wrong event for mode %d", - hdev->flow_ctl_mode); - return; - } - - bt_dev_dbg(hdev, "num_blocks %d num_hndl %d", ev->num_blocks, - ev->num_hndl); - - for (i = 0; i < ev->num_hndl; i++) { - struct hci_comp_blocks_info *info = &ev->handles[i]; - struct hci_conn *conn = NULL; - __u16 handle, block_count; - - handle = __le16_to_cpu(info->handle); - block_count = __le16_to_cpu(info->blocks); - - conn = __hci_conn_lookup_handle(hdev, handle); - if (!conn) - continue; - - conn->sent -= block_count; - - switch (conn->type) { - case ACL_LINK: - case AMP_LINK: - hdev->block_cnt += block_count; - if (hdev->block_cnt > hdev->num_blocks) - hdev->block_cnt = hdev->num_blocks; - break; - - default: - bt_dev_err(hdev, "unknown type %d conn %p", - conn->type, conn); - break; - } - } - - queue_work(hdev->workqueue, &hdev->tx_work); -} - static void hci_mode_change_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -7520,9 +7376,6 @@ static const struct hci_ev { /* [0x3e = HCI_EV_LE_META] */ HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt, sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE), - /* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */ - HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt, - sizeof(struct hci_ev_num_comp_blocks)), /* [0xff = HCI_EV_VENDOR] */ HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE), }; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 703b84bd48d5..69c2ba1e843e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -485,7 +485,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return NULL; ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE); - ni->type = hdev->dev_type; + ni->type = 0x00; /* Old hdev->dev_type */ ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name, @@ -1007,9 +1007,6 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) return -EOPNOTSUPP; - if (hdev->dev_type != HCI_PRIMARY) - return -EOPNOTSUPP; - switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 813c11d61bca..891cae8a30da 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3511,10 +3511,6 @@ static int hci_unconf_init_sync(struct hci_dev *hdev) /* Read Local Supported Features. */ static int hci_read_local_features_sync(struct hci_dev *hdev) { - /* Not all AMP controllers support this command */ - if (hdev->dev_type == HCI_AMP && !(hdev->commands[14] & 0x20)) - return 0; - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL, HCI_CMD_TIMEOUT); } @@ -3549,51 +3545,6 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev) return 0; } -/* Read Local AMP Info */ -static int hci_read_local_amp_info_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_AMP_INFO, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* Read Data Blk size */ -static int hci_read_data_block_size_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* Read Flow Control Mode */ -static int hci_read_flow_control_mode_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* Read Location Data */ -static int hci_read_location_data_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCATION_DATA, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* AMP Controller init stage 1 command sequence */ -static const struct hci_init_stage amp_init1[] = { - /* HCI_OP_READ_LOCAL_VERSION */ - HCI_INIT(hci_read_local_version_sync), - /* HCI_OP_READ_LOCAL_COMMANDS */ - HCI_INIT(hci_read_local_cmds_sync), - /* HCI_OP_READ_LOCAL_AMP_INFO */ - HCI_INIT(hci_read_local_amp_info_sync), - /* HCI_OP_READ_DATA_BLOCK_SIZE */ - HCI_INIT(hci_read_data_block_size_sync), - /* HCI_OP_READ_FLOW_CONTROL_MODE */ - HCI_INIT(hci_read_flow_control_mode_sync), - /* HCI_OP_READ_LOCATION_DATA */ - HCI_INIT(hci_read_location_data_sync), - {} -}; - static int hci_init1_sync(struct hci_dev *hdev) { int err; @@ -3607,28 +3558,9 @@ static int hci_init1_sync(struct hci_dev *hdev) return err; } - switch (hdev->dev_type) { - case HCI_PRIMARY: - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; - return hci_init_stage_sync(hdev, br_init1); - case HCI_AMP: - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; - return hci_init_stage_sync(hdev, amp_init1); - default: - bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type); - break; - } - - return 0; + return hci_init_stage_sync(hdev, br_init1); } -/* AMP Controller init stage 2 command sequence */ -static const struct hci_init_stage amp_init2[] = { - /* HCI_OP_READ_LOCAL_FEATURES */ - HCI_INIT(hci_read_local_features_sync), - {} -}; - /* Read Buffer Size (ACL mtu, max pkt, etc.) */ static int hci_read_buffer_size_sync(struct hci_dev *hdev) { @@ -3886,9 +3818,6 @@ static int hci_init2_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); - if (hdev->dev_type == HCI_AMP) - return hci_init_stage_sync(hdev, amp_init2); - err = hci_init_stage_sync(hdev, hci_init2); if (err) return err; @@ -4716,13 +4645,6 @@ static int hci_init_sync(struct hci_dev *hdev) if (err < 0) return err; - /* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode - * BR/EDR/LE type controllers. AMP controllers only need the - * first two stages of init. - */ - if (hdev->dev_type != HCI_PRIMARY) - return 0; - err = hci_init3_sync(hdev); if (err < 0) return err; @@ -4951,12 +4873,8 @@ int hci_dev_open_sync(struct hci_dev *hdev) * In case of user channel usage, it is not important * if a public address or static random address is * available. - * - * This check is only valid for BR/EDR controllers - * since AMP controllers do not have an address. */ if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - hdev->dev_type == HCI_PRIMARY && !bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY)) { ret = -EADDRNOTAVAIL; @@ -4991,8 +4909,7 @@ int hci_dev_open_sync(struct hci_dev *hdev) !hci_dev_test_flag(hdev, HCI_CONFIG) && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - hci_dev_test_flag(hdev, HCI_MGMT) && - hdev->dev_type == HCI_PRIMARY) { + hci_dev_test_flag(hdev, HCI_MGMT)) { ret = hci_powered_update_sync(hdev); mgmt_power_on(hdev, ret); } @@ -5137,8 +5054,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); - if (!auto_off && hdev->dev_type == HCI_PRIMARY && - !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + if (!auto_off && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_MGMT)) __mgmt_power_off(hdev); @@ -5200,9 +5116,6 @@ int hci_dev_close_sync(struct hci_dev *hdev) hdev->flags &= BIT(HCI_RAW); hci_dev_clear_volatile_flags(hdev); - /* Controller radio is available but is currently powered down */ - hdev->amp_status = AMP_STATUS_POWERED_DOWN; - memset(hdev->eir, 0, sizeof(hdev->eir)); memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); bacpy(&hdev->random_addr, BDADDR_ANY); @@ -5239,8 +5152,7 @@ static int hci_power_on_sync(struct hci_dev *hdev) */ if (hci_dev_test_flag(hdev, HCI_RFKILLED) || hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || - (hdev->dev_type == HCI_PRIMARY && - !bacmp(&hdev->bdaddr, BDADDR_ANY) && + (!bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_close_sync(hdev); @@ -5342,27 +5254,11 @@ int hci_stop_discovery_sync(struct hci_dev *hdev) return 0; } -static int hci_disconnect_phy_link_sync(struct hci_dev *hdev, u16 handle, - u8 reason) -{ - struct hci_cp_disconn_phy_link cp; - - memset(&cp, 0, sizeof(cp)); - cp.phy_handle = HCI_PHY_HANDLE(handle); - cp.reason = reason; - - return __hci_cmd_sync_status(hdev, HCI_OP_DISCONN_PHY_LINK, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); -} - static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { struct hci_cp_disconnect cp; - if (conn->type == AMP_LINK) - return hci_disconnect_phy_link_sync(hdev, conn->handle, reason); - if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { /* This is a BIS connection, hci_conn_del will * do the necessary cleanup. diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index adc8f990f13e..5b509b767557 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3932,7 +3932,7 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, } static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, - u8 *data, u8 rsp_code, u8 amp_id) + u8 *data, u8 rsp_code) { struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; @@ -4011,17 +4011,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, status = L2CAP_CS_AUTHOR_PEND; chan->ops->defer(chan); } else { - /* Force pending result for AMP controllers. - * The connection will succeed after the - * physical link is up. - */ - if (amp_id == AMP_ID_BREDR) { - l2cap_state_change(chan, BT_CONFIG); - result = L2CAP_CR_SUCCESS; - } else { - l2cap_state_change(chan, BT_CONNECT2); - result = L2CAP_CR_PEND; - } + l2cap_state_change(chan, BT_CONNECT2); + result = L2CAP_CR_PEND; status = L2CAP_CS_NO_INFO; } } else { @@ -4086,7 +4077,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn, mgmt_device_connected(hdev, hcon, NULL, 0); hci_dev_unlock(hdev); - l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0); + l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP); return 0; } @@ -7491,10 +7482,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) struct l2cap_conn *conn = hcon->l2cap_data; int len; - /* For AMP controller do not create l2cap conn */ - if (!conn && hcon->hdev->dev_type != HCI_PRIMARY) - goto drop; - if (!conn) conn = l2cap_conn_add(hcon); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 965f621ef865..80f220b7e19d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -443,8 +443,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_PRIMARY && - !hci_dev_test_flag(d, HCI_UNCONFIGURED)) + if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -468,8 +467,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_PRIMARY && - !hci_dev_test_flag(d, HCI_UNCONFIGURED)) { + if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); bt_dev_dbg(hdev, "Added hci%u", d->id); } @@ -503,8 +501,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_PRIMARY && - hci_dev_test_flag(d, HCI_UNCONFIGURED)) + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -528,8 +525,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_PRIMARY && - hci_dev_test_flag(d, HCI_UNCONFIGURED)) { + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); bt_dev_dbg(hdev, "Added hci%u", d->id); } @@ -561,10 +557,8 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, read_lock(&hci_dev_list_lock); count = 0; - list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_PRIMARY || d->dev_type == HCI_AMP) - count++; - } + list_for_each_entry(d, &hci_dev_list, list) + count++; rp = kmalloc(struct_size(rp, entry, count), GFP_ATOMIC); if (!rp) { @@ -585,16 +579,10 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_PRIMARY) { - if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) - rp->entry[count].type = 0x01; - else - rp->entry[count].type = 0x00; - } else if (d->dev_type == HCI_AMP) { - rp->entry[count].type = 0x02; - } else { - continue; - } + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) + rp->entry[count].type = 0x01; + else + rp->entry[count].type = 0x00; rp->entry[count].bus = d->bus; rp->entry[count++].index = cpu_to_le16(d->id); @@ -9331,23 +9319,14 @@ void mgmt_index_added(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - switch (hdev->dev_type) { - case HCI_PRIMARY: - if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { - mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, - NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); - ev.type = 0x01; - } else { - mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, - HCI_MGMT_INDEX_EVENTS); - ev.type = 0x00; - } - break; - case HCI_AMP: - ev.type = 0x02; - break; - default: - return; + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, + HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; } ev.bus = hdev->bus; @@ -9364,25 +9343,16 @@ void mgmt_index_removed(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - switch (hdev->dev_type) { - case HCI_PRIMARY: - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); - if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { - mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, - NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); - ev.type = 0x01; - } else { - mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, - HCI_MGMT_INDEX_EVENTS); - ev.type = 0x00; - } - break; - case HCI_AMP: - ev.type = 0x02; - break; - default: - return; + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, + HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; } ev.bus = hdev->bus; -- cgit v1.2.3