diff options
Diffstat (limited to 'include/net')
128 files changed, 3916 insertions, 1571 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index 05c7df41d737..c61a1bf4e3de 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -39,7 +39,7 @@ struct tc_action { struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie __rcu *act_cookie; - struct tcf_chain *goto_chain; + struct tcf_chain __rcu *goto_chain; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -80,7 +80,7 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) struct tc_action_ops { struct list_head head; char kind[IFNAMSIZ]; - __u32 type; /* TBD to match kind */ + enum tca_id id; /* identifier should match kind */ size_t size; struct module *owner; int (*act)(struct sk_buff *, const struct tc_action *, @@ -90,7 +90,7 @@ struct tc_action_ops { int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, @@ -181,6 +181,11 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); +int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, + struct tcf_chain **handle, + struct netlink_ext_ack *newchain); +struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, + struct tcf_chain *newchain); #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, @@ -194,35 +199,5 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, #endif } -#ifdef CONFIG_NET_CLS_ACT -int tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv); -void tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv); -int tc_setup_cb_egdev_call(const struct net_device *dev, - enum tc_setup_type type, void *type_data, - bool err_stop); -#else -static inline -int tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - return 0; -} - -static inline -void tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ -} - -static inline -int tc_setup_cb_egdev_call(const struct net_device *dev, - enum tc_setup_type type, void *type_data, - bool err_stop) -{ - return 0; -} -#endif #endif diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 1656c5978498..2f67ae854ff0 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -49,6 +49,7 @@ struct prefix_info { struct in6_addr prefix; }; +#include <linux/ipv6.h> #include <linux/netdevice.h> #include <net/if_inet6.h> #include <net/ipv6.h> @@ -201,6 +202,15 @@ u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, /* * multicast prototypes (mcast.c) */ +static inline int ipv6_mc_may_pull(struct sk_buff *skb, + unsigned int len) +{ + if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len) + return -EINVAL; + + return pskb_may_pull(skb, len); +} + int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, @@ -219,7 +229,8 @@ void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); -int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); +int ipv6_mc_check_icmpv6(struct sk_buff *skb); +int ipv6_mc_check_mld(struct sk_buff *skb); void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, @@ -227,52 +238,6 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, void ipv6_mc_dad_complete(struct inet6_dev *idev); -/* A stub used by vxlan module. This is ugly, ideally these - * symbols should be built into the core kernel. - */ -struct ipv6_stub { - int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, - struct dst_entry **dst, struct flowi6 *fl6); - - struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); - struct fib6_info *(*fib6_lookup)(struct net *net, int oif, - struct flowi6 *fl6, int flags); - struct fib6_info *(*fib6_table_lookup)(struct net *net, - struct fib6_table *table, - int oif, struct flowi6 *fl6, - int flags); - struct fib6_info *(*fib6_multipath_select)(const struct net *net, - struct fib6_info *f6i, - struct flowi6 *fl6, int oif, - const struct sk_buff *skb, - int strict); - u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr, - struct in6_addr *saddr); - - void (*udpv6_encap_enable)(void); - void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, - const struct in6_addr *solicited_addr, - bool router, bool solicited, bool override, bool inc_opt); - struct neigh_table *nd_tbl; -}; -extern const struct ipv6_stub *ipv6_stub __read_mostly; - -/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ -struct ipv6_bpf_stub { - int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock); - struct sock *(*udp6_lib_lookup)(struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct udp_table *tbl, - struct sk_buff *skb); -}; -extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; - /* * identify MLD packets for MLD filter exceptions */ @@ -413,6 +378,14 @@ static inline void in6_dev_hold(struct inet6_dev *idev) refcount_inc(&idev->refcnt); } +/* called with rcu_read_lock held */ +static inline bool ip6_ignore_linkdown(const struct net_device *dev) +{ + const struct inet6_dev *idev = __in6_dev_get(dev); + + return !!idev->cnf.ignore_routes_with_linkdown; +} + void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); static inline void in6_ifa_put(struct inet6_ifaddr *ifp) @@ -489,6 +462,20 @@ static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr) #endif } +static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + __be64 *p = (__be64 *)addr; + + return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | + (p[1] ^ cpu_to_be64(0x6a))) == 0UL; +#else + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | + addr->s6_addr32[1] | addr->s6_addr32[2] | + (addr->s6_addr32[3] ^ htonl(0x0000006a))) == 0; +#endif +} + #ifdef CONFIG_PROC_FS int if6_proc_init(void); void if6_proc_exit(void); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 1adefe42c0a6..78c856cba4f5 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -21,18 +21,6 @@ struct socket; struct rxrpc_call; /* - * Call completion condition (state == RXRPC_CALL_COMPLETE). - */ -enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ - RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ - RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ - RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - NR__RXRPC_CALL_COMPLETIONS -}; - -/* * Debug ID counter for tracing. */ extern atomic_t rxrpc_debug_id; @@ -73,14 +61,12 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); -int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, - struct sockaddr_rxrpc *, struct key *); -int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, - enum rxrpc_call_completion *, u32 *); -u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); +bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *, + u32 *); void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, ktime_t *); +bool rxrpc_kernel_call_is_complete(struct rxrpc_call *); #endif /* _NET_RXRPC_H */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index ddbba838d048..3426d6dacc45 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -10,6 +10,7 @@ void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); +void unix_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); diff --git a/include/net/arp.h b/include/net/arp.h index 977aabfcdc03..c8f580a0e6b1 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -18,6 +18,7 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 return val * hash_rnd[0]; } +#ifdef CONFIG_INET static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) @@ -25,6 +26,13 @@ static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } +#else +static inline +struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) +{ + return NULL; +} +#endif static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) { diff --git a/include/net/ax25.h b/include/net/ax25.h index 3f9aea8087e3..8b7eb46ad72d 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -201,6 +201,18 @@ static inline void ax25_hold_route(ax25_route *ax25_rt) void __ax25_put_route(ax25_route *ax25_rt); +extern rwlock_t ax25_route_lock; + +static inline void ax25_route_lock_use(void) +{ + read_lock(&ax25_route_lock); +} + +static inline void ax25_route_lock_unuse(void) +{ + read_unlock(&ax25_route_lock); +} + static inline void ax25_put_route(ax25_route *ax25_rt) { if (refcount_dec_and_test(&ax25_rt->refcount)) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index ec9d6bc65855..fabee6db0abb 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -276,7 +276,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); int bt_sock_wait_ready(struct sock *sk, unsigned long flags); -void bt_accept_enqueue(struct sock *parent, struct sock *sk); +void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh); void bt_accept_unlink(struct sock *sk); struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c36dc1e20556..9a5330eed794 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -158,6 +158,18 @@ enum { */ HCI_QUIRK_INVALID_BDADDR, + /* When this quirk is set, the public Bluetooth address + * initially reported by HCI Read BD Address command + * is considered invalid. The public BD Address can be + * specified in the fwnode property 'local-bd-address'. + * If this property does not exist or is invalid controller + * configuration is required before this device can be used. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_USE_BDADDR_PROPERTY, + /* When this quirk is set, the duplicate filtering during * scanning is based on Bluetooth devices addresses. To allow * RSSI based updates, restart scanning if needed. @@ -270,6 +282,7 @@ enum { HCI_FORCE_BREDR_SMP, HCI_FORCE_STATIC_ADDR, HCI_LL_RPA_RESOLUTION, + HCI_CMD_PENDING, __HCI_NUM_FLAGS, }; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e5ea633ea368..05b1b96f4d9e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -190,6 +190,9 @@ struct adv_info { #define HCI_MAX_SHORT_NAME_LENGTH 10 +/* Min encryption key size to match with SMP */ +#define HCI_MIN_ENC_KEY_SIZE 7 + /* Default LE RPA expiry time, 15 minutes */ #define HCI_DEFAULT_RPA_TIMEOUT (15 * 60) @@ -437,6 +440,7 @@ struct hci_dev { int (*post_init)(struct hci_dev *hdev); int (*set_diag)(struct hci_dev *hdev, bool enable); int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr); + void (*cmd_timeout)(struct hci_dev *hdev); }; #define HCI_PHY_HANDLE(handle) (handle & 0xff) diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index fc3111515f5c..c781e1afd683 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -180,6 +180,19 @@ struct port; #pragma pack(8) #endif +struct bond_3ad_stats { + atomic64_t lacpdu_rx; + atomic64_t lacpdu_tx; + atomic64_t lacpdu_unknown_rx; + atomic64_t lacpdu_illegal_rx; + + atomic64_t marker_rx; + atomic64_t marker_tx; + atomic64_t marker_resp_rx; + atomic64_t marker_resp_tx; + atomic64_t marker_unknown_rx; +}; + /* aggregator structure(43.4.5 in the 802.3ad standard) */ typedef struct aggregator { struct mac_addr aggregator_mac_address; @@ -265,6 +278,7 @@ struct ad_system { struct ad_bond_info { struct ad_system system; /* 802.3ad system structure */ + struct bond_3ad_stats stats; u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ u16 aggregator_identifier; }; @@ -272,6 +286,7 @@ struct ad_bond_info { struct ad_slave_info { struct aggregator aggregator; /* 802.3ad aggregator structure */ struct port port; /* 802.3ad port structure */ + struct bond_3ad_stats stats; u16 id; }; @@ -307,5 +322,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, int bond_3ad_set_carrier(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); void bond_3ad_update_ad_actor_settings(struct bonding *bond); +int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats); +size_t bond_3ad_stats_size(void); #endif /* _NET_BOND_3AD_H */ diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h new file mode 100644 index 000000000000..b9dcb02e756b --- /dev/null +++ b/include/net/bpf_sk_storage.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Facebook */ +#ifndef _BPF_SK_STORAGE_H +#define _BPF_SK_STORAGE_H + +struct sock; + +void bpf_sk_storage_free(struct sock *sk); + +extern const struct bpf_func_proto bpf_sk_storage_get_proto; +extern const struct bpf_func_proto bpf_sk_storage_delete_proto; + +#endif /* _BPF_SK_STORAGE_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1fa41b7a1be3..87dae868707e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6,7 +6,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -485,6 +485,7 @@ struct vif_params { * with the get_key() callback, must be in little endian, * length given by @seq_len. * @seq_len: length of @seq. + * @mode: key install mode (RX_TX, NO_TX or SET_TX) */ struct key_params { const u8 *key; @@ -492,6 +493,7 @@ struct key_params { int key_len; int seq_len; u32 cipher; + enum nl80211_key_mode mode; }; /** @@ -777,8 +779,10 @@ struct cfg80211_crypto_settings { * @probe_resp: probe response template (AP mode only) * @ftm_responder: enable FTM responder functionality; -1 for no change * (which also implies no change in LCI/civic location data) - * @lci: LCI subelement content - * @civicloc: Civic location subelement content + * @lci: Measurement Report element content, starting with Measurement Token + * (measurement type 8) + * @civicloc: Measurement Report element content, starting with Measurement + * Token (measurement type 11) * @lci_len: LCI data length * @civicloc_len: Civic location data length */ @@ -834,6 +838,17 @@ struct cfg80211_bitrate_mask { }; /** + * enum cfg80211_ap_settings_flags - AP settings flags + * + * Used by cfg80211_ap_settings + * + * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication + */ +enum cfg80211_ap_settings_flags { + AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0), +}; + +/** * struct cfg80211_ap_settings - AP configuration * * Used to configure an AP interface. @@ -863,6 +878,7 @@ struct cfg80211_bitrate_mask { * @he_cap: HE capabilities (or %NULL if HE isn't enabled) * @ht_required: stations must support HT * @vht_required: stations must support VHT + * @flags: flags, as defined in enum cfg80211_ap_settings_flags */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -888,6 +904,7 @@ struct cfg80211_ap_settings { const struct ieee80211_vht_cap *vht_cap; const struct ieee80211_he_cap_elem *he_cap; bool ht_required, vht_required; + u32 flags; }; /** @@ -958,6 +975,27 @@ enum station_parameters_apply_mask { STATION_PARAM_APPLY_UAPSD = BIT(0), STATION_PARAM_APPLY_CAPABILITY = BIT(1), STATION_PARAM_APPLY_PLINK_STATE = BIT(2), + STATION_PARAM_APPLY_STA_TXPOWER = BIT(3), +}; + +/** + * struct sta_txpwr - station txpower configuration + * + * Used to configure txpower for station. + * + * @power: tx power (in dBm) to be used for sending data traffic. If tx power + * is not provided, the default per-interface tx power setting will be + * overriding. Driver should be picking up the lowest tx power, either tx + * power per-interface or per-station. + * @type: In particular if TPC %type is NL80211_TX_POWER_LIMITED then tx power + * will be less than or equal to specified from userspace, whereas if TPC + * %type is NL80211_TX_POWER_AUTOMATIC then it indicates default tx power. + * NL80211_TX_POWER_FIXED is not a valid configuration option for + * per peer TPC. + */ +struct sta_txpwr { + s16 power; + enum nl80211_tx_power_setting type; }; /** @@ -1001,6 +1039,7 @@ enum station_parameters_apply_mask { * @support_p2p_ps: information if station supports P2P PS mechanism * @he_capa: HE capabilities of station * @he_capa_len: the length of the HE capabilities + * @airtime_weight: airtime scheduler weight for this station */ struct station_parameters { const u8 *supported_rates; @@ -1030,6 +1069,8 @@ struct station_parameters { int support_p2p_ps; const struct ieee80211_he_cap_elem *he_capa; u8 he_capa_len; + u16 airtime_weight; + struct sta_txpwr txpwr; }; /** @@ -1296,7 +1337,10 @@ struct cfg80211_tid_stats { * @rx_beacon: number of beacons received from this peer * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received * from this peer + * @connected_to_gate: true if mesh STA has a path to mesh gate * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer + * @tx_duration: aggregate PPDU duration(usecs) for all the frames to a peer + * @airtime_weight: current airtime scheduling weight * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last * (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs. * Note that this doesn't use the @filled bit, but is used if non-NULL. @@ -1307,6 +1351,7 @@ struct cfg80211_tid_stats { * @fcs_err_count: number of packets (MPDUs) received from this station with * an FCS error. This counter should be incremented only when TA of the * received packet with an FCS error matches the peer MAC address. + * @airtime_link_metric: mesh airtime link metric. */ struct station_info { u64 filled; @@ -1347,15 +1392,22 @@ struct station_info { u32 expected_throughput; - u64 rx_beacon; + u64 tx_duration; u64 rx_duration; + u64 rx_beacon; u8 rx_beacon_signal_avg; + u8 connected_to_gate; + struct cfg80211_tid_stats *pertid; s8 ack_signal; s8 avg_ack_signal; + u16 airtime_weight; + u32 rx_mpdu_count; u32 fcs_err_count; + + u32 airtime_link_metric; }; #if IS_ENABLED(CONFIG_CFG80211) @@ -1417,6 +1469,8 @@ enum monitor_flags { * @MPATH_INFO_DISCOVERY_TIMEOUT: @discovery_timeout filled * @MPATH_INFO_DISCOVERY_RETRIES: @discovery_retries filled * @MPATH_INFO_FLAGS: @flags filled + * @MPATH_INFO_HOP_COUNT: @hop_count filled + * @MPATH_INFO_PATH_CHANGE: @path_change_count filled */ enum mpath_info_flags { MPATH_INFO_FRAME_QLEN = BIT(0), @@ -1426,6 +1480,8 @@ enum mpath_info_flags { MPATH_INFO_DISCOVERY_TIMEOUT = BIT(4), MPATH_INFO_DISCOVERY_RETRIES = BIT(5), MPATH_INFO_FLAGS = BIT(6), + MPATH_INFO_HOP_COUNT = BIT(7), + MPATH_INFO_PATH_CHANGE = BIT(8), }; /** @@ -1445,6 +1501,8 @@ enum mpath_info_flags { * This number should increase every time the list of mesh paths * changes, i.e. when a station is added or removed, so that * userspace can tell whether it got a consistent snapshot. + * @hop_count: hops to destination + * @path_change_count: total number of path changes to destination */ struct mpath_info { u32 filled; @@ -1455,6 +1513,8 @@ struct mpath_info { u32 discovery_timeout; u8 discovery_retries; u8 flags; + u8 hop_count; + u32 path_change_count; int generation; }; @@ -1559,6 +1619,10 @@ struct bss_parameters { * @plink_timeout: If no tx activity is seen from a STA we've established * peering with for longer than this time (in seconds), then remove it * from the STA's list of peers. Default is 30 minutes. + * @dot11MeshConnectedToMeshGate: if set to true, advertise that this STA is + * connected to a mesh gate in mesh formation info. If false, the + * value in mesh formation is determined by the presence of root paths + * in the mesh path table */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -1578,6 +1642,7 @@ struct mesh_config { u16 dot11MeshHWMPperrMinInterval; u16 dot11MeshHWMPnetDiameterTraversalTime; u8 dot11MeshHWMPRootMode; + bool dot11MeshConnectedToMeshGate; u16 dot11MeshHWMPRannInterval; bool dot11MeshGateAnnouncementProtocol; bool dot11MeshForwarding; @@ -1794,11 +1859,19 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) * @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match * or no match (RSSI only) * @rssi_thold: don't report scan results below this threshold (in s32 dBm) + * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied + * for filtering out scan results received. Drivers advertize this support + * of band specific rssi based filtering through the feature capability + * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band + * specific rssi thresholds take precedence over rssi_thold, if specified. + * If not specified for any band, it will be assigned with rssi_thold of + * corresponding matchset. */ struct cfg80211_match_set { struct cfg80211_ssid ssid; u8 bssid[ETH_ALEN]; s32 rssi_thold; + s32 per_band_rssi_thold[NUM_NL80211_BANDS]; }; /** @@ -1997,9 +2070,15 @@ struct cfg80211_bss_ies { * a BSS that hides the SSID in its beacon, this points to the BSS struct * that holds the beacon data. @beacon_ies is still valid, of course, and * points to the same data as hidden_beacon_bss->beacon_ies in that case. + * @transmitted_bss: pointer to the transmitted BSS, if this is a + * non-transmitted one (multi-BSSID support) + * @nontrans_list: list of non-transmitted BSS, if this is a transmitted one + * (multi-BSSID support) * @signal: signal strength value (type depends on the wiphy's signal_type) * @chains: bitmask for filled values in @chain_signal. * @chain_signal: per-chain signal strength of last received BSS in dBm. + * @bssid_index: index in the multiple BSS set + * @max_bssid_indicator: max number of members in the BSS set * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes */ struct cfg80211_bss { @@ -2011,6 +2090,8 @@ struct cfg80211_bss { const struct cfg80211_bss_ies __rcu *proberesp_ies; struct cfg80211_bss *hidden_beacon_bss; + struct cfg80211_bss *transmitted_bss; + struct list_head nontrans_list; s32 signal; @@ -2021,19 +2102,36 @@ struct cfg80211_bss { u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; + u8 bssid_index; + u8 max_bssid_indicator; + u8 priv[0] __aligned(sizeof(void *)); }; /** + * ieee80211_bss_get_elem - find element with given ID + * @bss: the bss to search + * @id: the element ID + * + * Note that the return value is an RCU-protected pointer, so + * rcu_read_lock() must be held when calling this function. + * Return: %NULL if not found. + */ +const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id); + +/** * ieee80211_bss_get_ie - find IE with given ID * @bss: the bss to search - * @ie: the IE ID + * @id: the element ID * * Note that the return value is an RCU-protected pointer, so * rcu_read_lock() must be held when calling this function. * Return: %NULL if not found. */ -const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); +static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id) +{ + return (void *)ieee80211_bss_get_elem(bss, id); +} /** @@ -2381,6 +2479,8 @@ enum wiphy_params_flags { WIPHY_PARAM_TXQ_QUANTUM = 1 << 8, }; +#define IEEE80211_DEFAULT_AIRTIME_WEIGHT 256 + /** * struct cfg80211_pmksa - PMK Security Association * @@ -2805,6 +2905,7 @@ struct cfg80211_pmk_conf { * use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you * the real status code for failures. Used only for the authentication * response command interface (user space to driver). + * @pmkid: The identifier to refer a PMKSA. */ struct cfg80211_external_auth_params { enum nl80211_external_auth_action action; @@ -2812,10 +2913,11 @@ struct cfg80211_external_auth_params { struct cfg80211_ssid ssid; unsigned int key_mgmt_suite; u16 status; + const u8 *pmkid; }; /** - * cfg80211_ftm_responder_stats - FTM responder statistics + * struct cfg80211_ftm_responder_stats - FTM responder statistics * * @filled: bitflag of flags using the bits of &enum nl80211_ftm_stats to * indicate the relevant values in this struct for them @@ -2849,6 +2951,216 @@ struct cfg80211_ftm_responder_stats { }; /** + * struct cfg80211_pmsr_ftm_result - FTM result + * @failure_reason: if this measurement failed (PMSR status is + * %NL80211_PMSR_STATUS_FAILURE), this gives a more precise + * reason than just "failure" + * @burst_index: if reporting partial results, this is the index + * in [0 .. num_bursts-1] of the burst that's being reported + * @num_ftmr_attempts: number of FTM request frames transmitted + * @num_ftmr_successes: number of FTM request frames acked + * @busy_retry_time: if failure_reason is %NL80211_PMSR_FTM_FAILURE_PEER_BUSY, + * fill this to indicate in how many seconds a retry is deemed possible + * by the responder + * @num_bursts_exp: actual number of bursts exponent negotiated + * @burst_duration: actual burst duration negotiated + * @ftms_per_burst: actual FTMs per burst negotiated + * @lci_len: length of LCI information (if present) + * @civicloc_len: length of civic location information (if present) + * @lci: LCI data (may be %NULL) + * @civicloc: civic location data (may be %NULL) + * @rssi_avg: average RSSI over FTM action frames reported + * @rssi_spread: spread of the RSSI over FTM action frames reported + * @tx_rate: bitrate for transmitted FTM action frame response + * @rx_rate: bitrate of received FTM action frame + * @rtt_avg: average of RTTs measured (must have either this or @dist_avg) + * @rtt_variance: variance of RTTs measured (note that standard deviation is + * the square root of the variance) + * @rtt_spread: spread of the RTTs measured + * @dist_avg: average of distances (mm) measured + * (must have either this or @rtt_avg) + * @dist_variance: variance of distances measured (see also @rtt_variance) + * @dist_spread: spread of distances measured (see also @rtt_spread) + * @num_ftmr_attempts_valid: @num_ftmr_attempts is valid + * @num_ftmr_successes_valid: @num_ftmr_successes is valid + * @rssi_avg_valid: @rssi_avg is valid + * @rssi_spread_valid: @rssi_spread is valid + * @tx_rate_valid: @tx_rate is valid + * @rx_rate_valid: @rx_rate is valid + * @rtt_avg_valid: @rtt_avg is valid + * @rtt_variance_valid: @rtt_variance is valid + * @rtt_spread_valid: @rtt_spread is valid + * @dist_avg_valid: @dist_avg is valid + * @dist_variance_valid: @dist_variance is valid + * @dist_spread_valid: @dist_spread is valid + */ +struct cfg80211_pmsr_ftm_result { + const u8 *lci; + const u8 *civicloc; + unsigned int lci_len; + unsigned int civicloc_len; + enum nl80211_peer_measurement_ftm_failure_reasons failure_reason; + u32 num_ftmr_attempts, num_ftmr_successes; + s16 burst_index; + u8 busy_retry_time; + u8 num_bursts_exp; + u8 burst_duration; + u8 ftms_per_burst; + s32 rssi_avg; + s32 rssi_spread; + struct rate_info tx_rate, rx_rate; + s64 rtt_avg; + s64 rtt_variance; + s64 rtt_spread; + s64 dist_avg; + s64 dist_variance; + s64 dist_spread; + + u16 num_ftmr_attempts_valid:1, + num_ftmr_successes_valid:1, + rssi_avg_valid:1, + rssi_spread_valid:1, + tx_rate_valid:1, + rx_rate_valid:1, + rtt_avg_valid:1, + rtt_variance_valid:1, + rtt_spread_valid:1, + dist_avg_valid:1, + dist_variance_valid:1, + dist_spread_valid:1; +}; + +/** + * struct cfg80211_pmsr_result - peer measurement result + * @addr: address of the peer + * @host_time: host time (use ktime_get_boottime() adjust to the time when the + * measurement was made) + * @ap_tsf: AP's TSF at measurement time + * @status: status of the measurement + * @final: if reporting partial results, mark this as the last one; if not + * reporting partial results always set this flag + * @ap_tsf_valid: indicates the @ap_tsf value is valid + * @type: type of the measurement reported, note that we only support reporting + * one type at a time, but you can report multiple results separately and + * they're all aggregated for userspace. + */ +struct cfg80211_pmsr_result { + u64 host_time, ap_tsf; + enum nl80211_peer_measurement_status status; + + u8 addr[ETH_ALEN]; + + u8 final:1, + ap_tsf_valid:1; + + enum nl80211_peer_measurement_type type; + + union { + struct cfg80211_pmsr_ftm_result ftm; + }; +}; + +/** + * struct cfg80211_pmsr_ftm_request_peer - FTM request data + * @requested: indicates FTM is requested + * @preamble: frame preamble to use + * @burst_period: burst period to use + * @asap: indicates to use ASAP mode + * @num_bursts_exp: number of bursts exponent + * @burst_duration: burst duration + * @ftms_per_burst: number of FTMs per burst + * @ftmr_retries: number of retries for FTM request + * @request_lci: request LCI information + * @request_civicloc: request civic location information + * + * See also nl80211 for the respective attribute documentation. + */ +struct cfg80211_pmsr_ftm_request_peer { + enum nl80211_preamble preamble; + u16 burst_period; + u8 requested:1, + asap:1, + request_lci:1, + request_civicloc:1; + u8 num_bursts_exp; + u8 burst_duration; + u8 ftms_per_burst; + u8 ftmr_retries; +}; + +/** + * struct cfg80211_pmsr_request_peer - peer data for a peer measurement request + * @addr: MAC address + * @chandef: channel to use + * @report_ap_tsf: report the associated AP's TSF + * @ftm: FTM data, see &struct cfg80211_pmsr_ftm_request_peer + */ +struct cfg80211_pmsr_request_peer { + u8 addr[ETH_ALEN]; + struct cfg80211_chan_def chandef; + u8 report_ap_tsf:1; + struct cfg80211_pmsr_ftm_request_peer ftm; +}; + +/** + * struct cfg80211_pmsr_request - peer measurement request + * @cookie: cookie, set by cfg80211 + * @nl_portid: netlink portid - used by cfg80211 + * @drv_data: driver data for this request, if required for aborting, + * not otherwise freed or anything by cfg80211 + * @mac_addr: MAC address used for (randomised) request + * @mac_addr_mask: MAC address mask used for randomisation, bits that + * are 0 in the mask should be randomised, bits that are 1 should + * be taken from the @mac_addr + * @list: used by cfg80211 to hold on to the request + * @timeout: timeout (in milliseconds) for the whole operation, if + * zero it means there's no timeout + * @n_peers: number of peers to do measurements with + * @peers: per-peer measurement request data + */ +struct cfg80211_pmsr_request { + u64 cookie; + void *drv_data; + u32 n_peers; + u32 nl_portid; + + u32 timeout; + + u8 mac_addr[ETH_ALEN] __aligned(2); + u8 mac_addr_mask[ETH_ALEN] __aligned(2); + + struct list_head list; + + struct cfg80211_pmsr_request_peer peers[]; +}; + +/** + * struct cfg80211_update_owe_info - OWE Information + * + * This structure provides information needed for the drivers to offload OWE + * (Opportunistic Wireless Encryption) processing to the user space. + * + * Commonly used across update_owe_info request and event interfaces. + * + * @peer: MAC address of the peer device for which the OWE processing + * has to be done. + * @status: status code, %WLAN_STATUS_SUCCESS for successful OWE info + * processing, use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space + * cannot give you the real status code for failures. Used only for + * OWE update request command interface (user space to driver). + * @ie: IEs obtained from the peer or constructed by the user space. These are + * the IEs of the remote peer in the event from the host driver and + * the constructed IEs by the user space in the request interface. + * @ie_len: Length of IEs in octets. + */ +struct cfg80211_update_owe_info { + u8 peer[ETH_ALEN] __aligned(2); + u16 status; + const u8 *ie; + size_t ie_len; +}; + +/** * struct cfg80211_ops - backend description for wireless configuration * * This struct is registered by fullmac card drivers and/or wireless stacks @@ -3183,6 +3495,15 @@ struct cfg80211_ftm_responder_stats { * * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available. * Statistics should be cumulative, currently no way to reset is provided. + * @start_pmsr: start peer measurement (e.g. FTM) + * @abort_pmsr: abort peer measurement + * + * @update_owe_info: Provide updated OWE info to driver. Driver implementing SME + * but offloading OWE processing to the user space will get the updated + * DH IE through this interface. + * + * @probe_mesh_link: Probe direct Mesh peer's link quality by sending data frame + * and overrule HWMP path selection algorithm. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3492,6 +3813,15 @@ struct cfg80211_ops { int (*get_ftm_responder_stats)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ftm_responder_stats *ftm_stats); + + int (*start_pmsr)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_pmsr_request *request); + void (*abort_pmsr)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_pmsr_request *request); + int (*update_owe_info)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_update_owe_info *owe_info); + int (*probe_mesh_link)(struct wiphy *wiphy, struct net_device *dev, + const u8 *buf, size_t len); }; /* @@ -3864,6 +4194,42 @@ struct wiphy_iftype_ext_capab { }; /** + * struct cfg80211_pmsr_capabilities - cfg80211 peer measurement capabilities + * @max_peers: maximum number of peers in a single measurement + * @report_ap_tsf: can report assoc AP's TSF for radio resource measurement + * @randomize_mac_addr: can randomize MAC address for measurement + * @ftm.supported: FTM measurement is supported + * @ftm.asap: ASAP-mode is supported + * @ftm.non_asap: non-ASAP-mode is supported + * @ftm.request_lci: can request LCI data + * @ftm.request_civicloc: can request civic location data + * @ftm.preambles: bitmap of preambles supported (&enum nl80211_preamble) + * @ftm.bandwidths: bitmap of bandwidths supported (&enum nl80211_chan_width) + * @ftm.max_bursts_exponent: maximum burst exponent supported + * (set to -1 if not limited; note that setting this will necessarily + * forbid using the value 15 to let the responder pick) + * @ftm.max_ftms_per_burst: maximum FTMs per burst supported (set to 0 if + * not limited) + */ +struct cfg80211_pmsr_capabilities { + unsigned int max_peers; + u8 report_ap_tsf:1, + randomize_mac_addr:1; + + struct { + u32 preambles; + u32 bandwidths; + s8 max_bursts_exponent; + u8 max_ftms_per_burst; + u8 supported:1, + asap:1, + non_asap:1, + request_lci:1, + request_civicloc:1; + } ftm; +}; + +/** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback, * note that if your driver uses wiphy_apply_custom_regulatory() @@ -3875,6 +4241,8 @@ struct wiphy_iftype_ext_capab { * @signal_type: signal type reported in &struct cfg80211_bss. * @cipher_suites: supported cipher suites * @n_cipher_suites: number of supported cipher suites + * @akm_suites: supported AKM suites + * @n_akm_suites: number of supported AKM suites * @retry_short: Retry limit for short frames (dot11ShortRetryLimit) * @retry_long: Retry limit for long frames (dot11LongRetryLimit) * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold); @@ -4027,6 +4395,13 @@ struct wiphy_iftype_ext_capab { * @txq_limit: configuration of internal TX queue frame limit * @txq_memory_limit: configuration internal TX queue memory limit * @txq_quantum: configuration of internal TX queue scheduler quantum + * + * @support_mbssid: can HW support association with nontransmitted AP + * @support_only_he_mbssid: don't parse MBSSID elements if it is not + * HE AP, in order to avoid compatibility issues. + * @support_mbssid must be set for this to have any effect. + * + * @pmsr_capa: peer measurement capabilities */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -4071,6 +4446,9 @@ struct wiphy { int n_cipher_suites; const u32 *cipher_suites; + int n_akm_suites; + const u32 *akm_suites; + u8 retry_short; u8 retry_long; u32 frag_threshold; @@ -4163,6 +4541,11 @@ struct wiphy { u32 txq_memory_limit; u32 txq_quantum; + u8 support_mbssid:1, + support_only_he_mbssid:1; + + const struct cfg80211_pmsr_capabilities *pmsr_capa; + char priv[0] __aligned(NETDEV_ALIGN); }; @@ -4332,6 +4715,17 @@ struct cfg80211_cqm_config; * @mesh_id_len: (private) Used by the internal configuration code * @mesh_id_up_len: (private) Used by the internal configuration code * @wext: (private) Used by the internal wireless extensions compat code + * @wext.ibss: (private) IBSS data part of wext handling + * @wext.connect: (private) connection handling data + * @wext.keys: (private) (WEP) key data + * @wext.ie: (private) extra elements for association + * @wext.ie_len: (private) length of extra elements + * @wext.bssid: (private) selected network BSSID + * @wext.ssid: (private) selected network SSID + * @wext.default_key: (private) selected default key index + * @wext.default_mgmt_key: (private) selected default management key index + * @wext.prev_bssid: (private) previous BSSID for reassociation + * @wext.prev_bssid_valid: (private) previous BSSID validity * @use_4addr: indicates 4addr mode is used on this interface, must be * set by driver (if supported) on add_interface BEFORE registering the * netdev and may otherwise be used by driver read-only, will be update @@ -4365,6 +4759,9 @@ struct cfg80211_cqm_config; * @owner_nlportid: (private) owner socket port ID * @nl_owner_dead: (private) owner socket went away * @cqm_config: (private) nl80211 RSSI monitor state + * @pmsr_list: (private) peer measurement requests + * @pmsr_lock: (private) peer measurements requests/results lock + * @pmsr_free_wk: (private) peer measurements cleanup work */ struct wireless_dev { struct wiphy *wiphy; @@ -4428,7 +4825,8 @@ struct wireless_dev { struct cfg80211_cached_keys *keys; const u8 *ie; size_t ie_len; - u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; + u8 bssid[ETH_ALEN]; + u8 prev_bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; s8 default_key, default_mgmt_key; bool prev_bssid_valid; @@ -4436,6 +4834,10 @@ struct wireless_dev { #endif struct cfg80211_cqm_config *cqm_config; + + struct list_head pmsr_list; + spinlock_t pmsr_lock; + struct work_struct pmsr_free_wk; }; static inline u8 *wdev_address(struct wireless_dev *wdev) @@ -4703,6 +5105,33 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, struct cfg80211_qos_map *qos_map); /** + * cfg80211_find_elem_match - match information element and byte array in data + * + * @eid: element ID + * @ies: data consisting of IEs + * @len: length of data + * @match: byte array to match + * @match_len: number of bytes in the match array + * @match_offset: offset in the IE data where the byte array should match. + * Note the difference to cfg80211_find_ie_match() which considers + * the offset to start from the element ID byte, but here we take + * the data portion instead. + * + * Return: %NULL if the element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data) or if the byte array doesn't match; otherwise return the + * requested element struct. + * + * Note: There are no checks on the element length other than + * having to fit into the given data and being large enough for the + * byte array to match. + */ +const struct element * +cfg80211_find_elem_match(u8 eid, const u8 *ies, unsigned int len, + const u8 *match, unsigned int match_len, + unsigned int match_offset); + +/** * cfg80211_find_ie_match - match information element and byte array in data * * @eid: element ID @@ -4726,9 +5155,44 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, * having to fit into the given data and being large enough for the * byte array to match. */ -const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, - const u8 *match, int match_len, - int match_offset); +static inline const u8 * +cfg80211_find_ie_match(u8 eid, const u8 *ies, unsigned int len, + const u8 *match, unsigned int match_len, + unsigned int match_offset) +{ + /* match_offset can't be smaller than 2, unless match_len is + * zero, in which case match_offset must be zero as well. + */ + if (WARN_ON((match_len && match_offset < 2) || + (!match_len && match_offset))) + return NULL; + + return (void *)cfg80211_find_elem_match(eid, ies, len, + match, match_len, + match_offset ? + match_offset - 2 : 0); +} + +/** + * cfg80211_find_elem - find information element in data + * + * @eid: element ID + * @ies: data consisting of IEs + * @len: length of data + * + * Return: %NULL if the element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data) or if the byte array doesn't match; otherwise return the + * requested element struct. + * + * Note: There are no checks on the element length other than + * having to fit into the given data. + */ +static inline const struct element * +cfg80211_find_elem(u8 eid, const u8 *ies, int len) +{ + return cfg80211_find_elem_match(eid, ies, len, NULL, 0, 0); +} /** * cfg80211_find_ie - find information element in data @@ -4751,6 +5215,28 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) } /** + * cfg80211_find_ext_elem - find information element with EID Extension in data + * + * @ext_eid: element ID Extension + * @ies: data consisting of IEs + * @len: length of data + * + * Return: %NULL if the etended element could not be found or if + * the element is invalid (claims to be longer than the given + * data) or if the byte array doesn't match; otherwise return the + * requested element struct. + * + * Note: There are no checks on the element length other than + * having to fit into the given data. + */ +static inline const struct element * +cfg80211_find_ext_elem(u8 ext_eid, const u8 *ies, int len) +{ + return cfg80211_find_elem_match(WLAN_EID_EXTENSION, ies, len, + &ext_eid, 1, 0); +} + +/** * cfg80211_find_ext_ie - find information element with EID Extension in data * * @ext_eid: element ID Extension @@ -4772,6 +5258,25 @@ static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) } /** + * cfg80211_find_vendor_elem - find vendor specific information element in data + * + * @oui: vendor OUI + * @oui_type: vendor-specific OUI type (must be < 0xff), negative means any + * @ies: data consisting of IEs + * @len: length of data + * + * Return: %NULL if the vendor specific element ID could not be found or if the + * element is invalid (claims to be longer than the given data); otherwise + * return the element structure for the requested element. + * + * Note: There are no checks on the element length other than having to fit into + * the given data. + */ +const struct element *cfg80211_find_vendor_elem(unsigned int oui, int oui_type, + const u8 *ies, + unsigned int len); + +/** * cfg80211_find_vendor_ie - find vendor specific information element in data * * @oui: vendor OUI @@ -4787,8 +5292,12 @@ static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) * Note: There are no checks on the element length other than having to fit into * the given data. */ -const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, - const u8 *ies, int len); +static inline const u8 * +cfg80211_find_vendor_ie(unsigned int oui, int oui_type, + const u8 *ies, unsigned int len) +{ + return (void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len); +} /** * cfg80211_send_layer2_update - send layer 2 update frame @@ -5034,6 +5543,49 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, } /** + * cfg80211_gen_new_bssid - generate a nontransmitted BSSID for multi-BSSID + * @bssid: transmitter BSSID + * @max_bssid: max BSSID indicator, taken from Multiple BSSID element + * @mbssid_index: BSSID index, taken from Multiple BSSID index element + * @new_bssid: calculated nontransmitted BSSID + */ +static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid, + u8 mbssid_index, u8 *new_bssid) +{ + u64 bssid_u64 = ether_addr_to_u64(bssid); + u64 mask = GENMASK_ULL(max_bssid - 1, 0); + u64 new_bssid_u64; + + new_bssid_u64 = bssid_u64 & ~mask; + + new_bssid_u64 |= ((bssid_u64 & mask) + mbssid_index) & mask; + + u64_to_ether_addr(new_bssid_u64, new_bssid); +} + +/** + * cfg80211_is_element_inherited - returns if element ID should be inherited + * @element: element to check + * @non_inherit_element: non inheritance element + */ +bool cfg80211_is_element_inherited(const struct element *element, + const struct element *non_inherit_element); + +/** + * cfg80211_merge_profile - merges a MBSSID profile if it is split between IEs + * @ie: ies + * @ielen: length of IEs + * @mbssid_elem: current MBSSID element + * @sub_elem: current MBSSID subelement (profile) + * @merged_ie: location of the merged profile + * @max_copy_len: max merged profile length + */ +size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, + const struct element *mbssid_elem, + const struct element *sub_elem, + u8 *merged_ie, size_t max_copy_len); + +/** * enum cfg80211_bss_frame_type - frame type that the BSS data came from * @CFG80211_BSS_FTYPE_UNKNOWN: driver doesn't know whether the data is * from a beacon or probe response @@ -5218,10 +5770,12 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * @dev: network device * @bss: the BSS that association was requested with, ownership of the pointer * moves to cfg80211 in this call - * @buf: authentication frame (header + body) + * @buf: (Re)Association Response frame (header + body) * @len: length of the frame data * @uapsd_queues: bitmap of queues configured for uapsd. Same format * as the AC bitmap in the QoS info field + * @req_ies: information elements from the (Re)Association Request frame + * @req_ies_len: length of req_ies data * * After being asked to associate via cfg80211_ops::assoc() the driver must * call either this function or cfg80211_auth_timeout(). @@ -5231,7 +5785,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, const u8 *buf, size_t len, - int uapsd_queues); + int uapsd_queues, + const u8 *req_ies, size_t req_ies_len); /** * cfg80211_assoc_timeout - notification of timed out association @@ -5320,7 +5875,7 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, * @dev: network device * @macaddr: the MAC address of the new candidate * @ie: information elements advertised by the peer candidate - * @ie_len: lenght of the information elements buffer + * @ie_len: length of the information elements buffer * @gfp: allocation flags * * This function notifies cfg80211 that the mesh peer candidate has been @@ -5328,7 +5883,8 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, * cfg80211 then sends a notification to userspace. */ void cfg80211_notify_new_peer_candidate(struct net_device *dev, - const u8 *macaddr, const u8 *ie, u8 ie_len, gfp_t gfp); + const u8 *macaddr, const u8 *ie, u8 ie_len, + int sig_dbm, gfp_t gfp); /** * DOC: RFkill integration @@ -5392,6 +5948,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_commands cmd, enum nl80211_attrs attr, + unsigned int portid, int vendor_event_idx, int approxlen, gfp_t gfp); @@ -5442,6 +5999,15 @@ cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen) int cfg80211_vendor_cmd_reply(struct sk_buff *skb); /** + * cfg80211_vendor_cmd_get_sender + * @wiphy: the wiphy + * + * Return the current netlink port ID in a vendor command handler. + * Valid to call only there. + */ +unsigned int cfg80211_vendor_cmd_get_sender(struct wiphy *wiphy); + +/** * cfg80211_vendor_event_alloc - allocate vendor-specific event skb * @wiphy: the wiphy * @wdev: the wireless device @@ -5468,7 +6034,42 @@ cfg80211_vendor_event_alloc(struct wiphy *wiphy, struct wireless_dev *wdev, { return __cfg80211_alloc_event_skb(wiphy, wdev, NL80211_CMD_VENDOR, NL80211_ATTR_VENDOR_DATA, - event_idx, approxlen, gfp); + 0, event_idx, approxlen, gfp); +} + +/** + * cfg80211_vendor_event_alloc_ucast - alloc unicast vendor-specific event skb + * @wiphy: the wiphy + * @wdev: the wireless device + * @event_idx: index of the vendor event in the wiphy's vendor_events + * @portid: port ID of the receiver + * @approxlen: an upper bound of the length of the data that will + * be put into the skb + * @gfp: allocation flags + * + * This function allocates and pre-fills an skb for an event to send to + * a specific (userland) socket. This socket would previously have been + * obtained by cfg80211_vendor_cmd_get_sender(), and the caller MUST take + * care to register a netlink notifier to see when the socket closes. + * + * If wdev != NULL, both the ifindex and identifier of the specified + * wireless device are added to the event message before the vendor data + * attribute. + * + * When done filling the skb, call cfg80211_vendor_event() with the + * skb to send the event. + * + * Return: An allocated and pre-filled skb. %NULL if any errors happen. + */ +static inline struct sk_buff * +cfg80211_vendor_event_alloc_ucast(struct wiphy *wiphy, + struct wireless_dev *wdev, + unsigned int portid, int approxlen, + int event_idx, gfp_t gfp) +{ + return __cfg80211_alloc_event_skb(wiphy, wdev, NL80211_CMD_VENDOR, + NL80211_ATTR_VENDOR_DATA, + portid, event_idx, approxlen, gfp); } /** @@ -5568,7 +6169,7 @@ static inline struct sk_buff * cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, int approxlen, gfp_t gfp) { return __cfg80211_alloc_event_skb(wiphy, NULL, NL80211_CMD_TESTMODE, - NL80211_ATTR_TESTDATA, -1, + NL80211_ATTR_TESTDATA, 0, -1, approxlen, gfp); } @@ -6630,6 +7231,31 @@ int cfg80211_external_auth_request(struct net_device *netdev, struct cfg80211_external_auth_params *params, gfp_t gfp); +/** + * cfg80211_pmsr_report - report peer measurement result data + * @wdev: the wireless device reporting the measurement + * @req: the original measurement request + * @result: the result data + * @gfp: allocation flags + */ +void cfg80211_pmsr_report(struct wireless_dev *wdev, + struct cfg80211_pmsr_request *req, + struct cfg80211_pmsr_result *result, + gfp_t gfp); + +/** + * cfg80211_pmsr_complete - report peer measurement completed + * @wdev: the wireless device reporting the measurement + * @req: the original measurement request + * @gfp: allocation flags + * + * Report that the entire measurement completed, after this + * the request pointer will no longer be valid. + */ +void cfg80211_pmsr_complete(struct wireless_dev *wdev, + struct cfg80211_pmsr_request *req, + gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ @@ -6651,6 +7277,11 @@ int cfg80211_external_auth_request(struct net_device *netdev, #define wiphy_info(wiphy, format, args...) \ dev_info(&(wiphy)->dev, format, ##args) +#define wiphy_err_ratelimited(wiphy, format, args...) \ + dev_err_ratelimited(&(wiphy)->dev, format, ##args) +#define wiphy_warn_ratelimited(wiphy, format, args...) \ + dev_warn_ratelimited(&(wiphy)->dev, format, ##args) + #define wiphy_debug(wiphy, format, args...) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args) @@ -6676,4 +7307,14 @@ int cfg80211_external_auth_request(struct net_device *netdev, #define wiphy_WARN(wiphy, format, args...) \ WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args); +/** + * cfg80211_update_owe_info_event - Notify the peer's OWE info to user space + * @netdev: network device + * @owe_info: peer's owe info + * @gfp: allocation flags + */ +void cfg80211_update_owe_info_event(struct net_device *netdev, + struct cfg80211_update_owe_info *owe_info, + gfp_t gfp); + #endif /* __NET_CFG80211_H */ diff --git a/include/net/checksum.h b/include/net/checksum.h index aef2b2bb6603..0f319e13be2c 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -30,7 +30,7 @@ static inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) { - if (access_ok(VERIFY_READ, src, len)) + if (access_ok(src, len)) return csum_partial_copy_from_user(src, dst, len, sum, err_ptr); if (len) @@ -46,7 +46,7 @@ static __inline__ __wsum csum_and_copy_to_user { sum = csum_partial(src, len, sum); - if (access_ok(VERIFY_WRITE, dst, len)) { + if (access_ok(dst, len)) { if (copy_to_user(dst, src, len) == 0) return sum; } diff --git a/include/net/compat.h b/include/net/compat.h index 4c6d75612b6c..f277653c7e17 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -30,9 +30,6 @@ struct compat_cmsghdr { compat_int_t cmsg_type; }; -int compat_sock_get_timestamp(struct sock *, struct timeval __user *); -int compat_sock_get_timestampns(struct sock *, struct timespec __user *); - #else /* defined(CONFIG_COMPAT) */ /* * To avoid compiler warnings: diff --git a/include/net/devlink.h b/include/net/devlink.h index 45db0c79462d..1c4adfb4195a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/list.h> #include <linux/netdevice.h> +#include <linux/spinlock.h> #include <net/net_namespace.h> #include <uapi/linux/devlink.h> @@ -30,6 +31,8 @@ struct devlink { struct list_head param_list; struct list_head region_list; u32 snapshot_id; + struct list_head reporter_list; + struct mutex reporters_lock; /* protects reporter_list */ struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; @@ -39,18 +42,24 @@ struct devlink { }; struct devlink_port_attrs { - bool set; + u8 set:1, + split:1, + switch_port:1; enum devlink_port_flavour flavour; u32 port_number; /* same value as "split group" */ - bool split; u32 split_subport_number; + struct netdev_phys_item_id switch_id; }; struct devlink_port { struct list_head list; + struct list_head param_list; struct devlink *devlink; unsigned index; bool registered; + spinlock_t type_lock; /* Protects type and type_dev + * pointer consistency. + */ enum devlink_port_type type; enum devlink_port_type desired_type; void *type_dev; @@ -61,6 +70,7 @@ struct devlink_sb_pool_info { enum devlink_sb_pool_type pool_type; u32 size; enum devlink_sb_threshold_type threshold_type; + u32 cell_size; }; /** @@ -355,6 +365,7 @@ struct devlink_param_item { const struct devlink_param *param; union devlink_param_value driverinit_value; bool driverinit_value_valid; + bool published; }; enum devlink_param_generic_id { @@ -365,6 +376,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -392,6 +404,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME "msix_vec_per_pf_min" #define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE DEVLINK_PARAM_TYPE_U32 +#define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy" +#define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -415,10 +430,55 @@ enum devlink_param_generic_id { .validate = _validate, \ } +/* Part number, identifier of board design */ +#define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID "board.id" +/* Revision of board design */ +#define DEVLINK_INFO_VERSION_GENERIC_BOARD_REV "board.rev" +/* Maker of the board */ +#define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE "board.manufacture" + +/* Control processor FW version */ +#define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT "fw.mgmt" +/* Data path microcode controlling high-speed packet processing */ +#define DEVLINK_INFO_VERSION_GENERIC_FW_APP "fw.app" +/* UNDI software version */ +#define DEVLINK_INFO_VERSION_GENERIC_FW_UNDI "fw.undi" +/* NCSI support/handler version */ +#define DEVLINK_INFO_VERSION_GENERIC_FW_NCSI "fw.ncsi" + struct devlink_region; +struct devlink_info_req; typedef void devlink_snapshot_data_dest_t(const void *data); +struct devlink_fmsg; +struct devlink_health_reporter; + +enum devlink_health_reporter_state { + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY, + DEVLINK_HEALTH_REPORTER_STATE_ERROR, +}; + +/** + * struct devlink_health_reporter_ops - Reporter operations + * @name: reporter name + * @recover: callback to recover from reported error + * if priv_ctx is NULL, run a full recover + * @dump: callback to dump an object + * if priv_ctx is NULL, run a full dump + * @diagnose: callback to diagnose the current status + */ + +struct devlink_health_reporter_ops { + char *name; + int (*recover)(struct devlink_health_reporter *reporter, + void *priv_ctx); + int (*dump)(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx); + int (*diagnose)(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg); +}; + struct devlink_ops { int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, @@ -432,13 +492,14 @@ struct devlink_ops { struct devlink_sb_pool_info *pool_info); int (*sb_pool_set)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, u32 size, - enum devlink_sb_threshold_type threshold_type); + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack); int (*sb_port_pool_get)(struct devlink_port *devlink_port, unsigned int sb_index, u16 pool_index, u32 *p_threshold); int (*sb_port_pool_set)(struct devlink_port *devlink_port, unsigned int sb_index, u16 pool_index, - u32 threshold); + u32 threshold, struct netlink_ext_ack *extack); int (*sb_tc_pool_bind_get)(struct devlink_port *devlink_port, unsigned int sb_index, u16 tc_index, @@ -448,7 +509,8 @@ struct devlink_ops { unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, - u16 pool_index, u32 threshold); + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack); int (*sb_occ_snapshot)(struct devlink *devlink, unsigned int sb_index); int (*sb_occ_max_clear)(struct devlink *devlink, @@ -471,6 +533,11 @@ struct devlink_ops { int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode, struct netlink_ext_ack *extack); + int (*info_get)(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack); + int (*flash_update)(struct devlink *devlink, const char *file_name, + const char *component, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) @@ -485,9 +552,24 @@ static inline struct devlink *priv_to_devlink(void *priv) return container_of(priv, struct devlink, priv); } -struct ib_device; +static inline struct devlink_port * +netdev_to_devlink_port(struct net_device *dev) +{ + if (dev->netdev_ops->ndo_get_devlink_port) + return dev->netdev_ops->ndo_get_devlink_port(dev); + return NULL; +} -#if IS_ENABLED(CONFIG_NET_DEVLINK) +static inline struct devlink *netdev_to_devlink(struct net_device *dev) +{ + struct devlink_port *devlink_port = netdev_to_devlink_port(dev); + + if (devlink_port) + return devlink_port->devlink; + return NULL; +} + +struct ib_device; struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); int devlink_register(struct devlink *devlink, struct device *dev); @@ -505,9 +587,9 @@ void devlink_port_type_clear(struct devlink_port *devlink_port); void devlink_port_attrs_set(struct devlink_port *devlink_port, enum devlink_port_flavour flavour, u32 port_number, bool split, - u32 split_subport_number); -int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, - char *name, size_t len); + u32 split_subport_number, + const unsigned char *switch_id, + unsigned char switch_id_len); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -563,11 +645,28 @@ int devlink_params_register(struct devlink *devlink, void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); +void devlink_params_publish(struct devlink *devlink); +void devlink_params_unpublish(struct devlink *devlink); +int devlink_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count); +void devlink_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count); int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val); int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); +int +devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value *init_val); +int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); +void devlink_port_param_value_changed(struct devlink_port *devlink_port, + u32 param_id); void devlink_param_value_str_fill(union devlink_param_value *dst_val, const char *src); struct devlink_region *devlink_region_create(struct devlink *devlink, @@ -579,267 +678,104 @@ u32 devlink_region_shapshot_id_get(struct devlink *devlink); int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, u8 *data, u32 snapshot_id, devlink_snapshot_data_dest_t *data_destructor); +int devlink_info_serial_number_put(struct devlink_info_req *req, + const char *sn); +int devlink_info_driver_name_put(struct devlink_info_req *req, + const char *name); +int devlink_info_version_fixed_put(struct devlink_info_req *req, + const char *version_name, + const char *version_value); +int devlink_info_version_stored_put(struct devlink_info_req *req, + const char *version_name, + const char *version_value); +int devlink_info_version_running_put(struct devlink_info_req *req, + const char *version_name, + const char *version_value); + +int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); +int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); + +int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); +int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg); + +int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name); +int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg); + +int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value); +int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value); +int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); +int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value); +int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); +int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len); + +int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value); +int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value); +int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value); +int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value); +int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value); +int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u16 value_len); + +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv); +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter); +int devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx); +void +devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, + enum devlink_health_reporter_state state); -#else - -static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, - size_t priv_size) -{ - return kzalloc(sizeof(struct devlink) + priv_size, GFP_KERNEL); -} - -static inline int devlink_register(struct devlink *devlink, struct device *dev) -{ - return 0; -} - -static inline void devlink_unregister(struct devlink *devlink) -{ -} - -static inline void devlink_free(struct devlink *devlink) -{ - kfree(devlink); -} - -static inline int devlink_port_register(struct devlink *devlink, - struct devlink_port *devlink_port, - unsigned int port_index) -{ - return 0; -} - -static inline void devlink_port_unregister(struct devlink_port *devlink_port) -{ -} - -static inline void devlink_port_type_eth_set(struct devlink_port *devlink_port, - struct net_device *netdev) -{ -} - -static inline void devlink_port_type_ib_set(struct devlink_port *devlink_port, - struct ib_device *ibdev) -{ -} - -static inline void devlink_port_type_clear(struct devlink_port *devlink_port) -{ -} - -static inline void devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - u32 port_number, bool split, - u32 split_subport_number) -{ -} - -static inline int -devlink_port_get_phys_port_name(struct devlink_port *devlink_port, - char *name, size_t len) -{ - return -EOPNOTSUPP; -} - -static inline int devlink_sb_register(struct devlink *devlink, - unsigned int sb_index, u32 size, - u16 ingress_pools_count, - u16 egress_pools_count, - u16 ingress_tc_count, - u16 egress_tc_count) -{ - return 0; -} - -static inline void devlink_sb_unregister(struct devlink *devlink, - unsigned int sb_index) -{ -} - -static inline int -devlink_dpipe_table_register(struct devlink *devlink, - const char *table_name, - struct devlink_dpipe_table_ops *table_ops, - void *priv, bool counter_control_extern) -{ - return 0; -} - -static inline void devlink_dpipe_table_unregister(struct devlink *devlink, - const char *table_name) -{ -} - -static inline int devlink_dpipe_headers_register(struct devlink *devlink, - struct devlink_dpipe_headers * - dpipe_headers) -{ - return 0; -} - -static inline void devlink_dpipe_headers_unregister(struct devlink *devlink) -{ -} - -static inline bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, - const char *table_name) -{ - return false; -} - -static inline int -devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) -{ - return 0; -} - -static inline int -devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, - struct devlink_dpipe_entry *entry) -{ - return 0; -} - -static inline int -devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) -{ - return 0; -} - -static inline void -devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry) -{ -} - -static inline int -devlink_dpipe_action_put(struct sk_buff *skb, - struct devlink_dpipe_action *action) -{ - return 0; -} +#if IS_ENABLED(CONFIG_NET_DEVLINK) -static inline int -devlink_dpipe_match_put(struct sk_buff *skb, - struct devlink_dpipe_match *match) -{ - return 0; -} +void devlink_compat_running_version(struct net_device *dev, + char *buf, size_t len); +int devlink_compat_flash_update(struct net_device *dev, const char *file_name); +int devlink_compat_phys_port_name_get(struct net_device *dev, + char *name, size_t len); +int devlink_compat_switch_id_get(struct net_device *dev, + struct netdev_phys_item_id *ppid); -static inline int -devlink_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) -{ - return 0; -} +#else static inline void -devlink_resources_unregister(struct devlink *devlink, - struct devlink_resource *resource) +devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) { } static inline int -devlink_resource_size_get(struct devlink *devlink, u64 resource_id, - u64 *p_resource_size) +devlink_compat_flash_update(struct net_device *dev, const char *file_name) { return -EOPNOTSUPP; } static inline int -devlink_dpipe_table_resource_set(struct devlink *devlink, - const char *table_name, u64 resource_id, - u64 resource_units) -{ - return -EOPNOTSUPP; -} - -static inline void -devlink_resource_occ_get_register(struct devlink *devlink, - u64 resource_id, - devlink_resource_occ_get_t *occ_get, - void *occ_get_priv) -{ -} - -static inline void -devlink_resource_occ_get_unregister(struct devlink *devlink, - u64 resource_id) -{ -} - -static inline int -devlink_params_register(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) -{ - return 0; -} - -static inline void -devlink_params_unregister(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) -{ - -} - -static inline int -devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, - union devlink_param_value *init_val) +devlink_compat_phys_port_name_get(struct net_device *dev, + char *name, size_t len) { return -EOPNOTSUPP; } static inline int -devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, - union devlink_param_value init_val) +devlink_compat_switch_id_get(struct net_device *dev, + struct netdev_phys_item_id *ppid) { return -EOPNOTSUPP; } -static inline void -devlink_param_value_changed(struct devlink *devlink, u32 param_id) -{ -} - -static inline void -devlink_param_value_str_fill(union devlink_param_value *dst_val, - const char *src) -{ -} - -static inline struct devlink_region * -devlink_region_create(struct devlink *devlink, - const char *region_name, - u32 region_max_snapshots, - u64 region_size) -{ - return NULL; -} - -static inline void -devlink_region_destroy(struct devlink_region *region) -{ -} - -static inline u32 -devlink_region_shapshot_id_get(struct devlink *devlink) -{ - return 0; -} - -static inline int -devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, - u8 *data, u32 snapshot_id, - devlink_snapshot_data_dest_t *data_destructor) -{ - return 0; -} - #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 23690c44e167..685294817712 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -21,6 +21,7 @@ #include <linux/ethtool.h> #include <linux/net_tstamp.h> #include <linux/phy.h> +#include <linux/platform_data/dsa.h> #include <net/devlink.h> #include <net/switchdev.h> @@ -29,79 +30,36 @@ struct phy_device; struct fixed_phy_status; struct phylink_link_state; -enum dsa_tag_protocol { - DSA_TAG_PROTO_NONE = 0, - DSA_TAG_PROTO_BRCM, - DSA_TAG_PROTO_BRCM_PREPEND, - DSA_TAG_PROTO_DSA, - DSA_TAG_PROTO_EDSA, - DSA_TAG_PROTO_GSWIP, - DSA_TAG_PROTO_KSZ, - DSA_TAG_PROTO_LAN9303, - DSA_TAG_PROTO_MTK, - DSA_TAG_PROTO_QCA, - DSA_TAG_PROTO_TRAILER, - DSA_TAG_LAST, /* MUST BE LAST */ -}; - -#define DSA_MAX_SWITCHES 4 -#define DSA_MAX_PORTS 12 - -#define DSA_RTABLE_NONE -1 - -struct dsa_chip_data { - /* - * How to access the switch configuration registers. - */ - struct device *host_dev; - int sw_addr; - - /* - * Reference to network devices - */ - struct device *netdev[DSA_MAX_PORTS]; - - /* set to size of eeprom if supported by the switch */ - int eeprom_len; - - /* Device tree node pointer for this specific switch chip - * used during switch setup in case additional properties - * and resources needs to be used - */ - struct device_node *of_node; - - /* - * The names of the switch's ports. Use "cpu" to - * designate the switch port that the cpu is connected to, - * "dsa" to indicate that this port is a DSA link to - * another switch, NULL to indicate the port is unused, - * or any other string to indicate this is a physical port. - */ - char *port_names[DSA_MAX_PORTS]; - struct device_node *port_dn[DSA_MAX_PORTS]; - - /* - * An array of which element [a] indicates which port on this - * switch should be used to send packets to that are destined - * for switch a. Can be NULL if there is only one switch chip. - */ - s8 rtable[DSA_MAX_SWITCHES]; -}; - -struct dsa_platform_data { - /* - * Reference to a Linux network interface that connects - * to the root switch chip of the tree. - */ - struct device *netdev; - struct net_device *of_netdev; +#define DSA_TAG_PROTO_NONE_VALUE 0 +#define DSA_TAG_PROTO_BRCM_VALUE 1 +#define DSA_TAG_PROTO_BRCM_PREPEND_VALUE 2 +#define DSA_TAG_PROTO_DSA_VALUE 3 +#define DSA_TAG_PROTO_EDSA_VALUE 4 +#define DSA_TAG_PROTO_GSWIP_VALUE 5 +#define DSA_TAG_PROTO_KSZ9477_VALUE 6 +#define DSA_TAG_PROTO_KSZ9893_VALUE 7 +#define DSA_TAG_PROTO_LAN9303_VALUE 8 +#define DSA_TAG_PROTO_MTK_VALUE 9 +#define DSA_TAG_PROTO_QCA_VALUE 10 +#define DSA_TAG_PROTO_TRAILER_VALUE 11 +#define DSA_TAG_PROTO_8021Q_VALUE 12 +#define DSA_TAG_PROTO_SJA1105_VALUE 13 - /* - * Info structs describing each of the switch chips - * connected via this network interface. - */ - int nr_chips; - struct dsa_chip_data *chip; +enum dsa_tag_protocol { + DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, + DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE, + DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE, + DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE, + DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE, + DSA_TAG_PROTO_GSWIP = DSA_TAG_PROTO_GSWIP_VALUE, + DSA_TAG_PROTO_KSZ9477 = DSA_TAG_PROTO_KSZ9477_VALUE, + DSA_TAG_PROTO_KSZ9893 = DSA_TAG_PROTO_KSZ9893_VALUE, + DSA_TAG_PROTO_LAN9303 = DSA_TAG_PROTO_LAN9303_VALUE, + DSA_TAG_PROTO_MTK = DSA_TAG_PROTO_MTK_VALUE, + DSA_TAG_PROTO_QCA = DSA_TAG_PROTO_QCA_VALUE, + DSA_TAG_PROTO_TRAILER = DSA_TAG_PROTO_TRAILER_VALUE, + DSA_TAG_PROTO_8021Q = DSA_TAG_PROTO_8021Q_VALUE, + DSA_TAG_PROTO_SJA1105 = DSA_TAG_PROTO_SJA1105_VALUE, }; struct packet_type; @@ -113,8 +71,37 @@ struct dsa_device_ops { struct packet_type *pt); int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); + /* Used to determine which traffic should match the DSA filter in + * eth_type_trans, and which, if any, should bypass it and be processed + * as regular on the master net device. + */ + bool (*filter)(const struct sk_buff *skb, struct net_device *dev); + unsigned int overhead; + const char *name; + enum dsa_tag_protocol proto; +}; + +#define DSA_TAG_DRIVER_ALIAS "dsa_tag-" +#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \ + MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE)) + +struct dsa_skb_cb { + struct sk_buff *clone; + bool deferred_xmit; }; +struct __dsa_skb_cb { + struct dsa_skb_cb cb; + u8 priv[48 - sizeof(struct dsa_skb_cb)]; +}; + +#define __DSA_SKB_CB(skb) ((struct __dsa_skb_cb *)((skb)->cb)) + +#define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb)) + +#define DSA_SKB_CB_PRIV(skb) \ + ((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv)) + struct dsa_switch_tree { struct list_head list; @@ -185,6 +172,7 @@ struct dsa_port { struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); + bool (*filter)(const struct sk_buff *skb, struct net_device *dev); enum { DSA_PORT_TYPE_UNUSED = 0, @@ -197,16 +185,33 @@ struct dsa_port { unsigned int index; const char *name; const struct dsa_port *cpu_dp; + const char *mac; struct device_node *dn; unsigned int ageing_time; + bool vlan_filtering; u8 stp_state; struct net_device *bridge_dev; struct devlink_port devlink_port; struct phylink *pl; + + struct work_struct xmit_work; + struct sk_buff_head xmit_queue; + + /* + * Give the switch driver somewhere to hang its per-port private data + * structures (accessible from the tagger). + */ + void *priv; + /* * Original copy of the master netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; + + /* + * Original copy of the master netdev net_device_ops + */ + const struct net_device_ops *orig_ndo_ops; }; struct dsa_switch { @@ -260,6 +265,16 @@ struct dsa_switch { /* Number of switch port queues */ unsigned int num_tx_queues; + /* Disallow bridge core from requesting different VLAN awareness + * settings on ports if not hardware-supported + */ + bool vlan_filtering_is_global; + + /* In case vlan_filtering_is_global is set, the VLAN awareness state + * should be retrieved from here and not from the per-port settings. + */ + bool vlan_filtering; + unsigned long *bitmap; unsigned long _bitmap; @@ -327,18 +342,19 @@ static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port) return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index); } +static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) +{ + const struct dsa_switch *ds = dp->ds; + + if (ds->vlan_filtering_is_global) + return ds->vlan_filtering; + else + return dp->vlan_filtering; +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { -#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) - /* - * Legacy probing. - */ - const char *(*probe)(struct device *dsa_dev, - struct device *host_dev, int sw_addr, - void **priv); -#endif - enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port); @@ -417,8 +433,7 @@ struct dsa_switch_ops { */ int (*port_enable)(struct dsa_switch *ds, int port, struct phy_device *phy); - void (*port_disable)(struct dsa_switch *ds, int port, - struct phy_device *phy); + void (*port_disable)(struct dsa_switch *ds, int port); /* * Port's MAC EEE settings @@ -453,6 +468,8 @@ struct dsa_switch_ops { void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); + int (*port_egress_floods)(struct dsa_switch *ds, int port, + bool unicast, bool multicast); /* * VLAN support @@ -520,6 +537,12 @@ struct dsa_switch_ops { struct sk_buff *clone, unsigned int type); bool (*port_rxtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb, unsigned int type); + + /* + * Deferred frame Tx + */ + netdev_tx_t (*port_deferred_xmit)(struct dsa_switch *ds, int port, + struct sk_buff *skb); }; struct dsa_switch_driver { @@ -527,20 +550,6 @@ struct dsa_switch_driver { const struct dsa_switch_ops *ops; }; -#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) -/* Legacy driver registration */ -void register_switch_driver(struct dsa_switch_driver *type); -void unregister_switch_driver(struct dsa_switch_driver *type); -struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); - -#else -static inline void register_switch_driver(struct dsa_switch_driver *type) { } -static inline void unregister_switch_driver(struct dsa_switch_driver *type) { } -static inline struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) -{ - return NULL; -} -#endif struct net_device *dsa_dev_to_net_device(struct device *dev); /* Keep inline for faster access in hot path */ @@ -552,6 +561,15 @@ static inline bool netdev_uses_dsa(struct net_device *dev) return false; } +static inline bool dsa_can_decode(const struct sk_buff *skb, + struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_NET_DSA) + return !dev->dsa_ptr->filter || dev->dsa_ptr->filter(skb, dev); +#endif + return false; +} + struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n); void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); @@ -620,9 +638,76 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) +netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data); int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data); int dsa_port_get_phy_sset_count(struct dsa_port *dp); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); +struct dsa_tag_driver { + const struct dsa_device_ops *ops; + struct list_head list; + struct module *owner; +}; + +void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count, + struct module *owner); +void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count); + +#define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \ +static int __init dsa_tag_driver_module_init(void) \ +{ \ + dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \ + THIS_MODULE); \ + return 0; \ +} \ +module_init(dsa_tag_driver_module_init); \ + \ +static void __exit dsa_tag_driver_module_exit(void) \ +{ \ + dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \ +} \ +module_exit(dsa_tag_driver_module_exit) + +/** + * module_dsa_tag_drivers() - Helper macro for registering DSA tag + * drivers + * @__ops_array: Array of tag driver strucutres + * + * Helper macro for DSA tag drivers which do not do anything special + * in module init/exit. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit(). + */ +#define module_dsa_tag_drivers(__ops_array) \ +dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array)) + +#define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops + +/* Create a static structure we can build a linked list of dsa_tag + * drivers + */ +#define DSA_TAG_DRIVER(__ops) \ +static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \ + .ops = &__ops, \ +} + +/** + * module_dsa_tag_driver() - Helper macro for registering a single DSA tag + * driver + * @__ops: Single tag driver structures + * + * Helper macro for DSA tag drivers which do not do anything special + * in module init/exit. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit(). + */ +#define module_dsa_tag_driver(__ops) \ +DSA_TAG_DRIVER(__ops); \ + \ +static struct dsa_tag_driver *dsa_tag_driver_array[] = { \ + &DSA_TAG_DRIVER_NAME(__ops) \ +}; \ +module_dsa_tag_drivers(dsa_tag_driver_array) #endif + diff --git a/include/net/dst.h b/include/net/dst.h index 6cf0870414c7..12b31c602cb0 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -19,17 +19,6 @@ #include <net/neighbour.h> #include <asm/processor.h> -#define DST_GC_MIN (HZ/10) -#define DST_GC_INC (HZ/2) -#define DST_GC_MAX (120*HZ) - -/* Each dst_entry has reference count and sits in some parent list(s). - * When it is removed from parent list, it is "freed" (dst_free). - * After this it enters dead state (dst->obsolete > 0) and if its refcnt - * is zero, it can be destroyed immediately, otherwise it is added - * to gc list and garbage collector periodically checks the refcnt. - */ - struct sk_buff; struct dst_entry { diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index c91ec732afd6..c49d7bfb5c30 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -2,10 +2,11 @@ #define __NET_FIB_NOTIFIER_H #include <linux/types.h> -#include <linux/module.h> #include <linux/notifier.h> #include <net/net_namespace.h> +struct module; + struct fib_notifier_info { struct net *net; int family; diff --git a/include/net/flow.h b/include/net/flow.h index 8ce21793094e..a50fb77a0b27 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -38,8 +38,9 @@ struct flowi_common { #define FLOWI_FLAG_KNOWN_NH 0x02 #define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; - struct flowi_tunnel flowic_tun_key; kuid_t flowic_uid; + struct flowi_tunnel flowic_tun_key; + __u32 flowic_multipath_hash; }; union flowi_uli { @@ -78,6 +79,7 @@ struct flowi4 { #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key #define flowi4_uid __fl_common.flowic_uid +#define flowi4_multipath_hash __fl_common.flowic_multipath_hash /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 6a4586dcdede..7c5a8d9a8d2a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -209,8 +209,8 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ - FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ - FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ + FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_vlan */ + FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ @@ -221,7 +221,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */ FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ - FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */ + FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ @@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec return ((char *)target_container) + flow_dissector->offset[key_id]; } +struct bpf_flow_dissector { + struct bpf_flow_keys *flow_keys; + const struct sk_buff *skb; + void *data; + void *data_end; +}; + #endif diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h new file mode 100644 index 000000000000..6200900434e1 --- /dev/null +++ b/include/net/flow_offload.h @@ -0,0 +1,226 @@ +#ifndef _NET_FLOW_OFFLOAD_H +#define _NET_FLOW_OFFLOAD_H + +#include <net/flow_dissector.h> + +struct flow_match { + struct flow_dissector *dissector; + void *mask; + void *key; +}; + +struct flow_match_basic { + struct flow_dissector_key_basic *key, *mask; +}; + +struct flow_match_control { + struct flow_dissector_key_control *key, *mask; +}; + +struct flow_match_eth_addrs { + struct flow_dissector_key_eth_addrs *key, *mask; +}; + +struct flow_match_vlan { + struct flow_dissector_key_vlan *key, *mask; +}; + +struct flow_match_ipv4_addrs { + struct flow_dissector_key_ipv4_addrs *key, *mask; +}; + +struct flow_match_ipv6_addrs { + struct flow_dissector_key_ipv6_addrs *key, *mask; +}; + +struct flow_match_ip { + struct flow_dissector_key_ip *key, *mask; +}; + +struct flow_match_ports { + struct flow_dissector_key_ports *key, *mask; +}; + +struct flow_match_icmp { + struct flow_dissector_key_icmp *key, *mask; +}; + +struct flow_match_tcp { + struct flow_dissector_key_tcp *key, *mask; +}; + +struct flow_match_mpls { + struct flow_dissector_key_mpls *key, *mask; +}; + +struct flow_match_enc_keyid { + struct flow_dissector_key_keyid *key, *mask; +}; + +struct flow_match_enc_opts { + struct flow_dissector_key_enc_opts *key, *mask; +}; + +struct flow_rule; + +void flow_rule_match_basic(const struct flow_rule *rule, + struct flow_match_basic *out); +void flow_rule_match_control(const struct flow_rule *rule, + struct flow_match_control *out); +void flow_rule_match_eth_addrs(const struct flow_rule *rule, + struct flow_match_eth_addrs *out); +void flow_rule_match_vlan(const struct flow_rule *rule, + struct flow_match_vlan *out); +void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, + struct flow_match_ipv4_addrs *out); +void flow_rule_match_ipv6_addrs(const struct flow_rule *rule, + struct flow_match_ipv6_addrs *out); +void flow_rule_match_ip(const struct flow_rule *rule, + struct flow_match_ip *out); +void flow_rule_match_ports(const struct flow_rule *rule, + struct flow_match_ports *out); +void flow_rule_match_tcp(const struct flow_rule *rule, + struct flow_match_tcp *out); +void flow_rule_match_icmp(const struct flow_rule *rule, + struct flow_match_icmp *out); +void flow_rule_match_mpls(const struct flow_rule *rule, + struct flow_match_mpls *out); +void flow_rule_match_enc_control(const struct flow_rule *rule, + struct flow_match_control *out); +void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule, + struct flow_match_ipv4_addrs *out); +void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule, + struct flow_match_ipv6_addrs *out); +void flow_rule_match_enc_ip(const struct flow_rule *rule, + struct flow_match_ip *out); +void flow_rule_match_enc_ports(const struct flow_rule *rule, + struct flow_match_ports *out); +void flow_rule_match_enc_keyid(const struct flow_rule *rule, + struct flow_match_enc_keyid *out); +void flow_rule_match_enc_opts(const struct flow_rule *rule, + struct flow_match_enc_opts *out); + +enum flow_action_id { + FLOW_ACTION_ACCEPT = 0, + FLOW_ACTION_DROP, + FLOW_ACTION_TRAP, + FLOW_ACTION_GOTO, + FLOW_ACTION_REDIRECT, + FLOW_ACTION_MIRRED, + FLOW_ACTION_VLAN_PUSH, + FLOW_ACTION_VLAN_POP, + FLOW_ACTION_VLAN_MANGLE, + FLOW_ACTION_TUNNEL_ENCAP, + FLOW_ACTION_TUNNEL_DECAP, + FLOW_ACTION_MANGLE, + FLOW_ACTION_ADD, + FLOW_ACTION_CSUM, + FLOW_ACTION_MARK, + FLOW_ACTION_WAKE, + FLOW_ACTION_QUEUE, + FLOW_ACTION_SAMPLE, + FLOW_ACTION_POLICE, +}; + +/* This is mirroring enum pedit_header_type definition for easy mapping between + * tc pedit action. Legacy TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK is mapped to + * FLOW_ACT_MANGLE_UNSPEC, which is supported by no driver. + */ +enum flow_action_mangle_base { + FLOW_ACT_MANGLE_UNSPEC = 0, + FLOW_ACT_MANGLE_HDR_TYPE_ETH, + FLOW_ACT_MANGLE_HDR_TYPE_IP4, + FLOW_ACT_MANGLE_HDR_TYPE_IP6, + FLOW_ACT_MANGLE_HDR_TYPE_TCP, + FLOW_ACT_MANGLE_HDR_TYPE_UDP, +}; + +struct flow_action_entry { + enum flow_action_id id; + union { + u32 chain_index; /* FLOW_ACTION_GOTO */ + struct net_device *dev; /* FLOW_ACTION_REDIRECT */ + struct { /* FLOW_ACTION_VLAN */ + u16 vid; + __be16 proto; + u8 prio; + } vlan; + struct { /* FLOW_ACTION_PACKET_EDIT */ + enum flow_action_mangle_base htype; + u32 offset; + u32 mask; + u32 val; + } mangle; + const struct ip_tunnel_info *tunnel; /* FLOW_ACTION_TUNNEL_ENCAP */ + u32 csum_flags; /* FLOW_ACTION_CSUM */ + u32 mark; /* FLOW_ACTION_MARK */ + struct { /* FLOW_ACTION_QUEUE */ + u32 ctx; + u32 index; + u8 vf; + } queue; + struct { /* FLOW_ACTION_SAMPLE */ + struct psample_group *psample_group; + u32 rate; + u32 trunc_size; + bool truncate; + } sample; + struct { /* FLOW_ACTION_POLICE */ + s64 burst; + u64 rate_bytes_ps; + } police; + }; +}; + +struct flow_action { + unsigned int num_entries; + struct flow_action_entry entries[0]; +}; + +static inline bool flow_action_has_entries(const struct flow_action *action) +{ + return action->num_entries; +} + +/** + * flow_action_has_one_action() - check if exactly one action is present + * @action: tc filter flow offload action + * + * Returns true if exactly one action is present. + */ +static inline bool flow_offload_has_one_action(const struct flow_action *action) +{ + return action->num_entries == 1; +} + +#define flow_action_for_each(__i, __act, __actions) \ + for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[++__i]) + +struct flow_rule { + struct flow_match match; + struct flow_action action; +}; + +struct flow_rule *flow_rule_alloc(unsigned int num_actions); + +static inline bool flow_rule_match_key(const struct flow_rule *rule, + enum flow_dissector_key_id key) +{ + return dissector_uses_key(rule->match.dissector, key); +} + +struct flow_stats { + u64 pkts; + u64 bytes; + u64 lastused; +}; + +static inline void flow_stats_update(struct flow_stats *flow_stats, + u64 bytes, u64 pkts, u64 lastused) +{ + flow_stats->pkts += pkts; + flow_stats->bytes += bytes; + flow_stats->lastused = max_t(u64, flow_stats->lastused, lastused); +} + +#endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index be7c0fab3478..2caa86660ab0 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -107,21 +107,23 @@ begin: return skb; } +static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) +{ + u32 hash = skb_get_hash_perturb(skb, fq->perturbation); + + return reciprocal_scale(hash, fq->flows_cnt); +} + static struct fq_flow *fq_flow_classify(struct fq *fq, - struct fq_tin *tin, + struct fq_tin *tin, u32 idx, struct sk_buff *skb, fq_flow_get_default_t get_default_func) { struct fq_flow *flow; - u32 hash; - u32 idx; lockdep_assert_held(&fq->lock); - hash = skb_get_hash_perturb(skb, fq->perturbation); - idx = reciprocal_scale(hash, fq->flows_cnt); flow = &fq->flows[idx]; - if (flow->tin && flow->tin != tin) { flow = get_default_func(fq, tin, idx, skb); tin->collisions++; @@ -153,7 +155,7 @@ static void fq_recalc_backlog(struct fq *fq, } static void fq_tin_enqueue(struct fq *fq, - struct fq_tin *tin, + struct fq_tin *tin, u32 idx, struct sk_buff *skb, fq_skb_free_t free_func, fq_flow_get_default_t get_default_func) @@ -163,7 +165,7 @@ static void fq_tin_enqueue(struct fq *fq, lockdep_assert_held(&fq->lock); - flow = fq_flow_classify(fq, tin, skb, get_default_func); + flow = fq_flow_classify(fq, tin, idx, skb, get_default_func); flow->tin = tin; flow->backlog += skb->len; diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 946bd53a9f81..ca23860adbb9 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -10,7 +10,7 @@ struct gnet_stats_basic_cpu { struct gnet_stats_basic_packed bstats; struct u64_stats_sync syncp; -}; +} __aligned(2 * sizeof(u64)); struct net_rate_estimator; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index aa2e5888f18d..9292f1c588b7 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -26,6 +26,7 @@ struct genl_info; * @name: name of family * @version: protocol version * @maxattr: maximum number of attributes supported + * @policy: netlink policy * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them * @parallel_ops: operations can be called in parallel and aren't @@ -56,6 +57,7 @@ struct genl_family { unsigned int maxattr; bool netnsok; bool parallel_ops; + const struct nla_policy *policy; int (*pre_doit)(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -119,19 +121,23 @@ static inline int genl_err_attr(struct genl_info *info, int err, return err; } +enum genl_validate_flags { + GENL_DONT_VALIDATE_STRICT = BIT(0), + GENL_DONT_VALIDATE_DUMP = BIT(1), + GENL_DONT_VALIDATE_DUMP_STRICT = BIT(2), +}; + /** * struct genl_ops - generic netlink operations * @cmd: command identifier * @internal_flags: flags used by the family * @flags: flags - * @policy: attribute validation policy * @doit: standard command callback * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps */ struct genl_ops { - const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); int (*start)(struct netlink_callback *cb); @@ -141,6 +147,7 @@ struct genl_ops { u8 cmd; u8 internal_flags; u8 flags; + u8 validate; }; int genl_register_family(struct genl_family *family); @@ -165,6 +172,25 @@ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr) } /** + * genlmsg_parse_deprecated - parse attributes of a genetlink message + * @nlh: netlink message header + * @family: genetlink message family + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * @extack: extended ACK report struct + */ +static inline int genlmsg_parse_deprecated(const struct nlmsghdr *nlh, + const struct genl_family *family, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, + policy, NL_VALIDATE_LIBERAL, extack); +} + +/** * genlmsg_parse - parse attributes of a genetlink message * @nlh: netlink message header * @family: genetlink message family @@ -179,8 +205,8 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh, const struct nla_policy *policy, struct netlink_ext_ack *extack) { - return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, - policy, extack); + return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, + policy, NL_VALIDATE_STRICT, extack); } /** diff --git a/include/net/geneve.h b/include/net/geneve.h index a7600ed55ea3..bced0b1d9fe4 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -4,6 +4,8 @@ #include <net/udp_tunnel.h> +#define GENEVE_UDP_PORT 6081 + /* Geneve Header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |Ver| Opt Len |O|C| Rsvd. | Protocol Type | @@ -60,6 +62,12 @@ struct genevehdr { struct geneve_opt options[]; }; +static inline bool netif_is_geneve(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "geneve"); +} + #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port); diff --git a/include/net/gre.h b/include/net/gre.h index 797142eee9cd..b60f212c16c6 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -37,8 +37,17 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs); -bool is_gretap_dev(const struct net_device *dev); -bool is_ip6gretap_dev(const struct net_device *dev); +static inline bool netif_is_gretap(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "gretap"); +} + +static inline bool netif_is_ip6gretap(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "ip6gretap"); +} static inline int gre_calc_hlen(__be16 o_flags) { diff --git a/include/net/icmp.h b/include/net/icmp.h index 3ef2743a8eec..e0f709d26dde 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -22,6 +22,7 @@ #include <net/inet_sock.h> #include <net/snmp.h> +#include <net/ip.h> struct icmp_err { int errno; @@ -39,9 +40,15 @@ struct net_proto_family; struct sk_buff; struct net; -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt); +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); +} + int icmp_rcv(struct sk_buff *skb); -void icmp_err(struct sk_buff *skb, u32 info); +int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); void icmp_out_count(struct net *net, unsigned char type); diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 8014153bdd49..459d355f6506 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2017 Intel Deutschland GmbH - * Copyright (c) 2018 Intel Corporation + * Copyright (c) 2018-2019 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -291,6 +291,12 @@ enum ieee80211_radiotap_he_bits { IEEE80211_RADIOTAP_HE_DATA6_NSTS = 0x000f, IEEE80211_RADIOTAP_HE_DATA6_DOPPLER = 0x0010, + IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_KNOWN = 0x0020, + IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW = 0x00c0, + IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_20MHZ = 0, + IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_40MHZ = 1, + IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_80MHZ = 2, + IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_160MHZ = 3, IEEE80211_RADIOTAP_HE_DATA6_TXOP = 0x7f00, IEEE80211_RADIOTAP_HE_DATA6_MIDAMBLE_PDCTY = 0x8000, }; @@ -343,6 +349,7 @@ struct ieee80211_radiotap_lsig { enum ieee80211_radiotap_zero_len_psdu_type { IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING = 0, + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED = 1, IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR = 0xff, }; diff --git a/include/net/ife.h b/include/net/ife.h index e117617e3c34..7e2538d8585b 100644 --- a/include/net/ife.h +++ b/include/net/ife.h @@ -4,7 +4,6 @@ #include <linux/etherdevice.h> #include <linux/rtnetlink.h> -#include <linux/module.h> #include <uapi/linux/ife.h> #if IS_ENABLED(CONFIG_NET_IFE) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 6e91e38a31da..9db98af46985 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -115,9 +115,8 @@ int inet6_hash(struct sock *sk); ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ + (((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 3ca969cbd161..975901a95c0f 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -2,6 +2,8 @@ #ifndef _INET_COMMON_H #define _INET_COMMON_H +#include <linux/indirect_call_wrapper.h> + extern const struct proto_ops inet_stream_ops; extern const struct proto_ops inet_dgram_ops; @@ -54,4 +56,11 @@ static inline void inet_ctl_sock_destroy(struct sock *sk) sock_release(sk->sk_socket); } +#define indirect_call_gro_receive(f2, f1, cb, head, skb) \ +({ \ + unlikely(gro_recursion_inc_test(skb)) ? \ + NAPI_GRO_CB(skb)->flush |= 1, NULL : \ + INDIRECT_CALL_2(cb, f2, f1, head, skb); \ +}) + #endif diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 371b3b45fd5c..ff40e1d08157 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -139,8 +139,8 @@ struct inet_connection_sock { } icsk_mtup; u32 icsk_user_timeout; - u64 icsk_ca_priv[88 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE (11 * sizeof(u64)) + u64 icsk_ca_priv[104 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (13 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ @@ -314,4 +314,29 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); + +#define TCP_PINGPONG_THRESH 3 + +static inline void inet_csk_enter_pingpong_mode(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH; +} + +static inline void inet_csk_exit_pingpong_mode(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pingpong = 0; +} + +static inline bool inet_csk_in_pingpong_mode(struct sock *sk) +{ + return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} + +static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ack.pingpong < U8_MAX) + icsk->icsk_ack.pingpong++; +} #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 1662cbc0b46b..378904ee9129 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -56,7 +56,6 @@ struct frag_v6_compare_key { * @timer: queue expiration timer * @lock: spinlock protecting this frag * @refcnt: reference count of the queue - * @fragments: received fragments head * @rb_fragments: received fragments rb-tree root * @fragments_tail: received fragments tail * @last_run_head: the head of the last "run". see ip_fragment.c @@ -77,8 +76,7 @@ struct inet_frag_queue { struct timer_list timer; spinlock_t lock; refcount_t refcnt; - struct sk_buff *fragments; /* Used in IPv6. */ - struct rb_root rb_fragments; /* Used in IPv4. */ + struct rb_root rb_fragments; struct sk_buff *fragments_tail; struct sk_buff *last_run_head; ktime_t stamp; @@ -153,4 +151,16 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, long val) extern const u8 ip_frag_ecn_table[16]; +/* Return values of inet_frag_queue_insert() */ +#define IPFRAG_OK 0 +#define IPFRAG_DUP 1 +#define IPFRAG_OVERLAP 2 +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, + int offset, int end); +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, + struct sk_buff *parent); +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, + void *reasm_data); +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q); + #endif diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 9141e95529e7..babb14136705 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -79,6 +79,7 @@ struct inet_ehash_bucket { struct inet_bind_bucket { possible_net_t ib_net; + int l3mdev; unsigned short port; signed char fastreuse; signed char fastreuseport; @@ -188,10 +189,21 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) hashinfo->ehash_locks = NULL; } +static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, - const unsigned short snum); + const unsigned short snum, int l3mdev); void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); @@ -225,6 +237,7 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit); +int inet_hashinfo2_init_mod(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); @@ -282,9 +295,8 @@ static inline struct sock *inet_lookup_listener(struct net *net, #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_addrpair == (__cookie)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ + (((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ @@ -294,9 +306,8 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_daddr == (__saddr)) && \ ((__sk)->sk_rcv_saddr == (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ + (((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index a80fd0ac4563..e8eef85006aa 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -130,6 +130,27 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, return sk->sk_bound_dev_if; } +static inline int inet_sk_bound_l3mdev(const struct sock *sk) +{ +#ifdef CONFIG_NET_L3_MASTER_DEV + struct net *net = sock_net(sk); + + if (!net->ipv4.sysctl_tcp_l3mdev_accept) + return l3mdev_master_ifindex_by_index(net, + sk->sk_bound_dev_if); +#endif + + return 0; +} + +static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, + int dif, int sdif) +{ + if (!bound_dev_if) + return !sdif || l3mdev_accept; + return bound_dev_if == dif || bound_dev_if == sdif; +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 00b5e7825508..74ff688568a0 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -39,6 +39,7 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ + u32 n_redirects; unsigned long rate_last; /* * Once inet_peer is queued for deletion (refcnt == 0), following field diff --git a/include/net/ip.h b/include/net/ip.h index 72593e171d14..2d3cce7c3e8a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -38,6 +38,10 @@ #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ +extern unsigned int sysctl_fib_sync_mem; +extern unsigned int sysctl_fib_sync_mem_min; +extern unsigned int sysctl_fib_sync_mem_max; + struct sock; struct inet_skb_parm { @@ -155,6 +159,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, void ip_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *orig_dev); int ip_local_deliver(struct sk_buff *skb); +void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto); int ip_mr_input(struct sk_buff *skb); int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -421,7 +426,8 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, } struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, - int fc_mx_len); + int fc_mx_len, + struct netlink_ext_ack *extack); static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) { if (fib_metrics != &dst_default_metrics && @@ -665,6 +671,8 @@ static inline int ip_options_echo(struct net *net, struct ip_options *dopt, } void ip_options_fragment(struct sk_buff *skb); +int __ip_options_compile(struct net *net, struct ip_options *opt, + struct sk_buff *skb, __be32 *info); int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb); int ip_options_get(struct net *net, struct ip_options_rcu **optp, @@ -673,7 +681,7 @@ int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, unsigned char __user *data, int optlen); void ip_options_undo(struct ip_options *opt); void ip_forward_options(struct sk_buff *skb); -int ip_options_rcv_srr(struct sk_buff *skb); +int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); /* * Functions provided by ip_sockglue.c @@ -714,7 +722,7 @@ extern int sysctl_icmp_msgs_burst; int ip_misc_proc_init(void); #endif -int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, +int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, struct netlink_ext_ack *extack); #endif /* _IP_H */ diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 84097010237c..40105738e2f6 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -19,6 +19,7 @@ #include <linux/notifier.h> #include <net/dst.h> #include <net/flow.h> +#include <net/ip_fib.h> #include <net/netlink.h> #include <net/inetpeer.h> #include <net/fib_notifier.h> @@ -50,7 +51,8 @@ struct fib6_config { u32 fc_protocol; u16 fc_type; /* only 8 bits are used */ u16 fc_delete_all_nh : 1, - __unused : 15; + fc_ignore_dev_down:1, + __unused : 14; struct in6_addr fc_dst; struct in6_addr fc_src; @@ -124,13 +126,11 @@ struct rt6_exception { #define FIB6_MAX_DEPTH 5 struct fib6_nh { - struct in6_addr nh_gw; - struct net_device *nh_dev; - struct lwtunnel_state *nh_lwtstate; + struct fib_nh_common nh_common; - unsigned int nh_flags; - atomic_t nh_upper_bound; - int nh_weight; +#ifdef CONFIG_IPV6_ROUTER_PREF + unsigned long last_probe; +#endif }; struct fib6_info { @@ -146,7 +146,7 @@ struct fib6_info { struct list_head fib6_siblings; unsigned int fib6_nsiblings; - atomic_t fib6_ref; + refcount_t fib6_ref; unsigned long expires; struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] @@ -159,10 +159,6 @@ struct fib6_info { struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; -#ifdef CONFIG_IPV6_ROUTER_PREF - unsigned long last_probe; -#endif - u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; @@ -194,6 +190,14 @@ struct rt6_info { unsigned short rt6i_nfheader_len; }; +struct fib6_result { + struct fib6_nh *nh; + struct fib6_info *f6i; + u32 fib6_flags; + u8 fib6_type; + struct rt6_info *rt6; +}; + #define for_each_fib6_node_rt_rcu(fn) \ for (rt = rcu_dereference((fn)->leaf); rt; \ rt = rcu_dereference(rt->fib6_next)) @@ -281,17 +285,17 @@ void fib6_info_destroy_rcu(struct rcu_head *head); static inline void fib6_info_hold(struct fib6_info *f6i) { - atomic_inc(&f6i->fib6_ref); + refcount_inc(&f6i->fib6_ref); } static inline bool fib6_info_hold_safe(struct fib6_info *f6i) { - return atomic_inc_not_zero(&f6i->fib6_ref); + return refcount_inc_not_zero(&f6i->fib6_ref); } static inline void fib6_info_release(struct fib6_info *f6i) { - if (f6i && atomic_dec_and_test(&f6i->fib6_ref)) + if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) call_rcu(&f6i->rcu, fib6_info_destroy_rcu); } @@ -388,18 +392,17 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, /* called with rcu lock held; can return error pointer * caller needs to select path */ -struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, - int flags); +int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags); /* called with rcu lock held; caller needs to select path */ -struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, int strict); - -struct fib6_info *fib6_multipath_select(const struct net *net, - struct fib6_info *match, - struct flowi6 *fl6, int oif, - const struct sk_buff *skb, int strict); +int fib6_table_lookup(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, struct fib6_result *res, + int strict); +void fib6_select_path(const struct net *net, struct fib6_result *res, + struct flowi6 *fl6, int oif, bool have_oif_match, + const struct sk_buff *skb, int strict); struct fib6_node *fib6_node_lookup(struct fib6_node *root, const struct in6_addr *daddr, const struct in6_addr *saddr); @@ -440,14 +443,13 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) { - return f6i->fib6_nh.nh_dev; + return f6i->fib6_nh.fib_nh_dev; } -static inline -struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i) -{ - return f6i->fib6_nh.nh_lwtstate; -} +int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +void fib6_nh_release(struct fib6_nh *fib6_nh); void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7ab119936e69..4790beaa86e0 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -68,8 +68,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { - return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == - RTF_GATEWAY; + /* the RTF_ADDRCONF flag filters out RA's */ + return !(f6i->fib6_flags & RTF_ADDRCONF) && + f6i->fib6_nh.fib_nh_gw_family; } void ip6_route_input(struct sk_buff *skb); @@ -181,7 +182,7 @@ int rt6_dump_route(struct fib6_info *f6i, void *p_arg); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); -void rt6_sync_up(struct net_device *dev, unsigned int nh_flags); +void rt6_sync_up(struct net_device *dev, unsigned char nh_flags); void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); void rt6_multipath_rebalance(struct fib6_info *f6i); @@ -274,9 +275,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev && - ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) && - !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); + struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh; + + return nha->fib_nh_dev == nhb->fib_nh_dev && + ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && + !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) @@ -300,8 +303,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) return mtu; } -u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, - struct in6_addr *saddr); +u32 ip6_mtu_from_fib6(const struct fib6_result *res, + const struct in6_addr *daddr, + const struct in6_addr *saddr); struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, struct net_device *dev, struct sk_buff *skb, diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 236e40ba06bf..69b4bcf880c9 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -69,6 +69,8 @@ struct ip6_tnl_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi6 *fl6); + int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info); }; #ifdef CONFIG_INET diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c5969762a8f4..d0e28f4ab099 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -32,10 +32,14 @@ struct fib_config { u8 fc_protocol; u8 fc_scope; u8 fc_type; - /* 3 bytes unused */ + u8 fc_gw_family; + /* 2 bytes unused */ u32 fc_table; __be32 fc_dst; - __be32 fc_gw; + union { + __be32 fc_gw4; + struct in6_addr fc_gw6; + }; int fc_oif; u32 fc_flags; u32 fc_priority; @@ -76,27 +80,49 @@ struct fnhe_hash_bucket { #define FNHE_HASH_SIZE (1 << FNHE_HASH_SHIFT) #define FNHE_RECLAIM_DEPTH 5 +struct fib_nh_common { + struct net_device *nhc_dev; + int nhc_oif; + unsigned char nhc_scope; + u8 nhc_family; + u8 nhc_gw_family; + unsigned char nhc_flags; + struct lwtunnel_state *nhc_lwtstate; + + union { + __be32 ipv4; + struct in6_addr ipv6; + } nhc_gw; + + int nhc_weight; + atomic_t nhc_upper_bound; + + /* v4 specific, but allows fib6_nh with v4 routes */ + struct rtable __rcu * __percpu *nhc_pcpu_rth_output; + struct rtable __rcu *nhc_rth_input; + struct fnhe_hash_bucket __rcu *nhc_exceptions; +}; + struct fib_nh { - struct net_device *nh_dev; + struct fib_nh_common nh_common; struct hlist_node nh_hash; struct fib_info *nh_parent; - unsigned int nh_flags; - unsigned char nh_scope; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - int nh_weight; - atomic_t nh_upper_bound; -#endif #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; #endif - int nh_oif; - __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; - struct rtable __rcu * __percpu *nh_pcpu_rth_output; - struct rtable __rcu *nh_rth_input; - struct fnhe_hash_bucket __rcu *nh_exceptions; - struct lwtunnel_state *nh_lwtstate; +#define fib_nh_family nh_common.nhc_family +#define fib_nh_dev nh_common.nhc_dev +#define fib_nh_oif nh_common.nhc_oif +#define fib_nh_flags nh_common.nhc_flags +#define fib_nh_lws nh_common.nhc_lwtstate +#define fib_nh_scope nh_common.nhc_scope +#define fib_nh_gw_family nh_common.nhc_gw_family +#define fib_nh_gw4 nh_common.nhc_gw.ipv4 +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 +#define fib_nh_weight nh_common.nhc_weight +#define fib_nh_upper_bound nh_common.nhc_upper_bound }; /* @@ -123,9 +149,10 @@ struct fib_info { #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; + bool fib_nh_is_v6; struct rcu_head rcu; struct fib_nh fib_nh[0]; -#define fib_dev fib_nh[0].nh_dev +#define fib_dev fib_nh[0].fib_nh_dev }; @@ -135,15 +162,16 @@ struct fib_rule; struct fib_table; struct fib_result { - __be32 prefix; - unsigned char prefixlen; - unsigned char nh_sel; - unsigned char type; - unsigned char scope; - u32 tclassid; - struct fib_info *fi; - struct fib_table *table; - struct hlist_head *fa_head; + __be32 prefix; + unsigned char prefixlen; + unsigned char nh_sel; + unsigned char type; + unsigned char scope; + u32 tclassid; + struct fib_nh_common *nhc; + struct fib_info *fi; + struct fib_table *table; + struct hlist_head *fa_head; }; struct fib_result_nl { @@ -161,11 +189,10 @@ struct fib_result_nl { int err; }; -#ifdef CONFIG_IP_ROUTE_MULTIPATH -#define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) -#else /* CONFIG_IP_ROUTE_MULTIPATH */ -#define FIB_RES_NH(res) ((res).fi->fib_nh[0]) -#endif /* CONFIG_IP_ROUTE_MULTIPATH */ +static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) +{ + return &fi->fib_nh[nhsel].nh_common; +} #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 @@ -174,18 +201,11 @@ struct fib_result_nl { #endif __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +__be32 fib_result_prefsrc(struct net *net, struct fib_result *res); -#define FIB_RES_SADDR(net, res) \ - ((FIB_RES_NH(res).nh_saddr_genid == \ - atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ - FIB_RES_NH(res).nh_saddr : \ - fib_info_update_nh_saddr((net), &FIB_RES_NH(res))) -#define FIB_RES_GW(res) (FIB_RES_NH(res).nh_gw) -#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) -#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) - -#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ - FIB_RES_SADDR(net, res)) +#define FIB_RES_NHC(res) ((res).nhc) +#define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev) +#define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif) struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ @@ -241,7 +261,7 @@ int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb, struct fib_dump_filter *filter); -int fib_table_flush(struct net *net, struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); @@ -383,6 +403,8 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net, /* Exported by fib_frontend.c */ extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); +int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, + struct netlink_ext_ack *extack); __be32 fib_compute_spec_dst(struct sk_buff *skb); bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, @@ -405,7 +427,7 @@ int fib_unmerge(struct net *net); int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); -int fib_sync_up(struct net_device *dev, unsigned int nh_flags); +int fib_sync_up(struct net_device *dev, unsigned char nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -416,6 +438,15 @@ void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); +int fib_nh_init(struct net *net, struct fib_nh *fib_nh, + struct fib_config *cfg, int nh_weight, + struct netlink_ext_ack *extack); +void fib_nh_release(struct net *net, struct fib_nh *fib_nh); +int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *fc_encap, + u16 fc_encap_type, void *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +void fib_nh_common_release(struct fib_nh_common *nhc); + /* Exported by fib_trie.c */ void fib_trie_init(void); struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); @@ -423,10 +454,12 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID + struct fib_nh_common *nhc = res->nhc; + struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif - *itag = FIB_RES_NH(*res).nh_tclassid<<16; + *itag = nh->nh_tclassid << 16; #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) @@ -467,4 +500,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, struct netlink_callback *cb); + +int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, + unsigned char *flags, bool skip_oif); +int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, + int nh_weight); #endif /* _NET_FIB_H */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 5ce926701bd0..af645604f328 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -241,7 +241,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, int proto, __be32 daddr, __be32 saddr, __be32 key, __u8 tos, int oif, - __u32 mark) + __u32 mark, __u32 tun_inner_hash) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = oif; @@ -251,6 +251,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; + fl4->flowi4_multipath_hash = tun_inner_hash; } int ip_tunnel_init(struct net_device *dev); @@ -267,7 +268,7 @@ void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, - const u8 proto); + const u8 proto, int tunnel_hlen); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); @@ -292,6 +293,7 @@ struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4); + int (*err_handler)(struct sk_buff *skb, u32 info); }; #define MAX_IPTUN_ENCAP_OPS 8 @@ -307,6 +309,26 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); +static inline bool pskb_inet_may_pull(struct sk_buff *skb) +{ + int nhlen; + + switch (skb->protocol) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + default: + nhlen = 0; + } + + return pskb_network_may_pull(skb, nhlen); +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a0d2e0bb9a94..2ac40135b576 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -453,9 +453,6 @@ struct ip_vs_protocol { int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph); - int (*csum_check)(int af, struct sk_buff *skb, - struct ip_vs_protocol *pp); - const char *(*state_name)(int state); void (*state_transition)(struct ip_vs_conn *cp, int direction, @@ -603,6 +600,9 @@ struct ip_vs_dest_user_kern { /* Address family of addr */ u16 af; + + u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ }; @@ -663,6 +663,8 @@ struct ip_vs_dest { atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ atomic_t last_weight; /* server latest weight */ + __u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 829650540780..daf80863d3a5 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -975,6 +975,8 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_forward(struct sk_buff *skb); int ip6_input(struct sk_buff *skb); int ip6_mc_input(struct sk_buff *skb); +void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, + bool have_final); int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 6ced1e6899b6..1f77fb4dc79d 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -82,12 +82,18 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); /* Don't send error if the first segment did not arrive. */ - head = fq->q.fragments; - if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) + if (!(fq->q.flags & INET_FRAG_FIRST_IN)) + goto out; + + /* sk_buff::dev and sk_buff::rbnode are unionized. So we + * pull the head out of the tree in order to be able to + * deal with head->dev. + */ + head = inet_frag_pull_head(&fq->q); + if (!head) goto out; head->dev = dev; - skb_get(head); spin_unlock(&fq->q.lock); icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h new file mode 100644 index 000000000000..6c0c4fde16f8 --- /dev/null +++ b/include/net/ipv6_stubs.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _IPV6_STUBS_H +#define _IPV6_STUBS_H + +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/dst.h> +#include <net/flow.h> +#include <net/neighbour.h> +#include <net/sock.h> + +/* structs from net/ip6_fib.h */ +struct fib6_info; +struct fib6_nh; +struct fib6_config; +struct fib6_result; + +/* This is ugly, ideally these symbols should be built + * into the core kernel. + */ +struct ipv6_stub { + int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex, + const struct in6_addr *addr); + int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, + const struct in6_addr *addr); + int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, + struct dst_entry **dst, struct flowi6 *fl6); + int (*ipv6_route_input)(struct sk_buff *skb); + + struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); + int (*fib6_lookup)(struct net *net, int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags); + int (*fib6_table_lookup)(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags); + void (*fib6_select_path)(const struct net *net, struct fib6_result *res, + struct flowi6 *fl6, int oif, bool oif_match, + const struct sk_buff *skb, int strict); + u32 (*ip6_mtu_from_fib6)(const struct fib6_result *res, + const struct in6_addr *daddr, + const struct in6_addr *saddr); + + int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); + void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*udpv6_encap_enable)(void); + void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, + const struct in6_addr *solicited_addr, + bool router, bool solicited, bool override, bool inc_opt); + struct neigh_table *nd_tbl; +}; +extern const struct ipv6_stub *ipv6_stub __read_mostly; + +/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ +struct ipv6_bpf_stub { + int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock); + struct sock *(*udp6_lib_lookup)(struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif, int sdif, struct udp_table *tbl, + struct sk_buff *skb); +}; +extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; + +#endif diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3832099289c5..5175fd63cd82 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -101,6 +101,17 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) return master; } +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex); +static inline +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex) +{ + rcu_read_lock(); + ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex); + rcu_read_unlock(); + + return ifindex; +} + u32 l3mdev_fib_table_rcu(const struct net_device *dev); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); static inline u32 l3mdev_fib_table(const struct net_device *dev) @@ -142,7 +153,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(skb->dev)) master = netdev_master_upper_dev_get_rcu(skb->dev); - else if (netif_is_l3_master(skb->dev)) + else if (netif_is_l3_master(skb->dev) || + netif_has_l3_rx_handler(skb->dev)) master = skb->dev; if (master && master->l3mdev_ops->l3mdev_l3_rcv) @@ -208,6 +220,17 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex) +{ + return 0; +} +static inline +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex) +{ + return 0; +} + +static inline struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) { return NULL; diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 33fd9ba7e0e5..5d6c5b1fc695 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -118,14 +118,16 @@ int lwtunnel_build_state(u16 encap_type, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack); -int lwtunnel_fill_encap(struct sk_buff *skb, - struct lwtunnel_state *lwtstate); +int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, + int encap_attr, int encap_type_attr); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); int lwtunnel_xmit(struct sk_buff *skb); +int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, + bool ingress); static inline void lwtunnel_set_redirect(struct dst_entry *dst) { @@ -217,7 +219,8 @@ static inline int lwtunnel_build_state(u16 encap_type, } static inline int lwtunnel_fill_encap(struct sk_buff *skb, - struct lwtunnel_state *lwtstate) + struct lwtunnel_state *lwtstate, + int encap_attr, int encap_type_attr) { return 0; } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 71985e95d2d9..72080d9d617e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -108,9 +108,15 @@ * The driver is expected to initialize its private per-queue data for stations * and interfaces in the .add_interface and .sta_add ops. * - * The driver can't access the queue directly. To dequeue a frame, it calls - * ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a queue, it - * calls the .wake_tx_queue driver op. + * The driver can't access the queue directly. To dequeue a frame from a + * txq, it calls ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a + * queue, it calls the .wake_tx_queue driver op. + * + * Drivers can optionally delegate responsibility for scheduling queues to + * mac80211, to take advantage of airtime fairness accounting. In this case, to + * obtain the next queue to pull frames from, the driver calls + * ieee80211_next_txq(). The driver is then expected to return the txq using + * ieee80211_return_txq(). * * For AP powersave TIM handling, the driver only needs to indicate if it has * buffered packets in the driver specific data structures by calling @@ -467,7 +473,7 @@ struct ieee80211_mu_group_data { }; /** - * ieee80211_ftm_responder_params - FTM responder parameters + * struct ieee80211_ftm_responder_params - FTM responder parameters * * @lci: LCI subelement content * @civicloc: CIVIC location subelement content @@ -496,6 +502,8 @@ struct ieee80211_ftm_responder_params { * @uora_ocw_range: UORA element's OCW Range field * @frame_time_rts_th: HE duration RTS threshold, in units of 32us * @he_support: does this BSS support HE + * @twt_requester: does this BSS support TWT requester (relevant for managed + * mode only, set if the AP advertises TWT responder role) * @assoc: association status * @ibss_joined: indicates whether this station is part of an IBSS * or not @@ -583,6 +591,14 @@ struct ieee80211_ftm_responder_params { * @ftm_responder: whether to enable or disable fine timing measurement FTM * responder functionality. * @ftmr_params: configurable lci/civic parameter when enabling FTM responder. + * @nontransmitted: this BSS is a nontransmitted BSS profile + * @transmitter_bssid: the address of transmitter AP + * @bssid_index: index inside the multiple BSSID set + * @bssid_indicator: 2^bssid_indicator is the maximum number of APs in set + * @ema_ap: AP supports enhancements of discovery and advertisement of + * nontransmitted BSSIDs + * @profile_periodicity: the least number of beacon frames need to be received + * in order to discover all the nontransmitted BSSIDs in the set. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -594,6 +610,7 @@ struct ieee80211_bss_conf { u8 uora_ocw_range; u16 frame_time_rts_th; bool he_support; + bool twt_requester; /* association related data */ bool assoc, ibss_joined; bool ibss_creator; @@ -635,6 +652,13 @@ struct ieee80211_bss_conf { bool protected_keep_alive; bool ftm_responder; struct ieee80211_ftm_responder_params *ftmr_params; + /* Multiple BSSID data */ + bool nontransmitted; + u8 transmitter_bssid[ETH_ALEN]; + u8 bssid_index; + u8 bssid_indicator; + bool ema_ap; + u8 profile_periodicity; }; /** @@ -783,6 +807,7 @@ enum mac80211_tx_info_flags { * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path + * @IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP: This frame skips mesh path lookup * * These flags are used in tx_info->control.flags. */ @@ -792,6 +817,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_RATE_INJECT = BIT(2), IEEE80211_TX_CTRL_AMSDU = BIT(3), IEEE80211_TX_CTRL_FAST_XMIT = BIT(4), + IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5), }; /* @@ -933,8 +959,32 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * @band: the band to transmit on (use for checking for races) * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC * @ack_frame_id: internal frame ID for TX status, used internally - * @control: union for control data - * @status: union for status data + * @control: union part for control data + * @control.rates: TX rates array to try + * @control.rts_cts_rate_idx: rate for RTS or CTS + * @control.use_rts: use RTS + * @control.use_cts_prot: use RTS/CTS + * @control.short_preamble: use short preamble (CCK only) + * @control.skip_table: skip externally configured rate table + * @control.jiffies: timestamp for expiry on powersave clients + * @control.vif: virtual interface (may be NULL) + * @control.hw_key: key to encrypt with (may be NULL) + * @control.flags: control flags, see &enum mac80211_tx_control_flags + * @control.enqueue_time: enqueue time (for iTXQs) + * @driver_rates: alias to @control.rates to reserve space + * @pad: padding + * @rate_driver_data: driver use area if driver needs @control.rates + * @status: union part for status data + * @status.rates: attempted rates + * @status.ack_signal: ACK signal + * @status.ampdu_ack_len: AMPDU ack length + * @status.ampdu_len: AMPDU length + * @status.antenna: (legacy, kept only for iwlegacy) + * @status.tx_time: airtime consumed for transmission + * @status.is_valid_ack_signal: ACK signal is valid + * @status.status_driver_data: driver use area + * @ack: union part for pure ACK data + * @ack.cookie: cookie for the ACK * @driver_data: array of driver_data pointers * @ampdu_ack_len: number of acked aggregated frames. * relevant only if IEEE80211_TX_STAT_AMPDU was set. @@ -1154,6 +1204,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known * @RX_FLAG_RADIOTAP_HE: HE radiotap data is present * (&struct ieee80211_radiotap_he, mac80211 will fill in + * * - DATA3_DATA_MCS * - DATA3_DATA_DCM * - DATA3_CODING @@ -1161,6 +1212,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * - DATA5_DATA_BW_RU_ALLOC * - DATA6_NSTS * - DATA3_STBC + * * from the RX info data, so leave those zeroed when building this data) * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present * (&struct ieee80211_radiotap_he_mu) @@ -1211,7 +1263,7 @@ enum mac80211_rx_flags { * @RX_ENC_FLAG_HT_GF: This frame was received in a HT-greenfield transmission, * if the driver fills this value it should add * %IEEE80211_RADIOTAP_MCS_HAVE_FMT - * to hw.radiotap_mcs_details to advertise that fact + * to @hw.radiotap_mcs_details to advertise that fact. * @RX_ENC_FLAG_LDPC: LDPC was used * @RX_ENC_FLAG_STBC_MASK: STBC 2 bit bitmask. 1 - Nss=1, 2 - Nss=2, 3 - Nss=3 * @RX_ENC_FLAG_BF: packet was beamformed @@ -1469,6 +1521,9 @@ struct ieee80211_conf { * scheduled channel switch, as indicated by the AP. * @chandef: the new channel to switch to * @count: the number of TBTT's until the channel switch event + * @delay: maximum delay between the time the AP transmitted the last beacon in + * current channel and the expected time of the first beacon in the new + * channel, expressed in TU. */ struct ieee80211_channel_switch { u64 timestamp; @@ -1476,6 +1531,7 @@ struct ieee80211_channel_switch { bool block_tx; struct cfg80211_chan_def chandef; u8 count; + u32 delay; }; /** @@ -1643,6 +1699,7 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); * @IEEE80211_KEY_FLAG_PUT_MIC_SPACE: This flag should be set by the driver for * a TKIP key if it only requires MIC space. Do not set together with * @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key. + * @IEEE80211_KEY_FLAG_NO_AUTO_TX: Key needs explicit Tx activation. */ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0), @@ -1654,6 +1711,7 @@ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_RX_MGMT = BIT(6), IEEE80211_KEY_FLAG_RESERVE_TAILROOM = BIT(7), IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8), + IEEE80211_KEY_FLAG_NO_AUTO_TX = BIT(9), }; /** @@ -1834,6 +1892,24 @@ struct ieee80211_sta_rates { }; /** + * struct ieee80211_sta_txpwr - station txpower configuration + * + * Used to configure txpower for station. + * + * @power: indicates the tx power, in dBm, to be used when sending data frames + * to the STA. + * @type: In particular if TPC %type is NL80211_TX_POWER_LIMITED then tx power + * will be less than or equal to specified from userspace, whereas if TPC + * %type is NL80211_TX_POWER_AUTOMATIC then it indicates default tx power. + * NL80211_TX_POWER_FIXED is not a valid configuration option for + * per peer TPC. + */ +struct ieee80211_sta_txpwr { + s16 power; + enum nl80211_tx_power_setting type; +}; + +/** * struct ieee80211_sta - station table entry * * A station table entry represents a station we are possibly @@ -1919,6 +1995,7 @@ struct ieee80211_sta { bool support_p2p_ps; u16 max_rc_amsdu_len; u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; + struct ieee80211_sta_txpwr txpwr; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; @@ -2181,6 +2258,17 @@ struct ieee80211_txq { * MMPDUs on station interfaces. This of course requires the driver to use * TXQs to start with. * + * @IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN: Driver does not report accurate A-MPDU + * length in tx status information + * + * @IEEE80211_HW_SUPPORTS_MULTI_BSSID: Hardware supports multi BSSID + * + * @IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID: Hardware supports multi BSSID + * only for HE APs. Applies if @IEEE80211_HW_SUPPORTS_MULTI_BSSID is set. + * + * @IEEE80211_HW_EXT_KEY_ID_NATIVE: Driver and hardware are supporting Extended + * Key ID and can handle two unicast keys per station for Rx and Tx. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2229,6 +2317,10 @@ enum ieee80211_hw_flags { IEEE80211_HW_BUFF_MMPDU_TXQ, IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, IEEE80211_HW_STA_MMPDU_TXQ, + IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN, + IEEE80211_HW_SUPPORTS_MULTI_BSSID, + IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID, + IEEE80211_HW_EXT_KEY_ID_NATIVE, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2320,12 +2412,14 @@ enum ieee80211_hw_flags { * @radiotap_he: HE radiotap validity flags * * @radiotap_timestamp: Information for the radiotap timestamp field; if the - * 'units_pos' member is set to a non-negative value it must be set to - * a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a - * IEEE80211_RADIOTAP_TIMESTAMP_SPOS_* value, and then the timestamp + * @units_pos member is set to a non-negative value then the timestamp * field will be added and populated from the &struct ieee80211_rx_status - * device_timestamp. If the 'accuracy' member is non-negative, it's put - * into the accuracy radiotap field and the accuracy known flag is set. + * device_timestamp. + * @radiotap_timestamp.units_pos: Must be set to a combination of a + * IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a + * IEEE80211_RADIOTAP_TIMESTAMP_SPOS_* value. + * @radiotap_timestamp.accuracy: If non-negative, fills the accuracy in the + * radiotap field and the accuracy known flag will be set. * * @netdev_features: netdev features to be set in each netdev created * from this HW. Note that not all features are usable with mac80211, @@ -2351,6 +2445,9 @@ enum ieee80211_hw_flags { * @tx_sk_pacing_shift: Pacing shift to set on TCP sockets when frames from * them are encountered. The default should typically not be changed, * unless the driver has good reasons for needing more buffers. + * + * @weight_multiplier: Driver specific airtime weight multiplier used while + * refilling deficit of each TXQ. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2387,6 +2484,7 @@ struct ieee80211_hw { const struct ieee80211_cipher_scheme *cipher_schemes; u8 max_nan_de_entries; u8 tx_sk_pacing_shift; + u8 weight_multiplier; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -3239,6 +3337,11 @@ enum ieee80211_reconfig_type { * When the scan finishes, ieee80211_scan_completed() must be called; * note that it also must be called when the scan cannot finish due to * any error unless this callback returned a negative error code. + * This callback is also allowed to return the special return value 1, + * this indicates that hardware scan isn't desirable right now and a + * software scan should be done instead. A driver wishing to use this + * capability must ensure its (hardware) scan capabilities aren't + * advertised as more capable than mac80211's software scan is. * The callback can sleep. * * @cancel_hw_scan: Ask the low-level tp cancel the active hw scan. @@ -3567,7 +3670,12 @@ enum ieee80211_reconfig_type { * @post_channel_switch: This is an optional callback that is called * after a channel switch procedure is completed, allowing the * driver to go back to a normal configuration. - * + * @abort_channel_switch: This is an optional callback that is called + * when channel switch procedure was completed, allowing the + * driver to go back to a normal configuration. + * @channel_switch_rx_beacon: This is an optional callback that is called + * when channel switch procedure is in progress and additional beacon with + * CSA IE was received, allowing driver to track changes in count. * @join_ibss: Join an IBSS (on an IBSS interface); this is called after all * information in bss_conf is set up and the beacon can be retrieved. A * channel context is bound before this is called. @@ -3623,6 +3731,9 @@ enum ieee80211_reconfig_type { * skb is always a real frame, head may or may not be an A-MSDU. * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available. * Statistics should be cumulative, currently no way to reset is provided. + * + * @start_pmsr: start peer measurement (e.g. FTM) (this call can sleep) + * @abort_pmsr: abort peer measurement (this call can sleep) */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3710,6 +3821,9 @@ struct ieee80211_ops { #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); + int (*sta_set_txpwr)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta); int (*sta_state)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, enum ieee80211_sta_state old_state, @@ -3867,6 +3981,11 @@ struct ieee80211_ops { int (*post_channel_switch)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + void (*abort_channel_switch)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); + void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_channel_switch *ch_switch); int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); @@ -3911,6 +4030,10 @@ struct ieee80211_ops { int (*get_ftm_responder_stats)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct cfg80211_ftm_responder_stats *ftm_stats); + int (*start_pmsr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct cfg80211_pmsr_request *request); + void (*abort_pmsr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct cfg80211_pmsr_request *request); }; /** @@ -5387,6 +5510,34 @@ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta); void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid); /** + * ieee80211_sta_register_airtime - register airtime usage for a sta/tid + * + * Register airtime usage for a given sta on a given tid. The driver can call + * this function to notify mac80211 that a station used a certain amount of + * airtime. This information will be used by the TXQ scheduler to schedule + * stations in a way that ensures airtime fairness. + * + * The reported airtime should as a minimum include all time that is spent + * transmitting to the remote station, including overhead and padding, but not + * including time spent waiting for a TXOP. If the time is not reported by the + * hardware it can in some cases be calculated from the rate and known frame + * composition. When possible, the time should include any failed transmission + * attempts. + * + * The driver can either call this function synchronously for every packet or + * aggregate, or asynchronously as airtime usage information becomes available. + * TX and RX airtime can be reported together, or separately by setting one of + * them to 0. + * + * @pubsta: the station + * @tid: the TID to register airtime for + * @tx_airtime: airtime used during TX (in usec) + * @rx_airtime: airtime used during RX (in usec) + */ +void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, + u32 tx_airtime, u32 rx_airtime); + +/** * ieee80211_iter_keys - iterate keys programmed into the device * @hw: pointer obtained from ieee80211_alloc_hw() * @vif: virtual interface to iterate, may be %NULL for all @@ -6088,14 +6239,116 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid); * ieee80211_tx_dequeue - dequeue a packet from a software tx queue * * @hw: pointer as obtained from ieee80211_alloc_hw() - * @txq: pointer obtained from station or virtual interface + * @txq: pointer obtained from station or virtual interface, or from + * ieee80211_next_txq() * * Returns the skb if successful, %NULL if no frame was available. + * + * Note that this must be called in an rcu_read_lock() critical section, + * which can only be released after the SKB was handled. Some pointers in + * skb->cb, e.g. the key pointer, are protected by by RCU and thus the + * critical section must persist not just for the duration of this call + * but for the duration of the frame handling. + * However, also note that while in the wake_tx_queue() method, + * rcu_read_lock() is already held. */ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct ieee80211_txq *txq); /** + * ieee80211_next_txq - get next tx queue to pull packets from + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @ac: AC number to return packets from. + * + * Returns the next txq if successful, %NULL if no queue is eligible. If a txq + * is returned, it should be returned with ieee80211_return_txq() after the + * driver has finished scheduling it. + */ +struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac); + +/** + * ieee80211_txq_schedule_start - start new scheduling round for TXQs + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @ac: AC number to acquire locks for + * + * Should be called before ieee80211_next_txq() or ieee80211_return_txq(). + * The driver must not call multiple TXQ scheduling rounds concurrently. + */ +void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac); + +/* (deprecated) */ +static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) +{ +} + +void __ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, bool force); + +/** + * ieee80211_schedule_txq - schedule a TXQ for transmission + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @txq: pointer obtained from station or virtual interface + * + * Schedules a TXQ for transmission if it is not already scheduled, + * even if mac80211 does not have any packets buffered. + * + * The driver may call this function if it has buffered packets for + * this TXQ internally. + */ +static inline void +ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) +{ + __ieee80211_schedule_txq(hw, txq, true); +} + +/** + * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq() + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @txq: pointer obtained from station or virtual interface + * @force: schedule txq even if mac80211 does not have any buffered packets. + * + * The driver may set force=true if it has buffered packets for this TXQ + * internally. + */ +static inline void +ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, + bool force) +{ + __ieee80211_schedule_txq(hw, txq, force); +} + +/** + * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit + * + * This function is used to check whether given txq is allowed to transmit by + * the airtime scheduler, and can be used by drivers to access the airtime + * fairness accounting without going using the scheduling order enfored by + * next_txq(). + * + * Returns %true if the airtime scheduler thinks the TXQ should be allowed to + * transmit, and %false if it should be throttled. This function can also have + * the side effect of rotating the TXQ in the scheduler rotation, which will + * eventually bring the deficit to positive and allow the station to transmit + * again. + * + * The API ieee80211_txq_may_transmit() also ensures that TXQ list will be + * aligned aginst driver's own round-robin scheduler list. i.e it rotates + * the TXQ list till it makes the requested node becomes the first entry + * in TXQ list. Thus both the TXQ list and driver's list are in sync. If this + * function returns %true, the driver is expected to schedule packets + * for transmission, and then return the TXQ through ieee80211_return_txq(). + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @txq: pointer obtained from station or virtual interface + */ +bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, + struct ieee80211_txq *txq); + +/** * ieee80211_txq_get_depth - get pending frame/byte count of given txq * * The values are not guaranteed to be coherent with regard to each other, i.e. diff --git a/include/net/ndisc.h b/include/net/ndisc.h index ddfbb591e2c5..366150053043 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -2,6 +2,8 @@ #ifndef _NDISC_H #define _NDISC_H +#include <net/ipv6_stubs.h> + /* * ICMP codes for neighbour discovery messages */ @@ -379,6 +381,14 @@ static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev); } +static inline +struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev, + const void *pkey) +{ + return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, + ndisc_hashfn, pkey, dev); +} + static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey) { struct neighbour *n; @@ -409,6 +419,36 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, rcu_read_unlock_bh(); } +static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, + const void *pkey) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref_stub(dev, pkey); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + +/* uses ipv6_stub and is meant for use outside of IPv6 core */ +static inline struct neighbour *ip_neigh_gw6(struct net_device *dev, + const void *addr) +{ + struct neighbour *neigh; + + neigh = __ipv6_neigh_lookup_noref_stub(dev, addr); + if (unlikely(!neigh)) + neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false); + + return neigh; +} + int ndisc_init(void); int ndisc_late_init(void); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 665990c7dec8..50a67bd6a434 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -140,8 +140,8 @@ struct neighbour { unsigned long updated; rwlock_t lock; refcount_t refcnt; - struct sk_buff_head arp_queue; unsigned int arp_queue_len_bytes; + struct sk_buff_head arp_queue; struct timer_list timer; unsigned long used; atomic_t probes; @@ -149,11 +149,13 @@ struct neighbour { __u8 nud_state; __u8 type; __u8 dead; + u8 protocol; seqlock_t ha_lock; - unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; + unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8); struct hh_cache hh; int (*output)(struct neighbour *, struct sk_buff *); const struct neigh_ops *ops; + struct list_head gc_list; struct rcu_head rcu; struct net_device *dev; u8 primary_key[0]; @@ -172,6 +174,7 @@ struct pneigh_entry { possible_net_t net; struct net_device *dev; u8 flags; + u8 protocol; u8 key[0]; }; @@ -202,6 +205,8 @@ struct neigh_table { int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); + bool (*allow_add)(const struct net_device *dev, + struct netlink_ext_ack *extack); char *id; struct neigh_parms parms; struct list_head parms_list; @@ -214,6 +219,8 @@ struct neigh_table { struct timer_list proxy_timer; struct sk_buff_head proxy_queue; atomic_t entries; + atomic_t gc_entries; + struct list_head gc_list; rwlock_t lock; unsigned long last_rand; struct neigh_statistics __percpu *stats; @@ -250,6 +257,7 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 +extern const struct nla_policy nda_policy[]; static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey) { @@ -492,11 +500,12 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb return dev_queue_xmit(skb); } -static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) +static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, + bool skip_cache) { const struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) return neigh_hh_output(hh, skb); else return n->output(n, skb); @@ -546,24 +555,6 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, } while (read_seqretry(&n->ha_lock, seq)); } -static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, - int *notify) -{ - u8 ndm_flags = 0; - - if (!(flags & NEIGH_UPDATE_F_ADMIN)) - return; - - ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; - if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { - if (ndm_flags & NTF_EXT_LEARNED) - neigh->flags |= NTF_EXT_LEARNED; - else - neigh->flags &= ~NTF_EXT_LEARNED; - *notify = 1; - } -} - static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags, int *notify) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 99d4148e0f90..12689ddfc24c 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -31,6 +31,7 @@ #include <net/netns/xfrm.h> #include <net/netns/mpls.h> #include <net/netns/can.h> +#include <net/netns/xdp.h> #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> @@ -58,6 +59,7 @@ struct net { */ spinlock_t rules_mod_lock; + u32 hash_mix; atomic64_t cookie_gen; struct list_head list; /* list of network namespaces */ @@ -161,6 +163,9 @@ struct net { #if IS_ENABLED(CONFIG_CAN) struct netns_can can; #endif +#ifdef CONFIG_XDP_SOCKETS + struct netns_xdp xdp; +#endif struct sock *diag_nlsk; atomic_t fnhe_genid; } __randomize_layout; diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 74af19c3a8f7..89808ce293c4 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -6,12 +6,12 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) { - skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); + struct nf_bridge_info *b = skb_ext_add(skb, SKB_EXT_BRIDGE_NF); - if (likely(skb->nf_bridge)) - refcount_set(&(skb->nf_bridge->use), 1); + if (b) + memset(b, 0, sizeof(*b)); - return skb->nf_bridge; + return b; } void nf_bridge_update_protocol(struct sk_buff *skb); @@ -22,12 +22,6 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, int (*okfn)(struct net *, struct sock *, struct sk_buff *)); -static inline struct nf_bridge_info * -nf_bridge_info_get(const struct sk_buff *skb) -{ - return skb->nf_bridge; -} - unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb); static inline void nf_bridge_push_encap_header(struct sk_buff *skb) @@ -49,7 +43,6 @@ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) } struct net_device *setup_pre_routing(struct sk_buff *skb); -void br_netfilter_enable(void); #if IS_ENABLED(CONFIG_IPV6) int br_validate_ipv6(struct net *net, struct sk_buff *skb); diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 135ee702c7b0..2c8c2b023848 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -22,5 +22,8 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; #ifdef CONFIG_NF_CT_PROTO_UDPLITE extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre; +#endif #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h deleted file mode 100644 index 13d55206bb9f..000000000000 --- a/include/net/netfilter/ipv4/nf_nat_masquerade.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV4_H_ -#define _NF_NAT_MASQUERADE_IPV4_H_ - -#include <net/netfilter/nf_nat.h> - -unsigned int -nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, - const struct nf_nat_range2 *range, - const struct net_device *out); - -int nf_nat_masquerade_ipv4_register_notifier(void); -void nf_nat_masquerade_ipv4_unregister_notifier(void); - -#endif /*_NF_NAT_MASQUERADE_IPV4_H_ */ diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index 2eb43fcefc50..40e0e0623f46 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -5,6 +5,7 @@ #include <linux/skbuff.h> #include <net/ip.h> #include <net/icmp.h> +#include <net/netfilter/nf_reject.h> void nf_send_unreach(struct sk_buff *skb_in, int code, int hook); void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook); diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h deleted file mode 100644 index 2917bf95c437..000000000000 --- a/include/net/netfilter/ipv6/nf_nat_masquerade.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV6_H_ -#define _NF_NAT_MASQUERADE_IPV6_H_ - -unsigned int -nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, - const struct net_device *out); -int nf_nat_masquerade_ipv6_register_notifier(void); -void nf_nat_masquerade_ipv6_unregister_notifier(void); - -#endif /* _NF_NAT_MASQUERADE_IPV6_H_ */ diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index 3a5a9a36a0b2..4a3ef9ebdf6f 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -3,6 +3,7 @@ #define _IPV6_NF_REJECT_H #include <linux/icmpv6.h> +#include <net/netfilter/nf_reject.h> void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, unsigned int hooknum); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 7e012312cd61..d2bc733a2ef1 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -27,12 +27,17 @@ #include <net/netfilter/nf_conntrack_tuple.h> +struct nf_ct_udp { + unsigned long stream_ts; +}; + /* per conntrack: protocol private data */ union nf_conntrack_proto { /* insert conntrack proto private data here */ struct nf_ct_dccp dccp; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; + struct nf_ct_udp udp; struct nf_ct_gre gre; unsigned int tmpl_padto; }; @@ -182,28 +187,26 @@ bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report); bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple); -bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig); void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies, int do_acct); + u32 extra_jiffies, bool do_acct); /* Refresh conntrack for this many jiffies and do accounting */ static inline void nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies) + u32 extra_jiffies) { - __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1); + __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, true); } /* Refresh conntrack for this many jiffies */ static inline void nf_ct_refresh(struct nf_conn *ct, const struct sk_buff *skb, - unsigned long extra_jiffies) + u32 extra_jiffies) { - __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); + __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, false); } /* kill conntrack and do accounting */ @@ -313,6 +316,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); +u32 nf_ct_get_id(const struct nf_conn *ct); + static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 79d8d16732b4..bc6745d3010e 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -46,9 +46,6 @@ struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) return acct; }; -unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, - int dir); - /* Check if connection tracking accounting is enabled */ static inline bool nf_ct_acct_enabled(struct net *net) { @@ -61,8 +58,7 @@ static inline void nf_ct_set_acct(struct net *net, bool enable) net->ct.sysctl_acct = enable; } -int nf_conntrack_acct_pernet_init(struct net *net); -void nf_conntrack_acct_pernet_fini(struct net *net); +void nf_conntrack_acct_pernet_init(struct net *net); int nf_conntrack_acct_init(void); void nf_conntrack_acct_fini(void); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index afc9b3620473..ae41e92251dd 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -26,7 +26,7 @@ int nf_conntrack_init_net(struct net *net); void nf_conntrack_cleanup_net(struct net *net); void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); -int nf_conntrack_proto_pernet_init(struct net *net); +void nf_conntrack_proto_pernet_init(struct net *net); void nf_conntrack_proto_pernet_fini(struct net *net); int nf_conntrack_proto_init(void); @@ -39,8 +39,7 @@ void nf_conntrack_init_end(void); void nf_conntrack_cleanup_end(void); bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_l4proto *l4proto); + const struct nf_conntrack_tuple *orig); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 4b2b2baf8ab4..f32fc8289473 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -5,17 +5,10 @@ struct nf_conncount_data; -enum nf_conncount_list_add { - NF_CONNCOUNT_ADDED, /* list add was ok */ - NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */ - NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */ -}; - struct nf_conncount_list { spinlock_t list_lock; struct list_head head; /* connections with the same filtering key */ unsigned int count; /* length of list */ - bool dead; }; struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, @@ -29,18 +22,12 @@ unsigned int nf_conncount_count(struct net *net, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone); -void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit); +int nf_conncount_add(struct net *net, struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); void nf_conncount_list_init(struct nf_conncount_list *list); -enum nf_conncount_list_add -nf_conncount_add(struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone); - bool nf_conncount_gc_list(struct net *net, struct nf_conncount_list *list); diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 3f1ce9a8776e..52b44192b43f 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -142,7 +142,7 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, u32 portid, int report); -int nf_conntrack_ecache_pernet_init(struct net *net); +void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); int nf_conntrack_ecache_init(void); @@ -182,10 +182,7 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, u32 portid, int report) {} -static inline int nf_conntrack_ecache_pernet_init(struct net *net) -{ - return 0; -} +static inline void nf_conntrack_ecache_pernet_init(struct net *net) {} static inline void nf_conntrack_ecache_pernet_fini(struct net *net) { diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 006e430d1cdf..93ce6b0daaba 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -48,7 +48,7 @@ struct nf_conntrack_expect { /* Expectation class */ unsigned int class; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) union nf_inet_addr saved_addr; /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 2492120b8097..44b5a00a9c64 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -15,6 +15,11 @@ #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_expect.h> +#define NF_NAT_HELPER_PREFIX "ip_nat_" +#define NF_NAT_HELPER_NAME(name) NF_NAT_HELPER_PREFIX name +#define MODULE_ALIAS_NF_NAT_HELPER(name) \ + MODULE_ALIAS(NF_NAT_HELPER_NAME(name)) + struct module; enum nf_ct_helper_flags { @@ -54,6 +59,8 @@ struct nf_conntrack_helper { unsigned int queue_num; /* length of userspace private data stored in nf_conn_help->data */ u16 data_len; + /* name of NAT helper module */ + char nat_mod_name[NF_CT_HELPER_NAME_LEN]; }; /* Must be kept in sync with the classes defined by helpers */ @@ -124,8 +131,7 @@ static inline void *nfct_help_data(const struct nf_conn *ct) return (void *)help->data; } -int nf_conntrack_helper_pernet_init(struct net *net); -void nf_conntrack_helper_pernet_fini(struct net *net); +void nf_conntrack_helper_pernet_init(struct net *net); int nf_conntrack_helper_init(void); void nf_conntrack_helper_fini(void); @@ -154,4 +160,21 @@ nf_ct_helper_expectfn_find_by_symbol(const void *symbol); extern struct hlist_head *nf_ct_helper_hash; extern unsigned int nf_ct_helper_hsize; +struct nf_conntrack_nat_helper { + struct list_head list; + char mod_name[NF_CT_HELPER_NAME_LEN]; /* module name */ + struct module *module; /* pointer to self */ +}; + +#define NF_CT_NAT_HELPER_INIT(name) \ + { \ + .mod_name = NF_NAT_HELPER_NAME(name), \ + .module = THIS_MODULE \ + } + +void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat); +void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); +int nf_nat_helper_try_module_get(const char *name, u16 l3num, + u8 protonum); +void nf_nat_helper_put(struct nf_conntrack_helper *helper); #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index ae7b86f587f2..a49edfdf47e8 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,27 +27,6 @@ struct nf_conntrack_l4proto { /* protoinfo nlattr size, closes a hole */ u16 nlattr_size; - /* Try to fill in the third arg: dataoff is offset past network protocol - hdr. Return true if possible. */ - bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple); - - /* Invert the per-proto part of the tuple: ie. turn xmit into reply. - * Only used by icmp, most protocols use a generic version. - */ - bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig); - - /* Returns verdict for packet, or -1 for invalid. */ - int (*packet)(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state); - - /* Called when a conntrack entry is destroyed */ - void (*destroy)(struct nf_conn *ct); - /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); @@ -79,16 +58,28 @@ struct nf_conntrack_l4proto { /* Print out the private part of the conntrack. */ void (*print_conntrack)(struct seq_file *s, struct nf_conn *); #endif - unsigned int *net_id; - /* Init l4proto pernet data */ - int (*init_net)(struct net *net); +}; - /* Return the per-net protocol part. */ - struct nf_proto_net *(*get_net_proto)(struct net *net); +bool icmp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple); - /* Module (if any) which this is connected to. */ - struct module *me; -}; +bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple); + +bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig); +bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig); + +int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state, + u8 l4proto, + union nf_inet_addr *outer_daddr); int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, struct sk_buff *skb, @@ -99,31 +90,63 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state); + +int nf_conntrack_icmp_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +int nf_conntrack_icmpv6_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +int nf_conntrack_udp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_udplite_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_tcp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_dccp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_sctp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_gre_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +void nf_conntrack_generic_init_net(struct net *net); +void nf_conntrack_tcp_init_net(struct net *net); +void nf_conntrack_udp_init_net(struct net *net); +void nf_conntrack_gre_init_net(struct net *net); +void nf_conntrack_dccp_init_net(struct net *net); +void nf_conntrack_sctp_init_net(struct net *net); +void nf_conntrack_icmp_init_net(struct net *net); +void nf_conntrack_icmpv6_init_net(struct net *net); + /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO IPPROTO_UDPLITE -const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto); - -const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto); -void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); - -/* Protocol pernet registration. */ -int nf_ct_l4proto_pernet_register_one(struct net *net, - const struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_pernet_unregister_one(struct net *net, - const struct nf_conntrack_l4proto *proto); -int nf_ct_l4proto_pernet_register(struct net *net, - const struct nf_conntrack_l4proto *const proto[], - unsigned int num_proto); -void nf_ct_l4proto_pernet_unregister(struct net *net, - const struct nf_conntrack_l4proto *const proto[], - unsigned int num_proto); - -/* Protocol global registration. */ -int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); +const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto); /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, @@ -192,4 +215,11 @@ static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net) } #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +static inline struct nf_gre_net *nf_gre_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.gre; +} +#endif + #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 3394d75e1c80..00a8fbb2d735 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -88,6 +88,9 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) int nf_conntrack_timeout_init(void); void nf_conntrack_timeout_fini(void); void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout); +int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, + const char *timeout_name); +void nf_ct_destroy_timeout(struct nf_conn *ct); #else static inline int nf_conntrack_timeout_init(void) { @@ -98,6 +101,18 @@ static inline void nf_conntrack_timeout_fini(void) { return; } + +static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, + u8 l3num, u8 l4num, + const char *timeout_name) +{ + return -EOPNOTSUPP; +} + +static inline void nf_ct_destroy_timeout(struct nf_conn *ct) +{ + return; +} #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h index 3b661986be8f..0ed617bf0a3d 100644 --- a/include/net/netfilter/nf_conntrack_timestamp.h +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -49,21 +49,12 @@ static inline void nf_ct_set_tstamp(struct net *net, bool enable) } #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP -int nf_conntrack_tstamp_pernet_init(struct net *net); -void nf_conntrack_tstamp_pernet_fini(struct net *net); +void nf_conntrack_tstamp_pernet_init(struct net *net); int nf_conntrack_tstamp_init(void); void nf_conntrack_tstamp_fini(void); #else -static inline int nf_conntrack_tstamp_pernet_init(struct net *net) -{ - return 0; -} - -static inline void nf_conntrack_tstamp_pernet_fini(struct net *net) -{ - return; -} +static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {} static inline int nf_conntrack_tstamp_init(void) { diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 77e2761d4f2f..3e370cb36263 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -84,7 +84,6 @@ struct flow_offload { struct nf_flow_route { struct { struct dst_entry *dst; - int ifindex; } tuple[FLOW_OFFLOAD_DIR_MAX]; }; @@ -95,10 +94,6 @@ void flow_offload_free(struct flow_offload *flow); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); -int nf_flow_table_iterate(struct nf_flowtable *flow_table, - void (*iter)(struct flow_offload *flow, void *data), - void *data); - void nf_flow_table_cleanup(struct net_device *dev); int nf_flow_table_init(struct nf_flowtable *flow_table); diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index a17eb2f8d40e..423cda2c6542 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -31,8 +31,7 @@ struct nf_conn; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { union nf_conntrack_nat_help help; -#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ - IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) +#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE) int masq_index; #endif }; @@ -47,10 +46,6 @@ extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct); -/* Is this tuple already taken? (not by us)*/ -int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack); - static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) { #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) @@ -65,8 +60,7 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, struct nf_conn_nat *nat, const struct net_device *out) { -#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ - IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) +#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE) return nat && nat->masq_index && hooknum == NF_INET_POST_ROUTING && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && nat->masq_index != out->ifindex; @@ -75,8 +69,50 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, #endif } -int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, +int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, const struct nf_hook_ops *nat_ops, unsigned int ops_count); -void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, +void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, unsigned int ops_count); + +unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int hooknum, struct sk_buff *skb); + +unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, + enum nf_nat_manip_type mtype, + enum ip_conntrack_dir dir); +void nf_nat_csum_recalc(struct sk_buff *skb, + u8 nfproto, u8 proto, void *data, __sum16 *check, + int datalen, int oldlen); + +int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum); + +int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, unsigned int hdrlen); + +int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + +int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + +int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + +unsigned int +nf_nat_inet_fn(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + +int nf_xfrm_me_harder(struct net *n, struct sk_buff *s, unsigned int family); + +static inline int nf_nat_initialized(struct nf_conn *ct, + enum nf_nat_manip_type manip) +{ + if (manip == NF_NAT_MANIP_SRC) + return ct->status & IPS_SRC_NAT_DONE; + else + return ct->status & IPS_DST_NAT_DONE; +} #endif diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h deleted file mode 100644 index dc7cd0440229..000000000000 --- a/include/net/netfilter/nf_nat_core.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_CORE_H -#define _NF_NAT_CORE_H -#include <linux/list.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat.h> - -/* This header used to share core functionality between the standalone - NAT module, and the compatibility layer's use of NAT for masquerading. */ - -unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned int hooknum, struct sk_buff *skb); - -unsigned int -nf_nat_inet_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state); - -int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family); - -static inline int nf_nat_initialized(struct nf_conn *ct, - enum nf_nat_manip_type manip) -{ - if (manip == NF_NAT_MANIP_SRC) - return ct->status & IPS_SRC_NAT_DONE; - else - return ct->status & IPS_DST_NAT_DONE; -} - -#endif /* _NF_NAT_CORE_H */ diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h deleted file mode 100644 index d300b8f03972..000000000000 --- a/include/net/netfilter/nf_nat_l3proto.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_L3PROTO_H -#define _NF_NAT_L3PROTO_H - -struct nf_nat_l4proto; -struct nf_nat_l3proto { - u8 l3proto; - - bool (*in_range)(const struct nf_conntrack_tuple *t, - const struct nf_nat_range2 *range); - - u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16); - - bool (*manip_pkt)(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_nat_l4proto *l4proto, - const struct nf_conntrack_tuple *target, - enum nf_nat_manip_type maniptype); - - void (*csum_update)(struct sk_buff *skb, unsigned int iphdroff, - __sum16 *check, - const struct nf_conntrack_tuple *t, - enum nf_nat_manip_type maniptype); - - void (*csum_recalc)(struct sk_buff *skb, u8 proto, - void *data, __sum16 *check, - int datalen, int oldlen); - - void (*decode_session)(struct sk_buff *skb, - const struct nf_conn *ct, - enum ip_conntrack_dir dir, - unsigned long statusbit, - struct flowi *fl); - - int (*nlattr_to_range)(struct nlattr *tb[], - struct nf_nat_range2 *range); -}; - -int nf_nat_l3proto_register(const struct nf_nat_l3proto *); -void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *); -const struct nf_nat_l3proto *__nf_nat_l3proto_find(u8 l3proto); - -int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum); - -int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, unsigned int hdrlen); - -int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops); -void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); - -int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); -void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); - -#endif /* _NF_NAT_L3PROTO_H */ diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h deleted file mode 100644 index b4d6b29bca62..000000000000 --- a/include/net/netfilter/nf_nat_l4proto.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Header for use in defining a given protocol. */ -#ifndef _NF_NAT_L4PROTO_H -#define _NF_NAT_L4PROTO_H -#include <net/netfilter/nf_nat.h> -#include <linux/netfilter/nfnetlink_conntrack.h> - -struct nf_nat_range; -struct nf_nat_l3proto; - -struct nf_nat_l4proto { - /* Protocol number. */ - u8 l4proto; - - /* Translate a packet to the target according to manip type. - * Return true if succeeded. - */ - bool (*manip_pkt)(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype); - - /* Is the manipable part of the tuple between min and max incl? */ - bool (*in_range)(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max); - - /* Alter the per-proto part of the tuple (depending on - * maniptype), to give a unique tuple in the given range if - * possible. Per-protocol part of tuple is initialized to the - * incoming packet. - */ - void (*unique_tuple)(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct); - - int (*nlattr_to_range)(struct nlattr *tb[], - struct nf_nat_range2 *range); -}; - -/* Protocol registration. */ -int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto); -void nf_nat_l4proto_unregister(u8 l3proto, - const struct nf_nat_l4proto *l4proto); - -const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto); - -/* Built-in protocols. */ -extern const struct nf_nat_l4proto nf_nat_l4proto_tcp; -extern const struct nf_nat_l4proto nf_nat_l4proto_udp; -extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; -extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6; -extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; -#ifdef CONFIG_NF_NAT_PROTO_DCCP -extern const struct nf_nat_l4proto nf_nat_l4proto_dccp; -#endif -#ifdef CONFIG_NF_NAT_PROTO_SCTP -extern const struct nf_nat_l4proto nf_nat_l4proto_sctp; -#endif -#ifdef CONFIG_NF_NAT_PROTO_UDPLITE -extern const struct nf_nat_l4proto nf_nat_l4proto_udplite; -#endif - -bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max); - -void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct, u16 *rover); - -int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range2 *range); - -#endif /*_NF_NAT_L4PROTO_H*/ diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h new file mode 100644 index 000000000000..54a14d643c34 --- /dev/null +++ b/include/net/netfilter/nf_nat_masquerade.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_NAT_MASQUERADE_H_ +#define _NF_NAT_MASQUERADE_H_ + +#include <net/netfilter/nf_nat.h> + +unsigned int +nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct nf_nat_range2 *range, + const struct net_device *out); + +int nf_nat_masquerade_inet_register_notifiers(void); +void nf_nat_masquerade_inet_unregister_notifiers(void); + +unsigned int +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, + const struct net_device *out); + +#endif /*_NF_NAT_MASQUERADE_H_ */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index a50a69f5334c..7239105d9d2e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -119,4 +119,7 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, return queue; } +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + const struct nf_hook_entries *entries, unsigned int index, + unsigned int verdict); #endif /* _NF_QUEUE_H */ diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h new file mode 100644 index 000000000000..221f877f29d1 --- /dev/null +++ b/include/net/netfilter/nf_reject.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_REJECT_H +#define _NF_REJECT_H + +static inline bool nf_reject_verify_csum(__u8 proto) +{ + /* Skip protocols that don't use 16-bit one's complement checksum + * of the entire payload. + */ + switch (proto) { + /* Protocols with other integrity checks. */ + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_SCTP: + + /* Protocols with partial checksums. */ + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + + /* Protocols with optional checksums. */ + case IPPROTO_GRE: + return false; + } + return true; +} + +#endif /* _NF_REJECT_H */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 841835a387e1..5b8624ae4a27 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -2,7 +2,6 @@ #ifndef _NET_NF_TABLES_H #define _NET_NF_TABLES_H -#include <linux/module.h> #include <linux/list.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> @@ -13,6 +12,8 @@ #include <net/netfilter/nf_flow_table.h> #include <net/netlink.h> +struct module; + #define NFT_JUMP_STACK_SIZE 16 struct nft_pktinfo { @@ -382,6 +383,7 @@ void nft_unregister_set(struct nft_set_type *type); * @dtype: data type (verdict or numeric type defined by userspace) * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size + * @use: number of rules references to this set * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal * @timeout: default timeout value in jiffies @@ -407,6 +409,7 @@ struct nft_set { u32 dtype; u32 objtype; u32 size; + u32 use; atomic_t nelems; u32 ndeact; u64 timeout; @@ -416,7 +419,8 @@ struct nft_set { unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:14, + u16 flags:13, + bound:1, genmask:2; u8 klen; u8 dlen; @@ -466,12 +470,12 @@ struct nft_set_binding { u32 flags; }; +enum nft_trans_phase; +void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase); int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); -void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** @@ -692,10 +696,12 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, gcb->elems[gcb->head.cnt++] = elem; } +struct nft_expr_ops; /** * struct nft_expr_type - nf_tables expression type * * @select_ops: function to select nft_expr_ops + * @release_ops: release nft_expr_ops * @ops: default ops, used when no select_ops functions is present * @list: used internally * @name: Identifier @@ -708,6 +714,7 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, struct nft_expr_type { const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, const struct nlattr * const tb[]); + void (*release_ops)(const struct nft_expr_ops *ops); const struct nft_expr_ops *ops; struct list_head list; const char *name; @@ -721,6 +728,13 @@ struct nft_expr_type { #define NFT_EXPR_STATEFUL 0x1 #define NFT_EXPR_GC 0x2 +enum nft_trans_phase { + NFT_TRANS_PREPARE, + NFT_TRANS_ABORT, + NFT_TRANS_COMMIT, + NFT_TRANS_RELEASE +}; + /** * struct nft_expr_ops - nf_tables expression operations * @@ -750,7 +764,8 @@ struct nft_expr_ops { void (*activate)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*deactivate)(const struct nft_ctx *ctx, - const struct nft_expr *expr); + const struct nft_expr *expr, + enum nft_trans_phase phase); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*destroy_clone)(const struct nft_ctx *ctx, @@ -792,23 +807,6 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); -static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) -{ - int err; - - if (src->ops->clone) { - dst->ops = src->ops; - err = src->ops->clone(dst, src); - if (err < 0) - return err; - } else { - memcpy(dst, src, src->ops->size); - } - - __module_get(src->ops->type->owner); - return 0; -} - /** * struct nft_rule - nf_tables rule * @@ -1012,21 +1010,32 @@ int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v); /** + * struct nft_object_hash_key - key to lookup nft_object + * + * @name: name of the stateful object to look up + * @table: table the object belongs to + */ +struct nft_object_hash_key { + const char *name; + const struct nft_table *table; +}; + +/** * struct nft_object - nf_tables stateful object * * @list: table stateful object list node - * @table: table this object belongs to - * @name: name of this stateful object + * @key: keys that identify this object + * @rhlhead: nft_objname_ht node * @genmask: generation mask * @use: number of references to this stateful object * @handle: unique object handle * @ops: object operations - * @data: object data, layout depends on type + * @data: object data, layout depends on type */ struct nft_object { struct list_head list; - char *name; - struct nft_table *table; + struct rhlist_head rhlhead; + struct nft_object_hash_key key; u32 genmask:2, use:30; u64 handle; @@ -1043,11 +1052,12 @@ static inline void *nft_obj_data(const struct nft_object *obj) #define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) -struct nft_object *nft_obj_lookup(const struct nft_table *table, +struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask); -void nft_obj_notify(struct net *net, struct nft_table *table, +void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp); @@ -1383,4 +1393,6 @@ struct nft_trans_flowtable { int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); +void __init nft_chain_route_init(void); +void nft_chain_route_fini(void); #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 2046d104f323..7281895fa6d9 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -80,6 +80,22 @@ struct nft_regs; struct nft_pktinfo; void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_cmp_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_lookup_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_payload_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_immediate_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h deleted file mode 100644 index e51ab3815797..000000000000 --- a/include/net/netfilter/nft_masq.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NFT_MASQ_H_ -#define _NFT_MASQ_H_ - -struct nft_masq { - u32 flags; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; -}; - -extern const struct nla_policy nft_masq_policy[]; - -int nft_masq_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]); - -int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr); - -int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data); - -#endif /* _NFT_MASQ_H_ */ diff --git a/include/net/netfilter/nft_redir.h b/include/net/netfilter/nft_redir.h deleted file mode 100644 index 4a970737c03c..000000000000 --- a/include/net/netfilter/nft_redir.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NFT_REDIR_H_ -#define _NFT_REDIR_H_ - -struct nft_redir { - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; - u16 flags; -}; - -extern const struct nla_policy nft_redir_policy[]; - -int nft_redir_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]); - -int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr); - -int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data); - -#endif /* _NFT_REDIR_H_ */ diff --git a/include/net/netlink.h b/include/net/netlink.h index 4c1e99303b5a..395b4406f4b0 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -183,6 +183,7 @@ enum { NLA_REJECT, NLA_EXACT_LEN, NLA_EXACT_LEN_WARN, + NLA_MIN_LEN, __NLA_TYPE_MAX, }; @@ -212,6 +213,7 @@ enum nla_policy_validation { * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload + * NLA_MIN_LEN Minimum length of attribute payload * NLA_NESTED, * NLA_NESTED_ARRAY Length verification is done by checking len of * nested header (or empty); len field is used if @@ -230,6 +232,7 @@ enum nla_policy_validation { * it is rejected. * NLA_EXACT_LEN_WARN Attribute should have exactly this length, a warning * is logged if it is longer, shorter is rejected. + * NLA_MIN_LEN Minimum length of attribute payload * All other Minimum length of attribute payload * * Meaning of `validation_data' field: @@ -281,7 +284,7 @@ enum nla_policy_validation { * static const struct nla_policy my_policy[ATTR_MAX+1] = { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, - * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, + * [ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) }, * [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags }, * }; */ @@ -296,20 +299,43 @@ struct nla_policy { }; int (*validate)(const struct nlattr *attr, struct netlink_ext_ack *extack); + /* This entry is special, and used for the attribute at index 0 + * only, and specifies special data about the policy, namely it + * specifies the "boundary type" where strict length validation + * starts for any attribute types >= this value, also, strict + * nesting validation starts here. + * + * Additionally, it means that NLA_UNSPEC is actually NLA_REJECT + * for any types >= this, so need to use NLA_MIN_LEN to get the + * previous pure { .len = xyz } behaviour. The advantage of this + * is that types not specified in the policy will be rejected. + * + * For completely new families it should be set to 1 so that the + * validation is enforced for all attributes. For existing ones + * it should be set at least when new attributes are added to + * the enum used by the policy, and be set to the new value that + * was added to enforce strict validation from thereon. + */ + u16 strict_start_type; }; }; #define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len } #define NLA_POLICY_EXACT_LEN_WARN(_len) { .type = NLA_EXACT_LEN_WARN, \ .len = _len } +#define NLA_POLICY_MIN_LEN(_len) { .type = NLA_MIN_LEN, .len = _len } #define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN) #define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN) -#define NLA_POLICY_NESTED(maxattr, policy) \ +#define _NLA_POLICY_NESTED(maxattr, policy) \ { .type = NLA_NESTED, .validation_data = policy, .len = maxattr } -#define NLA_POLICY_NESTED_ARRAY(maxattr, policy) \ +#define _NLA_POLICY_NESTED_ARRAY(maxattr, policy) \ { .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr } +#define NLA_POLICY_NESTED(policy) \ + _NLA_POLICY_NESTED(ARRAY_SIZE(policy) - 1, policy) +#define NLA_POLICY_NESTED_ARRAY(policy) \ + _NLA_POLICY_NESTED_ARRAY(ARRAY_SIZE(policy) - 1, policy) #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_INT_TYPE(tp) \ @@ -361,21 +387,52 @@ struct nl_info { bool skip_notify; }; +/** + * enum netlink_validation - netlink message/attribute validation levels + * @NL_VALIDATE_LIBERAL: Old-style "be liberal" validation, not caring about + * extra data at the end of the message, attributes being longer than + * they should be, or unknown attributes being present. + * @NL_VALIDATE_TRAILING: Reject junk data encountered after attribute parsing. + * @NL_VALIDATE_MAXTYPE: Reject attributes > max type; Together with _TRAILING + * this is equivalent to the old nla_parse_strict()/nlmsg_parse_strict(). + * @NL_VALIDATE_UNSPEC: Reject attributes with NLA_UNSPEC in the policy. + * This can safely be set by the kernel when the given policy has no + * NLA_UNSPEC anymore, and can thus be used to ensure policy entries + * are enforced going forward. + * @NL_VALIDATE_STRICT_ATTRS: strict attribute policy parsing (e.g. + * U8, U16, U32 must have exact size, etc.) + * @NL_VALIDATE_NESTED: Check that NLA_F_NESTED is set for NLA_NESTED(_ARRAY) + * and unset for other policies. + */ +enum netlink_validation { + NL_VALIDATE_LIBERAL = 0, + NL_VALIDATE_TRAILING = BIT(0), + NL_VALIDATE_MAXTYPE = BIT(1), + NL_VALIDATE_UNSPEC = BIT(2), + NL_VALIDATE_STRICT_ATTRS = BIT(3), + NL_VALIDATE_NESTED = BIT(4), +}; + +#define NL_VALIDATE_DEPRECATED_STRICT (NL_VALIDATE_TRAILING |\ + NL_VALIDATE_MAXTYPE) +#define NL_VALIDATE_STRICT (NL_VALIDATE_TRAILING |\ + NL_VALIDATE_MAXTYPE |\ + NL_VALIDATE_UNSPEC |\ + NL_VALIDATE_STRICT_ATTRS |\ + NL_VALIDATE_NESTED) + int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *)); int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags); -int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack); -int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack); -int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack); +int __nla_validate(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack); +int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, + int len, const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); @@ -504,42 +561,167 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining) } /** - * nlmsg_parse - parse attributes of a netlink message + * nla_parse - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * @extack: extended ACK pointer + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessible via the attribute type. Attributes with a type + * exceeding maxtype will be rejected, policy must be specified, attributes + * will be validated in the strictest way possible. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_parse(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, head, len, policy, + NL_VALIDATE_STRICT, extack); +} + +/** + * nla_parse_deprecated - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * @extack: extended ACK pointer + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessible via the attribute type. Attributes with a type + * exceeding maxtype will be ignored and attributes from the policy are not + * always strictly validated (only for new attributes). + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, head, len, policy, + NL_VALIDATE_LIBERAL, extack); +} + +/** + * nla_parse_deprecated_strict - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * @extack: extended ACK pointer + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessible via the attribute type. Attributes with a type + * exceeding maxtype will be rejected as well as trailing data, but the + * policy is not completely strictly validated (only for new attributes). + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype, + const struct nlattr *head, + int len, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, head, len, policy, + NL_VALIDATE_DEPRECATED_STRICT, extack); +} + +/** + * __nlmsg_parse - parse attributes of a netlink message * @nlh: netlink message header * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * See nla_parse() */ -static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, - struct nlattr *tb[], int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { NL_SET_ERR_MSG(extack, "Invalid header length"); return -EINVAL; } - return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy, extack); + return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy, validate, + extack); } -static inline int nlmsg_parse_strict(const struct nlmsghdr *nlh, int hdrlen, - struct nlattr *tb[], int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +/** + * nlmsg_parse - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @validate: validation strictness + * @extack: extended ACK report struct + * + * See nla_parse() + */ +static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { - NL_SET_ERR_MSG(extack, "Invalid header length"); - return -EINVAL; - } + return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy, + NL_VALIDATE_STRICT, extack); +} - return nla_parse_strict(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy, extack); +/** + * nlmsg_parse_deprecated - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @extack: extended ACK report struct + * + * See nla_parse_deprecated() + */ +static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, + NL_VALIDATE_LIBERAL, extack); +} + +/** + * nlmsg_parse_deprecated_strict - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @extack: extended ACK report struct + * + * See nla_parse_deprecated_strict() + */ +static inline int +nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, + NL_VALIDATE_DEPRECATED_STRICT, extack); } /** @@ -558,26 +740,75 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, } /** - * nlmsg_validate - validate a netlink message including attributes + * nla_validate_deprecated - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * @validate: validation strictness + * @extack: extended ACK report struct + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Validation is done in liberal mode. + * See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_validate_deprecated(const struct nlattr *head, int len, + int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_LIBERAL, + extack); +} + +/** + * nla_validate - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * @validate: validation strictness + * @extack: extended ACK report struct + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Validation is done in strict mode. + * See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_validate(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_STRICT, + extack); +} + +/** + * nlmsg_validate_deprecated - validate a netlink message including attributes * @nlh: netlinket message header * @hdrlen: length of familiy specific header * @maxtype: maximum attribute type to be expected * @policy: validation policy * @extack: extended ACK report struct */ -static inline int nlmsg_validate(const struct nlmsghdr *nlh, - int hdrlen, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh, + int hdrlen, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; - return nla_validate(nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), maxtype, policy, - extack); + return __nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), maxtype, + policy, NL_VALIDATE_LIBERAL, extack); } + + /** * nlmsg_report - need to report back to application? * @nlh: netlink message header @@ -905,8 +1136,32 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { - return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, - extack); + if (!(nla->nla_type & NLA_F_NESTED)) { + NL_SET_ERR_MSG_ATTR(extack, nla, "NLA_F_NESTED is missing"); + return -EINVAL; + } + + return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, + NL_VALIDATE_STRICT, extack); +} + +/** + * nla_parse_nested_deprecated - parse nested attributes + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @nla: attribute containing the nested attributes + * @policy: validation policy + * @extack: extended ACK report struct + * + * See nla_parse_deprecated() + */ +static inline int nla_parse_nested_deprecated(struct nlattr *tb[], int maxtype, + const struct nlattr *nla, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, + NL_VALIDATE_LIBERAL, extack); } /** @@ -1411,13 +1666,18 @@ static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp) } /** - * nla_nest_start - Start a new level of nested attributes + * nla_nest_start_noflag - Start a new level of nested attributes * @skb: socket buffer to add attributes to * @attrtype: attribute type of container * - * Returns the container attribute + * This function exists for backward compatibility to use in APIs which never + * marked their nest attributes with NLA_F_NESTED flag. New APIs should use + * nla_nest_start() which sets the flag. + * + * Returns the container attribute or NULL on error */ -static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) +static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb, + int attrtype) { struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); @@ -1428,6 +1688,21 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) } /** + * nla_nest_start - Start a new level of nested attributes, with NLA_F_NESTED + * @skb: socket buffer to add attributes to + * @attrtype: attribute type of container + * + * Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED + * flag. This is the preferred function to use in new code. + * + * Returns the container attribute or NULL on error + */ +static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) +{ + return nla_nest_start_noflag(skb, attrtype | NLA_F_NESTED); +} + +/** * nla_nest_end - Finalize nesting of attributes * @skb: socket buffer the attributes are stored in * @start: container attribute @@ -1461,6 +1736,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * @start: container attribute * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the nested attribute stream against the @@ -1469,12 +1745,22 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * * Returns 0 on success or a negative error code. */ -static inline int nla_validate_nested(const struct nlattr *start, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int __nla_validate_nested(const struct nlattr *start, int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack) +{ + return __nla_validate(nla_data(start), nla_len(start), maxtype, policy, + validate, extack); +} + +static inline int +nla_validate_nested_deprecated(const struct nlattr *start, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - return nla_validate(nla_data(start), nla_len(start), maxtype, policy, - extack); + return __nla_validate_nested(start, maxtype, policy, + NL_VALIDATE_LIBERAL, extack); } /** diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 9795d628a127..806454e767bf 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -18,25 +18,15 @@ struct ctl_table_header; struct nf_conntrack_ecache; -struct nf_proto_net { -#ifdef CONFIG_SYSCTL - struct ctl_table_header *ctl_table_header; - struct ctl_table *ctl_table; -#endif - unsigned int users; -}; - struct nf_generic_net { - struct nf_proto_net pn; unsigned int timeout; }; struct nf_tcp_net { - struct nf_proto_net pn; unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX]; - unsigned int tcp_loose; - unsigned int tcp_be_liberal; - unsigned int tcp_max_retrans; + int tcp_loose; + int tcp_be_liberal; + int tcp_max_retrans; }; enum udp_conntrack { @@ -46,18 +36,15 @@ enum udp_conntrack { }; struct nf_udp_net { - struct nf_proto_net pn; unsigned int timeouts[UDP_CT_MAX]; }; struct nf_icmp_net { - struct nf_proto_net pn; unsigned int timeout; }; #ifdef CONFIG_NF_CT_PROTO_DCCP struct nf_dccp_net { - struct nf_proto_net pn; int dccp_loose; unsigned int dccp_timeout[CT_DCCP_MAX + 1]; }; @@ -65,11 +52,23 @@ struct nf_dccp_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net { - struct nf_proto_net pn; unsigned int timeouts[SCTP_CONNTRACK_MAX]; }; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +enum gre_conntrack { + GRE_CT_UNREPLIED, + GRE_CT_REPLIED, + GRE_CT_MAX +}; + +struct nf_gre_net { + struct list_head keymap_list; + unsigned int timeouts[GRE_CT_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -82,6 +81,9 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE + struct nf_gre_net gre; +#endif }; struct ct_pcpu { @@ -97,18 +99,14 @@ struct netns_ct { struct delayed_work ecache_dwork; bool ecache_dwork_pending; #endif + bool auto_assign_helper_warned; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; - struct ctl_table_header *acct_sysctl_header; - struct ctl_table_header *tstamp_sysctl_header; - struct ctl_table_header *event_sysctl_header; - struct ctl_table_header *helper_sysctl_header; #endif unsigned int sysctl_log_invalid; /* Log invalid packets */ int sysctl_events; int sysctl_acct; int sysctl_auto_assign_helper; - bool auto_assign_helper_warned; int sysctl_tstamp; int sysctl_checksum; diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h index 16a842456189..d9b665151f3d 100644 --- a/include/net/netns/hash.h +++ b/include/net/netns/hash.h @@ -2,16 +2,10 @@ #ifndef __NET_NS_HASH_H__ #define __NET_NS_HASH_H__ -#include <asm/cache.h> - -struct net; +#include <net/net_namespace.h> static inline u32 net_hash_mix(const struct net *net) { -#ifdef CONFIG_NET_NS - return (u32)(((unsigned long)net) >> ilog2(sizeof(*net))); -#else - return 0; -#endif + return net->hash_mix; } #endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e47503b4e4d1..7698460a3dd1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -9,6 +9,7 @@ #include <linux/uidgid.h> #include <net/inet_frag.h> #include <linux/rcupdate.h> +#include <linux/siphash.h> struct tcpm_hash_bucket; struct ctl_table_header; @@ -103,6 +104,9 @@ struct netns_ipv4 { /* Shall we try to damage output packets if routing dev changes? */ int sysctl_ip_dynaddr; int sysctl_ip_early_demux; +#ifdef CONFIG_NET_L3_MASTER_DEV + int sysctl_raw_l3mdev_accept; +#endif int sysctl_tcp_early_demux; int sysctl_udp_early_demux; @@ -214,5 +218,6 @@ struct netns_ipv4 { unsigned int ipmr_seq; /* protected by rtnl_mutex */ atomic_t rt_genid; + siphash_key_t ip_id_key; }; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index ef1ed529f33c..5e61b5a8635d 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -8,6 +8,7 @@ #ifndef __NETNS_IPV6_H__ #define __NETNS_IPV6_H__ #include <net/dst_ops.h> +#include <uapi/linux/icmpv6.h> struct ctl_table_header; @@ -33,6 +34,10 @@ struct netns_sysctl_ipv6 { int auto_flowlabels; int icmpv6_time; int icmpv6_echo_ignore_all; + int icmpv6_echo_ignore_multicast; + int icmpv6_echo_ignore_anycast; + DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1); + unsigned long *icmpv6_ratemask_ptr; int anycast_src_echo_reply; int ip_nonlocal_bind; int fwmark_reflect; @@ -83,7 +88,7 @@ struct netns_ipv6 { struct fib6_table *fib6_local_tbl; struct fib_rules_ops *fib6_rules_ops; #endif - struct sock **icmp_sk; + struct sock * __percpu *icmp_sk; struct sock *ndisc_sk; struct sock *tcp_sk; struct sock *igmp_sk; diff --git a/include/net/netns/xdp.h b/include/net/netns/xdp.h new file mode 100644 index 000000000000..e5734261ba0a --- /dev/null +++ b/include/net/netns/xdp.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NETNS_XDP_H__ +#define __NETNS_XDP_H__ + +#include <linux/rculist.h> +#include <linux/mutex.h> + +struct netns_xdp { + struct mutex lock; + struct hlist_head list; +}; + +#endif /* __NETNS_XDP_H__ */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 9991e5ef52cc..59f45b1e9dac 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -5,6 +5,7 @@ #include <linux/list.h> #include <linux/wait.h> #include <linux/workqueue.h> +#include <linux/rhashtable-types.h> #include <linux/xfrm.h> #include <net/dst_ops.h> @@ -53,6 +54,7 @@ struct netns_xfrm { unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; struct xfrm_policy_hthresh policy_hthresh; + struct list_head inexact_bins; struct sock *nlsk; diff --git a/include/net/netrom.h b/include/net/netrom.h index 5a0714ff500f..80f15b1c1a48 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -266,7 +266,7 @@ void nr_stop_idletimer(struct sock *); int nr_t1timer_running(struct sock *); /* sysctl_net_netrom.c */ -void nr_register_sysctl(void); +int nr_register_sysctl(void); void nr_unregister_sysctl(void); #endif diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 87499b6b35d6..df5c69db68af 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -166,7 +166,7 @@ struct nci_conn_info { * According to specification 102 622 chapter 4.4 Pipes, * the pipe identifier is 7 bits long. */ -#define NCI_HCI_MAX_PIPES 127 +#define NCI_HCI_MAX_PIPES 128 struct nci_hci_gate { u8 gate; diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index b669fe6dbc3b..98f31c7ea23d 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -63,10 +63,11 @@ struct pnpipehdr { u8 state_after_reset; /* reset request */ u8 error_code; /* any response */ u8 pep_type; /* status indication */ - u8 data[1]; + u8 data0; /* anything else */ }; + u8 data[]; }; -#define other_pep_type data[1] +#define other_pep_type data[0] static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) { diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 72ffb3120ced..514e3c80ecc1 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -6,6 +6,7 @@ #include <linux/workqueue.h> #include <net/sch_generic.h> #include <net/act_api.h> +#include <net/flow_offload.h> /* TC action not accessible from user space */ #define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1) @@ -16,6 +17,7 @@ struct tcf_walker { int stop; int skip; int count; + bool nonempty; unsigned long cookie; int (*fn)(struct tcf_proto *, void *node, struct tcf_walker *); }; @@ -43,6 +45,10 @@ bool tcf_queue_work(struct rcu_work *rwork, work_func_t func); struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index); void tcf_chain_put_by_act(struct tcf_chain *chain); +struct tcf_chain *tcf_get_next_chain(struct tcf_block *block, + struct tcf_chain *chain); +struct tcf_proto *tcf_get_next_proto(struct tcf_chain *chain, + struct tcf_proto *tp, bool rtnl_held); void tcf_block_netif_keep_dst(struct tcf_block *block); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, @@ -81,11 +87,24 @@ void __tcf_block_cb_unregister(struct tcf_block *block, struct tcf_block_cb *block_cb); void tcf_block_cb_unregister(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_ident); +int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident); +int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident); +void __tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident); +void tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident); int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); #else +static inline bool tcf_block_shared(struct tcf_block *block) +{ + return false; +} + static inline int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, @@ -183,6 +202,32 @@ void tcf_block_cb_unregister(struct tcf_block *block, { } +static inline +int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + return 0; +} + +static inline +int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + return 0; +} + +static inline +void __tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ +} + +static inline +void tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ +} + static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { @@ -250,12 +295,13 @@ struct tcf_exts { int police; }; -static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) +static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net, + int action, int police) { #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; - exts->net = NULL; + exts->net = net; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) @@ -331,30 +377,6 @@ static inline bool tcf_exts_has_actions(struct tcf_exts *exts) } /** - * tcf_exts_has_one_action - check if exactly one action is present - * @exts: tc filter extensions handle - * - * Returns true if exactly one action is present. - */ -static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) -{ -#ifdef CONFIG_NET_CLS_ACT - return exts->nr_actions == 1; -#else - return false; -#endif -} - -static inline struct tc_action *tcf_exts_first_action(struct tcf_exts *exts) -{ -#ifdef CONFIG_NET_CLS_ACT - return exts->actions[0]; -#else - return NULL; -#endif -} - -/** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer * @exts: tc filter extensions handle @@ -377,7 +399,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, bool ovr, + struct tcf_exts *exts, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); @@ -585,8 +607,11 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ -int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, - enum tc_setup_type type, void *type_data, bool err_stop); +int tc_setup_flow_action(struct flow_action *flow_action, + const struct tcf_exts *exts); +int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop); +unsigned int tcf_exts_num_actions(struct tcf_exts *exts); enum tc_block_command { TC_BLOCK_BIND, @@ -609,6 +634,7 @@ struct tc_cls_common_offload { struct tc_cls_u32_knode { struct tcf_exts *exts; + struct tcf_result *res; struct tc_u32_sel *sel; u32 handle; u32 val; @@ -725,22 +751,28 @@ struct tc_cls_flower_offload { struct tc_cls_common_offload common; enum tc_fl_command command; unsigned long cookie; - struct flow_dissector *dissector; - struct fl_flow_key *mask; - struct fl_flow_key *key; - struct tcf_exts *exts; + struct flow_rule *rule; + struct flow_stats stats; u32 classid; }; +static inline struct flow_rule * +tc_cls_flower_offload_flow_rule(struct tc_cls_flower_offload *tc_flow_cmd) +{ + return tc_flow_cmd->rule; +} + enum tc_matchall_command { TC_CLSMATCHALL_REPLACE, TC_CLSMATCHALL_DESTROY, + TC_CLSMATCHALL_STATS, }; struct tc_cls_matchall_offload { struct tc_cls_common_offload common; enum tc_matchall_command command; - struct tcf_exts *exts; + struct flow_rule *rule; + struct flow_stats stats; unsigned long cookie; }; @@ -787,12 +819,21 @@ enum tc_mq_command { TC_MQ_CREATE, TC_MQ_DESTROY, TC_MQ_STATS, + TC_MQ_GRAFT, +}; + +struct tc_mq_opt_offload_graft_params { + unsigned long queue; + u32 child_handle; }; struct tc_mq_qopt_offload { enum tc_mq_command command; u32 handle; - struct tc_qopt_offload_stats stats; + union { + struct tc_qopt_offload_stats stats; + struct tc_mq_opt_offload_graft_params graft_params; + }; }; enum tc_red_command { @@ -800,13 +841,16 @@ enum tc_red_command { TC_RED_DESTROY, TC_RED_STATS, TC_RED_XSTATS, + TC_RED_GRAFT, }; struct tc_red_qopt_offload_params { u32 min; u32 max; u32 probability; + u32 limit; bool is_ecn; + bool is_harddrop; struct gnet_stats_queue *qstats; }; @@ -818,6 +862,51 @@ struct tc_red_qopt_offload { struct tc_red_qopt_offload_params set; struct tc_qopt_offload_stats stats; struct red_stats *xstats; + u32 child_handle; + }; +}; + +enum tc_gred_command { + TC_GRED_REPLACE, + TC_GRED_DESTROY, + TC_GRED_STATS, +}; + +struct tc_gred_vq_qopt_offload_params { + bool present; + u32 limit; + u32 prio; + u32 min; + u32 max; + bool is_ecn; + bool is_harddrop; + u32 probability; + /* Only need backlog, see struct tc_prio_qopt_offload_params */ + u32 *backlog; +}; + +struct tc_gred_qopt_offload_params { + bool grio_on; + bool wred_on; + unsigned int dp_cnt; + unsigned int dp_def; + struct gnet_stats_queue *qstats; + struct tc_gred_vq_qopt_offload_params tab[MAX_DPs]; +}; + +struct tc_gred_qopt_offload_stats { + struct gnet_stats_basic_packed bstats[MAX_DPs]; + struct gnet_stats_queue qstats[MAX_DPs]; + struct red_stats *xstats[MAX_DPs]; +}; + +struct tc_gred_qopt_offload { + enum tc_gred_command command; + u32 handle; + u32 parent; + union { + struct tc_gred_qopt_offload_params set; + struct tc_gred_qopt_offload_stats stats; }; }; @@ -854,4 +943,14 @@ struct tc_prio_qopt_offload { }; }; +enum tc_root_command { + TC_ROOT_GRAFT, +}; + +struct tc_root_qopt_offload { + enum tc_root_command command; + u32 handle; + bool ingress; +}; + #endif diff --git a/include/net/protocol.h b/include/net/protocol.h index 4fc75f7ae23b..92b3eaad6088 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -42,7 +42,10 @@ struct net_protocol { int (*early_demux)(struct sk_buff *skb); int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); - void (*err_handler)(struct sk_buff *skb, u32 info); + + /* This returns an error if we weren't able to handle the error. */ + int (*err_handler)(struct sk_buff *skb, u32 info); + unsigned int no_policy:1, netns_ok:1, /* does the protocol do more stringent @@ -58,10 +61,12 @@ struct inet6_protocol { void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); - void (*err_handler)(struct sk_buff *skb, + /* This returns an error if we weren't able to handle the error. */ + int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); + unsigned int flags; /* INET6_PROTO_xxx */ }; diff --git a/include/net/psample.h b/include/net/psample.h index 9b80f814ab04..37a4df2325b2 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -3,7 +3,6 @@ #define __NET_PSAMPLE_H #include <uapi/linux/psample.h> -#include <linux/module.h> #include <linux/list.h> struct psample_group { diff --git a/include/net/raw.h b/include/net/raw.h index 9c9fa98a91a4..821ff4887f77 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -17,7 +17,7 @@ #ifndef _RAW_H #define _RAW_H - +#include <net/inet_sock.h> #include <net/protocol.h> #include <linux/icmp.h> @@ -61,6 +61,7 @@ void raw_seq_stop(struct seq_file *seq, void *v); int raw_hash_sk(struct sock *sk); void raw_unhash_sk(struct sock *sk); +void raw_init(void); struct raw_sock { /* inet_sock has to be the first member */ @@ -74,4 +75,15 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) return (struct raw_sock *)sk; } +static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + #endif /* _RAW_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 347015515a7d..9dfd7960d90a 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -106,11 +106,8 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return req; } -static inline void reqsk_free(struct request_sock *req) +static inline void __reqsk_free(struct request_sock *req) { - /* temporary debugging */ - WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); - req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); @@ -118,6 +115,12 @@ static inline void reqsk_free(struct request_sock *req) kmem_cache_free(req->rsk_ops->slab, req); } +static inline void reqsk_free(struct request_sock *req) +{ + WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); + __reqsk_free(req); +} + static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) diff --git a/include/net/route.h b/include/net/route.h index 9883dc82f723..96f6c9ae33c2 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -29,6 +29,8 @@ #include <net/flow.h> #include <net/inet_sock.h> #include <net/ip_fib.h> +#include <net/arp.h> +#include <net/ndisc.h> #include <linux/in_route.h> #include <linux/rtnetlink.h> #include <linux/rcupdate.h> @@ -55,12 +57,15 @@ struct rtable { unsigned int rt_flags; __u16 rt_type; __u8 rt_is_input; - __u8 rt_uses_gateway; + u8 rt_gw_family; int rt_iif; /* Info on neighbour */ - __be32 rt_gateway; + union { + __be32 rt_gw4; + struct in6_addr rt_gw6; + }; /* Miscellaneous cached information */ u32 rt_mtu_locked:1, @@ -82,8 +87,8 @@ static inline bool rt_is_output_route(const struct rtable *rt) static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) { - if (rt->rt_gateway) - return rt->rt_gateway; + if (rt->rt_gw_family == AF_INET) + return rt->rt_gw4; return daddr; } @@ -347,4 +352,34 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) return hoplimit; } +static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, + __be32 daddr) +{ + struct neighbour *neigh; + + neigh = __ipv4_neigh_lookup_noref(dev, daddr); + if (unlikely(!neigh)) + neigh = __neigh_create(&arp_tbl, &daddr, dev, false); + + return neigh; +} + +static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt, + struct sk_buff *skb, + bool *is_v6gw) +{ + struct net_device *dev = rt->dst.dev; + struct neighbour *neigh; + + if (likely(rt->rt_gw_family == AF_INET)) { + neigh = ip_neigh_gw4(dev, rt->rt_gw4); + } else if (rt->rt_gw_family == AF_INET6) { + neigh = ip_neigh_gw6(dev, &rt->rt_gw6); + *is_v6gw = true; + } else { + neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr); + } + return neigh; +} + #endif /* _ROUTE_H */ diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index cf26e5aacac4..e2091bb2b3a8 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -159,7 +159,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); struct net_device *rtnl_create_link(struct net *net, const char *ifname, unsigned char name_assign_type, const struct rtnl_link_ops *ops, - struct nlattr *tb[]); + struct nlattr *tb[], + struct netlink_ext_ack *extack); int rtnl_delete_link(struct net_device *dev); int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); diff --git a/include/net/nexthop.h b/include/net/rtnh.h index 902ff382a6dc..aa2cfc508f7c 100644 --- a/include/net/nexthop.h +++ b/include/net/rtnh.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NET_NEXTHOP_H -#define __NET_NEXTHOP_H +#ifndef __NET_RTNH_H +#define __NET_RTNH_H #include <linux/rtnetlink.h> #include <net/netlink.h> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4d736427a4cb..21f434f3ac9e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -12,6 +12,7 @@ #include <linux/list.h> #include <linux/refcount.h> #include <linux/workqueue.h> +#include <linux/mutex.h> #include <net/gen_stats.h> #include <net/rtnetlink.h> @@ -24,6 +25,9 @@ struct bpf_flow_keys; typedef int tc_setup_cb_t(enum tc_setup_type type, void *type_data, void *cb_priv); +typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, + enum tc_setup_type type, void *type_data); + struct qdisc_rate_table { struct tc_ratespec rate; u32 data[256]; @@ -106,6 +110,9 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; + + /* for NOLOCK qdisc, true if there are no enqueued skbs */ + bool empty; struct rcu_head rcu; }; @@ -136,11 +143,24 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } +static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) +{ + return q->flags & TCQ_F_CPUSTATS; +} + +static inline bool qdisc_is_empty(const struct Qdisc *qdisc) +{ + if (qdisc_is_percpu_stats(qdisc)) + return qdisc->empty; + return !qdisc->q.qlen; +} + static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { if (!spin_trylock(&qdisc->seqlock)) return false; + qdisc->empty = false; } else if (qdisc_is_running(qdisc)) { return false; } @@ -175,6 +195,7 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) } struct Qdisc_class_ops { + unsigned int flags; /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); int (*graft)(struct Qdisc *, unsigned long cl, @@ -206,6 +227,13 @@ struct Qdisc_class_ops { struct gnet_dump *); }; +/* Qdisc_class_ops flag values */ + +/* Implements API that doesn't require rtnl lock */ +enum qdisc_class_ops_flags { + QDISC_CLASS_OPS_DOIT_UNLOCKED = 1, +}; + struct Qdisc_ops { struct Qdisc_ops *next; const struct Qdisc_class_ops *cl_ops; @@ -269,19 +297,21 @@ struct tcf_proto_ops { const struct tcf_proto *, struct tcf_result *); int (*init)(struct tcf_proto*); - void (*destroy)(struct tcf_proto *tp, + void (*destroy)(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack); void* (*get)(struct tcf_proto*, u32 handle); + void (*put)(struct tcf_proto *tp, void *f); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - void **, bool, + void **, bool, bool, struct netlink_ext_ack *); int (*delete)(struct tcf_proto *tp, void *arg, - bool *last, + bool *last, bool rtnl_held, struct netlink_ext_ack *); - void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + void (*walk)(struct tcf_proto *tp, + struct tcf_walker *arg, bool rtnl_held); int (*reoffload)(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack); @@ -294,12 +324,18 @@ struct tcf_proto_ops { /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, - struct sk_buff *skb, struct tcmsg*); + struct sk_buff *skb, struct tcmsg*, + bool); int (*tmplt_dump)(struct sk_buff *skb, struct net *net, void *tmplt_priv); struct module *owner; + int flags; +}; + +enum tcf_proto_ops_flags { + TCF_PROTO_OPS_DOIT_UNLOCKED = 1, }; struct tcf_proto { @@ -318,17 +354,20 @@ struct tcf_proto { void *data; const struct tcf_proto_ops *ops; struct tcf_chain *chain; + /* Lock protects tcf_proto shared state and can be used by unlocked + * classifiers to protect their private data. + */ + spinlock_t lock; + bool deleting; + refcount_t refcnt; struct rcu_head rcu; }; struct qdisc_skb_cb { - union { - struct { - unsigned int pkt_len; - u16 slave_dev_queue_mapping; - u16 tc_classid; - }; - struct bpf_flow_keys *flow_keys; + struct { + unsigned int pkt_len; + u16 slave_dev_queue_mapping; + u16 tc_classid; }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; @@ -337,6 +376,8 @@ struct qdisc_skb_cb { typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); struct tcf_chain { + /* Protects filter_chain. */ + struct mutex filter_chain_lock; struct tcf_proto __rcu *filter_chain; struct list_head list; struct tcf_block *block; @@ -344,11 +385,17 @@ struct tcf_chain { unsigned int refcnt; unsigned int action_refcnt; bool explicitly_created; + bool flushing; const struct tcf_proto_ops *tmplt_ops; void *tmplt_priv; + struct rcu_head rcu; }; struct tcf_block { + /* Lock protects tcf_block and lifetime-management data of chains + * attached to the block (refcnt, action_refcnt, explicitly_created). + */ + struct mutex lock; struct list_head chain_list; u32 index; /* block index for shared blocks */ refcount_t refcnt; @@ -366,6 +413,34 @@ struct tcf_block { struct rcu_head rcu; }; +#ifdef CONFIG_PROVE_LOCKING +static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) +{ + return lockdep_is_held(&chain->filter_chain_lock); +} + +static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) +{ + return lockdep_is_held(&tp->lock); +} +#else +static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain) +{ + return true; +} + +static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) +{ + return true; +} +#endif /* #ifdef CONFIG_PROVE_LOCKING */ + +#define tcf_chain_dereference(p, chain) \ + rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain)) + +#define tcf_proto_dereference(p, tp) \ + rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp)) + static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) { if (*flags & TCA_CLS_FLAGS_IN_HW) @@ -420,7 +495,7 @@ static inline int qdisc_qlen_sum(const struct Qdisc *q) __u32 qlen = q->qstats.qlen; int i; - if (q->flags & TCQ_F_NOLOCK) { + if (qdisc_is_percpu_stats(q)) { for_each_possible_cpu(i) qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; } else { @@ -577,8 +652,31 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, void qdisc_reset(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); -void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, - unsigned int len); +void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); +#ifdef CONFIG_NET_SCHED +int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, + void *type_data); +void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, + struct Qdisc *new, struct Qdisc *old, + enum tc_setup_type type, void *type_data, + struct netlink_ext_ack *extack); +#else +static inline int +qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, + void *type_data) +{ + q->flags &= ~TCQ_F_OFFLOADED; + return 0; +} + +static inline void +qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, + struct Qdisc *new, struct Qdisc *old, + enum tc_setup_type type, void *type_data, + struct netlink_ext_ack *extack) +{ +} +#endif struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, struct netlink_ext_ack *extack); @@ -656,7 +754,7 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev) struct netdev_queue *txq = netdev_get_tx_queue(dev, i); const struct Qdisc *q = rcu_dereference(txq->qdisc); - if (q->q.qlen) { + if (!qdisc_is_empty(q)) { rcu_read_unlock(); return false; } @@ -726,11 +824,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return sch->enqueue(skb, sch, to_free); } -static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) -{ - return q->flags & TCQ_F_CPUSTATS; -} - static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, __u64 bytes, __u32 packets) { @@ -843,6 +936,41 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch) sch->qstats.overlimits++; } +static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch) +{ + __u32 qlen = qdisc_qlen_sum(sch); + + return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen); +} + +static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, + __u32 *backlog) +{ + struct gnet_stats_queue qstats = { 0 }; + __u32 len = qdisc_qlen_sum(sch); + + __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len); + *qlen = qstats.qlen; + *backlog = qstats.backlog; +} + +static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) +{ + __u32 qlen, backlog; + + qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); + qdisc_tree_reduce_backlog(sch, qlen, backlog); +} + +static inline void qdisc_purge_queue(struct Qdisc *sch) +{ + __u32 qlen, backlog; + + qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); + qdisc_reset(sch); + qdisc_tree_reduce_backlog(sch, qlen, backlog); +} + static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) { qh->head = NULL; @@ -980,6 +1108,32 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) return skb; } +static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch, + struct sk_buff *skb) +{ + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_backlog_dec(sch, skb); + qdisc_bstats_cpu_update(sch, skb); + qdisc_qstats_cpu_qlen_dec(sch); + } else { + qdisc_qstats_backlog_dec(sch, skb); + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + } +} + +static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch, + unsigned int pkt_len) +{ + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_qlen_inc(sch); + this_cpu_add(sch->cpu_qstats->backlog, pkt_len); + } else { + sch->qstats.backlog += pkt_len; + sch->q.qlen++; + } +} + /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { @@ -987,8 +1141,13 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { skb = __skb_dequeue(&sch->gso_skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_backlog_dec(sch, skb); + qdisc_qstats_cpu_qlen_dec(sch); + } else { + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + } } else { skb = sch->dequeue(sch); } @@ -1026,13 +1185,8 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, sch_tree_lock(sch); old = *pold; *pold = new; - if (old != NULL) { - unsigned int qlen = old->q.qlen; - unsigned int backlog = old->qstats.backlog; - - qdisc_reset(old); - qdisc_tree_reduce_backlog(old, qlen, backlog); - } + if (old != NULL) + qdisc_tree_flush_backlog(old); sch_tree_unlock(sch); return old; diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 32ee65a30aff..1c6e6c0766ca 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -61,7 +61,7 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { - struct sctphdr *sh = sctp_hdr(skb); + struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); const struct skb_checksum_ops ops = { .update = sctp_csum_update, .combine = sctp_csum_combine, diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 6640f84fe536..6d5beac29bc1 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -105,7 +105,6 @@ enum sctp_verb { SCTP_CMD_T1_RETRAN, /* Mark for retransmission after T1 timeout */ SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */ SCTP_CMD_SEND_MSG, /* Send the whole use message */ - SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 8dadc74c22e7..4588bdc2b8f0 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -71,7 +71,7 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM }; SCTP_NUM_AUTH_CHUNK_TYPES) /* These are the different flavours of event. */ -enum sctp_event { +enum sctp_event_type { SCTP_EVENT_T_CHUNK = 1, SCTP_EVENT_T_TIMEOUT, SCTP_EVENT_T_OTHER, diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 2abbc15824af..eefdfa5abf6e 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -151,8 +151,8 @@ int sctp_primitive_RECONF(struct net *net, struct sctp_association *asoc, * sctp/input.c */ int sctp_rcv(struct sk_buff *skb); -void sctp_v4_err(struct sk_buff *skb, u32 info); -void sctp_hash_endpoint(struct sctp_endpoint *); +int sctp_v4_err(struct sk_buff *skb, u32 info); +int sctp_hash_endpoint(struct sctp_endpoint *ep); void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, @@ -421,7 +421,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) /* * This mimics the behavior of skb_set_owner_r */ - sk->sk_forward_alloc -= event->rmem_len; + sk_mem_charge(sk, event->rmem_len); } /* Tests if the list has one and only one entry. */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 9e3d32746430..24825a81829e 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -173,7 +173,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); const struct sctp_sm_table_entry *sctp_sm_lookup_event( struct net *net, - enum sctp_event event_type, + enum sctp_event_type event_type, enum sctp_state state, union sctp_subtype event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); @@ -313,7 +313,7 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ -int sctp_do_sm(struct net *net, enum sctp_event event_type, +int sctp_do_sm(struct net *net, enum sctp_event_type event_type, union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, gfp_t gfp); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index feada358d872..140fd836a396 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -48,6 +48,7 @@ #define __sctp_structs_h__ #include <linux/ktime.h> +#include <linux/generic-radix-tree.h> #include <linux/rhashtable-types.h> #include <linux/socket.h> /* linux/in.h needs this!! */ #include <linux/in.h> /* We get struct sockaddr_in. */ @@ -57,7 +58,6 @@ #include <linux/atomic.h> /* This gets us atomic counters. */ #include <linux/skbuff.h> /* We need sk_buff_head. */ #include <linux/workqueue.h> /* We need tq_struct. */ -#include <linux/flex_array.h> /* We need flex_array. */ #include <linux/sctp.h> /* We need sctp* header structs. */ #include <net/sctp/auth.h> /* We need auth specific structs */ #include <net/ip.h> /* For inet_skb_parm */ @@ -96,7 +96,9 @@ struct sctp_stream; struct sctp_bind_bucket { unsigned short port; - unsigned short fastreuse; + signed char fastreuse; + signed char fastreuseport; + kuid_t fastuid; struct hlist_node node; struct hlist_head owner; struct net *net; @@ -197,6 +199,8 @@ struct sctp_sock { __u32 flowlabel; __u8 dscp; + int pf_retrans; + /* The initial Path MTU to use for new associations. */ __u32 pathmtu; @@ -207,6 +211,8 @@ struct sctp_sock { /* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */ __u32 param_flags; + __u32 default_ss; + struct sctp_rtoinfo rtoinfo; struct sctp_paddrparams paddrparam; struct sctp_assocparams assocparams; @@ -215,7 +221,7 @@ struct sctp_sock { * These two structures must be grouped together for the usercopy * whitelist region. */ - struct sctp_event_subscribe subscribe; + __u16 subscribe; struct sctp_initmsg initmsg; int user_frag; @@ -1190,6 +1196,8 @@ int sctp_bind_addr_conflict(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *, struct sctp_sock *); int sctp_bind_addr_state(const struct sctp_bind_addr *bp, const union sctp_addr *addr); +int sctp_bind_addrs_check(struct sctp_sock *sp, + struct sctp_sock *sp2, int cnt2); union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, const union sctp_addr *addrs, int addrcnt, @@ -1441,8 +1449,9 @@ struct sctp_stream_in { }; struct sctp_stream { - struct flex_array *out; - struct flex_array *in; + GENRADIX(struct sctp_stream_out) out; + GENRADIX(struct sctp_stream_in) in; + __u16 outcnt; __u16 incnt; /* Current stream being sent, if any */ @@ -1465,17 +1474,17 @@ struct sctp_stream { }; static inline struct sctp_stream_out *sctp_stream_out( - const struct sctp_stream *stream, + struct sctp_stream *stream, __u16 sid) { - return flex_array_get(stream->out, sid); + return genradix_ptr(&stream->out, sid); } static inline struct sctp_stream_in *sctp_stream_in( - const struct sctp_stream *stream, + struct sctp_stream *stream, __u16 sid) { - return flex_array_get(stream->in, sid); + return genradix_ptr(&stream->in, sid); } #define SCTP_SO(s, i) sctp_stream_out((s), (i)) @@ -2073,6 +2082,8 @@ struct sctp_association { int sent_cnt_removable; + __u16 subscribe; + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 51b4e0626c34..bd922a0fe914 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -164,30 +164,39 @@ void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event, __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); +static inline void sctp_ulpevent_type_set(__u16 *subscribe, + __u16 sn_type, __u8 on) +{ + if (sn_type > SCTP_SN_TYPE_MAX) + return; + + if (on) + *subscribe |= (1 << (sn_type - SCTP_SN_TYPE_BASE)); + else + *subscribe &= ~(1 << (sn_type - SCTP_SN_TYPE_BASE)); +} + /* Is this event type enabled? */ -static inline int sctp_ulpevent_type_enabled(__u16 sn_type, - struct sctp_event_subscribe *mask) +static inline bool sctp_ulpevent_type_enabled(__u16 subscribe, __u16 sn_type) { - int offset = sn_type - SCTP_SN_TYPE_BASE; - char *amask = (char *) mask; + if (sn_type > SCTP_SN_TYPE_MAX) + return false; - if (offset >= sizeof(struct sctp_event_subscribe)) - return 0; - return amask[offset]; + return subscribe & (1 << (sn_type - SCTP_SN_TYPE_BASE)); } /* Given an event subscription, is this event enabled? */ -static inline int sctp_ulpevent_is_enabled(const struct sctp_ulpevent *event, - struct sctp_event_subscribe *mask) +static inline bool sctp_ulpevent_is_enabled(const struct sctp_ulpevent *event, + __u16 subscribe) { __u16 sn_type; - int enabled = 1; - if (sctp_ulpevent_is_notification(event)) { - sn_type = sctp_ulpevent_get_notification_type(event); - enabled = sctp_ulpevent_type_enabled(sn_type, mask); - } - return enabled; + if (!sctp_ulpevent_is_notification(event)) + return true; + + sn_type = sctp_ulpevent_get_notification_type(event); + + return sctp_ulpevent_type_enabled(subscribe, sn_type); } #endif /* __sctp_ulpevent_h__ */ diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index bb0ecba3db2b..f4ac7117ff29 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -59,7 +59,7 @@ void sctp_ulpq_free(struct sctp_ulpq *); int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); /* Add a new event for propagation to the ULP. */ -int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev); +int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sk_buff_head *skb_list); /* Renege previously received chunks. */ void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); diff --git a/include/net/seg6.h b/include/net/seg6.h index 2567941a2f32..8b2dc6869fd1 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -16,7 +16,6 @@ #include <linux/net.h> #include <linux/ipv6.h> -#include <net/lwtunnel.h> #include <linux/seg6.h> #include <linux/rhashtable-types.h> diff --git a/include/net/smc.h b/include/net/smc.h index 9ef49f8b1002..bd9c0fb3b577 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -74,6 +74,7 @@ struct smcd_dev { struct list_head vlan; struct workqueue_struct *event_wq; u8 pnetid[SMC_MAX_PNETID_LEN]; + bool pnetid_by_user; }; struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, diff --git a/include/net/sock.h b/include/net/sock.h index 0e3a09380655..4d208c0f9c14 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -236,6 +236,8 @@ struct sock_common { /* public: */ }; +struct bpf_sk_storage; + /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock @@ -298,6 +300,7 @@ struct sock_common { * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received + * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only * @sk_tsflags: SO_TIMESTAMPING socket options * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications @@ -367,6 +370,7 @@ struct sock { atomic_t sk_drops; int sk_rcvlowat; struct sk_buff_head sk_error_queue; + struct sk_buff *sk_rx_skb_cache; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -413,6 +417,7 @@ struct sock { struct sk_buff *sk_send_head; struct rb_root tcp_rtx_queue; }; + struct sk_buff *sk_tx_skb_cache; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; @@ -474,6 +479,9 @@ struct sock { const struct cred *sk_peer_cred; long sk_rcvtimeo; ktime_t sk_stamp; +#if BITS_PER_LONG==32 + seqlock_t sk_stamp_seq; +#endif u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; @@ -504,6 +512,9 @@ struct sock { #endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; +#ifdef CONFIG_BPF_SYSCALL + struct bpf_sk_storage __rcu *sk_bpf_storage; +#endif struct rcu_head sk_rcu; }; @@ -706,6 +717,12 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) hlist_add_head_rcu(&sk->sk_node, list); } +static inline void sk_add_node_tail_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + hlist_add_tail_rcu(&sk->sk_node, list); +} + static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); @@ -801,6 +818,7 @@ enum sock_flags { SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ SOCK_TXTIME, SOCK_XDP, /* XDP is attached */ + SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -955,7 +973,7 @@ static inline void sock_rps_record_flow_hash(__u32 hash) static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS - if (static_key_false(&rfs_needed)) { + if (static_branch_unlikely(&rfs_needed)) { /* Reading sk->sk_rxhash might incur an expensive cache line * miss. * @@ -1110,7 +1128,7 @@ struct proto { unsigned int inuse_idx; #endif - bool (*stream_memory_free)(const struct sock *sk); + bool (*stream_memory_free)(const struct sock *sk, int wake); bool (*stream_memory_read)(const struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); @@ -1192,19 +1210,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ -static inline bool sk_stream_memory_free(const struct sock *sk) +static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { if (sk->sk_wmem_queued >= sk->sk_sndbuf) return false; return sk->sk_prot->stream_memory_free ? - sk->sk_prot->stream_memory_free(sk) : true; + sk->sk_prot->stream_memory_free(sk, wake) : true; } -static inline bool sk_stream_is_writeable(const struct sock *sk) +static inline bool sk_stream_memory_free(const struct sock *sk) +{ + return __sk_stream_memory_free(sk, 0); +} + +static inline bool __sk_stream_is_writeable(const struct sock *sk, int wake) { return sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && - sk_stream_memory_free(sk); + __sk_stream_memory_free(sk, wake); +} + +static inline bool sk_stream_is_writeable(const struct sock *sk) +{ + return __sk_stream_is_writeable(sk, 0); } static inline int sk_under_cgroup_hierarchy(struct sock *sk, @@ -1263,7 +1291,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) percpu_counter_inc(sk->sk_prot->sockets_allocated); } -static inline int +static inline u64 sk_sockets_allocated_read_positive(struct sock *sk) { return percpu_counter_read_positive(sk->sk_prot->sockets_allocated); @@ -1445,6 +1473,11 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); + if (!sk->sk_tx_skb_cache) { + skb_zcopy_clear(skb, true); + sk->sk_tx_skb_cache = skb; + return; + } __kfree_skb(skb); } @@ -1586,6 +1619,8 @@ int sock_setsockopt(struct socket *sock, int level, int op, int sock_getsockopt(struct socket *sock, int level, int op, char __user *optval, int __user *optlen); +int sock_gettstamp(struct socket *sock, void __user *userstamp, + bool timeval, bool time32); struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode); struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, @@ -2063,12 +2098,6 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq) * @p: poll_table * * See the comments in the wq_has_sleeper function. - * - * Do not derive sock from filp->private_data here. An SMC socket establishes - * an internal TCP socket that is used in the fallback case. All socket - * operations on the SMC socket are then forwarded to the TCP socket. In case of - * poll, the filp->private_data pointer references the SMC socket because the - * TCP socket has no file assigned. */ static inline void sock_poll_wait(struct file *filp, struct socket *sock, poll_table *p) @@ -2287,6 +2316,34 @@ static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) atomic_add(segs, &sk->sk_drops); } +static inline ktime_t sock_read_timestamp(struct sock *sk) +{ +#if BITS_PER_LONG==32 + unsigned int seq; + ktime_t kt; + + do { + seq = read_seqbegin(&sk->sk_stamp_seq); + kt = sk->sk_stamp; + } while (read_seqretry(&sk->sk_stamp_seq, seq)); + + return kt; +#else + return sk->sk_stamp; +#endif +} + +static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) +{ +#if BITS_PER_LONG==32 + write_seqlock(&sk->sk_stamp_seq); + sk->sk_stamp = kt; + write_sequnlock(&sk->sk_stamp_seq); +#else + sk->sk_stamp = kt; +#endif +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, @@ -2311,7 +2368,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else - sk->sk_stamp = kt; + sock_write_timestamp(sk, kt); if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid) __sock_recv_wifi_status(msg, sk, skb); @@ -2332,9 +2389,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) __sock_recv_ts_and_drops(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) - sk->sk_stamp = skb->tstamp; + sock_write_timestamp(sk, skb->tstamp); else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) - sk->sk_stamp = 0; + sock_write_timestamp(sk, 0); } void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); @@ -2384,6 +2441,15 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); + if ( +#ifdef CONFIG_RPS + !static_branch_unlikely(&rps_needed) && +#endif + !sk->sk_rx_skb_cache) { + sk->sk_rx_skb_cache = skb; + skb_orphan(skb); + return; + } __kfree_skb(skb); } @@ -2444,8 +2510,6 @@ static inline bool sk_listener(const struct sock *sk) } void sock_enable_timestamp(struct sock *sk, int flag); -int sock_get_timestamp(struct sock *, struct timeval __user *); -int sock_get_timestampns(struct sock *, struct timespec __user *); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 881ecb1555bf..0ebd67ae7012 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -20,14 +20,7 @@ #define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1) #define SWITCHDEV_F_DEFER BIT(2) -struct switchdev_trans_item { - struct list_head list; - void *data; - void (*destructor)(const void *data); -}; - struct switchdev_trans { - struct list_head item_list; bool ph_prepare; }; @@ -43,10 +36,9 @@ static inline bool switchdev_trans_ph_commit(struct switchdev_trans *trans) enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, - SWITCHDEV_ATTR_ID_PORT_PARENT_ID, SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, - SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, + SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, @@ -61,10 +53,8 @@ struct switchdev_attr { void *complete_priv; void (*complete)(struct net_device *dev, int err, void *priv); union { - struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ - unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ - unsigned long brport_flags_support; /* PORT_BRIDGE_FLAGS_SUPPORT */ + unsigned long brport_flags; /* PORT_{PRE}_BRIDGE_FLAGS */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ @@ -95,8 +85,8 @@ struct switchdev_obj_port_vlan { u16 vid_end; }; -#define SWITCHDEV_OBJ_PORT_VLAN(obj) \ - container_of(obj, struct switchdev_obj_port_vlan, obj) +#define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \ + container_of((OBJ), struct switchdev_obj_port_vlan, obj) /* SWITCHDEV_OBJ_ID_PORT_MDB */ struct switchdev_obj_port_mdb { @@ -105,40 +95,11 @@ struct switchdev_obj_port_mdb { u16 vid; }; -#define SWITCHDEV_OBJ_PORT_MDB(obj) \ - container_of(obj, struct switchdev_obj_port_mdb, obj) - -void switchdev_trans_item_enqueue(struct switchdev_trans *trans, - void *data, void (*destructor)(void const *), - struct switchdev_trans_item *tritem); -void *switchdev_trans_item_dequeue(struct switchdev_trans *trans); +#define SWITCHDEV_OBJ_PORT_MDB(OBJ) \ + container_of((OBJ), struct switchdev_obj_port_mdb, obj) typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); -/** - * struct switchdev_ops - switchdev operations - * - * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr). - * - * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). - * - * @switchdev_port_obj_add: Add an object to port (see switchdev_obj_*). - * - * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj_*). - */ -struct switchdev_ops { - int (*switchdev_port_attr_get)(struct net_device *dev, - struct switchdev_attr *attr); - int (*switchdev_port_attr_set)(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans); - int (*switchdev_port_obj_add)(struct net_device *dev, - const struct switchdev_obj *obj, - struct switchdev_trans *trans); - int (*switchdev_port_obj_del)(struct net_device *dev, - const struct switchdev_obj *obj); -}; - enum switchdev_notifier_type { SWITCHDEV_FDB_ADD_TO_BRIDGE = 1, SWITCHDEV_FDB_DEL_TO_BRIDGE, @@ -146,6 +107,12 @@ enum switchdev_notifier_type { SWITCHDEV_FDB_DEL_TO_DEVICE, SWITCHDEV_FDB_OFFLOADED, + SWITCHDEV_PORT_OBJ_ADD, /* Blocking. */ + SWITCHDEV_PORT_OBJ_DEL, /* Blocking. */ + SWITCHDEV_PORT_ATTR_SET, /* May be blocking . */ + + SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE, + SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE, SWITCHDEV_VXLAN_FDB_OFFLOADED, @@ -153,6 +120,7 @@ enum switchdev_notifier_type { struct switchdev_notifier_info { struct net_device *dev; + struct netlink_ext_ack *extack; }; struct switchdev_notifier_fdb_info { @@ -163,47 +131,84 @@ struct switchdev_notifier_fdb_info { offloaded:1; }; +struct switchdev_notifier_port_obj_info { + struct switchdev_notifier_info info; /* must be first */ + const struct switchdev_obj *obj; + struct switchdev_trans *trans; + bool handled; +}; + +struct switchdev_notifier_port_attr_info { + struct switchdev_notifier_info info; /* must be first */ + const struct switchdev_attr *attr; + struct switchdev_trans *trans; + bool handled; +}; + static inline struct net_device * switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) { return info->dev; } +static inline struct netlink_ext_ack * +switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info) +{ + return info->extack; +} + #ifdef CONFIG_NET_SWITCHDEV void switchdev_deferred_process(void); -int switchdev_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr); int switchdev_port_obj_add(struct net_device *dev, - const struct switchdev_obj *obj); + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack); int switchdev_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj); + int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, - struct switchdev_notifier_info *info); + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack); + +int register_switchdev_blocking_notifier(struct notifier_block *nb); +int unregister_switchdev_blocking_notifier(struct notifier_block *nb); +int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack); + void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); -bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b); - -#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops)) +int switchdev_handle_port_obj_add(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + int (*add_cb)(struct net_device *dev, + const struct switchdev_obj *obj, + struct switchdev_trans *trans, + struct netlink_ext_ack *extack)); +int switchdev_handle_port_obj_del(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + int (*del_cb)(struct net_device *dev, + const struct switchdev_obj *obj)); + +int switchdev_handle_port_attr_set(struct net_device *dev, + struct switchdev_notifier_port_attr_info *port_attr_info, + bool (*check_cb)(const struct net_device *dev), + int (*set_cb)(struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans)); #else static inline void switchdev_deferred_process(void) { } -static inline int switchdev_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - return -EOPNOTSUPP; -} - static inline int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr) { @@ -211,7 +216,8 @@ static inline int switchdev_port_attr_set(struct net_device *dev, } static inline int switchdev_port_obj_add(struct net_device *dev, - const struct switchdev_obj *obj) + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } @@ -234,19 +240,65 @@ static inline int unregister_switchdev_notifier(struct notifier_block *nb) static inline int call_switchdev_notifiers(unsigned long val, struct net_device *dev, - struct switchdev_notifier_info *info) + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack) +{ + return NOTIFY_DONE; +} + +static inline int +register_switchdev_blocking_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int +unregister_switchdev_blocking_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int +call_switchdev_blocking_notifiers(unsigned long val, + struct net_device *dev, + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack) { return NOTIFY_DONE; } -static inline bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b) +static inline int +switchdev_handle_port_obj_add(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + int (*add_cb)(struct net_device *dev, + const struct switchdev_obj *obj, + struct switchdev_trans *trans, + struct netlink_ext_ack *extack)) { - return false; + return 0; } -#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0) +static inline int +switchdev_handle_port_obj_del(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + int (*del_cb)(struct net_device *dev, + const struct switchdev_obj *obj)) +{ + return 0; +} +static inline int +switchdev_handle_port_attr_set(struct net_device *dev, + struct switchdev_notifier_port_attr_info *port_attr_info, + bool (*check_cb)(const struct net_device *dev), + int (*set_cb)(struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans)) +{ + return 0; +} #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 32d2454c0479..68269e4581b7 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -21,7 +21,7 @@ struct tcf_csum { static inline bool is_tcf_csum(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_CSUM) + if (a->ops && a->ops->id == TCA_ID_CSUM) return true; #endif return false; diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index ef8dd0db70ce..eb8f01c819e6 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -22,7 +22,7 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, #ifdef CONFIG_NET_CLS_ACT struct tcf_gact *gact; - if (a->ops && a->ops->type != TCA_ACT_GACT) + if (a->ops && a->ops->id != TCA_ID_GACT) return false; gact = to_gact(a); @@ -56,7 +56,7 @@ static inline bool is_tcf_gact_goto_chain(const struct tc_action *a) static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) { - return a->goto_chain->index; + return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK; } #endif /* __NET_TC_GACT_H */ diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 86d13b01b39d..c7f24a2da1ca 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -5,7 +5,8 @@ #include <net/act_api.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> -#include <linux/module.h> + +struct module; struct tcf_ife_params { u8 eth_dst[ETH_ALEN]; diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index a2e9cbca5c9e..c757585a05b0 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -17,7 +17,7 @@ struct tcf_mirred { static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_MIRRED) + if (a->ops && a->ops->id == TCA_ID_MIRRED) return to_mirred(a)->tcfm_eaction == TCA_EGRESS_REDIR; #endif return false; @@ -26,7 +26,7 @@ static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_MIRRED) + if (a->ops && a->ops->id == TCA_ID_MIRRED) return to_mirred(a)->tcfm_eaction == TCA_EGRESS_MIRROR; #endif return false; diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index fac3ad4a86de..748cf87a4d7e 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -23,7 +23,7 @@ struct tcf_pedit { static inline bool is_tcf_pedit(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_PEDIT) + if (a->ops && a->ops->id == TCA_ID_PEDIT) return true; #endif return false; diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h new file mode 100644 index 000000000000..8b9ef3664262 --- /dev/null +++ b/include/net/tc_act/tc_police.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_TC_POLICE_H +#define __NET_TC_POLICE_H + +#include <net/act_api.h> + +struct tcf_police_params { + int tcfp_result; + u32 tcfp_ewma_rate; + s64 tcfp_burst; + u32 tcfp_mtu; + s64 tcfp_mtu_ptoks; + struct psched_ratecfg rate; + bool rate_present; + struct psched_ratecfg peak; + bool peak_present; + struct rcu_head rcu; +}; + +struct tcf_police { + struct tc_action common; + struct tcf_police_params __rcu *params; + + spinlock_t tcfp_lock ____cacheline_aligned_in_smp; + s64 tcfp_toks; + s64 tcfp_ptoks; + s64 tcfp_t_c; +}; + +#define to_police(pc) ((struct tcf_police *)pc) + +/* old policer structure from before tc actions */ +struct tc_police_compat { + u32 index; + int action; + u32 limit; + u32 burst; + u32 mtu; + struct tc_ratespec rate; + struct tc_ratespec peakrate; +}; + +static inline bool is_tcf_police(const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + if (act->ops && act->ops->id == TCA_ID_POLICE) + return true; +#endif + return false; +} + +static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_bh(police->params); + return params->rate.rate_bytes_ps; +} + +static inline s64 tcf_police_tcfp_burst(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_bh(police->params); + return params->tcfp_burst; +} + +#endif /* __NET_TC_POLICE_H */ diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index 01dbfea32672..0a559d4b6f0f 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -20,7 +20,7 @@ struct tcf_sample { static inline bool is_tcf_sample(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - return a->ops && a->ops->type == TCA_ACT_SAMPLE; + return a->ops && a->ops->id == TCA_ID_SAMPLE; #else return false; #endif diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 911bbac838a2..85c5c4756d92 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -44,7 +44,7 @@ static inline bool is_tcf_skbedit_mark(const struct tc_action *a) #ifdef CONFIG_NET_CLS_ACT u32 flags; - if (a->ops && a->ops->type == TCA_ACT_SKBEDIT) { + if (a->ops && a->ops->id == TCA_ID_SKBEDIT) { rcu_read_lock(); flags = rcu_dereference(to_skbedit(a)->params)->flags; rcu_read_unlock(); diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 46b8c7f1c8d5..23d5b8b19f3e 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -34,7 +34,7 @@ static inline bool is_tcf_tunnel_set(const struct tc_action *a) struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); - if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; #endif return false; @@ -46,7 +46,7 @@ static inline bool is_tcf_tunnel_release(const struct tc_action *a) struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); - if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; #endif return false; diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 22ae260d6869..fe39ed502bef 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -30,7 +30,7 @@ struct tcf_vlan { static inline bool is_tcf_vlan(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_VLAN) + if (a->ops && a->ops->id == TCA_ID_VLAN) return true; #endif return false; diff --git a/include/net/tcp.h b/include/net/tcp.h index a18914d20486..985aa5db570c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -313,7 +313,7 @@ extern struct proto tcp_prot; void tcp_tasklet_init(void); -void tcp_v4_err(struct sk_buff *skb, u32); +int tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); @@ -406,8 +406,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); int tcp_set_rcvlowat(struct sock *sk, int val); void tcp_data_ready(struct sock *sk); +#ifdef CONFIG_MMU int tcp_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); +#endif void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); @@ -1124,7 +1126,7 @@ void tcp_rate_check_app_limited(struct sock *sk); */ static inline int tcp_is_sack(const struct tcp_sock *tp) { - return tp->rx_opt.sack_ok; + return likely(tp->rx_opt.sack_ok); } static inline bool tcp_is_reno(const struct tcp_sock *tp) @@ -1312,36 +1314,19 @@ static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq) static inline __sum16 tcp_v4_check(int len, __be32 saddr, __be32 daddr, __wsum base) { - return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); -} - -static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb) -{ - return __skb_checksum_complete(skb); + return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base); } static inline bool tcp_checksum_complete(struct sk_buff *skb) { return !skb_csum_unnecessary(skb) && - __tcp_checksum_complete(skb); + __skb_checksum_complete(skb); } bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); int tcp_filter(struct sock *sk, struct sk_buff *skb); - -#undef STATE_TRACE - -#ifdef STATE_TRACE -static const char *statename[]={ - "Unused","Established","Syn Sent","Syn Recv", - "Fin Wait 1","Fin Wait 2","Time Wait", "Close", - "Close Wait","Last ACK","Listen","Closing" -}; -#endif void tcp_set_state(struct sock *sk, int state); - void tcp_done(struct sock *sk); - int tcp_abort(struct sock *sk, int err); static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) @@ -1385,7 +1370,7 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf - + return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len - atomic_read(&sk->sk_rmem_alloc)); } @@ -1572,9 +1557,21 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG -struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, - const union tcp_md5_addr *addr, - int family); +#include <linux/jump_label.h> +extern struct static_key_false tcp_md5_needed; +struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, + const union tcp_md5_addr *addr, + int family); +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup(const struct sock *sk, + const union tcp_md5_addr *addr, + int family) +{ + if (!static_branch_unlikely(&tcp_md5_needed)) + return NULL; + return __tcp_md5_do_lookup(sk, addr, family); +} + #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, @@ -1611,6 +1608,7 @@ struct tcp_fastopen_request { struct msghdr *data; /* data in MSG_FASTOPEN */ size_t size; int copied; /* queued in tcp_connect() */ + struct ubuf_info *uarg; }; void tcp_free_fastopen_req(struct tcp_sock *tp); void tcp_fastopen_destroy_cipher(struct sock *sk); @@ -1718,20 +1716,9 @@ static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk) return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk); } -static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) -{ - if (tcp_write_queue_empty(sk)) - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); -} - -static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) -{ - __skb_queue_tail(&sk->sk_write_queue, skb); -} - static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) { - __tcp_add_write_queue_tail(sk, skb); + __skb_queue_tail(&sk->sk_write_queue, skb); /* Queue it, remembering where we must start sending. */ if (sk->sk_write_queue.next == skb) @@ -1875,12 +1862,16 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; } -static inline bool tcp_stream_memory_free(const struct sock *sk) +/* @wake is one when sk_stream_write_space() calls us. + * This sends EPOLLOUT only if notsent_bytes is half the limit. + * This mimics the strategy used in sock_def_write_space(). + */ +static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) { const struct tcp_sock *tp = tcp_sk(sk); u32 notsent_bytes = tp->write_seq - tp->snd_nxt; - return notsent_bytes < tcp_notsent_lowat(tp); + return (notsent_bytes << wake) < tcp_notsent_lowat(tp); } #ifdef CONFIG_PROC_FS @@ -2207,7 +2198,7 @@ extern struct static_key_false tcp_have_smc; void clean_acked_data_enable(struct inet_connection_sock *icsk, void (*cad)(struct sock *sk, u32 ack_seq)); void clean_acked_data_disable(struct inet_connection_sock *icsk); - +void clean_acked_data_flush(void); #endif #endif /* _TCP_H */ diff --git a/include/net/tls.h b/include/net/tls.h index 3cbcd12303fd..39ea62f0c1f6 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -60,6 +60,17 @@ #define TLS_AAD_SPACE_SIZE 13 #define TLS_DEVICE_NAME_MAX 32 +#define MAX_IV_SIZE 16 + +/* For AES-CCM, the full 16-bytes of IV is made of '4' fields of given sizes. + * + * IV[16] = b0[1] || implicit nonce[4] || explicit nonce[8] || length[3] + * + * The field 'length' is encoded in field 'b0' as '(length width - 1)'. + * Hence b0 contains (3 - 1) = 2. + */ +#define TLS_AES_CCM_IV_B0_BYTE 2 + /* * This structure defines the routines for Inline TLS driver. * The following routines are optional and filled with a @@ -119,11 +130,20 @@ struct tls_rec { /* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */ struct scatterlist sg_aead_out[2]; + char content_type; + struct scatterlist sg_content_type; + char aad_space[TLS_AAD_SPACE_SIZE]; + u8 iv_data[MAX_IV_SIZE]; struct aead_request aead_req; u8 aead_req_ctx[]; }; +struct tls_msg { + struct strp_msg rxm; + u8 control; +}; + struct tx_work { struct delayed_work work; struct sock *sk; @@ -137,6 +157,7 @@ struct tls_sw_context_tx { struct list_head tx_list; atomic_t encrypt_pending; int async_notify; + int async_capable; #define BIT_TX_SCHEDULED 0 unsigned long tx_bitmask; @@ -145,12 +166,13 @@ struct tls_sw_context_tx { struct tls_sw_context_rx { struct crypto_aead *aead_recv; struct crypto_wait async_wait; - struct strparser strp; + struct sk_buff_head rx_list; /* list of decrypted 'data' records */ void (*saved_data_ready)(struct sock *sk); struct sk_buff *recv_pkt; u8 control; + int async_capable; bool decrypted; atomic_t decrypt_pending; bool async_notify; @@ -187,26 +209,35 @@ struct tls_offload_context_tx { (ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) + \ TLS_DRIVER_STATE_SIZE) -enum { - TLS_PENDING_CLOSED_RECORD -}; - struct cipher_context { - u16 prepend_size; - u16 tag_size; - u16 overhead_size; - u16 iv_size; char *iv; - u16 rec_seq_size; char *rec_seq; }; union tls_crypto_context { struct tls_crypto_info info; - struct tls12_crypto_info_aes_gcm_128 aes_gcm_128; + union { + struct tls12_crypto_info_aes_gcm_128 aes_gcm_128; + struct tls12_crypto_info_aes_gcm_256 aes_gcm_256; + }; +}; + +struct tls_prot_info { + u16 version; + u16 cipher_type; + u16 prepend_size; + u16 tag_size; + u16 overhead_size; + u16 iv_size; + u16 salt_size; + u16 rec_seq_size; + u16 aad_size; + u16 tail_size; }; struct tls_context { + struct tls_prot_info prot_info; + union tls_crypto_context crypto_send; union tls_crypto_context crypto_recv; @@ -246,6 +277,23 @@ struct tls_context { void (*unhash)(struct sock *sk); }; +enum tls_offload_ctx_dir { + TLS_OFFLOAD_CTX_DIR_RX, + TLS_OFFLOAD_CTX_DIR_TX, +}; + +struct tlsdev_ops { + int (*tls_dev_add)(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn); + void (*tls_dev_del)(struct net_device *netdev, + struct tls_context *ctx, + enum tls_offload_ctx_dir direction); + void (*tls_dev_resync_rx)(struct net_device *netdev, + struct sock *sk, u32 seq, u64 rcd_sn); +}; + struct tls_offload_context_rx { /* sw must be the first member of tls_offload_context_rx */ struct tls_sw_context_rx sw; @@ -286,7 +334,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -void tls_device_sk_destruct(struct sock *sk); +void tls_device_free_resources_tx(struct sock *sk); void tls_device_init(void); void tls_device_cleanup(void); int tls_tx_records(struct sock *sk, int flags); @@ -304,19 +352,21 @@ static inline u32 tls_record_start_seq(struct tls_record_info *rec) return rec->end_seq - rec->len; } -void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); int tls_push_sg(struct sock *sk, struct tls_context *ctx, struct scatterlist *sg, u16 first_offset, int flags); int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, int flags); +bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx); -int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, - int flags, long *timeo); +static inline struct tls_msg *tls_msg(struct sk_buff *skb) +{ + return (struct tls_msg *)strp_msg(skb); +} -static inline bool tls_is_pending_closed_record(struct tls_context *ctx) +static inline bool tls_is_partially_sent_record(struct tls_context *ctx) { - return test_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); + return !!ctx->partially_sent_record; } static inline int tls_complete_pending_work(struct sock *sk, @@ -328,17 +378,12 @@ static inline int tls_complete_pending_work(struct sock *sk, if (unlikely(sk->sk_write_pending)) rc = wait_on_pending_writer(sk, timeo); - if (!rc && tls_is_pending_closed_record(ctx)) - rc = tls_push_pending_closed_record(sk, ctx, flags, timeo); + if (!rc && tls_is_partially_sent_record(ctx)) + rc = tls_push_partial_record(sk, ctx, flags); return rc; } -static inline bool tls_is_partially_sent_record(struct tls_context *ctx) -{ - return !!ctx->partially_sent_record; -} - static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) { return tls_ctx->pending_open_record_frags; @@ -362,7 +407,7 @@ tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) { #ifdef CONFIG_SOCK_VALIDATE_XMIT - return sk_fullsock(sk) & + return sk_fullsock(sk) && (smp_load_acquire(&sk->sk_validate_xmit_skb) == &tls_validate_xmit_skb); #else @@ -389,59 +434,92 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len) return (i == -1); } +static inline struct tls_context *tls_get_ctx(const struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ulp_data; +} + static inline void tls_advance_record_sn(struct sock *sk, - struct cipher_context *ctx) + struct cipher_context *ctx, + int version) { - if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size)) + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; + + if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) tls_err_abort(sk, EBADMSG); - tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - ctx->iv_size); + + if (version != TLS_1_3_VERSION) { + tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + prot->iv_size); + } } static inline void tls_fill_prepend(struct tls_context *ctx, char *buf, size_t plaintext_len, - unsigned char record_type) + unsigned char record_type, + int version) { - size_t pkt_len, iv_size = ctx->tx.iv_size; + struct tls_prot_info *prot = &ctx->prot_info; + size_t pkt_len, iv_size = prot->iv_size; + + pkt_len = plaintext_len + prot->tag_size; + if (version != TLS_1_3_VERSION) { + pkt_len += iv_size; - pkt_len = plaintext_len + iv_size + ctx->tx.tag_size; + memcpy(buf + TLS_NONCE_OFFSET, + ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); + } /* we cover nonce explicit here as well, so buf should be of * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE */ - buf[0] = record_type; - buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.info.version); - buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.info.version); + buf[0] = version == TLS_1_3_VERSION ? + TLS_RECORD_TYPE_DATA : record_type; + /* Note that VERSION must be TLS_1_2 for both TLS1.2 and TLS1.3 */ + buf[1] = TLS_1_2_VERSION_MINOR; + buf[2] = TLS_1_2_VERSION_MAJOR; /* we can use IV for nonce explicit according to spec */ buf[3] = pkt_len >> 8; buf[4] = pkt_len & 0xFF; - memcpy(buf + TLS_NONCE_OFFSET, - ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); } static inline void tls_make_aad(char *buf, size_t size, char *record_sequence, int record_sequence_size, - unsigned char record_type) + unsigned char record_type, + int version) { - memcpy(buf, record_sequence, record_sequence_size); + if (version != TLS_1_3_VERSION) { + memcpy(buf, record_sequence, record_sequence_size); + buf += 8; + } else { + size += TLS_CIPHER_AES_GCM_128_TAG_SIZE; + } - buf[8] = record_type; - buf[9] = TLS_1_2_VERSION_MAJOR; - buf[10] = TLS_1_2_VERSION_MINOR; - buf[11] = size >> 8; - buf[12] = size & 0xFF; + buf[0] = version == TLS_1_3_VERSION ? + TLS_RECORD_TYPE_DATA : record_type; + buf[1] = TLS_1_2_VERSION_MAJOR; + buf[2] = TLS_1_2_VERSION_MINOR; + buf[3] = size >> 8; + buf[4] = size & 0xFF; } -static inline struct tls_context *tls_get_ctx(const struct sock *sk) +static inline void xor_iv_with_seq(int version, char *iv, char *seq) { - struct inet_connection_sock *icsk = inet_csk(sk); + int i; - return icsk->icsk_ulp_data; + if (version == TLS_1_3_VERSION) { + for (i = 0; i < 8; i++) + iv[i + 4] ^= seq[i]; + } } + static inline struct tls_sw_context_rx *tls_sw_ctx_rx( const struct tls_context *tls_ctx) { @@ -460,6 +538,18 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx) return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx; } +static inline bool tls_sw_has_ctx_tx(const struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (!ctx) + return false; + return !!tls_sw_ctx_tx(ctx); +} + +void tls_sw_write_space(struct sock *sk, struct tls_context *ctx); +void tls_device_write_space(struct sock *sk, struct tls_context *ctx); + static inline struct tls_offload_context_rx * tls_offload_ctx_rx(const struct tls_context *tls_ctx) { @@ -472,7 +562,7 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - atomic64_set(&rx_ctx->resync_req, ((((uint64_t)seq) << 32) | 1)); + atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | 1); } diff --git a/include/net/udp.h b/include/net/udp.h index 9e82cb391dea..d8ce937bc395 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -252,19 +252,30 @@ static inline int udp_rqueue_get(struct sock *sk) return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit); } +static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + /* net/ipv4/udp.c */ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *peeked, int *off, int *err); + int noblock, int *off, int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *err) { - int peeked, off = 0; + int off = 0; - return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); + return __skb_recv_udp(sk, flags, noblock, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); @@ -272,7 +283,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); -void udp_err(struct sk_buff *, u32); +int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); @@ -406,17 +417,24 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, } while(0) #if IS_ENABLED(CONFIG_IPV6) -#define __UDPX_INC_STATS(sk, field) \ -do { \ - if ((sk)->sk_family == AF_INET) \ - __UDP_INC_STATS(sock_net(sk), field, 0); \ - else \ - __UDP6_INC_STATS(sock_net(sk), field, 0); \ -} while (0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics) : \ + (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ + sock_net(sk)->mib.udp_stats_in6); \ +}) #else -#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics; \ +}) #endif +#define __UDPX_INC_STATS(sk, field) \ + __SNMP_INC_STATS(__UDPX_MIB(sk, (sk)->sk_family == AF_INET), field) + #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; @@ -450,4 +468,26 @@ DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); #endif +static inline struct sk_buff *udp_rcv_segment(struct sock *sk, + struct sk_buff *skb, bool ipv4) +{ + struct sk_buff *segs; + + /* the GSO CB lays after the UDP one, no need to save and restore any + * CB fragment + */ + segs = __skb_gso_segment(skb, NETIF_F_SG, false); + if (unlikely(IS_ERR_OR_NULL(segs))) { + int segs_nr = skb_shinfo(skb)->gso_segs; + + atomic_add(segs_nr, &sk->sk_drops); + SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); + kfree_skb(skb); + return NULL; + } + + consume_skb(skb); + return segs; +} + #endif /* _UDP_H */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index fe680ab6b15a..4b1f95e08307 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -7,7 +7,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> -#include <net/addrconf.h> +#include <net/ipv6_stubs.h> #endif struct udp_port_cfg { @@ -30,6 +30,7 @@ struct udp_port_cfg { __be16 local_udp_port; __be16 peer_udp_port; + int bind_ifindex; unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, @@ -64,6 +65,8 @@ static inline int udp_sock_create(struct net *net, } typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); +typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, + struct sk_buff *skb); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, @@ -76,6 +79,7 @@ struct udp_tunnel_sock_cfg { /* Used for setting up udp_sock fields, see udp.h for details */ __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; + udp_tunnel_encap_err_lookup_t encap_err_lookup; udp_tunnel_encap_destroy_t encap_destroy; udp_tunnel_gro_receive_t gro_receive; udp_tunnel_gro_complete_t gro_complete; @@ -165,6 +169,12 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) static inline void udp_tunnel_encap_enable(struct socket *sock) { + struct udp_sock *up = udp_sk(sock->sk); + + if (up->encap_enabled) + return; + + up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 03431c148e16..83b5999a2587 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -8,6 +8,8 @@ #include <net/rtnetlink.h> #include <net/switchdev.h> +#define IANA_VXLAN_UDP_PORT 4789 + /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|I|R|R|R| Reserved | @@ -216,6 +218,7 @@ struct vxlan_config { unsigned long age_interval; unsigned int addrmax; bool no_share; + enum ifla_vxlan_df df; }; struct vxlan_dev_node { @@ -420,11 +423,17 @@ struct switchdev_notifier_vxlan_fdb_info { u8 eth_addr[ETH_ALEN]; __be32 vni; bool offloaded; + bool added_by_user; }; #if IS_ENABLED(CONFIG_VXLAN) int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, struct switchdev_notifier_vxlan_fdb_info *fdb_info); +int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, + struct notifier_block *nb, + struct netlink_ext_ack *extack); +void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni); + #else static inline int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, @@ -432,6 +441,49 @@ vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, { return -ENOENT; } + +static inline int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, + struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline void +vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni) +{ +} #endif +static inline void vxlan_flag_attr_error(int attrtype, + struct netlink_ext_ack *extack) +{ +#define VXLAN_FLAG(flg) \ + case IFLA_VXLAN_##flg: \ + NL_SET_ERR_MSG_MOD(extack, \ + "cannot change " #flg " flag"); \ + break + switch (attrtype) { + VXLAN_FLAG(TTL_INHERIT); + VXLAN_FLAG(LEARNING); + VXLAN_FLAG(PROXY); + VXLAN_FLAG(RSC); + VXLAN_FLAG(L2MISS); + VXLAN_FLAG(L3MISS); + VXLAN_FLAG(COLLECT_METADATA); + VXLAN_FLAG(UDP_ZERO_CSUM6_TX); + VXLAN_FLAG(UDP_ZERO_CSUM6_RX); + VXLAN_FLAG(REMCSUM_TX); + VXLAN_FLAG(REMCSUM_RX); + VXLAN_FLAG(GBP); + VXLAN_FLAG(GPE); + VXLAN_FLAG(REMCSUM_NOPARTIAL); + default: + NL_SET_ERR_MSG_MOD(extack, \ + "cannot change flag"); + break; + } +#undef VXLAN_FLAG +} + #endif diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 13acb9803a6d..d074b6d60f8a 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -36,12 +36,12 @@ struct xdp_umem { u32 headroom; u32 chunk_size_nohr; struct user_struct *user; - struct pid *pid; unsigned long address; refcount_t users; struct work_struct work; struct page **pgs; u32 npgs; + int id; struct net_device *dev; struct xdp_umem_fq_reuse *fq_reuse; u16 queue_id; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index da588def3c61..a2907873ed56 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -132,6 +132,17 @@ struct xfrm_state_offload { u8 flags; }; +struct xfrm_mode { + u8 encap; + u8 family; + u8 flags; +}; + +/* Flags for xfrm_mode. */ +enum { + XFRM_MODE_FLAG_TUNNEL = 1, +}; + /* Full description of state of transformer. */ struct xfrm_state { possible_net_t xs_net; @@ -219,7 +230,7 @@ struct xfrm_state { struct xfrm_stats stats; struct xfrm_lifetime_cur curlft; - struct tasklet_hrtimer mtimer; + struct hrtimer mtimer; struct xfrm_state_offload xso; @@ -234,9 +245,9 @@ struct xfrm_state { /* Reference to data common to all the instances of this * transformer. */ const struct xfrm_type *type; - struct xfrm_mode *inner_mode; - struct xfrm_mode *inner_mode_iaf; - struct xfrm_mode *outer_mode; + struct xfrm_mode inner_mode; + struct xfrm_mode inner_mode_iaf; + struct xfrm_mode outer_mode; const struct xfrm_type_offload *type_offload; @@ -295,7 +306,8 @@ struct xfrm_replay { }; struct xfrm_if_cb { - struct xfrm_if *(*decode_session)(struct sk_buff *skb); + struct xfrm_if *(*decode_session)(struct sk_buff *skb, + unsigned short family); }; void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); @@ -315,13 +327,6 @@ struct xfrm_policy_afinfo { xfrm_address_t *saddr, xfrm_address_t *daddr, u32 mark); - void (*decode_session)(struct sk_buff *skb, - struct flowi *fl, - int reverse); - int (*get_tos)(const struct flowi *fl); - int (*init_path)(struct xfrm_dst *path, - struct dst_entry *dst, - int nfheader_len); int (*fill_dst)(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl); @@ -347,7 +352,6 @@ struct xfrm_state_afinfo { struct module *owner; const struct xfrm_type *type_map[IPPROTO_MAX]; const struct xfrm_type_offload *type_offload_map[IPPROTO_MAX]; - struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_selector *sel, @@ -422,78 +426,6 @@ struct xfrm_type_offload { int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); -struct xfrm_mode { - /* - * Remove encapsulation header. - * - * The IP header will be moved over the top of the encapsulation - * header. - * - * On entry, the transport header shall point to where the IP header - * should be and the network header shall be set to where the IP - * header currently is. skb->data shall point to the start of the - * payload. - */ - int (*input2)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * This is the actual input entry point. - * - * For transport mode and equivalent this would be identical to - * input2 (which does not need to be set). While tunnel mode - * and equivalent would set this to the tunnel encapsulation function - * xfrm4_prepare_input that would in turn call input2. - */ - int (*input)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * Add encapsulation header. - * - * On exit, the transport header will be set to the start of the - * encapsulation header to be filled in by x->type->output and - * the mac header will be set to the nextheader (protocol for - * IPv4) field of the extension header directly preceding the - * encapsulation header, or in its absence, that of the top IP - * header. The value of the network header will always point - * to the top IP header while skb->data will point to the payload. - */ - int (*output2)(struct xfrm_state *x,struct sk_buff *skb); - - /* - * This is the actual output entry point. - * - * For transport mode and equivalent this would be identical to - * output2 (which does not need to be set). While tunnel mode - * and equivalent would set this to a tunnel encapsulation function - * (xfrm4_prepare_output or xfrm6_prepare_output) that would in turn - * call output2. - */ - int (*output)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * Adjust pointers into the packet and do GSO segmentation. - */ - struct sk_buff *(*gso_segment)(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features); - - /* - * Adjust pointers into the packet when IPsec is done at layer2. - */ - void (*xmit)(struct xfrm_state *x, struct sk_buff *skb); - - struct xfrm_state_afinfo *afinfo; - struct module *owner; - unsigned int encap; - int flags; -}; - -/* Flags for xfrm_mode. */ -enum { - XFRM_MODE_FLAG_TUNNEL = 1, -}; - -int xfrm_register_mode(struct xfrm_mode *mode, int family); -int xfrm_unregister_mode(struct xfrm_mode *mode, int family); - static inline int xfrm_af2proto(unsigned int family) { switch(family) { @@ -506,13 +438,13 @@ static inline int xfrm_af2proto(unsigned int family) } } -static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) +static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) { if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) || (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6)) - return x->inner_mode; + return &x->inner_mode; else - return x->inner_mode_iaf; + return &x->inner_mode_iaf; } struct xfrm_tmpl { @@ -577,6 +509,7 @@ struct xfrm_policy { /* This lock only affects elements except for entry. */ rwlock_t lock; refcount_t refcnt; + u32 pos; struct timer_list timer; atomic_t genid; @@ -589,6 +522,7 @@ struct xfrm_policy { struct xfrm_lifetime_cur curlft; struct xfrm_policy_walk_entry walk; struct xfrm_policy_queue polq; + bool bydst_reinsert; u8 type; u8 action; u8 flags; @@ -596,6 +530,7 @@ struct xfrm_policy { u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; + struct hlist_node bydst_inexact_list; struct rcu_head rcu; }; @@ -850,7 +785,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) xfrm_pol_put(pols[i]); } -void __xfrm_state_destroy(struct xfrm_state *); +void __xfrm_state_destroy(struct xfrm_state *, bool); static inline void __xfrm_state_put(struct xfrm_state *x) { @@ -860,7 +795,13 @@ static inline void __xfrm_state_put(struct xfrm_state *x) static inline void xfrm_state_put(struct xfrm_state *x) { if (refcount_dec_and_test(&x->refcnt)) - __xfrm_state_destroy(x); + __xfrm_state_destroy(x, false); +} + +static inline void xfrm_state_put_sync(struct xfrm_state *x) +{ + if (refcount_dec_and_test(&x->refcnt)) + __xfrm_state_destroy(x, true); } static inline void xfrm_state_hold(struct xfrm_state *x) @@ -1093,7 +1034,6 @@ struct xfrm_offload { }; struct sec_path { - refcount_t refcnt; int len; int olen; @@ -1101,41 +1041,13 @@ struct sec_path { struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; }; -static inline int secpath_exists(struct sk_buff *skb) -{ -#ifdef CONFIG_XFRM - return skb->sp != NULL; -#else - return 0; -#endif -} - -static inline struct sec_path * -secpath_get(struct sec_path *sp) -{ - if (sp) - refcount_inc(&sp->refcnt); - return sp; -} - -void __secpath_destroy(struct sec_path *sp); - -static inline void -secpath_put(struct sec_path *sp) -{ - if (sp && refcount_dec_and_test(&sp->refcnt)) - __secpath_destroy(sp); -} - -struct sec_path *secpath_dup(struct sec_path *src); -int secpath_set(struct sk_buff *skb); +struct sec_path *secpath_set(struct sk_buff *skb); static inline void secpath_reset(struct sk_buff *skb) { #ifdef CONFIG_XFRM - secpath_put(skb->sp); - skb->sp = NULL; + skb_ext_del(skb, SKB_EXT_SEC_PATH); #endif } @@ -1191,7 +1103,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - return (!net->xfrm.policy_count[dir] && !skb->sp) || + return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || (skb_dst(skb)->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, ndir, skb, family); } @@ -1424,6 +1336,23 @@ static inline int xfrm_state_kern(const struct xfrm_state *x) return atomic_read(&x->tunnel_users); } +static inline bool xfrm_id_proto_valid(u8 proto) +{ + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: +#endif + return true; + default: + return false; + } +} + +/* IPSEC_PROTO_ANY only matches 3 IPsec protocols, 0 could match all. */ static inline int xfrm_id_proto_match(u8 proto, u8 userproto) { return (!userproto || proto == userproto || @@ -1616,7 +1545,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_delete(struct xfrm_state *x); -int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); @@ -1625,7 +1554,6 @@ int xfrm_init_replay(struct xfrm_state *x); int xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); -int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); int xfrm_trans_queue(struct sk_buff *skb, @@ -1633,7 +1561,11 @@ int xfrm_trans_queue(struct sk_buff *skb, struct sk_buff *)); int xfrm_output_resume(struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); -int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); + +#if IS_ENABLED(CONFIG_NET_PKTGEN) +int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); +#endif + void xfrm_local_error(struct sk_buff *skb, int mtu); int xfrm4_extract_header(struct sk_buff *skb); int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); @@ -1652,10 +1584,8 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) } int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb); -int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); @@ -1671,7 +1601,6 @@ int xfrm6_rcv(struct sk_buff *skb); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); void xfrm6_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); @@ -1679,7 +1608,6 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, @@ -1903,14 +1831,16 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n) #ifdef CONFIG_XFRM static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { - return skb->sp->xvec[skb->sp->len - 1]; + struct sec_path *sp = skb_sec_path(skb); + + return sp->xvec[sp->len - 1]; } #endif static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) { #ifdef CONFIG_XFRM - struct sec_path *sp = skb->sp; + struct sec_path *sp = skb_sec_path(skb); if (!sp || !sp->olen || sp->len != sp->olen) return NULL; @@ -1968,7 +1898,7 @@ static inline void xfrm_dev_state_delete(struct xfrm_state *x) static inline void xfrm_dev_state_free(struct xfrm_state *x) { struct xfrm_state_offload *xso = &x->xso; - struct net_device *dev = xso->dev; + struct net_device *dev = xso->dev; if (dev && dev->xfrmdev_ops) { if (dev->xfrmdev_ops->xdo_dev_state_free) @@ -2069,7 +1999,7 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, tunnel = true; break; } - if (tunnel && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)) + if (tunnel && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)) return -EINVAL; return 0; |