summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/atm_suni.h12
-rw-r--r--include/linux/avf/virtchnl.h278
-rw-r--r--include/linux/bpf-cgroup.h57
-rw-r--r--include/linux/bpf.h152
-rw-r--r--include/linux/bpf_local_storage.h3
-rw-r--r--include/linux/bpf_lsm.h22
-rw-r--r--include/linux/bpf_types.h8
-rw-r--r--include/linux/bpf_verifier.h3
-rw-r--r--include/linux/btf.h7
-rw-r--r--include/linux/can/bittiming.h79
-rw-r--r--include/linux/can/dev.h14
-rw-r--r--include/linux/can/skb.h3
-rw-r--r--include/linux/dsa/ocelot.h5
-rw-r--r--include/linux/etherdevice.h4
-rw-r--r--include/linux/ethtool.h14
-rw-r--r--include/linux/filter.h44
-rw-r--r--include/linux/if_bridge.h40
-rw-r--r--include/linux/if_rmnet.h65
-rw-r--r--include/linux/marvell_phy.h7
-rw-r--r--include/linux/mlx5/device.h2
-rw-r--r--include/linux/mlx5/driver.h21
-rw-r--r--include/linux/mlx5/eswitch.h14
-rw-r--r--include/linux/mlx5/mlx5_ifc.h32
-rw-r--r--include/linux/netdevice.h138
-rw-r--r--include/linux/netfilter/ipset/ip_set.h2
-rw-r--r--include/linux/netfilter/nfnetlink.h29
-rw-r--r--include/linux/pci.h8
-rw-r--r--include/linux/pcs/pcs-xpcs.h5
-rw-r--r--include/linux/phy.h3
-rw-r--r--include/linux/phylink.h2
-rw-r--r--include/linux/platform_data/hirschmann-hellcreek.h1
-rw-r--r--include/linux/ppp_channel.h3
-rw-r--r--include/linux/ptp_pch.h22
-rw-r--r--include/linux/qed/qed_chain.h8
-rw-r--r--include/linux/qed/qed_ll2_if.h1
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/skbuff.h33
-rw-r--r--include/linux/skmsg.h161
-rw-r--r--include/linux/stmmac.h43
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/udp.h22
-rw-r--r--include/linux/usb/usbnet.h11
-rw-r--r--include/net/bluetooth/hci.h1
-rw-r--r--include/net/bluetooth/hci_core.h17
-rw-r--r--include/net/bluetooth/l2cap.h1
-rw-r--r--include/net/bluetooth/mgmt.h1
-rw-r--r--include/net/bpf_sk_storage.h1
-rw-r--r--include/net/dsa.h16
-rw-r--r--include/net/flow_dissector.h6
-rw-r--r--include/net/flow_offload.h6
-rw-r--r--include/net/gro.h13
-rw-r--r--include/net/if_inet6.h37
-rw-r--r--include/net/ipv6.h1
-rw-r--r--include/net/ipv6_stubs.h2
-rw-r--r--include/net/lapb.h2
-rw-r--r--include/net/mld.h3
-rw-r--r--include/net/mptcp.h48
-rw-r--r--include/net/net_namespace.h14
-rw-r--r--include/net/netfilter/ipv6/nf_conntrack_ipv6.h3
-rw-r--r--include/net/netfilter/ipv6/nf_defrag_ipv6.h6
-rw-r--r--include/net/netfilter/nf_conntrack.h7
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h33
-rw-r--r--include/net/netfilter/nf_flow_table.h64
-rw-r--r--include/net/netfilter/nf_log.h25
-rw-r--r--include/net/netfilter/nf_tables.h25
-rw-r--r--include/net/netns/conntrack.h4
-rw-r--r--include/net/netns/dccp.h12
-rw-r--r--include/net/netns/ipv4.h133
-rw-r--r--include/net/netns/ipv6.h28
-rw-r--r--include/net/netns/mib.h30
-rw-r--r--include/net/netns/netfilter.h6
-rw-r--r--include/net/netns/nftables.h7
-rw-r--r--include/net/netns/x_tables.h1
-rw-r--r--include/net/nexthop.h76
-rw-r--r--include/net/pkt_sched.h9
-rw-r--r--include/net/psample.h21
-rw-r--r--include/net/sch_generic.h14
-rw-r--r--include/net/sock.h3
-rw-r--r--include/net/switchdev.h1
-rw-r--r--include/net/tc_act/tc_police.h52
-rw-r--r--include/net/tcp.h86
-rw-r--r--include/net/udp.h30
-rw-r--r--include/net/xdp_sock.h19
-rw-r--r--include/soc/mscc/ocelot.h25
-rw-r--r--include/soc/mscc/ocelot_ptp.h2
-rw-r--r--include/trace/events/xdp.h62
-rw-r--r--include/uapi/linux/bpf.h767
-rw-r--r--include/uapi/linux/btf.h5
-rw-r--r--include/uapi/linux/ethtool.h45
-rw-r--r--include/uapi/linux/ethtool_netlink.h17
-rw-r--r--include/uapi/linux/icmp.h42
-rw-r--r--include/uapi/linux/icmpv6.h3
-rw-r--r--include/uapi/linux/if_fddi.h2
-rw-r--r--include/uapi/linux/mdio.h2
-rw-r--r--include/uapi/linux/mptcp.h11
-rw-r--r--include/uapi/linux/nexthop.h47
-rw-r--r--include/uapi/linux/pkt_cls.h2
-rw-r--r--include/uapi/linux/psample.h7
-rw-r--r--include/uapi/linux/rtnetlink.h8
-rw-r--r--include/uapi/linux/virtio_bt.h31
-rw-r--r--include/uapi/linux/virtio_ids.h1
-rw-r--r--include/vdso/time64.h1
102 files changed, 2705 insertions, 624 deletions
diff --git a/include/linux/atm_suni.h b/include/linux/atm_suni.h
deleted file mode 100644
index 84f3aab54468..000000000000
--- a/include/linux/atm_suni.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* atm_suni.h - Driver-specific declarations of the SUNI driver (for use by
- driver-specific utilities) */
-
-/* Written 1998,2000 by Werner Almesberger, EPFL ICA */
-
-
-#ifndef LINUX_ATM_SUNI_H
-#define LINUX_ATM_SUNI_H
-
-/* everything obsoleted */
-
-#endif
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 532bcbfc4716..40dd6afbfd81 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,6 +136,9 @@ enum virtchnl_ops {
VIRTCHNL_OP_DISABLE_CHANNELS = 31,
VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
+ /* opcode 34 - 46 are reserved */
+ VIRTCHNL_OP_ADD_FDIR_FILTER = 47,
+ VIRTCHNL_OP_DEL_FDIR_FILTER = 48,
};
/* These macros are used to generate compilation errors if a structure/union
@@ -247,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000
#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000
#define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000
+#define VIRTCHNL_VF_OFFLOAD_FDIR_PF 0X10000000
/* Define below the capability flags that are not offloads */
#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080
@@ -557,6 +561,11 @@ enum virtchnl_action {
/* action types */
VIRTCHNL_ACTION_DROP = 0,
VIRTCHNL_ACTION_TC_REDIRECT,
+ VIRTCHNL_ACTION_PASSTHRU,
+ VIRTCHNL_ACTION_QUEUE,
+ VIRTCHNL_ACTION_Q_REGION,
+ VIRTCHNL_ACTION_MARK,
+ VIRTCHNL_ACTION_COUNT,
};
enum virtchnl_flow_type {
@@ -666,6 +675,269 @@ enum virtchnl_vfr_states {
VIRTCHNL_VFR_VFACTIVE,
};
+#define VIRTCHNL_MAX_NUM_PROTO_HDRS 32
+#define PROTO_HDR_SHIFT 5
+#define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT)
+#define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1)
+
+/* VF use these macros to configure each protocol header.
+ * Specify which protocol headers and protocol header fields base on
+ * virtchnl_proto_hdr_type and virtchnl_proto_hdr_field.
+ * @param hdr: a struct of virtchnl_proto_hdr
+ * @param hdr_type: ETH/IPV4/TCP, etc
+ * @param field: SRC/DST/TEID/SPI, etc
+ */
+#define VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, field) \
+ ((hdr)->field_selector |= BIT((field) & PROTO_HDR_FIELD_MASK))
+#define VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, field) \
+ ((hdr)->field_selector &= ~BIT((field) & PROTO_HDR_FIELD_MASK))
+#define VIRTCHNL_TEST_PROTO_HDR_FIELD(hdr, val) \
+ ((hdr)->field_selector & BIT((val) & PROTO_HDR_FIELD_MASK))
+#define VIRTCHNL_GET_PROTO_HDR_FIELD(hdr) ((hdr)->field_selector)
+
+#define VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \
+ (VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, \
+ VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field))
+#define VIRTCHNL_DEL_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \
+ (VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, \
+ VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field))
+
+#define VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, hdr_type) \
+ ((hdr)->type = VIRTCHNL_PROTO_HDR_ ## hdr_type)
+#define VIRTCHNL_GET_PROTO_HDR_TYPE(hdr) \
+ (((hdr)->type) >> PROTO_HDR_SHIFT)
+#define VIRTCHNL_TEST_PROTO_HDR_TYPE(hdr, val) \
+ ((hdr)->type == ((val) >> PROTO_HDR_SHIFT))
+#define VIRTCHNL_TEST_PROTO_HDR(hdr, val) \
+ (VIRTCHNL_TEST_PROTO_HDR_TYPE((hdr), (val)) && \
+ VIRTCHNL_TEST_PROTO_HDR_FIELD((hdr), (val)))
+
+/* Protocol header type within a packet segment. A segment consists of one or
+ * more protocol headers that make up a logical group of protocol headers. Each
+ * logical group of protocol headers encapsulates or is encapsulated using/by
+ * tunneling or encapsulation protocols for network virtualization.
+ */
+enum virtchnl_proto_hdr_type {
+ VIRTCHNL_PROTO_HDR_NONE,
+ VIRTCHNL_PROTO_HDR_ETH,
+ VIRTCHNL_PROTO_HDR_S_VLAN,
+ VIRTCHNL_PROTO_HDR_C_VLAN,
+ VIRTCHNL_PROTO_HDR_IPV4,
+ VIRTCHNL_PROTO_HDR_IPV6,
+ VIRTCHNL_PROTO_HDR_TCP,
+ VIRTCHNL_PROTO_HDR_UDP,
+ VIRTCHNL_PROTO_HDR_SCTP,
+ VIRTCHNL_PROTO_HDR_GTPU_IP,
+ VIRTCHNL_PROTO_HDR_GTPU_EH,
+ VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN,
+ VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP,
+ VIRTCHNL_PROTO_HDR_PPPOE,
+ VIRTCHNL_PROTO_HDR_L2TPV3,
+ VIRTCHNL_PROTO_HDR_ESP,
+ VIRTCHNL_PROTO_HDR_AH,
+ VIRTCHNL_PROTO_HDR_PFCP,
+};
+
+/* Protocol header field within a protocol header. */
+enum virtchnl_proto_hdr_field {
+ /* ETHER */
+ VIRTCHNL_PROTO_HDR_ETH_SRC =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ETH),
+ VIRTCHNL_PROTO_HDR_ETH_DST,
+ VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE,
+ /* S-VLAN */
+ VIRTCHNL_PROTO_HDR_S_VLAN_ID =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_S_VLAN),
+ /* C-VLAN */
+ VIRTCHNL_PROTO_HDR_C_VLAN_ID =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_C_VLAN),
+ /* IPV4 */
+ VIRTCHNL_PROTO_HDR_IPV4_SRC =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV4),
+ VIRTCHNL_PROTO_HDR_IPV4_DST,
+ VIRTCHNL_PROTO_HDR_IPV4_DSCP,
+ VIRTCHNL_PROTO_HDR_IPV4_TTL,
+ VIRTCHNL_PROTO_HDR_IPV4_PROT,
+ /* IPV6 */
+ VIRTCHNL_PROTO_HDR_IPV6_SRC =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6),
+ VIRTCHNL_PROTO_HDR_IPV6_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_TC,
+ VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT,
+ VIRTCHNL_PROTO_HDR_IPV6_PROT,
+ /* TCP */
+ VIRTCHNL_PROTO_HDR_TCP_SRC_PORT =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_TCP),
+ VIRTCHNL_PROTO_HDR_TCP_DST_PORT,
+ /* UDP */
+ VIRTCHNL_PROTO_HDR_UDP_SRC_PORT =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_UDP),
+ VIRTCHNL_PROTO_HDR_UDP_DST_PORT,
+ /* SCTP */
+ VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_SCTP),
+ VIRTCHNL_PROTO_HDR_SCTP_DST_PORT,
+ /* GTPU_IP */
+ VIRTCHNL_PROTO_HDR_GTPU_IP_TEID =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_IP),
+ /* GTPU_EH */
+ VIRTCHNL_PROTO_HDR_GTPU_EH_PDU =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH),
+ VIRTCHNL_PROTO_HDR_GTPU_EH_QFI,
+ /* PPPOE */
+ VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PPPOE),
+ /* L2TPV3 */
+ VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_L2TPV3),
+ /* ESP */
+ VIRTCHNL_PROTO_HDR_ESP_SPI =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ESP),
+ /* AH */
+ VIRTCHNL_PROTO_HDR_AH_SPI =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_AH),
+ /* PFCP */
+ VIRTCHNL_PROTO_HDR_PFCP_S_FIELD =
+ PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PFCP),
+ VIRTCHNL_PROTO_HDR_PFCP_SEID,
+};
+
+struct virtchnl_proto_hdr {
+ enum virtchnl_proto_hdr_type type;
+ u32 field_selector; /* a bit mask to select field for header type */
+ u8 buffer[64];
+ /**
+ * binary buffer in network order for specific header type.
+ * For example, if type = VIRTCHNL_PROTO_HDR_IPV4, a IPv4
+ * header is expected to be copied into the buffer.
+ */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr);
+
+struct virtchnl_proto_hdrs {
+ u8 tunnel_level;
+ /**
+ * specify where protocol header start from.
+ * 0 - from the outer layer
+ * 1 - from the first inner layer
+ * 2 - from the second inner layer
+ * ....
+ **/
+ int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */
+ struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs);
+
+/* action configuration for FDIR */
+struct virtchnl_filter_action {
+ enum virtchnl_action type;
+ union {
+ /* used for queue and qgroup action */
+ struct {
+ u16 index;
+ u8 region;
+ } queue;
+ /* used for count action */
+ struct {
+ /* share counter ID with other flow rules */
+ u8 shared;
+ u32 id; /* counter ID */
+ } count;
+ /* used for mark action */
+ u32 mark_id;
+ u8 reserve[32];
+ } act_conf;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action);
+
+#define VIRTCHNL_MAX_NUM_ACTIONS 8
+
+struct virtchnl_filter_action_set {
+ /* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */
+ int count;
+ struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(292, virtchnl_filter_action_set);
+
+/* pattern and action for FDIR rule */
+struct virtchnl_fdir_rule {
+ struct virtchnl_proto_hdrs proto_hdrs;
+ struct virtchnl_filter_action_set action_set;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule);
+
+/* Status returned to VF after VF requests FDIR commands
+ * VIRTCHNL_FDIR_SUCCESS
+ * VF FDIR related request is successfully done by PF
+ * The request can be OP_ADD/DEL.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE
+ * OP_ADD_FDIR_FILTER request is failed due to no Hardware resource.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_EXIST
+ * OP_ADD_FDIR_FILTER request is failed due to the rule is already existed.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT
+ * OP_ADD_FDIR_FILTER request is failed due to conflict with existing rule.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST
+ * OP_DEL_FDIR_FILTER request is failed due to this rule doesn't exist.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_INVALID
+ * OP_ADD_FDIR_FILTER request is failed due to parameters validation
+ * or HW doesn't support.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT
+ * OP_ADD/DEL_FDIR_FILTER request is failed due to timing out
+ * for programming.
+ */
+enum virtchnl_fdir_prgm_status {
+ VIRTCHNL_FDIR_SUCCESS = 0,
+ VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE,
+ VIRTCHNL_FDIR_FAILURE_RULE_EXIST,
+ VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT,
+ VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST,
+ VIRTCHNL_FDIR_FAILURE_RULE_INVALID,
+ VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT,
+};
+
+/* VIRTCHNL_OP_ADD_FDIR_FILTER
+ * VF sends this request to PF by filling out vsi_id,
+ * validate_only and rule_cfg. PF will return flow_id
+ * if the request is successfully done and return add_status to VF.
+ */
+struct virtchnl_fdir_add {
+ u16 vsi_id; /* INPUT */
+ /*
+ * 1 for validating a fdir rule, 0 for creating a fdir rule.
+ * Validate and create share one ops: VIRTCHNL_OP_ADD_FDIR_FILTER.
+ */
+ u16 validate_only; /* INPUT */
+ u32 flow_id; /* OUTPUT */
+ struct virtchnl_fdir_rule rule_cfg; /* INPUT */
+ enum virtchnl_fdir_prgm_status status; /* OUTPUT */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2616, virtchnl_fdir_add);
+
+/* VIRTCHNL_OP_DEL_FDIR_FILTER
+ * VF sends this request to PF by filling out vsi_id
+ * and flow_id. PF will return del_status to VF.
+ */
+struct virtchnl_fdir_del {
+ u16 vsi_id; /* INPUT */
+ u16 pad;
+ u32 flow_id; /* INPUT */
+ enum virtchnl_fdir_prgm_status status; /* OUTPUT */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
+
/**
* virtchnl_vc_validate_vf_msg
* @ver: Virtchnl version info
@@ -826,6 +1098,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
case VIRTCHNL_OP_DEL_CLOUD_FILTER:
valid_len = sizeof(struct virtchnl_filter);
break;
+ case VIRTCHNL_OP_ADD_FDIR_FILTER:
+ valid_len = sizeof(struct virtchnl_fdir_add);
+ break;
+ case VIRTCHNL_OP_DEL_FDIR_FILTER:
+ valid_len = sizeof(struct virtchnl_fdir_del);
+ break;
/* These are always errors coming from the VF. */
case VIRTCHNL_OP_EVENT:
case VIRTCHNL_OP_UNKNOWN:
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c42e02b4d84b..6a29fe11485d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
struct ctl_table;
struct ctl_table_header;
+struct task_struct;
#ifdef CONFIG_CGROUP_BPF
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
-DECLARE_PER_CPU(struct bpf_cgroup_storage*,
- bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+#define BPF_CGROUP_STORAGE_NEST_MAX 8
+
+struct bpf_cgroup_storage_info {
+ struct task_struct *task;
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+};
+
+/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
+ * to use bpf cgroup storage simultaneously.
+ */
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
+ bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
return BPF_CGROUP_STORAGE_SHARED;
}
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
- *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
+static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
+ *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
{
enum bpf_cgroup_storage_type stype;
+ int i, err = 0;
+
+ preempt_disable();
+ for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+ if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
+ continue;
+
+ this_cpu_write(bpf_cgroup_storage_info[i].task, current);
+ for_each_cgroup_storage_type(stype)
+ this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
+ storage[stype]);
+ goto out;
+ }
+ err = -EBUSY;
+ WARN_ON_ONCE(1);
+
+out:
+ preempt_enable();
+ return err;
+}
+
+static inline void bpf_cgroup_storage_unset(void)
+{
+ int i;
+
+ for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+ if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
+ continue;
- for_each_cgroup_storage_type(stype)
- this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
+ this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
+ return;
+ }
}
struct bpf_cgroup_storage *
@@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}
-static inline void bpf_cgroup_storage_set(
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
+static inline int bpf_cgroup_storage_set(
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
+static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fdac0534ce79..c9b7a876b0c8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -41,6 +41,7 @@ struct bpf_local_storage_map;
struct kobject;
struct mem_cgroup;
struct module;
+struct bpf_func_state;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -56,7 +57,7 @@ struct bpf_iter_seq_info {
u32 seq_priv_size;
};
-/* map is generic key/value storage optionally accesible by eBPF programs */
+/* map is generic key/value storage optionally accessible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
int (*map_alloc_check)(union bpf_attr *attr);
@@ -119,6 +120,9 @@ struct bpf_map_ops {
void *owner, u32 size);
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
+ /* Misc helpers.*/
+ int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
+
/* map_meta_equal must be implemented for maps that can be
* used as an inner map. It is a runtime check to ensure
* an inner map can be inserted to an outer map.
@@ -131,6 +135,13 @@ struct bpf_map_ops {
bool (*map_meta_equal)(const struct bpf_map *meta0,
const struct bpf_map *meta1);
+
+ int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee);
+ int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
+ void *callback_ctx, u64 flags);
+
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;
@@ -297,6 +308,8 @@ enum bpf_arg_type {
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
+ ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
+ ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
__BPF_ARG_TYPE_MAX,
};
@@ -413,6 +426,9 @@ enum bpf_reg_type {
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
+ PTR_TO_FUNC, /* reg points to a bpf program function */
+ PTR_TO_MAP_KEY, /* reg points to a map element key */
+ __BPF_REG_TYPE_MAX,
};
/* The information passed from prog-specific *_is_valid_access
@@ -466,6 +482,7 @@ struct bpf_verifier_ops {
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
u32 *next_btf_id);
+ bool (*check_kfunc_call)(u32 kfunc_btf_id);
};
struct bpf_prog_offload_ops {
@@ -508,6 +525,11 @@ enum bpf_cgroup_storage_type {
*/
#define MAX_BPF_FUNC_ARGS 12
+/* The maximum number of arguments passed through registers
+ * a single function may have.
+ */
+#define MAX_BPF_FUNC_REG_ARGS 5
+
struct btf_func_model {
u8 ret_size;
u8 nr_args;
@@ -778,6 +800,8 @@ struct btf_mod_pair {
struct module *module;
};
+struct bpf_kfunc_desc_tab;
+
struct bpf_prog_aux {
atomic64_t refcnt;
u32 used_map_cnt;
@@ -814,6 +838,7 @@ struct bpf_prog_aux {
struct bpf_prog **func;
void *jit_data; /* JIT specific data. arch dependent */
struct bpf_jit_poke_descriptor *poke_tab;
+ struct bpf_kfunc_desc_tab *kfunc_tab;
u32 size_poke_tab;
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
@@ -1088,6 +1113,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
/* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0)
+/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
+ * if bpf_cgroup_storage_set() failed, the rest of programs
+ * will not execute. This should be a really rare scenario
+ * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
+ * preemptions all between bpf_cgroup_storage_set() and
+ * bpf_cgroup_storage_unset() on the same cpu.
+ */
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
({ \
struct bpf_prog_array_item *_item; \
@@ -1100,10 +1132,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
- bpf_cgroup_storage_set(_item->cgroup_storage); \
+ if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
+ break; \
func_ret = func(_prog, ctx); \
_ret &= (func_ret & 1); \
*(ret_flags) |= (func_ret >> 1); \
+ bpf_cgroup_storage_unset(); \
_item++; \
} \
rcu_read_unlock(); \
@@ -1124,9 +1158,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
goto _out; \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
- if (set_cg_storage) \
- bpf_cgroup_storage_set(_item->cgroup_storage); \
- _ret &= func(_prog, ctx); \
+ if (!set_cg_storage) { \
+ _ret &= func(_prog, ctx); \
+ } else { \
+ if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
+ break; \
+ _ret &= func(_prog, ctx); \
+ bpf_cgroup_storage_unset(); \
+ } \
_item++; \
} \
_out: \
@@ -1399,6 +1438,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
struct bpf_link_info *info);
+int map_set_for_each_callback_args(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee);
+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
@@ -1448,9 +1491,9 @@ struct btf *bpf_get_btf_vmlinux(void);
/* Map specifics */
struct xdp_buff;
struct sk_buff;
+struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
-struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
void __dev_flush(void);
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx);
@@ -1460,7 +1503,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
bool dev_map_can_have_prog(struct bpf_map *map);
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
@@ -1489,6 +1531,10 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
+bool bpf_prog_test_check_kfunc_call(u32 kfunc_id);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
@@ -1507,8 +1553,11 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
struct btf_func_model *m);
struct bpf_reg_state;
-int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *regs);
+int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
+ struct bpf_reg_state *regs);
+int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
+ const struct btf *btf, u32 func_id,
+ struct bpf_reg_state *regs);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *reg);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
@@ -1518,6 +1567,11 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+void bpf_task_storage_free(struct task_struct *task);
+bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
+const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+ const struct bpf_insn *insn);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -1587,17 +1641,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
return -EOPNOTSUPP;
}
-static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
-
-static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
static inline bool dev_map_can_have_prog(struct bpf_map *map)
{
return false;
@@ -1609,6 +1652,7 @@ static inline void __dev_flush(void)
struct xdp_buff;
struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
static inline
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
@@ -1633,12 +1677,6 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
return 0;
}
-static inline
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
-{
- return NULL;
-}
-
static inline void __cpu_map_flush(void)
{
}
@@ -1689,6 +1727,18 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
return -ENOTSUPP;
}
+static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
+static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
+{
+ return false;
+}
+
static inline void bpf_map_put(struct bpf_map *map)
{
}
@@ -1703,6 +1753,22 @@ bpf_base_func_proto(enum bpf_func_id func_id)
{
return NULL;
}
+
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
+
+static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
+{
+ return false;
+}
+
+static inline const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+ const struct bpf_insn *insn)
+{
+ return NULL;
+}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -1787,22 +1853,24 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
-#if defined(CONFIG_BPF_STREAM_PARSER)
-int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which);
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
+
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+ void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags);
#else
-static inline int sock_map_prog_update(struct bpf_map *map,
- struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
- return -EOPNOTSUPP;
}
+#ifdef CONFIG_BPF_SYSCALL
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
struct bpf_prog *prog)
{
@@ -1820,20 +1888,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
return -EOPNOTSUPP;
}
-#endif /* CONFIG_BPF_STREAM_PARSER */
-#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
-void bpf_sk_reuseport_detach(struct sock *sk);
-int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
- void *value);
-int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags);
-#else
-static inline void bpf_sk_reuseport_detach(struct sock *sk)
-{
-}
-
-#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
void *key, void *value)
{
@@ -1905,6 +1960,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
+extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index b2c9463f36a1..b902c580c48d 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
bool cacheit_lockit);
-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+ int __percpu *busy_counter);
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
const struct btf *btf,
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 0d1c33ace398..479c101546ad 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
}
-static inline struct bpf_storage_blob *bpf_task(
- const struct task_struct *task)
-{
- if (unlikely(!task->security))
- return NULL;
-
- return task->security + bpf_lsm_blob_sizes.lbs_task;
-}
-
extern const struct bpf_func_proto bpf_inode_storage_get_proto;
extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
-extern const struct bpf_func_proto bpf_task_storage_get_proto;
-extern const struct bpf_func_proto bpf_task_storage_delete_proto;
void bpf_inode_storage_free(struct inode *inode);
-void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_LSM */
@@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
return NULL;
}
-static inline struct bpf_storage_blob *bpf_task(
- const struct task_struct *task)
-{
- return NULL;
-}
-
static inline void bpf_inode_storage_free(struct inode *inode)
{
}
-static inline void bpf_task_storage_free(struct task_struct *task)
-{
-}
-
#endif /* CONFIG_BPF_LSM */
#endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 99f7fd657d87..f883f01a5061 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -103,19 +103,17 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
-#if defined(CONFIG_BPF_STREAM_PARSER)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
-#endif
#ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
#endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
#endif
#ifdef CONFIG_INET
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 971b33aca13d..51c2ffa3d901 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -68,6 +68,8 @@ struct bpf_reg_state {
unsigned long raw1;
unsigned long raw2;
} raw;
+
+ u32 subprogno; /* for PTR_TO_FUNC */
};
/* For PTR_TO_PACKET, used to find other pointers with the same variable
* offset, so they can share range knowledge.
@@ -204,6 +206,7 @@ struct bpf_func_state {
int acquired_refs;
struct bpf_reference_state *refs;
int allocated_stack;
+ bool in_callback_fn;
struct bpf_stack_state *stack;
};
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 7fabf1428093..3bac66e0183a 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -9,6 +9,7 @@
#include <uapi/linux/bpf.h>
#define BTF_TYPE_EMIT(type) ((void)(type *)0)
+#define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
struct btf;
struct btf_member;
@@ -109,6 +110,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
const struct btf_type *
btf_resolve_size(const struct btf *btf, const struct btf_type *type,
u32 *type_size);
+const char *btf_type_str(const struct btf_type *t);
#define for_each_member(i, struct_type, member) \
for (i = 0, member = btf_type_member(struct_type); \
@@ -140,6 +142,11 @@ static inline bool btf_type_is_enum(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
}
+static inline bool btf_type_is_scalar(const struct btf_type *t)
+{
+ return btf_type_is_int(t) || btf_type_is_enum(t);
+}
+
static inline bool btf_type_is_typedef(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 707575c668f4..ae7a3411167c 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2020 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
+ * Copyright (c) 2021 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
*/
#ifndef _CAN_BITTIMING_H
@@ -10,9 +11,83 @@
#define CAN_SYNC_SEG 1
+
+/* Kilobits and Megabits per second */
+#define CAN_KBPS 1000UL
+#define CAN_MBPS 1000000UL
+
+/* Megahertz */
+#define CAN_MHZ 1000000UL
+
+/*
+ * struct can_tdc - CAN FD Transmission Delay Compensation parameters
+ *
+ * At high bit rates, the propagation delay from the TX pin to the RX
+ * pin of the transceiver causes measurement errors: the sample point
+ * on the RX pin might occur on the previous bit.
+ *
+ * To solve this issue, ISO 11898-1 introduces in section 11.3.3
+ * "Transmitter delay compensation" a SSP (Secondary Sample Point)
+ * equal to the distance, in time quanta, from the start of the bit
+ * time on the TX pin to the actual measurement on the RX pin.
+ *
+ * This structure contains the parameters to calculate that SSP.
+ *
+ * @tdcv: Transmitter Delay Compensation Value. Distance, in time
+ * quanta, from when the bit is sent on the TX pin to when it is
+ * received on the RX pin of the transmitter. Possible options:
+ *
+ * O: automatic mode. The controller dynamically measure @tdcv
+ * for each transmitted CAN FD frame.
+ *
+ * Other values: manual mode. Use the fixed provided value.
+ *
+ * @tdco: Transmitter Delay Compensation Offset. Offset value, in time
+ * quanta, defining the distance between the start of the bit
+ * reception on the RX pin of the transceiver and the SSP
+ * position such as SSP = @tdcv + @tdco.
+ *
+ * If @tdco is zero, then TDC is disabled and both @tdcv and
+ * @tdcf should be ignored.
+ *
+ * @tdcf: Transmitter Delay Compensation Filter window. Defines the
+ * minimum value for the SSP position in time quanta. If SSP is
+ * less than @tdcf, then no delay compensations occur and the
+ * normal sampling point is used instead. The feature is enabled
+ * if and only if @tdcv is set to zero (automatic mode) and @tdcf
+ * is configured to a value greater than @tdco.
+ */
+struct can_tdc {
+ u32 tdcv;
+ u32 tdco;
+ u32 tdcf;
+};
+
+/*
+ * struct can_tdc_const - CAN hardware-dependent constant for
+ * Transmission Delay Compensation
+ *
+ * @tdcv_max: Transmitter Delay Compensation Value maximum value.
+ * Should be set to zero if the controller does not support
+ * manual mode for tdcv.
+ * @tdco_max: Transmitter Delay Compensation Offset maximum value.
+ * Should not be zero. If the controller does not support TDC,
+ * then the pointer to this structure should be NULL.
+ * @tdcf_max: Transmitter Delay Compensation Filter window maximum
+ * value. Should be set to zero if the controller does not
+ * support this feature.
+ */
+struct can_tdc_const {
+ u32 tdcv_max;
+ u32 tdco_max;
+ u32 tdcf_max;
+};
+
#ifdef CONFIG_CAN_CALC_BITTIMING
int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
const struct can_bittiming_const *btc);
+
+void can_calc_tdco(struct net_device *dev);
#else /* !CONFIG_CAN_CALC_BITTIMING */
static inline int
can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
@@ -21,6 +96,10 @@ can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
netdev_err(dev, "bit-timing calculation not available\n");
return -EINVAL;
}
+
+static inline void can_calc_tdco(struct net_device *dev)
+{
+}
#endif /* CONFIG_CAN_CALC_BITTIMING */
int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt,
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index ac4d83a1ab81..27b275e463da 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -39,19 +39,23 @@ struct can_priv {
struct net_device *dev;
struct can_device_stats can_stats;
- struct can_bittiming bittiming, data_bittiming;
const struct can_bittiming_const *bittiming_const,
*data_bittiming_const;
- const u16 *termination_const;
- unsigned int termination_const_cnt;
- u16 termination;
- const u32 *bitrate_const;
+ struct can_bittiming bittiming, data_bittiming;
+ const struct can_tdc_const *tdc_const;
+ struct can_tdc tdc;
+
unsigned int bitrate_const_cnt;
+ const u32 *bitrate_const;
const u32 *data_bitrate_const;
unsigned int data_bitrate_const_cnt;
u32 bitrate_max;
struct can_clock clock;
+ unsigned int termination_const_cnt;
+ const u16 *termination_const;
+ u16 termination;
+
enum can_state state;
/* CAN controller features - see include/uapi/linux/can/netlink.h */
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index d438eb058069..d311bc369a39 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -23,7 +23,8 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx,
u8 *len_ptr, unsigned int *frame_len_ptr);
unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx,
unsigned int *frame_len_ptr);
-void can_free_echo_skb(struct net_device *dev, unsigned int idx);
+void can_free_echo_skb(struct net_device *dev, unsigned int idx,
+ unsigned int *frame_len_ptr);
struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf);
struct sk_buff *alloc_canfd_skb(struct net_device *dev,
struct canfd_frame **cfd);
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index 4265f328681a..c6bc45ae5e03 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -160,11 +160,6 @@ static inline void ocelot_xfh_get_src_port(void *extraction, u64 *src_port)
packing(extraction, src_port, 46, 43, OCELOT_TAG_LEN, UNPACK, 0);
}
-static inline void ocelot_xfh_get_cpuq(void *extraction, u64 *cpuq)
-{
- packing(extraction, cpuq, 28, 20, OCELOT_TAG_LEN, UNPACK, 0);
-}
-
static inline void ocelot_xfh_get_qos_class(void *extraction, u64 *qos_class)
{
packing(extraction, qos_class, 19, 17, OCELOT_TAG_LEN, UNPACK, 0);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 2e5debc0373c..330345b1be54 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -11,7 +11,7 @@
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*
- * Relocated to include/linux where it belongs by Alan Cox
+ * Relocated to include/linux where it belongs by Alan Cox
* <gw4pts@gw4pts.ampr.org>
*/
#ifndef _LINUX_ETHERDEVICE_H
@@ -29,7 +29,7 @@ struct device;
int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
unsigned char *arch_get_platform_mac_address(void);
int nvmem_get_mac_address(struct device *dev, void *addrbuf);
-u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len);
+u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len);
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
extern const struct header_ops eth_header_ops;
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index cdca84e6dd6b..4290e2fa3117 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -412,6 +412,8 @@ struct ethtool_pause_stats {
* @get_ethtool_phy_stats: Return extended statistics about the PHY device.
* This is only useful if the device maintains PHY statistics and
* cannot use the standard PHY library helpers.
+ * @get_phy_tunable: Read the value of a PHY tunable.
+ * @set_phy_tunable: Set the value of a PHY tunable.
*
* All operations are optional (i.e. the function pointer may be set
* to %NULL) and callers must take this into account. Callers must
@@ -573,7 +575,7 @@ struct ethtool_phy_ops {
*/
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops);
-/*
+/**
* ethtool_params_from_link_mode - Derive link parameters from a given link mode
* @link_ksettings: Link parameters to be derived from the link mode
* @link_mode: Link mode
@@ -581,4 +583,14 @@ void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops);
void
ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings,
enum ethtool_link_mode_bit_indices link_mode);
+
+/**
+ * ethtool_sprintf - Write formatted string to ethtool string data
+ * @data: Pointer to start of string to update
+ * @fmt: Format of string to write
+ *
+ * Write formatted string to data. Update data to point at start of
+ * next string.
+ */
+extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...);
#endif /* _LINUX_ETHTOOL_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3b00fc906ccd..9a09547bc7ba 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -646,7 +646,8 @@ struct bpf_redirect_info {
u32 flags;
u32 tgt_index;
void *tgt_value;
- struct bpf_map *map;
+ u32 map_id;
+ enum bpf_map_type map_type;
u32 kern_flags;
struct bpf_nh_params nh;
};
@@ -876,8 +877,7 @@ void bpf_prog_free_linfo(struct bpf_prog *prog);
void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
const u32 *insn_to_jit_off);
int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog);
-void bpf_prog_free_jited_linfo(struct bpf_prog *prog);
-void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
+void bpf_prog_jit_attempt_done(struct bpf_prog *prog);
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
@@ -918,6 +918,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
+bool bpf_jit_supports_kfunc_call(void);
bool bpf_helper_changes_pkt_data(void *func);
static inline bool bpf_dump_raw_ok(const struct cred *cred)
@@ -1245,15 +1246,6 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
int k, unsigned int size);
-static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
- unsigned int size, void *buffer)
-{
- if (k >= 0)
- return skb_header_pointer(skb, k, size, buffer);
-
- return bpf_internal_load_pointer_neg_helper(skb, k, size);
-}
-
static inline int bpf_tell_extensions(void)
{
return SKF_AD_MAX;
@@ -1472,4 +1464,32 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
+ void *lookup_elem(struct bpf_map *map, u32 key))
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ /* Lower bits of the flags are used as return code on lookup failure */
+ if (unlikely(flags > XDP_TX))
+ return XDP_ABORTED;
+
+ ri->tgt_value = lookup_elem(map, ifindex);
+ if (unlikely(!ri->tgt_value)) {
+ /* If the lookup fails we want to clear out the state in the
+ * redirect_info struct completely, so that if an eBPF program
+ * performs multiple lookups, the last one always takes
+ * precedence.
+ */
+ ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ return flags;
+ }
+
+ ri->tgt_index = ifindex;
+ ri->map_id = map->id;
+ ri->map_type = map->map_type;
+
+ return XDP_REDIRECT;
+}
+
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index b979005ea39c..2cc35038a8ca 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -69,6 +69,8 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
bool br_multicast_enabled(const struct net_device *dev);
bool br_multicast_router(const struct net_device *dev);
+int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+ struct notifier_block *nb, struct netlink_ext_ack *extack);
#else
static inline int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list)
@@ -93,6 +95,13 @@ static inline bool br_multicast_router(const struct net_device *dev)
{
return false;
}
+static inline int br_mdb_replay(struct net_device *br_dev,
+ struct net_device *dev,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
#endif
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
@@ -102,6 +111,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid);
int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
int br_vlan_get_info(const struct net_device *dev, u16 vid,
struct bridge_vlan_info *p_vinfo);
+int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
+ struct notifier_block *nb, struct netlink_ext_ack *extack);
#else
static inline bool br_vlan_enabled(const struct net_device *dev)
{
@@ -128,6 +139,14 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
{
return -EINVAL;
}
+
+static inline int br_vlan_replay(struct net_device *br_dev,
+ struct net_device *dev,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
#endif
#if IS_ENABLED(CONFIG_BRIDGE)
@@ -136,6 +155,10 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev,
__u16 vid);
void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
+u8 br_port_get_stp_state(const struct net_device *dev);
+clock_t br_get_ageing_time(struct net_device *br_dev);
+int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+ struct notifier_block *nb);
#else
static inline struct net_device *
br_fdb_find_port(const struct net_device *br_dev,
@@ -154,6 +177,23 @@ br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
{
return false;
}
+
+static inline u8 br_port_get_stp_state(const struct net_device *dev)
+{
+ return BR_STATE_DISABLED;
+}
+
+static inline clock_t br_get_ageing_time(struct net_device *br_dev)
+{
+ return 0;
+}
+
+static inline int br_fdb_replay(struct net_device *br_dev,
+ struct net_device *dev,
+ struct notifier_block *nb)
+{
+ return -EOPNOTSUPP;
+}
#endif
#endif
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index 9661416a9bb4..4efb537f57f3 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -6,50 +6,43 @@
#define _LINUX_IF_RMNET_H_
struct rmnet_map_header {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- u8 pad_len:6;
- u8 reserved_bit:1;
- u8 cd_bit:1;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- u8 cd_bit:1;
- u8 reserved_bit:1;
- u8 pad_len:6;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
- u8 mux_id;
- __be16 pkt_len;
+ u8 flags; /* MAP_CMD_FLAG, MAP_PAD_LEN_MASK */
+ u8 mux_id;
+ __be16 pkt_len; /* Length of packet, including pad */
} __aligned(1);
+/* rmnet_map_header flags field:
+ * PAD_LEN: number of pad bytes following packet data
+ * CMD: 1 = packet contains a MAP command; 0 = packet contains data
+ */
+#define MAP_PAD_LEN_MASK GENMASK(5, 0)
+#define MAP_CMD_FLAG BIT(7)
+
struct rmnet_map_dl_csum_trailer {
- u8 reserved1;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- u8 valid:1;
- u8 reserved2:7;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- u8 reserved2:7;
- u8 valid:1;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
- u16 csum_start_offset;
- u16 csum_length;
+ u8 reserved1;
+ u8 flags; /* MAP_CSUM_DL_VALID_FLAG */
+ __be16 csum_start_offset;
+ __be16 csum_length;
__be16 csum_value;
} __aligned(1);
+/* rmnet_map_dl_csum_trailer flags field:
+ * VALID: 1 = checksum and length valid; 0 = ignore them
+ */
+#define MAP_CSUM_DL_VALID_FLAG BIT(0)
+
struct rmnet_map_ul_csum_header {
__be16 csum_start_offset;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- u16 csum_insert_offset:14;
- u16 udp_ind:1;
- u16 csum_enabled:1;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- u16 csum_enabled:1;
- u16 udp_ind:1;
- u16 csum_insert_offset:14;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
+ __be16 csum_info; /* MAP_CSUM_UL_* */
} __aligned(1);
+/* csum_info field:
+ * OFFSET: where (offset in bytes) to insert computed checksum
+ * UDP: 1 = UDP checksum (zero checkum means no checksum)
+ * ENABLED: 1 = checksum computation requested
+ */
+#define MAP_CSUM_UL_OFFSET_MASK GENMASK(13, 0)
+#define MAP_CSUM_UL_UDP_FLAG BIT(14)
+#define MAP_CSUM_UL_ENABLED_FLAG BIT(15)
+
#endif /* !(_LINUX_IF_RMNET_H_) */
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 52b1610eae68..6b11a5411082 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -22,8 +22,13 @@
#define MARVELL_PHY_ID_88E1545 0x01410ea0
#define MARVELL_PHY_ID_88E1548P 0x01410ec0
#define MARVELL_PHY_ID_88E3016 0x01410e60
-#define MARVELL_PHY_ID_88X3310 0x002b09a0
#define MARVELL_PHY_ID_88E2110 0x002b09b0
+#define MARVELL_PHY_ID_88X2222 0x01410f10
+
+/* PHY IDs and mask for Alaska 10G PHYs */
+#define MARVELL_PHY_ID_88X33X0_MASK 0xfffffff8
+#define MARVELL_PHY_ID_88X3310 0x002b09a0
+#define MARVELL_PHY_ID_88X3340 0x002b09a8
/* Marvel 88E1111 in Finisar SFP module with modified PHY ID */
#define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dc3d2508f5c6..92a029a800a0 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1142,6 +1142,8 @@ enum mlx5_flex_parser_protos {
MLX5_FLEX_PROTO_GENEVE = 1 << 3,
MLX5_FLEX_PROTO_CW_MPLS_GRE = 1 << 4,
MLX5_FLEX_PROTO_CW_MPLS_UDP = 1 << 5,
+ MLX5_FLEX_PROTO_ICMP = 1 << 8,
+ MLX5_FLEX_PROTO_ICMPV6 = 1 << 9,
};
/* MLX5 DEV CAPs */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 53b89631a1d9..2da953ad02ed 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -438,7 +438,6 @@ struct mlx5_core_health {
unsigned long flags;
struct work_struct fatal_report_work;
struct work_struct report_work;
- struct delayed_work recover_work;
struct devlink_health_reporter *fw_reporter;
struct devlink_health_reporter *fw_fatal_reporter;
};
@@ -517,8 +516,8 @@ struct mlx5_rate_limit {
struct mlx5_rl_entry {
u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)];
- u16 index;
u64 refcount;
+ u16 index;
u16 uid;
u8 dedicated : 1;
};
@@ -530,6 +529,7 @@ struct mlx5_rl_table {
u32 max_rate;
u32 min_rate;
struct mlx5_rl_entry *rl_entry;
+ u64 refcount;
};
struct mlx5_core_roce {
@@ -644,10 +644,14 @@ struct mlx5_td {
};
struct mlx5e_resources {
- u32 pdn;
- struct mlx5_td td;
- struct mlx5_core_mkey mkey;
- struct mlx5_sq_bfreg bfreg;
+ struct mlx5e_hw_objs {
+ u32 pdn;
+ struct mlx5_td td;
+ struct mlx5_core_mkey mkey;
+ struct mlx5_sq_bfreg bfreg;
+ } hw_objs;
+ struct devlink_port dl_port;
+ struct net_device *uplink_netdev;
};
enum mlx5_sw_icm_type {
@@ -874,6 +878,11 @@ static inline u32 mlx5_base_mkey(const u32 key)
return key & 0xffffff00u;
}
+static inline u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
+{
+ return ((u32)1 << log_sz) << log_stride;
+}
+
static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags,
u8 log_stride, u8 log_sz,
u16 strides_offset,
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 994c2c8cb4fd..429a710c5a99 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -48,6 +48,7 @@ struct mlx5_eswitch_rep {
/* Only IB rep is using vport_index */
u16 vport_index;
u32 vlan_refcount;
+ struct mlx5_eswitch *esw;
};
void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
@@ -61,8 +62,8 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
u16 vport_num);
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
- u16 vport_num, u32 sqn);
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+ struct mlx5_eswitch_rep *rep, u32 sqn);
u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
@@ -74,20 +75,19 @@ bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw);
bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
/* Reg C0 usage:
- * Reg C0 = < ESW_PFNUM_BITS(4) | ESW_VPORT BITS(12) | ESW_CHAIN_TAG(16) >
+ * Reg C0 = < ESW_PFNUM_BITS(4) | ESW_VPORT BITS(12) | ESW_REG_C0_OBJ(16) >
*
* Highest 4 bits of the reg c0 is the PF_NUM (range 0-15), 12 bits of
* unique non-zero vport id (range 1-4095). The rest (lowest 16 bits) is left
- * for tc chain tag restoration.
+ * for user data objects managed by a common mapping context.
* PFNUM + VPORT comprise the SOURCE_PORT matching.
*/
#define ESW_VPORT_BITS 12
#define ESW_PFNUM_BITS 4
#define ESW_SOURCE_PORT_METADATA_BITS (ESW_PFNUM_BITS + ESW_VPORT_BITS)
#define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS)
-#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS)
-#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\
- 0)
+#define ESW_REG_C0_USER_DATA_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS)
+#define ESW_REG_C0_USER_DATA_METADATA_MASK GENMASK(ESW_REG_C0_USER_DATA_METADATA_BITS - 1, 0)
static inline u32 mlx5_eswitch_get_vport_metadata_mask(void)
{
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 9c68b2da14c6..1599deee0456 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -806,9 +806,11 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_insert_if_not_exist[0x1];
u8 vport_cvlan_insert_overwrite[0x1];
- u8 reserved_at_5[0x3];
+ u8 reserved_at_5[0x2];
+ u8 esw_shared_ingress_acl[0x1];
u8 esw_uplink_ingress_acl[0x1];
- u8 reserved_at_9[0x10];
+ u8 root_ft_on_other_esw[0x1];
+ u8 reserved_at_a[0xf];
u8 esw_functions_changed[0x1];
u8 reserved_at_1a[0x1];
u8 ecpf_vport_exists[0x1];
@@ -1502,7 +1504,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_270[0x6];
u8 lag_dct[0x2];
u8 lag_tx_port_affinity[0x1];
- u8 reserved_at_279[0x2];
+ u8 lag_native_fdb_selection[0x1];
+ u8 reserved_at_27a[0x1];
u8 lag_master[0x1];
u8 num_lag_ports[0x4];
@@ -1680,7 +1683,16 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_6e0[0x10];
u8 sf_base_id[0x10];
- u8 reserved_at_700[0x80];
+ u8 reserved_at_700[0x8];
+ u8 num_total_dynamic_vf_msix[0x18];
+ u8 reserved_at_720[0x14];
+ u8 dynamic_msix_table_size[0xc];
+ u8 reserved_at_740[0xc];
+ u8 min_dynamic_vf_msix_table_size[0x4];
+ u8 reserved_at_750[0x4];
+ u8 max_dynamic_vf_msix_table_size[0xc];
+
+ u8 reserved_at_760[0x20];
u8 vhca_tunnel_commands[0x40];
u8 reserved_at_7c0[0x40];
};
@@ -10038,14 +10050,19 @@ struct mlx5_ifc_set_flow_table_root_in_bits {
u8 reserved_at_60[0x20];
u8 table_type[0x8];
- u8 reserved_at_88[0x18];
+ u8 reserved_at_88[0x7];
+ u8 table_of_other_vport[0x1];
+ u8 table_vport_number[0x10];
u8 reserved_at_a0[0x8];
u8 table_id[0x18];
u8 reserved_at_c0[0x8];
u8 underlay_qpn[0x18];
- u8 reserved_at_e0[0x120];
+ u8 table_eswitch_owner_vhca_id_valid[0x1];
+ u8 reserved_at_e1[0xf];
+ u8 table_eswitch_owner_vhca_id[0x10];
+ u8 reserved_at_100[0x100];
};
enum {
@@ -10275,7 +10292,8 @@ struct mlx5_ifc_dcbx_param_bits {
};
struct mlx5_ifc_lagc_bits {
- u8 reserved_at_0[0x1d];
+ u8 fdb_selection_mode[0x1];
+ u8 reserved_at_1[0x1c];
u8 lag_state[0x3];
u8 reserved_at_20[0x14];
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 87a5d186faff..5cbc950b34df 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -756,6 +756,13 @@ struct rx_queue_attribute {
const char *buf, size_t len);
};
+/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
+enum xps_map_type {
+ XPS_CPUS = 0,
+ XPS_RXQS,
+ XPS_MAPS_MAX,
+};
+
#ifdef CONFIG_XPS
/*
* This structure holds an XPS map which can be of variable length. The
@@ -773,9 +780,19 @@ struct xps_map {
/*
* This structure holds all XPS maps for device. Maps are indexed by CPU.
+ *
+ * We keep track of the number of cpus/rxqs used when the struct is allocated,
+ * in nr_ids. This will help not accessing out-of-bound memory.
+ *
+ * We keep track of the number of traffic classes used when the struct is
+ * allocated, in num_tc. This will be used to navigate the maps, to ensure we're
+ * not crossing its upper bound, as the original dev->num_tc can be updated in
+ * the meantime.
*/
struct xps_dev_maps {
struct rcu_head rcu;
+ unsigned int nr_ids;
+ s16 num_tc;
struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */
};
@@ -833,6 +850,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
struct sk_buff *skb,
struct net_device *sb_dev);
+enum net_device_path_type {
+ DEV_PATH_ETHERNET = 0,
+ DEV_PATH_VLAN,
+ DEV_PATH_BRIDGE,
+ DEV_PATH_PPPOE,
+ DEV_PATH_DSA,
+};
+
+struct net_device_path {
+ enum net_device_path_type type;
+ const struct net_device *dev;
+ union {
+ struct {
+ u16 id;
+ __be16 proto;
+ u8 h_dest[ETH_ALEN];
+ } encap;
+ struct {
+ enum {
+ DEV_PATH_BR_VLAN_KEEP,
+ DEV_PATH_BR_VLAN_TAG,
+ DEV_PATH_BR_VLAN_UNTAG,
+ DEV_PATH_BR_VLAN_UNTAG_HW,
+ } vlan_mode;
+ u16 vlan_id;
+ __be16 vlan_proto;
+ } bridge;
+ struct {
+ int port;
+ u16 proto;
+ } dsa;
+ };
+};
+
+#define NET_DEVICE_PATH_STACK_MAX 5
+#define NET_DEVICE_PATH_VLAN_MAX 2
+
+struct net_device_path_stack {
+ int num_paths;
+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
+};
+
+struct net_device_path_ctx {
+ const struct net_device *dev;
+ const u8 *daddr;
+
+ int num_vlans;
+ struct {
+ u16 id;
+ __be16 proto;
+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
+};
+
enum tc_setup_type {
TC_SETUP_QDISC_MQPRIO,
TC_SETUP_CLSU32,
@@ -1267,6 +1337,8 @@ struct netdev_net_notifier {
* struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
* If a device is paired with a peer device, return the peer instance.
* The caller must be under RCU read context.
+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
+ * Get the forwarding path to reach the real device from the HW destination address
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1473,6 +1545,8 @@ struct net_device_ops {
int (*ndo_tunnel_ctl)(struct net_device *dev,
struct ip_tunnel_parm *p, int cmd);
struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
+ struct net_device_path *path);
};
/**
@@ -1520,6 +1594,8 @@ struct net_device_ops {
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
* @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
* @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
+ * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
+ * skb_headlen(skb) == 0 (data starts from frag0)
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@@ -1553,6 +1629,7 @@ enum netdev_priv_flags {
IFF_FAILOVER_SLAVE = 1<<28,
IFF_L3MDEV_RX_HANDLER = 1<<29,
IFF_LIVE_RENAME_OK = 1<<30,
+ IFF_TX_SKB_NO_LINEAR = 1<<31,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@@ -1579,12 +1656,14 @@ enum netdev_priv_flags {
#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE
#define IFF_TEAM IFF_TEAM
#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED
+#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM
#define IFF_MACSEC IFF_MACSEC
#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER
#define IFF_FAILOVER IFF_FAILOVER
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK
+#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR
/* Specifies the type of the struct net_device::ml_priv pointer */
enum netdev_ml_priv_type {
@@ -1760,8 +1839,7 @@ enum netdev_ml_priv_type {
* @tx_queue_len: Max frames per queue allowed
* @tx_global_lock: XXX: need comments on this one
* @xdp_bulkq: XDP device bulk queue
- * @xps_cpus_map: all CPUs map for XPS device
- * @xps_rxqs_map: all RXQs map for XPS device
+ * @xps_maps: all CPUs/RXQs maps for XPS device
*
* @xps_maps: XXX: need comments on this one
* @miniq_egress: clsact qdisc specific data for
@@ -1773,6 +1851,7 @@ enum netdev_ml_priv_type {
*
* @proto_down_reason: reason a netdev interface is held down
* @pcpu_refcnt: Number of references to this device
+ * @dev_refcnt: Number of references to this device
* @todo_list: Delayed register/unregister
* @link_watch_list: XXX: need comments on this one
*
@@ -2057,8 +2136,7 @@ struct net_device {
struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
#ifdef CONFIG_XPS
- struct xps_dev_maps __rcu *xps_cpus_map;
- struct xps_dev_maps __rcu *xps_rxqs_map;
+ struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
#endif
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_egress;
@@ -2074,7 +2152,12 @@ struct net_device {
u32 proto_down_reason;
struct list_head todo_list;
+
+#ifdef CONFIG_PCPU_DEV_REFCNT
int __percpu *pcpu_refcnt;
+#else
+ refcount_t dev_refcnt;
+#endif
struct list_head link_watch_list;
@@ -2846,6 +2929,8 @@ void dev_remove_offload(struct packet_offload *po);
int dev_get_iflink(const struct net_device *dev);
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+ struct net_device_path_stack *stack);
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
unsigned short mask);
struct net_device *dev_get_by_name(struct net *net, const char *name);
@@ -3424,6 +3509,24 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue)
}
/**
+ * netdev_queue_set_dql_min_limit - set dql minimum limit
+ * @dev_queue: pointer to transmit queue
+ * @min_limit: dql minimum limit
+ *
+ * Forces xmit_more() to return true until the minimum threshold
+ * defined by @min_limit is reached (or until the tx queue is
+ * empty). Warning: to be use with care, misuse will impact the
+ * latency.
+ */
+static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue,
+ unsigned int min_limit)
+{
+#ifdef CONFIG_BQL
+ dev_queue->dql.min_limit = min_limit;
+#endif
+}
+
+/**
* netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
* @dev_queue: pointer to transmit queue
*
@@ -3688,7 +3791,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
u16 index);
int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
- u16 index, bool is_rxqs_map);
+ u16 index, enum xps_map_type type);
/**
* netif_attr_test_mask - Test a CPU or Rx queue set in a mask
@@ -3783,7 +3886,7 @@ static inline int netif_set_xps_queue(struct net_device *dev,
static inline int __netif_set_xps_queue(struct net_device *dev,
const unsigned long *mask,
- u16 index, bool is_rxqs_map)
+ u16 index, enum xps_map_type type)
{
return 0;
}
@@ -3923,7 +4026,14 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags,
int dev_change_name(struct net_device *, const char *);
int dev_set_alias(struct net_device *, const char *, size_t);
int dev_get_alias(const struct net_device *, char *, size_t);
-int dev_change_net_namespace(struct net_device *, struct net *, const char *);
+int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+ const char *pat, int new_ifindex);
+static inline
+int dev_change_net_namespace(struct net_device *dev, struct net *net,
+ const char *pat)
+{
+ return __dev_change_net_namespace(dev, net, pat, 0);
+}
int __dev_set_mtu(struct net_device *, int);
int dev_validate_mtu(struct net_device *dev, int mtu,
struct netlink_ext_ack *extack);
@@ -4026,7 +4136,11 @@ void netdev_run_todo(void);
*/
static inline void dev_put(struct net_device *dev)
{
+#ifdef CONFIG_PCPU_DEV_REFCNT
this_cpu_dec(*dev->pcpu_refcnt);
+#else
+ refcount_dec(&dev->dev_refcnt);
+#endif
}
/**
@@ -4037,7 +4151,11 @@ static inline void dev_put(struct net_device *dev)
*/
static inline void dev_hold(struct net_device *dev)
{
+#ifdef CONFIG_PCPU_DEV_REFCNT
this_cpu_inc(*dev->pcpu_refcnt);
+#else
+ refcount_inc(&dev->dev_refcnt);
+#endif
}
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
@@ -4172,7 +4290,7 @@ static inline bool netif_oper_up(const struct net_device *dev)
*
* Check if device has not been removed from system.
*/
-static inline bool netif_device_present(struct net_device *dev)
+static inline bool netif_device_present(const struct net_device *dev)
{
return test_bit(__LINK_STATE_PRESENT, &dev->state);
}
@@ -4611,6 +4729,7 @@ void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
extern int netdev_max_backlog;
extern int netdev_tstamp_prequeue;
+extern int netdev_unregister_timeout_secs;
extern int weight_p;
extern int dev_weight_rx_bias;
extern int dev_weight_tx_bias;
@@ -5287,6 +5406,9 @@ do { \
#define PTYPE_HASH_SIZE (16)
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
+extern struct list_head ptype_all __read_mostly;
+extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+
extern struct net_device *blackhole_netdev;
#endif /* _LINUX_NETDEVICE_H */
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 46d9a0c26c67..10279c4830ac 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -124,8 +124,6 @@ struct ip_set_ext {
bool target;
};
-struct ip_set;
-
#define ext_timeout(e, s) \
((unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]))
#define ext_counter(e, s) \
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index f6267e2883f2..d4c14257db5d 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -51,12 +51,41 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
unsigned int group, int echo, gfp_t flags);
int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid);
+void nfnetlink_broadcast(struct net *net, struct sk_buff *skb, __u32 portid,
+ __u32 group, gfp_t allocation);
static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type)
{
return subsys << 8 | msg_type;
}
+static inline void nfnl_fill_hdr(struct nlmsghdr *nlh, u8 family, u8 version,
+ __be16 res_id)
+{
+ struct nfgenmsg *nfmsg;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = version;
+ nfmsg->res_id = res_id;
+}
+
+static inline struct nlmsghdr *nfnl_msg_put(struct sk_buff *skb, u32 portid,
+ u32 seq, int type, int flags,
+ u8 family, u8 version,
+ __be16 res_id)
+{
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
+ if (!nlh)
+ return NULL;
+
+ nfnl_fill_hdr(nlh, family, version, res_id);
+
+ return nlh;
+}
+
void nfnl_lock(__u8 subsys_id);
void nfnl_unlock(__u8 subsys_id);
#ifdef CONFIG_PROVE_LOCKING
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 86c799c97b77..9b575a676888 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -856,6 +856,12 @@ struct module;
* e.g. drivers/net/e100.c.
* @sriov_configure: Optional driver callback to allow configuration of
* number of VFs to enable via sysfs "sriov_numvfs" file.
+ * @sriov_set_msix_vec_count: PF Driver callback to change number of MSI-X
+ * vectors on a VF. Triggered via sysfs "sriov_vf_msix_count".
+ * This will change MSI-X Table Size in the VF Message Control
+ * registers.
+ * @sriov_get_vf_total_msix: PF driver callback to get the total number of
+ * MSI-X vectors available for distribution to the VFs.
* @err_handler: See Documentation/PCI/pci-error-recovery.rst
* @groups: Sysfs attribute groups.
* @driver: Driver model structure.
@@ -871,6 +877,8 @@ struct pci_driver {
int (*resume)(struct pci_dev *dev); /* Device woken up */
void (*shutdown)(struct pci_dev *dev);
int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */
+ int (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */
+ u32 (*sriov_get_vf_total_msix)(struct pci_dev *pf);
const struct pci_error_handlers *err_handler;
const struct attribute_group **groups;
struct device_driver driver;
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 351c1c9aedc5..2cb5188a7ef1 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -10,10 +10,15 @@
#include <linux/phy.h>
#include <linux/phylink.h>
+/* AN mode */
+#define DW_AN_C73 1
+#define DW_AN_C37_SGMII 2
+
struct mdio_xpcs_args {
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
struct mii_bus *bus;
int addr;
+ int an_mode;
};
struct mdio_xpcs_ops {
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1a12e4436b5b..98fb441dd72e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -493,6 +493,7 @@ struct macsec_ops;
* @loopback_enabled: Set true if this PHY has been loopbacked successfully.
* @downshifted_rate: Set true if link speed has been downshifted.
* @is_on_sfp_module: Set true if PHY is located on an SFP module.
+ * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY
* @state: State of the PHY for management purposes
* @dev_flags: Device-specific flags used by the PHY driver.
* @irq: IRQ number of the PHY's interrupt (-1 if none)
@@ -567,6 +568,7 @@ struct phy_device {
unsigned loopback_enabled:1;
unsigned downshifted_rate:1;
unsigned is_on_sfp_module:1;
+ unsigned mac_managed_pm:1;
unsigned autoneg:1;
/* The most recently read link state */
@@ -1532,6 +1534,7 @@ int genphy_c45_read_mdix(struct phy_device *phydev);
int genphy_c45_pma_read_abilities(struct phy_device *phydev);
int genphy_c45_read_status(struct phy_device *phydev);
int genphy_c45_config_aneg(struct phy_device *phydev);
+int genphy_c45_loopback(struct phy_device *phydev, bool enable);
/* Generic C45 PHY driver */
extern struct phy_driver genphy_c45_driver;
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index d81a714cfbbd..fd2acfd9b597 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -64,6 +64,7 @@ enum phylink_op_type {
* @pcs_poll: MAC PCS cannot provide link change interrupt
* @poll_fixed_state: if true, starts link_poll,
* if MAC link is at %MLO_AN_FIXED mode.
+ * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND
* @get_fixed_state: callback to execute to determine the fixed link state,
* if MAC link is at %MLO_AN_FIXED mode.
*/
@@ -72,6 +73,7 @@ struct phylink_config {
enum phylink_op_type type;
bool pcs_poll;
bool poll_fixed_state;
+ bool ovr_an_inband;
void (*get_fixed_state)(struct phylink_config *config,
struct phylink_link_state *state);
};
diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h
index 388846766bb2..6a000df5541f 100644
--- a/include/linux/platform_data/hirschmann-hellcreek.h
+++ b/include/linux/platform_data/hirschmann-hellcreek.h
@@ -12,6 +12,7 @@
#include <linux/types.h>
struct hellcreek_platform_data {
+ const char *name; /* Switch name */
int num_ports; /* Amount of switch ports */
int is_100_mbits; /* Is it configured to 100 or 1000 mbit/s */
int qbv_support; /* Qbv support on front TSN ports */
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index 98966064ee68..91f9a928344e 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -28,6 +28,9 @@ struct ppp_channel_ops {
int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
/* Handle an ioctl call that has come in via /dev/ppp. */
int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
+ int (*fill_forward_path)(struct net_device_path_ctx *,
+ struct net_device_path *,
+ const struct ppp_channel *);
};
struct ppp_channel {
diff --git a/include/linux/ptp_pch.h b/include/linux/ptp_pch.h
new file mode 100644
index 000000000000..51818198c292
--- /dev/null
+++ b/include/linux/ptp_pch.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PTP PCH
+ *
+ * Copyright 2019 Linaro Ltd.
+ *
+ * Author Lee Jones <lee.jones@linaro.org>
+ */
+
+#ifndef _PTP_PCH_H_
+#define _PTP_PCH_H_
+
+void pch_ch_control_write(struct pci_dev *pdev, u32 val);
+u32 pch_ch_event_read(struct pci_dev *pdev);
+void pch_ch_event_write(struct pci_dev *pdev, u32 val);
+u32 pch_src_uuid_lo_read(struct pci_dev *pdev);
+u32 pch_src_uuid_hi_read(struct pci_dev *pdev);
+u64 pch_rx_snap_read(struct pci_dev *pdev);
+u64 pch_tx_snap_read(struct pci_dev *pdev);
+int pch_set_station_address(u8 *addr, struct pci_dev *pdev);
+
+#endif /* _PTP_PCH_H_ */
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index e339b48de32d..f34dbd0db795 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -19,7 +19,7 @@ enum qed_chain_mode {
/* Each Page contains a next pointer at its end */
QED_CHAIN_MODE_NEXT_PTR,
- /* Chain is a single page (next ptr) is unrequired */
+ /* Chain is a single page (next ptr) is not required */
QED_CHAIN_MODE_SINGLE,
/* Page pointers are located in a side list */
@@ -56,13 +56,13 @@ struct qed_chain_pbl_u32 {
};
struct qed_chain_u16 {
- /* Cyclic index of next element to produce/consme */
+ /* Cyclic index of next element to produce/consume */
u16 prod_idx;
u16 cons_idx;
};
struct qed_chain_u32 {
- /* Cyclic index of next element to produce/consme */
+ /* Cyclic index of next element to produce/consume */
u32 prod_idx;
u32 cons_idx;
};
@@ -270,7 +270,7 @@ static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain)
/**
* @brief qed_chain_advance_page -
*
- * Advance the next element accros pages for a linked chain
+ * Advance the next element across pages for a linked chain
*
* @param p_chain
* @param p_next_elem
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 2f64ed79cee9..ea273ba1c991 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -12,7 +12,6 @@
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/skbuff.h>
-#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/qed/qed_if.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ef00bb22164c..e5b7d9054473 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -42,6 +42,7 @@ struct audit_context;
struct backing_dev_info;
struct bio_list;
struct blk_plug;
+struct bpf_local_storage;
struct capture_control;
struct cfs_rq;
struct fs_struct;
@@ -1351,6 +1352,10 @@ struct task_struct {
/* Used by LSM modules for access restriction: */
void *security;
#endif
+#ifdef CONFIG_BPF_SYSCALL
+ /* Used by BPF task local storage */
+ struct bpf_local_storage __rcu *bpf_storage;
+#endif
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
unsigned long lowest_stack;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f2c9ee71cb2c..dbf820a50a39 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -657,6 +657,7 @@ typedef unsigned char *sk_buff_data_t;
* @protocol: Packet protocol from driver
* @destructor: Destruct function
* @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
+ * @_sk_redir: socket redirection information for skmsg
* @_nfct: Associated connection, if any (with nfctinfo bits)
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @skb_iif: ifindex of device we arrived on
@@ -756,6 +757,9 @@ struct sk_buff {
void (*destructor)(struct sk_buff *skb);
};
struct list_head tcp_tsorted_anchor;
+#ifdef CONFIG_NET_SOCK_MSG
+ unsigned long _sk_redir;
+#endif
};
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -1137,7 +1141,7 @@ static inline bool skb_fclone_busy(const struct sock *sk,
return skb->fclone == SKB_FCLONE_ORIG &&
refcount_read(&fclones->fclone_ref) > 1 &&
- fclones->skb2.sk == sk;
+ READ_ONCE(fclones->skb2.sk) == sk;
}
/**
@@ -1289,10 +1293,10 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
void __skb_get_hash(struct sk_buff *skb);
u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
u32 skb_get_poff(const struct sk_buff *skb);
-u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
const struct flow_keys_basic *keys, int hlen);
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
- void *data, int hlen_proto);
+ const void *data, int hlen_proto);
static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
int thoff, u8 ip_proto)
@@ -1311,9 +1315,8 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
bool __skb_flow_dissect(const struct net *net,
const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
- void *target_container,
- void *data, __be16 proto, int nhoff, int hlen,
- unsigned int flags);
+ void *target_container, const void *data,
+ __be16 proto, int nhoff, int hlen, unsigned int flags);
static inline bool skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
@@ -1335,9 +1338,9 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
static inline bool
skb_flow_dissect_flow_keys_basic(const struct net *net,
const struct sk_buff *skb,
- struct flow_keys_basic *flow, void *data,
- __be16 proto, int nhoff, int hlen,
- unsigned int flags)
+ struct flow_keys_basic *flow,
+ const void *data, __be16 proto,
+ int nhoff, int hlen, unsigned int flags)
{
memset(flow, 0, sizeof(*flow));
return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
@@ -3623,6 +3626,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
unsigned int flags);
int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
int len);
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
@@ -3675,14 +3679,13 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
__wsum csum);
static inline void * __must_check
-__skb_header_pointer(const struct sk_buff *skb, int offset,
- int len, void *data, int hlen, void *buffer)
+__skb_header_pointer(const struct sk_buff *skb, int offset, int len,
+ const void *data, int hlen, void *buffer)
{
- if (hlen - offset >= len)
- return data + offset;
+ if (likely(hlen - offset >= len))
+ return (void *)data + offset;
- if (!skb ||
- skb_copy_bits(skb, offset, buffer, len) < 0)
+ if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
return NULL;
return buffer;
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 822c048934e3..e242bf3d2b4a 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -56,7 +56,8 @@ struct sk_msg {
struct sk_psock_progs {
struct bpf_prog *msg_parser;
- struct bpf_prog *skb_parser;
+ struct bpf_prog *stream_parser;
+ struct bpf_prog *stream_verdict;
struct bpf_prog *skb_verdict;
};
@@ -70,12 +71,6 @@ struct sk_psock_link {
void *link_raw;
};
-struct sk_psock_parser {
- struct strparser strp;
- bool enabled;
- void (*saved_data_ready)(struct sock *sk);
-};
-
struct sk_psock_work_state {
struct sk_buff *skb;
u32 len;
@@ -90,9 +85,12 @@ struct sk_psock {
u32 eval;
struct sk_msg *cork;
struct sk_psock_progs progs;
- struct sk_psock_parser parser;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+ struct strparser strp;
+#endif
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
+ spinlock_t ingress_lock;
unsigned long state;
struct list_head link;
spinlock_t link_lock;
@@ -100,13 +98,13 @@ struct sk_psock {
void (*saved_unhash)(struct sock *sk);
void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk);
+ void (*saved_data_ready)(struct sock *sk);
+ int (*psock_update_sk_prot)(struct sock *sk, bool restore);
struct proto *sk_proto;
+ struct mutex work_mutex;
struct sk_psock_work_state work_state;
struct work_struct work;
- union {
- struct rcu_head rcu;
- struct work_struct gc;
- };
+ struct rcu_work rwork;
};
int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
@@ -127,6 +125,10 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+ long timeo, int *err);
+int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
@@ -287,7 +289,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
static inline void sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
+ spin_lock_bh(&psock->ingress_lock);
list_add_tail(&msg->list, &psock->ingress_msg);
+ spin_unlock_bh(&psock->ingress_lock);
+}
+
+static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
+{
+ struct sk_msg *msg;
+
+ spin_lock_bh(&psock->ingress_lock);
+ msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+ if (msg)
+ list_del(&msg->list);
+ spin_unlock_bh(&psock->ingress_lock);
+ return msg;
+}
+
+static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock)
+{
+ struct sk_msg *msg;
+
+ spin_lock_bh(&psock->ingress_lock);
+ msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+ spin_unlock_bh(&psock->ingress_lock);
+ return msg;
+}
+
+static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock,
+ struct sk_msg *msg)
+{
+ struct sk_msg *ret;
+
+ spin_lock_bh(&psock->ingress_lock);
+ if (list_is_last(&msg->list, &psock->ingress_msg))
+ ret = NULL;
+ else
+ ret = list_next_entry(msg, list);
+ spin_unlock_bh(&psock->ingress_lock);
+ return ret;
}
static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
@@ -295,6 +335,13 @@ static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
return psock ? list_empty(&psock->ingress_msg) : true;
}
+static inline void kfree_sk_msg(struct sk_msg *msg)
+{
+ if (msg->skb)
+ consume_skb(msg->skb);
+ kfree(msg);
+}
+
static inline void sk_psock_report_error(struct sk_psock *psock, int err)
{
struct sock *sk = psock->sk;
@@ -304,10 +351,27 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
}
struct sk_psock *sk_psock_init(struct sock *sk, int node);
+void sk_psock_stop(struct sk_psock *psock, bool wait);
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
+#else
+static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+{
+}
+
+static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+{
+}
+#endif
+
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
@@ -327,8 +391,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
-
static inline void sk_psock_cork_free(struct sk_psock *psock)
{
if (psock->cork) {
@@ -338,30 +400,11 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
}
}
-static inline void sk_psock_update_proto(struct sock *sk,
- struct sk_psock *psock,
- struct proto *ops)
-{
- /* Pairs with lockless read in sk_clone_lock() */
- WRITE_ONCE(sk->sk_prot, ops);
-}
-
static inline void sk_psock_restore_proto(struct sock *sk,
struct sk_psock *psock)
{
- if (inet_csk_has_ulp(sk)) {
- /* TLS does not have an unhash proto in SW cases, but we need
- * to ensure we stop using the sock_map unhash routine because
- * the associated psock is being removed. So use the original
- * unhash handler.
- */
- WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash);
- tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
- } else {
- sk->sk_write_space = psock->saved_write_space;
- /* Pairs with lockless read in sk_clone_lock() */
- WRITE_ONCE(sk->sk_prot, psock->sk_proto);
- }
+ if (psock->psock_update_sk_prot)
+ psock->psock_update_sk_prot(sk, true);
}
static inline void sk_psock_set_state(struct sk_psock *psock,
@@ -394,7 +437,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
return psock;
}
-void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
@@ -405,8 +447,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
{
- if (psock->parser.enabled)
- psock->parser.saved_data_ready(sk);
+ if (psock->saved_data_ready)
+ psock->saved_data_ready(sk);
else
sk->sk_data_ready(sk);
}
@@ -435,7 +477,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog,
static inline void psock_progs_drop(struct sk_psock_progs *progs)
{
psock_set_prog(&progs->msg_parser, NULL);
- psock_set_prog(&progs->skb_parser, NULL);
+ psock_set_prog(&progs->stream_parser, NULL);
+ psock_set_prog(&progs->stream_verdict, NULL);
psock_set_prog(&progs->skb_verdict, NULL);
}
@@ -445,6 +488,44 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
{
if (!psock)
return false;
- return psock->parser.enabled;
+ return !!psock->saved_data_ready;
+}
+
+#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
+
+/* We only have one bit so far. */
+#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
+
+static inline bool skb_bpf_ingress(const struct sk_buff *skb)
+{
+ unsigned long sk_redir = skb->_sk_redir;
+
+ return sk_redir & BPF_F_INGRESS;
+}
+
+static inline void skb_bpf_set_ingress(struct sk_buff *skb)
+{
+ skb->_sk_redir |= BPF_F_INGRESS;
+}
+
+static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir,
+ bool ingress)
+{
+ skb->_sk_redir = (unsigned long)sk_redir;
+ if (ingress)
+ skb->_sk_redir |= BPF_F_INGRESS;
+}
+
+static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb)
+{
+ unsigned long sk_redir = skb->_sk_redir;
+
+ return (struct sock *)(sk_redir & BPF_F_PTR_MASK);
+}
+
+static inline void skb_bpf_redirect_clear(struct sk_buff *skb)
+{
+ skb->_sk_redir = 0;
}
+#endif /* CONFIG_NET_SOCK_MSG */
#endif /* _LINUX_SKMSG_H */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index a302982de2d7..e338ef7abc00 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -81,6 +81,7 @@
struct stmmac_mdio_bus_data {
unsigned int phy_mask;
unsigned int has_xpcs;
+ unsigned int xpcs_an_inband;
int *irqs;
int probed_phy_irq;
bool needs_reset;
@@ -95,6 +96,7 @@ struct stmmac_dma_cfg {
int mixed_burst;
bool aal;
bool eame;
+ bool multi_msi_en;
};
#define AXI_BLEN 7
@@ -143,6 +145,32 @@ struct stmmac_txq_cfg {
int tbs_en;
};
+/* FPE link state */
+enum stmmac_fpe_state {
+ FPE_STATE_OFF = 0,
+ FPE_STATE_CAPABLE = 1,
+ FPE_STATE_ENTERING_ON = 2,
+ FPE_STATE_ON = 3,
+};
+
+/* FPE link-partner hand-shaking mPacket type */
+enum stmmac_mpacket_type {
+ MPACKET_VERIFY = 0,
+ MPACKET_RESPONSE = 1,
+};
+
+enum stmmac_fpe_task_state_t {
+ __FPE_REMOVING,
+ __FPE_TASK_SCHED,
+};
+
+struct stmmac_fpe_cfg {
+ bool enable; /* FPE enable */
+ bool hs_enable; /* FPE handshake enable */
+ enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */
+ enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */
+};
+
struct plat_stmmacenet_data {
int bus_id;
int phy_addr;
@@ -154,6 +182,7 @@ struct plat_stmmacenet_data {
struct device_node *mdio_node;
struct stmmac_dma_cfg *dma_cfg;
struct stmmac_est *est;
+ struct stmmac_fpe_cfg *fpe_cfg;
int clk_csr;
int has_gmac;
int enh_desc;
@@ -180,9 +209,13 @@ struct plat_stmmacenet_data {
void (*fix_mac_speed)(void *priv, unsigned int speed);
int (*serdes_powerup)(struct net_device *ndev, void *priv);
void (*serdes_powerdown)(struct net_device *ndev, void *priv);
+ void (*ptp_clk_freq_config)(void *priv);
int (*init)(struct platform_device *pdev, void *priv);
void (*exit)(struct platform_device *pdev, void *priv);
struct mac_device_info *(*setup)(void *priv);
+ int (*clks_config)(void *priv, bool enabled);
+ int (*crosststamp)(ktime_t *device, struct system_counterval_t *system,
+ void *ctx);
void *bsp_priv;
struct clk *stmmac_clk;
struct clk *pclk;
@@ -203,5 +236,15 @@ struct plat_stmmacenet_data {
u8 vlan_fail_q;
unsigned int eee_usecs_rate;
struct pci_dev *pdev;
+ bool has_crossts;
+ int int_snapshot_num;
+ bool multi_msi_en;
+ int msi_mac_vec;
+ int msi_wol_vec;
+ int msi_lpi_vec;
+ int msi_sfty_ce_vec;
+ int msi_sfty_ue_vec;
+ int msi_rx_base_vec;
+ int msi_tx_base_vec;
};
#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 51298a4f4623..d99ca99837de 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -53,6 +53,8 @@ int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos);
+int proc_dou8vec_minmax(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos);
int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *,
loff_t *);
diff --git a/include/linux/udp.h b/include/linux/udp.h
index aa84597bdc33..ae66dadd8543 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -51,7 +51,9 @@ struct udp_sock {
* different encapsulation layer set
* this
*/
- gro_enabled:1; /* Can accept GRO packets */
+ gro_enabled:1, /* Request GRO aggregation */
+ accept_udp_l4:1,
+ accept_udp_fraglist:1;
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
@@ -131,8 +133,22 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
{
- return !udp_sk(sk)->gro_enabled && skb_is_gso(skb) &&
- skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4;
+ if (!skb_is_gso(skb))
+ return false;
+
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4)
+ return true;
+
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist)
+ return true;
+
+ return false;
+}
+
+static inline void udp_allow_gso(struct sock *sk)
+{
+ udp_sk(sk)->accept_udp_l4 = 1;
+ udp_sk(sk)->accept_udp_fraglist = 1;
}
#define udp_portaddr_for_each_entry(__sk, list) \
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index cfbfd6fe01df..8336e86ce606 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -53,6 +53,9 @@ struct usbnet {
u32 hard_mtu; /* count any extra framing */
size_t rx_urb_size; /* size for rx urbs */
struct mii_if_info mii;
+ long rx_speed; /* If MII not used */
+ long tx_speed; /* If MII not used */
+# define SPEED_UNSET -1
/* various kinds of pending driver work */
struct sk_buff_head rxq;
@@ -81,8 +84,6 @@ struct usbnet {
# define EVENT_LINK_CHANGE 11
# define EVENT_SET_RX_MODE 12
# define EVENT_NO_IP_ALIGN 13
- u32 rx_speed; /* in bps - NOT Mbps */
- u32 tx_speed; /* in bps - NOT Mbps */
};
static inline struct usb_driver *driver_of(struct usb_interface *intf)
@@ -267,10 +268,12 @@ extern void usbnet_pause_rx(struct usbnet *);
extern void usbnet_resume_rx(struct usbnet *);
extern void usbnet_purge_paused_rxq(struct usbnet *);
-extern int usbnet_get_link_ksettings(struct net_device *net,
+extern int usbnet_get_link_ksettings_mii(struct net_device *net,
struct ethtool_link_ksettings *cmd);
-extern int usbnet_set_link_ksettings(struct net_device *net,
+extern int usbnet_set_link_ksettings_mii(struct net_device *net,
const struct ethtool_link_ksettings *cmd);
+extern int usbnet_get_link_ksettings_internal(struct net_device *net,
+ struct ethtool_link_ksettings *cmd);
extern u32 usbnet_get_link(struct net_device *net);
extern u32 usbnet_get_msglevel(struct net_device *);
extern void usbnet_set_msglevel(struct net_device *, u32);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ba2f439bc04d..ea4ae551c426 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -320,6 +320,7 @@ enum {
HCI_BREDR_ENABLED,
HCI_LE_SCAN_INTERRUPTED,
HCI_WIDEBAND_SPEECH_ENABLED,
+ HCI_EVENT_FILTER_CONFIGURED,
HCI_DUT_MODE,
HCI_VENDOR_DIAG,
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ebdd4afe30d2..c73ac52af186 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -584,6 +584,11 @@ struct hci_dev {
#if IS_ENABLED(CONFIG_BT_MSFTEXT)
__u16 msft_opcode;
void *msft_data;
+ bool msft_curve_validity;
+#endif
+
+#if IS_ENABLED(CONFIG_BT_AOSPEXT)
+ bool aosp_capable;
#endif
int (*open)(struct hci_dev *hdev);
@@ -704,6 +709,7 @@ struct hci_chan {
struct sk_buff_head data_q;
unsigned int sent;
__u8 state;
+ bool amp;
};
struct hci_conn_params {
@@ -1238,6 +1244,13 @@ static inline void hci_set_msft_opcode(struct hci_dev *hdev, __u16 opcode)
#endif
}
+static inline void hci_set_aosp_capable(struct hci_dev *hdev)
+{
+#if IS_ENABLED(CONFIG_BT_AOSPEXT)
+ hdev->aosp_capable = true;
+#endif
+}
+
int hci_dev_open(__u16 dev);
int hci_dev_close(__u16 dev);
int hci_dev_do_close(struct hci_dev *hdev);
@@ -1742,8 +1755,8 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);
#define DISCOV_INTERLEAVED_INQUIRY_LEN 0x04
#define DISCOV_BREDR_INQUIRY_LEN 0x08
#define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */
-#define DISCOV_LE_FAST_ADV_INT_MIN 100 /* msec */
-#define DISCOV_LE_FAST_ADV_INT_MAX 150 /* msec */
+#define DISCOV_LE_FAST_ADV_INT_MIN 0x00A0 /* 100 msec */
+#define DISCOV_LE_FAST_ADV_INT_MAX 0x00F0 /* 150 msec */
void mgmt_fill_version_info(void *ver);
int mgmt_new_settings(struct hci_dev *hdev);
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 61800a7b6192..3c4f550e5a8b 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -494,6 +494,7 @@ struct l2cap_le_credits {
#define L2CAP_ECRED_MIN_MTU 64
#define L2CAP_ECRED_MIN_MPS 64
+#define L2CAP_ECRED_MAX_CID 5
struct l2cap_ecred_conn_req {
__le16 psm;
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 839a2028009e..a7cffb069565 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -578,6 +578,7 @@ struct mgmt_rp_add_advertising {
#define MGMT_ADV_PARAM_TIMEOUT BIT(13)
#define MGMT_ADV_PARAM_INTERVALS BIT(14)
#define MGMT_ADV_PARAM_TX_POWER BIT(15)
+#define MGMT_ADV_PARAM_SCAN_RSP BIT(16)
#define MGMT_ADV_FLAG_SEC_MASK (MGMT_ADV_FLAG_SEC_1M | MGMT_ADV_FLAG_SEC_2M | \
MGMT_ADV_FLAG_SEC_CODED)
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 0e85713f56df..2926f1f00d65 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -27,7 +27,6 @@ struct bpf_local_storage_elem;
struct bpf_sk_storage_diag;
struct sk_buff;
struct nlattr;
-struct sock;
#ifdef CONFIG_BPF_SYSCALL
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 83a933e563fe..57b2c49f72f4 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -49,10 +49,12 @@ struct phylink_link_state;
#define DSA_TAG_PROTO_XRS700X_VALUE 19
#define DSA_TAG_PROTO_OCELOT_8021Q_VALUE 20
#define DSA_TAG_PROTO_SEVILLE_VALUE 21
+#define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22
enum dsa_tag_protocol {
DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE,
DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE,
+ DSA_TAG_PROTO_BRCM_LEGACY = DSA_TAG_PROTO_BRCM_LEGACY_VALUE,
DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE,
DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE,
DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE,
@@ -491,6 +493,20 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp)
return dp->vlan_filtering;
}
+static inline
+struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp)
+{
+ if (!dp->bridge_dev)
+ return NULL;
+
+ if (dp->lag_dev)
+ return dp->lag_dev;
+ else if (dp->hsr_dev)
+ return dp->hsr_dev;
+
+ return dp->slave;
+}
+
typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
bool is_static, void *data);
struct dsa_switch_ops {
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index cc10b10dc3a1..ffd386ea0dbb 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -350,7 +350,7 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
u32 flow_hash_from_keys(struct flow_keys *keys);
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp,
- void *data, int thoff, int hlen);
+ const void *data, int thoff, int hlen);
static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
@@ -368,8 +368,8 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
struct bpf_flow_dissector {
struct bpf_flow_keys *flow_keys;
const struct sk_buff *skb;
- void *data;
- void *data_end;
+ const void *data;
+ const void *data_end;
};
static inline void
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index e6bd8ebf9ac3..dc5c1e69cd9f 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -147,6 +147,7 @@ enum flow_action_id {
FLOW_ACTION_MPLS_POP,
FLOW_ACTION_MPLS_MANGLE,
FLOW_ACTION_GATE,
+ FLOW_ACTION_PPPOE_PUSH,
NUM_FLOW_ACTIONS,
};
@@ -234,6 +235,8 @@ struct flow_action_entry {
u32 index;
u32 burst;
u64 rate_bytes_ps;
+ u64 burst_pkt;
+ u64 rate_pkt_ps;
u32 mtu;
} police;
struct { /* FLOW_ACTION_CT */
@@ -272,6 +275,9 @@ struct flow_action_entry {
u32 num_entries;
struct action_gate_entry *entries;
} gate;
+ struct { /* FLOW_ACTION_PPPOE_PUSH */
+ u16 sid;
+ } pppoe;
};
struct flow_action_cookie *cookie; /* user defined action cookie */
};
diff --git a/include/net/gro.h b/include/net/gro.h
index 8a6eb5303cc4..01edaf3fdda0 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -3,10 +3,23 @@
#ifndef _NET_IPV6_GRO_H
#define _NET_IPV6_GRO_H
+#include <linux/indirect_call_wrapper.h>
+
+struct list_head;
+struct sk_buff;
+
INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+
+#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \
+({ \
+ unlikely(gro_recursion_inc_test(skb)) ? \
+ NAPI_GRO_CB(skb)->flush |= 1, NULL : \
+ INDIRECT_CALL_INET(cb, f2, f1, head, skb); \
+})
+
#endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 8bf5906073bc..71bb4cc4d05d 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -78,6 +78,7 @@ struct inet6_ifaddr {
struct ip6_sf_socklist {
unsigned int sl_max;
unsigned int sl_count;
+ struct rcu_head rcu;
struct in6_addr sl_addr[];
};
@@ -91,18 +92,18 @@ struct ipv6_mc_socklist {
int ifindex;
unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */
struct ipv6_mc_socklist __rcu *next;
- rwlock_t sflock;
- struct ip6_sf_socklist *sflist;
+ struct ip6_sf_socklist __rcu *sflist;
struct rcu_head rcu;
};
struct ip6_sf_list {
- struct ip6_sf_list *sf_next;
+ struct ip6_sf_list __rcu *sf_next;
struct in6_addr sf_addr;
unsigned long sf_count[2]; /* include/exclude counts */
unsigned char sf_gsresp; /* include in g & s response? */
unsigned char sf_oldin; /* change state */
unsigned char sf_crcount; /* retrans. left to send */
+ struct rcu_head rcu;
};
#define MAF_TIMER_RUNNING 0x01
@@ -114,19 +115,19 @@ struct ip6_sf_list {
struct ifmcaddr6 {
struct in6_addr mca_addr;
struct inet6_dev *idev;
- struct ifmcaddr6 *next;
- struct ip6_sf_list *mca_sources;
- struct ip6_sf_list *mca_tomb;
+ struct ifmcaddr6 __rcu *next;
+ struct ip6_sf_list __rcu *mca_sources;
+ struct ip6_sf_list __rcu *mca_tomb;
unsigned int mca_sfmode;
unsigned char mca_crcount;
unsigned long mca_sfcount[2];
- struct timer_list mca_timer;
+ struct delayed_work mca_work;
unsigned int mca_flags;
int mca_users;
refcount_t mca_refcnt;
- spinlock_t mca_lock;
unsigned long mca_cstamp;
unsigned long mca_tstamp;
+ struct rcu_head rcu;
};
/* Anycast stuff */
@@ -165,9 +166,8 @@ struct inet6_dev {
struct list_head addr_list;
- struct ifmcaddr6 *mc_list;
- struct ifmcaddr6 *mc_tomb;
- spinlock_t mc_lock;
+ struct ifmcaddr6 __rcu *mc_list;
+ struct ifmcaddr6 __rcu *mc_tomb;
unsigned char mc_qrv; /* Query Robustness Variable */
unsigned char mc_gq_running;
@@ -179,9 +179,18 @@ struct inet6_dev {
unsigned long mc_qri; /* Query Response Interval */
unsigned long mc_maxdelay;
- struct timer_list mc_gq_timer; /* general query timer */
- struct timer_list mc_ifc_timer; /* interface change timer */
- struct timer_list mc_dad_timer; /* dad complete mc timer */
+ struct delayed_work mc_gq_work; /* general query work */
+ struct delayed_work mc_ifc_work; /* interface change work */
+ struct delayed_work mc_dad_work; /* dad complete mc work */
+ struct delayed_work mc_query_work; /* mld query work */
+ struct delayed_work mc_report_work; /* mld report work */
+
+ struct sk_buff_head mc_query_queue; /* mld query queue */
+ struct sk_buff_head mc_report_queue; /* mld report queue */
+
+ spinlock_t mc_query_lock; /* mld query queue lock */
+ spinlock_t mc_report_lock; /* mld query report lock */
+ struct mutex mc_lock; /* mld global lock */
struct ifacaddr6 *ac_list;
rwlock_t lock;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index bd1f396cc9c7..448bf2b34759 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -30,6 +30,7 @@
*/
#define NEXTHDR_HOP 0 /* Hop-by-hop option header. */
+#define NEXTHDR_IPV4 4 /* IPv4 in IPv6 */
#define NEXTHDR_TCP 6 /* TCP segment. */
#define NEXTHDR_UDP 17 /* UDP message. */
#define NEXTHDR_IPV6 41 /* IPv6 in IPv6 */
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 8fce558b5fea..afbce90c4480 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -66,6 +66,8 @@ struct ipv6_stub {
int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
+ struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev);
};
extern const struct ipv6_stub *ipv6_stub __read_mostly;
diff --git a/include/net/lapb.h b/include/net/lapb.h
index eee73442a1ba..124ee122f2c8 100644
--- a/include/net/lapb.h
+++ b/include/net/lapb.h
@@ -92,7 +92,7 @@ struct lapb_cb {
unsigned short n2, n2count;
unsigned short t1, t2;
struct timer_list t1timer, t2timer;
- bool t1timer_stop, t2timer_stop;
+ bool t1timer_running, t2timer_running;
/* Internal control information */
struct sk_buff_head write_queue;
diff --git a/include/net/mld.h b/include/net/mld.h
index 496bddb59942..c07359808493 100644
--- a/include/net/mld.h
+++ b/include/net/mld.h
@@ -92,6 +92,9 @@ struct mld2_query {
#define MLD_EXP_MIN_LIMIT 32768UL
#define MLDV1_MRD_MAX_COMPAT (MLD_EXP_MIN_LIMIT - 1)
+#define MLD_MAX_QUEUE 8
+#define MLD_MAX_SKBS 32
+
static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2)
{
/* RFC3810, 5.1.3. Maximum Response Code */
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 5694370be3d4..83f23774b908 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -30,8 +30,27 @@ struct mptcp_ext {
ack64:1,
mpc_map:1,
frozen:1,
- __unused:1;
- /* one byte hole */
+ reset_transient:1;
+ u8 reset_reason:4;
+};
+
+#define MPTCP_RM_IDS_MAX 8
+
+struct mptcp_rm_list {
+ u8 ids[MPTCP_RM_IDS_MAX];
+ u8 nr;
+};
+
+struct mptcp_addr_info {
+ u8 id;
+ sa_family_t family;
+ __be16 port;
+ union {
+ struct in_addr addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ struct in6_addr addr6;
+#endif
+ };
};
struct mptcp_out_options {
@@ -39,18 +58,13 @@ struct mptcp_out_options {
u16 suboptions;
u64 sndr_key;
u64 rcvr_key;
- union {
- struct in_addr addr;
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- struct in6_addr addr6;
-#endif
- };
- u8 addr_id;
- u16 port;
u64 ahmac;
- u8 rm_id;
+ struct mptcp_addr_info addr;
+ struct mptcp_rm_list rm_list;
u8 join_id;
u8 backup;
+ u8 reset_reason:4;
+ u8 reset_transient:1;
u32 nonce;
u64 thmac;
u32 token;
@@ -149,6 +163,16 @@ void mptcp_seq_show(struct seq_file *seq);
int mptcp_subflow_init_cookie_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb);
+
+__be32 mptcp_get_reset_option(const struct sk_buff *skb);
+
+static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
+{
+ if (skb_ext_exist(skb, SKB_EXT_MPTCP))
+ return mptcp_get_reset_option(skb);
+
+ return htonl(0u);
+}
#else
static inline void mptcp_init(void)
@@ -229,6 +253,8 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
{
return 0; /* TCP fallback */
}
+
+static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); }
#endif /* CONFIG_MPTCP */
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index dcaee24a4d87..fa5887143f0d 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -22,7 +22,6 @@
#include <net/netns/nexthop.h>
#include <net/netns/ieee802154_6lowpan.h>
#include <net/netns/sctp.h>
-#include <net/netns/dccp.h>
#include <net/netns/netfilter.h>
#include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -130,9 +129,6 @@ struct net {
#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
struct netns_sctp sctp;
#endif
-#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
- struct netns_dccp dccp;
-#endif
#ifdef CONFIG_NETFILTER
struct netns_nf nf;
struct netns_xt xt;
@@ -142,15 +138,6 @@ struct net {
#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
struct netns_nftables nft;
#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- struct netns_nf_frag nf_frag;
- struct ctl_table_header *nf_frag_frags_hdr;
-#endif
- struct sock *nfnl;
- struct sock *nfnl_stash;
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
- struct list_head nfct_timeout_list;
-#endif
#endif
#ifdef CONFIG_WEXT_CORE
struct sk_buff_head wext_nlevents;
@@ -407,7 +394,6 @@ int register_pernet_device(struct pernet_operations *);
void unregister_pernet_device(struct pernet_operations *);
struct ctl_table;
-struct ctl_table_header;
#ifdef CONFIG_SYSCTL
int net_sysctl_init(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 7b3c873f8839..e95483192d1b 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -4,7 +4,4 @@
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
-#include <linux/sysctl.h>
-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
-
#endif /* _NF_CONNTRACK_IPV6_H*/
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 6d31cd041143..ece923e2035b 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -13,4 +13,10 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
struct inet_frags_ctl;
+struct nft_ct_frag6_pernet {
+ struct ctl_table_header *nf_frag_frags_hdr;
+ struct fqdir *fqdir;
+ unsigned int users;
+};
+
#endif /* _NF_DEFRAG_IPV6_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 439379ca9ffa..86d86c860ede 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -47,6 +47,13 @@ struct nf_conntrack_net {
unsigned int users4;
unsigned int users6;
unsigned int users_bridge;
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_header *sysctl_header;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ struct delayed_work ecache_dwork;
+ struct netns_ct *ct_net;
+#endif
};
#include <linux/types.h>
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index eb81f9195e28..d00ba6048e44 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -171,12 +171,18 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp,
u32 portid, int report);
+void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state);
+
void nf_conntrack_ecache_pernet_init(struct net *net);
void nf_conntrack_ecache_pernet_fini(struct net *net);
int nf_conntrack_ecache_init(void);
void nf_conntrack_ecache_fini(void);
+static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net)
+{
+ return net->ct.ecache_dwork_pending;
+}
#else /* CONFIG_NF_CONNTRACK_EVENTS */
static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
@@ -186,6 +192,11 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
{
}
+static inline void nf_conntrack_ecache_work(struct net *net,
+ enum nf_ct_ecache_state s)
+{
+}
+
static inline void nf_conntrack_ecache_pernet_init(struct net *net)
{
}
@@ -203,26 +214,6 @@ static inline void nf_conntrack_ecache_fini(void)
{
}
+static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; }
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
-
-static inline void nf_conntrack_ecache_delayed_work(struct net *net)
-{
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- if (!delayed_work_pending(&net->ct.ecache_dwork)) {
- schedule_delayed_work(&net->ct.ecache_dwork, HZ);
- net->ct.ecache_dwork_pending = true;
- }
-#endif
-}
-
-static inline void nf_conntrack_ecache_work(struct net *net)
-{
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- if (net->ct.ecache_dwork_pending) {
- net->ct.ecache_dwork_pending = false;
- mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0);
- }
-#endif
-}
-
#endif /*_NF_CONNTRACK_ECACHE_H*/
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 54c4d5c908a5..583b327d8fc0 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -86,8 +86,16 @@ static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
enum flow_offload_tuple_dir {
FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
};
+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
+
+enum flow_offload_xmit_type {
+ FLOW_OFFLOAD_XMIT_NEIGH = 0,
+ FLOW_OFFLOAD_XMIT_XFRM,
+ FLOW_OFFLOAD_XMIT_DIRECT,
+};
+
+#define NF_FLOW_TABLE_ENCAP_MAX 2
struct flow_offload_tuple {
union {
@@ -107,15 +115,31 @@ struct flow_offload_tuple {
u8 l3proto;
u8 l4proto;
+ struct {
+ u16 id;
+ __be16 proto;
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
/* All members above are keys for lookups, see flow_offload_hash(). */
struct { } __hash;
- u8 dir;
-
+ u8 dir:2,
+ xmit_type:2,
+ encap_num:2,
+ in_vlan_ingress:2;
u16 mtu;
-
- struct dst_entry *dst_cache;
+ union {
+ struct {
+ struct dst_entry *dst_cache;
+ u32 dst_cookie;
+ };
+ struct {
+ u32 ifidx;
+ u32 hw_ifidx;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ } out;
+ };
};
struct flow_offload_tuple_rhash {
@@ -158,7 +182,23 @@ static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
struct nf_flow_route {
struct {
- struct dst_entry *dst;
+ struct dst_entry *dst;
+ struct {
+ u32 ifindex;
+ struct {
+ u16 id;
+ __be16 proto;
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
+ u8 num_encaps:2,
+ ingress_vlans:2;
+ } in;
+ struct {
+ u32 ifindex;
+ u32 hw_ifindex;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ } out;
+ enum flow_offload_xmit_type xmit_type;
} tuple[FLOW_OFFLOAD_DIR_MAX];
};
@@ -229,12 +269,12 @@ void nf_flow_table_free(struct nf_flowtable *flow_table);
void flow_offload_teardown(struct flow_offload *flow);
-int nf_flow_snat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir);
-int nf_flow_dnat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
struct flow_ports {
__be16 source, dest;
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 716db4a0fed8..e55eedc84ed7 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -68,7 +68,6 @@ void nf_log_unbind_pf(struct net *net, u_int8_t pf);
int nf_logger_find_get(int pf, enum nf_log_type type);
void nf_logger_put(int pf, enum nf_log_type type);
-void nf_logger_request_module(int pf, enum nf_log_type type);
#define MODULE_ALIAS_NF_LOGGER(family, type) \
MODULE_ALIAS("nf-logger-" __stringify(family) "-" __stringify(type))
@@ -99,28 +98,4 @@ struct nf_log_buf;
struct nf_log_buf *nf_log_buf_open(void);
__printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...);
void nf_log_buf_close(struct nf_log_buf *m);
-
-/* common logging functions */
-int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
- u8 proto, int fragment, unsigned int offset);
-int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
- u8 proto, int fragment, unsigned int offset,
- unsigned int logflags);
-void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
- struct sock *sk);
-void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb);
-void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
- unsigned int hooknum, const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix);
-void nf_log_l2packet(struct net *net, u_int8_t pf,
- __be16 protocol,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo, const char *prefix);
-
#endif /* _NF_LOG_H */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 5aaced6bf13e..f0f7a3c5da6a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1498,13 +1498,16 @@ struct nft_trans_chain {
struct nft_trans_table {
bool update;
- bool enable;
+ u8 state;
+ u32 flags;
};
#define nft_trans_table_update(trans) \
(((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_enable(trans) \
- (((struct nft_trans_table *)trans->data)->enable)
+#define nft_trans_table_state(trans) \
+ (((struct nft_trans_table *)trans->data)->state)
+#define nft_trans_table_flags(trans) \
+ (((struct nft_trans_table *)trans->data)->flags)
struct nft_trans_elem {
struct nft_set *set;
@@ -1559,4 +1562,20 @@ void nf_tables_trans_destroy_flush_work(void);
int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
__be64 nf_jiffies64_to_msecs(u64 input);
+#ifdef CONFIG_MODULES
+__printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...);
+#else
+static inline int nft_request_module(struct net *net, const char *fmt, ...) { return -ENOENT; }
+#endif
+
+struct nftables_pernet {
+ struct list_head tables;
+ struct list_head commit_list;
+ struct list_head module_list;
+ struct list_head notify_list;
+ struct mutex commit_mutex;
+ unsigned int base_seq;
+ u8 validate_state;
+};
+
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 806454e767bf..e5f664d69ead 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -96,13 +96,9 @@ struct netns_ct {
atomic_t count;
unsigned int expect_count;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- struct delayed_work ecache_dwork;
bool ecache_dwork_pending;
#endif
bool auto_assign_helper_warned;
-#ifdef CONFIG_SYSCTL
- struct ctl_table_header *sysctl_header;
-#endif
unsigned int sysctl_log_invalid; /* Log invalid packets */
int sysctl_events;
int sysctl_acct;
diff --git a/include/net/netns/dccp.h b/include/net/netns/dccp.h
deleted file mode 100644
index cdbc4f5b8390..000000000000
--- a/include/net/netns/dccp.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NETNS_DCCP_H__
-#define __NETNS_DCCP_H__
-
-struct sock;
-
-struct netns_dccp {
- struct sock *v4_ctl_sk;
- struct sock *v6_ctl_sk;
-};
-
-#endif
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 70a2a085dd1a..87e1612497ea 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -11,7 +11,6 @@
#include <linux/rcupdate.h>
#include <linux/siphash.h>
-struct tcpm_hash_bucket;
struct ctl_table_header;
struct ipv4_devconf;
struct fib_rules_ops;
@@ -33,14 +32,18 @@ struct inet_hashinfo;
struct inet_timewait_death_row {
atomic_t tw_count;
+ char tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)];
- struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp;
+ struct inet_hashinfo *hashinfo;
int sysctl_max_tw_buckets;
};
struct tcp_fastopen_context;
struct netns_ipv4 {
+ /* Please keep tcp_death_row at first field in netns_ipv4 */
+ struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp;
+
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
struct ctl_table_header *frags_hdr;
@@ -54,17 +57,17 @@ struct netns_ipv4 {
struct mutex ra_mutex;
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rules_ops *rules_ops;
- bool fib_has_custom_rules;
- unsigned int fib_rules_require_fldissect;
struct fib_table __rcu *fib_main;
struct fib_table __rcu *fib_default;
+ unsigned int fib_rules_require_fldissect;
+ bool fib_has_custom_rules;
#endif
bool fib_has_custom_local_routes;
+ bool fib_offload_disabled;
#ifdef CONFIG_IP_ROUTE_CLASSID
int fib_num_tclassid_users;
#endif
struct hlist_head *fib_table_hash;
- bool fib_offload_disabled;
struct sock *fibnl;
struct sock * __percpu *icmp_sk;
@@ -84,41 +87,42 @@ struct netns_ipv4 {
struct xt_table *nat_table;
#endif
- int sysctl_icmp_echo_ignore_all;
- int sysctl_icmp_echo_ignore_broadcasts;
- int sysctl_icmp_ignore_bogus_error_responses;
+ u8 sysctl_icmp_echo_ignore_all;
+ u8 sysctl_icmp_echo_enable_probe;
+ u8 sysctl_icmp_echo_ignore_broadcasts;
+ u8 sysctl_icmp_ignore_bogus_error_responses;
+ u8 sysctl_icmp_errors_use_inbound_ifaddr;
int sysctl_icmp_ratelimit;
int sysctl_icmp_ratemask;
- int sysctl_icmp_errors_use_inbound_ifaddr;
struct local_ports ip_local_ports;
- int sysctl_tcp_ecn;
- int sysctl_tcp_ecn_fallback;
+ u8 sysctl_tcp_ecn;
+ u8 sysctl_tcp_ecn_fallback;
- int sysctl_ip_default_ttl;
- int sysctl_ip_no_pmtu_disc;
- int sysctl_ip_fwd_use_pmtu;
- int sysctl_ip_fwd_update_priority;
- int sysctl_ip_nonlocal_bind;
- int sysctl_ip_autobind_reuse;
+ u8 sysctl_ip_default_ttl;
+ u8 sysctl_ip_no_pmtu_disc;
+ u8 sysctl_ip_fwd_use_pmtu;
+ u8 sysctl_ip_fwd_update_priority;
+ u8 sysctl_ip_nonlocal_bind;
+ u8 sysctl_ip_autobind_reuse;
/* Shall we try to damage output packets if routing dev changes? */
- int sysctl_ip_dynaddr;
- int sysctl_ip_early_demux;
+ u8 sysctl_ip_dynaddr;
+ u8 sysctl_ip_early_demux;
#ifdef CONFIG_NET_L3_MASTER_DEV
- int sysctl_raw_l3mdev_accept;
+ u8 sysctl_raw_l3mdev_accept;
#endif
- int sysctl_tcp_early_demux;
- int sysctl_udp_early_demux;
+ u8 sysctl_tcp_early_demux;
+ u8 sysctl_udp_early_demux;
- int sysctl_nexthop_compat_mode;
+ u8 sysctl_nexthop_compat_mode;
- int sysctl_fwmark_reflect;
- int sysctl_tcp_fwmark_accept;
+ u8 sysctl_fwmark_reflect;
+ u8 sysctl_tcp_fwmark_accept;
#ifdef CONFIG_NET_L3_MASTER_DEV
- int sysctl_tcp_l3mdev_accept;
+ u8 sysctl_tcp_l3mdev_accept;
#endif
- int sysctl_tcp_mtu_probing;
+ u8 sysctl_tcp_mtu_probing;
int sysctl_tcp_mtu_probe_floor;
int sysctl_tcp_base_mss;
int sysctl_tcp_min_snd_mss;
@@ -126,55 +130,55 @@ struct netns_ipv4 {
u32 sysctl_tcp_probe_interval;
int sysctl_tcp_keepalive_time;
- int sysctl_tcp_keepalive_probes;
int sysctl_tcp_keepalive_intvl;
+ u8 sysctl_tcp_keepalive_probes;
- int sysctl_tcp_syn_retries;
- int sysctl_tcp_synack_retries;
- int sysctl_tcp_syncookies;
+ u8 sysctl_tcp_syn_retries;
+ u8 sysctl_tcp_synack_retries;
+ u8 sysctl_tcp_syncookies;
int sysctl_tcp_reordering;
- int sysctl_tcp_retries1;
- int sysctl_tcp_retries2;
- int sysctl_tcp_orphan_retries;
+ u8 sysctl_tcp_retries1;
+ u8 sysctl_tcp_retries2;
+ u8 sysctl_tcp_orphan_retries;
+ u8 sysctl_tcp_tw_reuse;
int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat;
- int sysctl_tcp_tw_reuse;
- int sysctl_tcp_sack;
- int sysctl_tcp_window_scaling;
- int sysctl_tcp_timestamps;
- int sysctl_tcp_early_retrans;
- int sysctl_tcp_recovery;
- int sysctl_tcp_thin_linear_timeouts;
- int sysctl_tcp_slow_start_after_idle;
- int sysctl_tcp_retrans_collapse;
- int sysctl_tcp_stdurg;
- int sysctl_tcp_rfc1337;
- int sysctl_tcp_abort_on_overflow;
- int sysctl_tcp_fack;
+ u8 sysctl_tcp_sack;
+ u8 sysctl_tcp_window_scaling;
+ u8 sysctl_tcp_timestamps;
+ u8 sysctl_tcp_early_retrans;
+ u8 sysctl_tcp_recovery;
+ u8 sysctl_tcp_thin_linear_timeouts;
+ u8 sysctl_tcp_slow_start_after_idle;
+ u8 sysctl_tcp_retrans_collapse;
+ u8 sysctl_tcp_stdurg;
+ u8 sysctl_tcp_rfc1337;
+ u8 sysctl_tcp_abort_on_overflow;
+ u8 sysctl_tcp_fack; /* obsolete */
int sysctl_tcp_max_reordering;
- int sysctl_tcp_dsack;
- int sysctl_tcp_app_win;
int sysctl_tcp_adv_win_scale;
- int sysctl_tcp_frto;
- int sysctl_tcp_nometrics_save;
- int sysctl_tcp_no_ssthresh_metrics_save;
- int sysctl_tcp_moderate_rcvbuf;
- int sysctl_tcp_tso_win_divisor;
- int sysctl_tcp_workaround_signed_windows;
+ u8 sysctl_tcp_dsack;
+ u8 sysctl_tcp_app_win;
+ u8 sysctl_tcp_frto;
+ u8 sysctl_tcp_nometrics_save;
+ u8 sysctl_tcp_no_ssthresh_metrics_save;
+ u8 sysctl_tcp_moderate_rcvbuf;
+ u8 sysctl_tcp_tso_win_divisor;
+ u8 sysctl_tcp_workaround_signed_windows;
int sysctl_tcp_limit_output_bytes;
int sysctl_tcp_challenge_ack_limit;
- int sysctl_tcp_min_tso_segs;
int sysctl_tcp_min_rtt_wlen;
- int sysctl_tcp_autocorking;
+ u8 sysctl_tcp_min_tso_segs;
+ u8 sysctl_tcp_autocorking;
+ u8 sysctl_tcp_reflect_tos;
+ u8 sysctl_tcp_comp_sack_nr;
int sysctl_tcp_invalid_ratelimit;
int sysctl_tcp_pacing_ss_ratio;
int sysctl_tcp_pacing_ca_ratio;
int sysctl_tcp_wmem[3];
int sysctl_tcp_rmem[3];
- int sysctl_tcp_comp_sack_nr;
unsigned long sysctl_tcp_comp_sack_delay_ns;
unsigned long sysctl_tcp_comp_sack_slack_ns;
- struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
const struct tcp_congestion_ops __rcu *tcp_congestion_control;
@@ -183,20 +187,19 @@ struct netns_ipv4 {
unsigned int sysctl_tcp_fastopen_blackhole_timeout;
atomic_t tfo_active_disable_times;
unsigned long tfo_active_disable_stamp;
- int sysctl_tcp_reflect_tos;
int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;
- int sysctl_fib_notify_on_flag_change;
+ u8 sysctl_fib_notify_on_flag_change;
#ifdef CONFIG_NET_L3_MASTER_DEV
- int sysctl_udp_l3mdev_accept;
+ u8 sysctl_udp_l3mdev_accept;
#endif
+ u8 sysctl_igmp_llm_reports;
int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf;
- int sysctl_igmp_llm_reports;
int sysctl_igmp_qrv;
struct ping_group_range ping_group_range;
@@ -217,8 +220,8 @@ struct netns_ipv4 {
#endif
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- int sysctl_fib_multipath_use_neigh;
- int sysctl_fib_multipath_hash_policy;
+ u8 sysctl_fib_multipath_use_neigh;
+ u8 sysctl_fib_multipath_hash_policy;
#endif
struct fib_notifier_ops *notifier_ops;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 21c0debbd39e..808f0f79ea9c 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -20,7 +20,6 @@ struct netns_sysctl_ipv6 {
struct ctl_table_header *frags_hdr;
struct ctl_table_header *xfrm6_hdr;
#endif
- int bindv6only;
int flush_delay;
int ip6_rt_max_size;
int ip6_rt_gc_min_interval;
@@ -29,21 +28,22 @@ struct netns_sysctl_ipv6 {
int ip6_rt_gc_elasticity;
int ip6_rt_mtu_expires;
int ip6_rt_min_advmss;
- int multipath_hash_policy;
- int flowlabel_consistency;
- int auto_flowlabels;
+ u8 bindv6only;
+ u8 multipath_hash_policy;
+ u8 flowlabel_consistency;
+ u8 auto_flowlabels;
int icmpv6_time;
- int icmpv6_echo_ignore_all;
- int icmpv6_echo_ignore_multicast;
- int icmpv6_echo_ignore_anycast;
+ u8 icmpv6_echo_ignore_all;
+ u8 icmpv6_echo_ignore_multicast;
+ u8 icmpv6_echo_ignore_anycast;
DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1);
unsigned long *icmpv6_ratemask_ptr;
- int anycast_src_echo_reply;
- int ip_nonlocal_bind;
- int fwmark_reflect;
+ u8 anycast_src_echo_reply;
+ u8 ip_nonlocal_bind;
+ u8 fwmark_reflect;
+ u8 flowlabel_state_ranges;
int idgen_retries;
int idgen_delay;
- int flowlabel_state_ranges;
int flowlabel_reflect;
int max_dst_opts_cnt;
int max_hbh_opts_cnt;
@@ -51,10 +51,13 @@ struct netns_sysctl_ipv6 {
int max_hbh_opts_len;
int seg6_flowlabel;
bool skip_notify_on_dev_down;
- int fib_notify_on_flag_change;
+ u8 fib_notify_on_flag_change;
};
struct netns_ipv6 {
+ /* Keep ip6_dst_ops at the beginning of netns_sysctl_ipv6 */
+ struct dst_ops ip6_dst_ops;
+
struct netns_sysctl_ipv6 sysctl;
struct ipv6_devconf *devconf_all;
struct ipv6_devconf *devconf_dflt;
@@ -76,7 +79,6 @@ struct netns_ipv6 {
struct hlist_head *fib_table_hash;
struct fib6_table *fib6_main_tbl;
struct list_head fib6_walkers;
- struct dst_ops ip6_dst_ops;
rwlock_t fib6_walker_lock;
spinlock_t fib6_gc_lock;
unsigned int ip6_rt_gc_expire;
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 59b2c3a3db42..7e373664b1e7 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -5,22 +5,19 @@
#include <net/snmp.h>
struct netns_mib {
- DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+#if IS_ENABLED(CONFIG_IPV6)
+ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+#endif
+
+ DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
- DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
- DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
- DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
- DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics);
+ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
#if IS_ENABLED(CONFIG_IPV6)
- struct proc_dir_entry *proc_net_devsnmp6;
DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
- DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
- DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
- DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
- DEFINE_SNMP_STAT_ATOMIC(struct icmpv6msg_mib, icmpv6msg_statistics);
#endif
+
#ifdef CONFIG_XFRM_STATISTICS
DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics);
#endif
@@ -30,6 +27,19 @@ struct netns_mib {
#ifdef CONFIG_MPTCP
DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics);
#endif
+
+ DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
+#if IS_ENABLED(CONFIG_IPV6)
+ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
+#endif
+
+ DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+ DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics);
+#if IS_ENABLED(CONFIG_IPV6)
+ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+ DEFINE_SNMP_STAT_ATOMIC(struct icmpv6msg_mib, icmpv6msg_statistics);
+ struct proc_dir_entry *proc_net_devsnmp6;
+#endif
};
#endif
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index ca043342c0eb..15e2b13fb0c0 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -28,11 +28,5 @@ struct netns_nf {
#if IS_ENABLED(CONFIG_DECNET)
struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
- bool defrag_ipv4;
-#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- bool defrag_ipv6;
-#endif
};
#endif
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 6c0806bd8d1e..8c77832d0240 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -5,14 +5,7 @@
#include <linux/list.h>
struct netns_nftables {
- struct list_head tables;
- struct list_head commit_list;
- struct list_head module_list;
- struct list_head notify_list;
- struct mutex commit_mutex;
- unsigned int base_seq;
u8 gencursor;
- u8 validate_state;
};
#endif
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 9bc5a12fdbb0..83c8ea2e87a6 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -8,7 +8,6 @@
struct ebt_table;
struct netns_xt {
- struct list_head tables[NFPROTO_NUMPROTO];
bool notrack_deprecated_warning;
bool clusterip_deprecated_warning;
#if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index a10a319d7eb2..10e1777877e6 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -40,6 +40,12 @@ struct nh_config {
struct nlattr *nh_grp;
u16 nh_grp_type;
+ u16 nh_grp_res_num_buckets;
+ unsigned long nh_grp_res_idle_timer;
+ unsigned long nh_grp_res_unbalanced_timer;
+ bool nh_grp_res_has_num_buckets;
+ bool nh_grp_res_has_idle_timer;
+ bool nh_grp_res_has_unbalanced_timer;
struct nlattr *nh_encap;
u16 nh_encap_type;
@@ -63,6 +69,32 @@ struct nh_info {
};
};
+struct nh_res_bucket {
+ struct nh_grp_entry __rcu *nh_entry;
+ atomic_long_t used_time;
+ unsigned long migrated_time;
+ bool occupied;
+ u8 nh_flags;
+};
+
+struct nh_res_table {
+ struct net *net;
+ u32 nhg_id;
+ struct delayed_work upkeep_dw;
+
+ /* List of NHGEs that have too few buckets ("uw" for underweight).
+ * Reclaimed buckets will be given to entries in this list.
+ */
+ struct list_head uw_nh_entries;
+ unsigned long unbalanced_since;
+
+ u32 idle_timer;
+ u32 unbalanced_timer;
+
+ u16 num_nh_buckets;
+ struct nh_res_bucket nh_buckets[];
+};
+
struct nh_grp_entry {
struct nexthop *nh;
u8 weight;
@@ -70,7 +102,14 @@ struct nh_grp_entry {
union {
struct {
atomic_t upper_bound;
- } mpath;
+ } hthr;
+ struct {
+ /* Member on uw_nh_entries. */
+ struct list_head uw_nh_entry;
+
+ u16 count_buckets;
+ u16 wants_buckets;
+ } res;
};
struct list_head nh_list;
@@ -80,9 +119,13 @@ struct nh_grp_entry {
struct nh_group {
struct nh_group *spare; /* spare group for removals */
u16 num_nh;
- bool mpath;
+ bool is_multipath;
+ bool hash_threshold;
+ bool resilient;
bool fdb_nh;
bool has_v4;
+
+ struct nh_res_table __rcu *res_table;
struct nh_grp_entry nh_entries[];
};
@@ -112,11 +155,15 @@ struct nexthop {
enum nexthop_event_type {
NEXTHOP_EVENT_DEL,
NEXTHOP_EVENT_REPLACE,
+ NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
+ NEXTHOP_EVENT_BUCKET_REPLACE,
};
enum nh_notifier_info_type {
NH_NOTIFIER_INFO_TYPE_SINGLE,
NH_NOTIFIER_INFO_TYPE_GRP,
+ NH_NOTIFIER_INFO_TYPE_RES_TABLE,
+ NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
};
struct nh_notifier_single_info {
@@ -143,6 +190,19 @@ struct nh_notifier_grp_info {
struct nh_notifier_grp_entry_info nh_entries[];
};
+struct nh_notifier_res_bucket_info {
+ u16 bucket_index;
+ unsigned int idle_timer_ms;
+ bool force;
+ struct nh_notifier_single_info old_nh;
+ struct nh_notifier_single_info new_nh;
+};
+
+struct nh_notifier_res_table_info {
+ u16 num_nh_buckets;
+ struct nh_notifier_single_info nhs[];
+};
+
struct nh_notifier_info {
struct net *net;
struct netlink_ext_ack *extack;
@@ -151,6 +211,8 @@ struct nh_notifier_info {
union {
struct nh_notifier_single_info *nh;
struct nh_notifier_grp_info *nh_grp;
+ struct nh_notifier_res_table_info *nh_res_table;
+ struct nh_notifier_res_bucket_info *nh_res_bucket;
};
};
@@ -158,6 +220,10 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
+void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
+ bool offload, bool trap);
+void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
+ unsigned long *activity);
/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
@@ -212,7 +278,7 @@ static inline bool nexthop_is_multipath(const struct nexthop *nh)
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- return nh_grp->mpath;
+ return nh_grp->is_multipath;
}
return false;
}
@@ -227,7 +293,7 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh)
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- if (nh_grp->mpath)
+ if (nh_grp->is_multipath)
rc = nh_grp->num_nh;
}
@@ -308,7 +374,7 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- if (nh_grp->mpath) {
+ if (nh_grp->is_multipath) {
nh = nexthop_mpath_select(nh_grp, nhsel);
if (!nh)
return NULL;
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 15b1b30f454e..f5c1bee0cd6a 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -188,4 +188,13 @@ struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
*offload);
void taprio_offload_free(struct tc_taprio_qopt_offload *offload);
+/* Ensure skb_mstamp_ns, which might have been populated with the txtime, is
+ * not mistaken for a software timestamp, because this will otherwise prevent
+ * the dispatch of hardware timestamps to the socket.
+ */
+static inline void skb_txtime_consumed(struct sk_buff *skb)
+{
+ skb->tstamp = ktime_set(0, 0);
+}
+
#endif
diff --git a/include/net/psample.h b/include/net/psample.h
index 68ae16bb0a4a..e328c5127757 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -14,6 +14,19 @@ struct psample_group {
struct rcu_head rcu;
};
+struct psample_metadata {
+ u32 trunc_size;
+ int in_ifindex;
+ int out_ifindex;
+ u16 out_tc;
+ u64 out_tc_occ; /* bytes */
+ u64 latency; /* nanoseconds */
+ u8 out_tc_valid:1,
+ out_tc_occ_valid:1,
+ latency_valid:1,
+ unused:5;
+};
+
struct psample_group *psample_group_get(struct net *net, u32 group_num);
void psample_group_take(struct psample_group *group);
void psample_group_put(struct psample_group *group);
@@ -21,15 +34,13 @@ void psample_group_put(struct psample_group *group);
#if IS_ENABLED(CONFIG_PSAMPLE)
void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
- u32 trunc_size, int in_ifindex, int out_ifindex,
- u32 sample_rate);
+ u32 sample_rate, const struct psample_metadata *md);
#else
static inline void psample_sample_packet(struct psample_group *group,
- struct sk_buff *skb, u32 trunc_size,
- int in_ifindex, int out_ifindex,
- u32 sample_rate)
+ struct sk_buff *skb, u32 sample_rate,
+ const struct psample_metadata *md)
{
}
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2d6eb60c58c8..f7a6e14491fb 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1242,6 +1242,20 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
}
+struct psched_pktrate {
+ u64 rate_pkts_ps; /* packets per second */
+ u32 mult;
+ u8 shift;
+};
+
+static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r,
+ unsigned int pkt_num)
+{
+ return ((u64)pkt_num * r->mult) >> r->shift;
+}
+
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
+
/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
* The fast path only needs to access filter list and to update stats
*/
diff --git a/include/net/sock.h b/include/net/sock.h
index 8487f58da36d..cadcc12cc316 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1188,6 +1188,9 @@ struct proto {
void (*unhash)(struct sock *sk);
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
+#ifdef CONFIG_BPF_SYSCALL
+ int (*psock_update_sk_prot)(struct sock *sk, bool restore);
+#endif
/* Keeping track of sockets in use */
#ifdef CONFIG_PROC_FS
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index b7fc7d0f54e2..8c3218177136 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -68,6 +68,7 @@ enum switchdev_obj_id {
};
struct switchdev_obj {
+ struct list_head list;
struct net_device *orig_dev;
enum switchdev_obj_id id;
u32 flags;
diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h
index 6d1e26b709b5..72649512dcdd 100644
--- a/include/net/tc_act/tc_police.h
+++ b/include/net/tc_act/tc_police.h
@@ -10,10 +10,13 @@ struct tcf_police_params {
s64 tcfp_burst;
u32 tcfp_mtu;
s64 tcfp_mtu_ptoks;
+ s64 tcfp_pkt_burst;
struct psched_ratecfg rate;
bool rate_present;
struct psched_ratecfg peak;
bool peak_present;
+ struct psched_pktrate ppsrate;
+ bool pps_present;
struct rcu_head rcu;
};
@@ -24,6 +27,7 @@ struct tcf_police {
spinlock_t tcfp_lock ____cacheline_aligned_in_smp;
s64 tcfp_toks;
s64 tcfp_ptoks;
+ s64 tcfp_pkttoks;
s64 tcfp_t_c;
};
@@ -97,6 +101,54 @@ static inline u32 tcf_police_burst(const struct tc_action *act)
return burst;
}
+static inline u64 tcf_police_rate_pkt_ps(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+ return params->ppsrate.rate_pkts_ps;
+}
+
+static inline u32 tcf_police_burst_pkt(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+ u32 burst;
+
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+
+ /*
+ * "rate" pkts "burst" nanoseconds
+ * ------------ * -------------------
+ * 1 second 2^6 ticks
+ *
+ * ------------------------------------
+ * NSEC_PER_SEC nanoseconds
+ * ------------------------
+ * 2^6 ticks
+ *
+ * "rate" pkts "burst" nanoseconds 2^6 ticks
+ * = ------------ * ------------------- * ------------------------
+ * 1 second 2^6 ticks NSEC_PER_SEC nanoseconds
+ *
+ * "rate" * "burst"
+ * = ---------------- pkts/nanosecond
+ * NSEC_PER_SEC^2
+ *
+ *
+ * "rate" * "burst"
+ * = ---------------- pkts/second
+ * NSEC_PER_SEC
+ */
+ burst = div_u64(params->tcfp_pkt_burst * params->ppsrate.rate_pkts_ps,
+ NSEC_PER_SEC);
+
+ return burst;
+}
+
static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act)
{
struct tcf_police *police = to_police(act);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 963cd86d12dd..eaea43afcc97 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -883,36 +883,11 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6;
#endif
} header; /* For incoming skbs */
- struct {
- __u32 flags;
- struct sock *sk_redir;
- void *data_end;
- } bpf;
};
};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
-static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
-{
- TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
-}
-
-static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
-{
- return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
-}
-
-static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
-{
- return TCP_SKB_CB(skb)->bpf.sk_redir;
-}
-
-static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
-{
- TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
-}
-
extern const struct inet_connection_sock_af_ops ipv4_specific;
#if IS_ENABLED(CONFIG_IPV6)
@@ -1060,44 +1035,56 @@ struct rate_sample {
};
struct tcp_congestion_ops {
- struct list_head list;
- u32 key;
- u32 flags;
-
- /* initialize private data (optional) */
- void (*init)(struct sock *sk);
- /* cleanup private data (optional) */
- void (*release)(struct sock *sk);
+/* fast path fields are put first to fill one cache line */
/* return slow start threshold (required) */
u32 (*ssthresh)(struct sock *sk);
+
/* do new cwnd calculation (required) */
void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
+
/* call before changing ca_state (optional) */
void (*set_state)(struct sock *sk, u8 new_state);
+
/* call when cwnd event occurs (optional) */
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
+
/* call when ack arrives (optional) */
void (*in_ack_event)(struct sock *sk, u32 flags);
- /* new value of cwnd after loss (required) */
- u32 (*undo_cwnd)(struct sock *sk);
+
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
+
/* override sysctl_tcp_min_tso_segs */
u32 (*min_tso_segs)(struct sock *sk);
- /* returns the multiplier used in tcp_sndbuf_expand (optional) */
- u32 (*sndbuf_expand)(struct sock *sk);
+
/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
*/
void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
+
+
+ /* new value of cwnd after loss (required) */
+ u32 (*undo_cwnd)(struct sock *sk);
+ /* returns the multiplier used in tcp_sndbuf_expand (optional) */
+ u32 (*sndbuf_expand)(struct sock *sk);
+
+/* control/slow paths put last */
/* get info for inet_diag (optional) */
size_t (*get_info)(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info);
- char name[TCP_CA_NAME_MAX];
- struct module *owner;
-};
+ char name[TCP_CA_NAME_MAX];
+ struct module *owner;
+ struct list_head list;
+ u32 key;
+ u32 flags;
+
+ /* initialize private data (optional) */
+ void (*init)(struct sock *sk);
+ /* cleanup private data (optional) */
+ void (*release)(struct sock *sk);
+} ____cacheline_aligned_in_smp;
int tcp_register_congestion_control(struct tcp_congestion_ops *type);
void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
@@ -2222,25 +2209,26 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
__MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
+#ifdef CONFIG_NET_SOCK_MSG
struct sk_msg;
struct sk_psock;
-#ifdef CONFIG_BPF_STREAM_PARSER
+#ifdef CONFIG_BPF_SYSCALL
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+int tcp_bpf_update_proto(struct sock *sk, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
-#else
-static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
-{
-}
-#endif /* CONFIG_BPF_STREAM_PARSER */
+#endif /* CONFIG_BPF_SYSCALL */
-#ifdef CONFIG_NET_SOCK_MSG
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags);
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
- struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
+#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
+static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+#endif
+
#ifdef CONFIG_CGROUP_BPF
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
struct sk_buff *skb,
diff --git a/include/net/udp.h b/include/net/udp.h
index a132a02b2f2c..f55aaeef7e91 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -329,6 +329,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
struct sk_buff *skb);
struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
* possibly multiple cache miss on dequeue()
@@ -515,9 +517,33 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
return segs;
}
-#ifdef CONFIG_BPF_STREAM_PARSER
+static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
+{
+ /* UDP-lite can't land here - no GRO */
+ WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
+
+ /* UDP packets generated with UDP_SEGMENT and traversing:
+ *
+ * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx)
+ *
+ * can reach an UDP socket with CHECKSUM_NONE, because
+ * __iptunnel_pull_header() converts CHECKSUM_PARTIAL into NONE.
+ * SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST packets with no UDP tunnel will
+ * have a valid checksum, as the GRO engine validates the UDP csum
+ * before the aggregation and nobody strips such info in between.
+ * Instead of adding another check in the tunnel fastpath, we can force
+ * a valid csum after the segmentation.
+ * Additionally fixup the UDP CB.
+ */
+ UDP_SKB_CB(skb)->cscov = skb->len;
+ if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid)
+ skb->csum_valid = 1;
+}
+
+#ifdef CONFIG_BPF_SYSCALL
struct sk_psock;
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-#endif /* BPF_STREAM_PARSER */
+int udp_bpf_update_proto(struct sock *sk, bool restore);
+#endif
#endif /* _UDP_H */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index cc17bc957548..9c0722c6d7ac 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -80,19 +80,6 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(void);
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *xs;
-
- if (key >= map->max_entries)
- return NULL;
-
- xs = READ_ONCE(m->xsk_map[key]);
- return xs;
-}
-
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -109,12 +96,6 @@ static inline void __xsk_map_flush(void)
{
}
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
-
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 425ff29d9389..68cdc7ceaf4d 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -51,6 +51,7 @@
*/
/* Reserve some destination PGIDs at the end of the range:
+ * PGID_BLACKHOLE: used for not forwarding the frames
* PGID_CPU: used for whitelisting certain MAC addresses, such as the addresses
* of the switch port net devices, towards the CPU port module.
* PGID_UC: the flooding destinations for unknown unicast traffic.
@@ -59,6 +60,7 @@
* PGID_MCIPV6: the flooding destinations for IPv6 multicast traffic.
* PGID_BC: the flooding destinations for broadcast traffic.
*/
+#define PGID_BLACKHOLE 57
#define PGID_CPU 58
#define PGID_UC 59
#define PGID_MC 60
@@ -73,7 +75,7 @@
#define for_each_nonreserved_multicast_dest_pgid(ocelot, pgid) \
for ((pgid) = (ocelot)->num_phys_ports + 1; \
- (pgid) < PGID_CPU; \
+ (pgid) < PGID_BLACKHOLE; \
(pgid)++)
#define for_each_aggr_pgid(ocelot, pgid) \
@@ -611,6 +613,11 @@ struct ocelot_port {
struct net_device *bond;
bool lag_tx_active;
+
+ u16 mrp_ring_id;
+
+ struct net_device *bridge;
+ u8 stp_state;
};
struct ocelot {
@@ -630,10 +637,6 @@ struct ocelot {
int num_frame_refs;
int num_mact_rows;
- struct net_device *hw_bridge_dev;
- u16 bridge_mask;
- u16 bridge_fwd_mask;
-
struct ocelot_port **ports;
u8 base_mac[ETH_ALEN];
@@ -679,12 +682,6 @@ struct ocelot {
/* Protects the PTP clock */
spinlock_t ptp_clock_lock;
struct ptp_pin_desc ptp_pins[OCELOT_PTP_PINS_NUM];
-
-#if IS_ENABLED(CONFIG_BRIDGE_MRP)
- u16 mrp_ring_id;
- struct net_device *mrp_p_port;
- struct net_device *mrp_s_port;
-#endif
};
struct ocelot_policer {
@@ -806,10 +803,10 @@ int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port,
struct switchdev_brport_flags val);
void ocelot_port_bridge_flags(struct ocelot *ocelot, int port,
struct switchdev_brport_flags val);
-int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
- struct net_device *bridge);
-int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
+void ocelot_port_bridge_join(struct ocelot *ocelot, int port,
struct net_device *bridge);
+void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
+ struct net_device *bridge);
int ocelot_fdb_dump(struct ocelot *ocelot, int port,
dsa_fdb_dump_cb_t *cb, void *data);
int ocelot_fdb_add(struct ocelot *ocelot, int port,
diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h
index 6a7388fa7cc5..ded497d72bdb 100644
--- a/include/soc/mscc/ocelot_ptp.h
+++ b/include/soc/mscc/ocelot_ptp.h
@@ -37,8 +37,6 @@ enum {
#define PTP_CFG_MISC_PTP_EN BIT(2)
-#define PSEC_PER_SEC 1000000000000LL
-
#define PTP_CFG_CLK_ADJ_CFG_ENA BIT(0)
#define PTP_CFG_CLK_ADJ_CFG_DIR BIT(1)
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 76a97176ab81..fcad3645a70b 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -86,19 +86,15 @@ struct _bpf_dtab_netdev {
};
#endif /* __DEVMAP_OBJ_TYPE */
-#define devmap_ifindex(tgt, map) \
- (((map->map_type == BPF_MAP_TYPE_DEVMAP || \
- map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
- ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
-
DECLARE_EVENT_CLASS(xdp_redirect_template,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index),
TP_STRUCT__entry(
__field(int, prog_id)
@@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
),
TP_fast_assign(
+ u32 ifindex = 0, map_index = index;
+
+ if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+ ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
+ } else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
+ ifindex = index;
+ map_index = 0;
+ }
+
__entry->prog_id = xdp->aux->id;
__entry->act = XDP_REDIRECT;
__entry->ifindex = dev->ifindex;
__entry->err = err;
- __entry->to_ifindex = map ? devmap_ifindex(tgt, map) :
- index;
- __entry->map_id = map ? map->id : 0;
- __entry->map_index = map ? index : 0;
+ __entry->to_ifindex = ifindex;
+ __entry->map_id = map_id;
+ __entry->map_index = map_index;
),
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
@@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index)
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index)
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
-#define _trace_xdp_redirect(dev, xdp, to) \
- trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
+#define _trace_xdp_redirect(dev, xdp, to) \
+ trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
-#define _trace_xdp_redirect_err(dev, xdp, to, err) \
- trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
+#define _trace_xdp_redirect_err(dev, xdp, to, err) \
+ trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
-#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \
- trace_xdp_redirect(dev, xdp, to, 0, map, index)
+#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \
+ trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index)
-#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \
- trace_xdp_redirect_err(dev, xdp, to, err, map, index)
+#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \
+ trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index)
/* not used anymore, but kept around so as not to break old programs */
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index)
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
- const struct bpf_map *map, u32 index),
- TP_ARGS(dev, xdp, tgt, err, map, index)
+ enum bpf_map_type map_type,
+ u32 map_id, u32 index),
+ TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
TRACE_EVENT(xdp_cpumap_kthread,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4ba4ef0ff63a..49371eba98ba 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -93,7 +93,717 @@ union bpf_iter_link_info {
} map;
};
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ * Description
+ * Create a map and return a file descriptor that refers to the
+ * map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ * is automatically enabled for the new file descriptor.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ * Description
+ * Look up an element with a given *key* in the map referred to
+ * by the file descriptor *map_fd*.
+ *
+ * The *flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ * Description
+ * Create or update an element (key/value pair) in a specified map.
+ *
+ * The *flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create a new element or update an existing element.
+ * **BPF_NOEXIST**
+ * Create a new element only if it did not exist.
+ * **BPF_EXIST**
+ * Update an existing element.
+ * **BPF_F_LOCK**
+ * Update a spin_lock-ed map element.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ * **E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ * **E2BIG**
+ * The number of elements in the map reached the
+ * *max_entries* limit specified at map creation time.
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ * Description
+ * Look up and delete an element by key in a specified map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ * Description
+ * Look up an element by key in a specified map and return the key
+ * of the next element. Can be used to iterate over all elements
+ * in the map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * The following cases can be used to iterate over all elements of
+ * the map:
+ *
+ * * If *key* is not found, the operation returns zero and sets
+ * the *next_key* pointer to the key of the first element.
+ * * If *key* is found, the operation returns zero and sets the
+ * *next_key* pointer to the key of the next element.
+ * * If *key* is the last element, returns -1 and *errno* is set
+ * to **ENOENT**.
+ *
+ * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ * **EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ * Description
+ * Verify and load an eBPF program, returning a new file
+ * descriptor associated with the program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ * automatically enabled for the new file descriptor.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_PIN
+ * Description
+ * Pin an eBPF program or map referred by the specified *bpf_fd*
+ * to the provided *pathname* on the filesystem.
+ *
+ * The *pathname* argument must not contain a dot (".").
+ *
+ * On success, *pathname* retains a reference to the eBPF object,
+ * preventing deallocation of the object when the original
+ * *bpf_fd* is closed. This allow the eBPF object to live beyond
+ * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ * process.
+ *
+ * Applying **unlink**\ (2) or similar calls to the *pathname*
+ * unpins the object from the filesystem, removing the reference.
+ * If no other file descriptors or filesystem nodes refer to the
+ * same object, it will be deallocated (see NOTES).
+ *
+ * The filesystem type for the parent directory of *pathname* must
+ * be **BPF_FS_MAGIC**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_OBJ_GET
+ * Description
+ * Open a file descriptor for the eBPF object pinned to the
+ * specified *pathname*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook.
+ *
+ * The *attach_type* specifies the eBPF attachment point to
+ * attach the program to, and must be one of *bpf_attach_type*
+ * (see below).
+ *
+ * The *attach_bpf_fd* must be a valid file descriptor for a
+ * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ * or sock_ops type corresponding to the specified *attach_type*.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_TYPE_SK_SKB**,
+ * **BPF_PROG_TYPE_SK_MSG**
+ *
+ * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ * Description
+ * Detach the eBPF program associated with the *target_fd* at the
+ * hook specified by *attach_type*. The program must have been
+ * previously attached using **BPF_PROG_ATTACH**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ * Description
+ * Run the eBPF program associated with the *prog_fd* a *repeat*
+ * number of times against a provided program context *ctx_in* and
+ * data *data_in*, and return the modified program context
+ * *ctx_out*, *data_out* (for example, packet data), result of the
+ * execution *retval*, and *duration* of the test run.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * **ENOSPC**
+ * Either *data_size_out* or *ctx_size_out* is too small.
+ * **ENOTSUPP**
+ * This command is not supported by the program type of
+ * the program referred to by *prog_fd*.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF program currently loaded into the kernel.
+ *
+ * Looks for the eBPF program with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF programs
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF map currently loaded into the kernel.
+ *
+ * Looks for the eBPF map with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF maps
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF program corresponding to
+ * *prog_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF map corresponding to
+ * *map_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ * Description
+ * Obtain information about the eBPF object corresponding to
+ * *bpf_fd*.
+ *
+ * Populates up to *info_len* bytes of *info*, which will be in
+ * one of the following formats depending on the eBPF object type
+ * of *bpf_fd*:
+ *
+ * * **struct bpf_prog_info**
+ * * **struct bpf_map_info**
+ * * **struct bpf_btf_info**
+ * * **struct bpf_link_info**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * specified *attach_type* hook.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_QUERY** always fetches the number of programs
+ * attached and the *attach_flags* which were used to attach those
+ * programs. Additionally, if *prog_ids* is nonzero and the number
+ * of attached programs is less than *prog_cnt*, populates
+ * *prog_ids* with the eBPF program ids of the programs attached
+ * at *target_fd*.
+ *
+ * The following flags may alter the result:
+ *
+ * **BPF_F_QUERY_EFFECTIVE**
+ * Only return information regarding programs which are
+ * currently effective at the specified *target_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ * Description
+ * Attach an eBPF program to a tracepoint *name* to access kernel
+ * internal arguments of the tracepoint in their raw form.
+ *
+ * The *prog_fd* must be a valid file descriptor associated with
+ * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ * No ABI guarantees are made about the content of tracepoint
+ * arguments exposed to the corresponding eBPF program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ * Description
+ * Verify and load BPF Type Format (BTF) metadata into the kernel,
+ * returning a new file descriptor associated with the metadata.
+ * BTF is described in more detail at
+ * https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ * The *btf* parameter must point to valid memory providing
+ * *btf_size* bytes of BTF binary metadata.
+ *
+ * The returned file descriptor can be passed to other **bpf**\ ()
+ * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ * associate the BTF with those objects.
+ *
+ * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ * parameters to specify a *btf_log_buf*, *btf_log_size* and
+ * *btf_log_level* which allow the kernel to return freeform log
+ * output regarding the BTF verification process.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the BPF Type Format (BTF)
+ * corresponding to *btf_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * target process identified by *pid* and *fd*.
+ *
+ * If the *pid* and *fd* are associated with a tracepoint, kprobe
+ * or uprobe perf event, then the *prog_id* and *fd_type* will
+ * be populated with the eBPF program id and file descriptor type
+ * of type **bpf_task_fd_type**. If associated with a kprobe or
+ * uprobe, the *probe_offset* and *probe_addr* will also be
+ * populated. Optionally, if *buf* is provided, then up to
+ * *buf_len* bytes of *buf* will be populated with the name of
+ * the tracepoint, kprobe or uprobe.
+ *
+ * The resulting *prog_id* may be introspected in deeper detail
+ * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ * Description
+ * Look up an element with the given *key* in the map referred to
+ * by the file descriptor *fd*, and if found, delete the element.
+ *
+ * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ * implement this command as a "pop" operation, deleting the top
+ * element rather than one corresponding to *key*.
+ * The *key* and *key_len* parameters should be zeroed when
+ * issuing this operation for these map types.
+ *
+ * This command is only valid for the following map types:
+ * * **BPF_MAP_TYPE_QUEUE**
+ * * **BPF_MAP_TYPE_STACK**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ * Description
+ * Freeze the permissions of the specified map.
+ *
+ * Write permissions may be frozen by passing zero *flags*.
+ * Upon success, no future syscall invocations may alter the
+ * map state of *map_fd*. Write operations from eBPF programs
+ * are still possible for a frozen map.
+ *
+ * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ * Description
+ * Fetch the next BPF Type Format (BTF) object currently loaded
+ * into the kernel.
+ *
+ * Looks for the BTF object with an id greater than *start_id*
+ * and updates *next_id* on success. If no other BTF objects
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ * Description
+ * Iterate and fetch multiple elements in a map.
+ *
+ * Two opaque values are used to manage batch operations,
+ * *in_batch* and *out_batch*. Initially, *in_batch* must be set
+ * to NULL to begin the batched operation. After each subsequent
+ * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ * *out_batch* as the *in_batch* for the next operation to
+ * continue iteration from the current point.
+ *
+ * The *keys* and *values* are output parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are copied into the
+ * user buffer, with the keys copied into *keys* and the values
+ * copied into the corresponding indices in *values*.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **ENOSPC** to indicate that *keys* or
+ * *values* is too small to dump an entire bucket during
+ * iteration of a hash-based map type.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ * Description
+ * Iterate and delete all elements in a map.
+ *
+ * This operation has the same behavior as
+ * **BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ * * Every element that is successfully returned is also deleted
+ * from the map. This is at least *count* elements. Note that
+ * *count* is both an input and an output parameter.
+ * * Upon returning with *errno* set to **EFAULT**, up to
+ * *count* elements may be deleted without returning the keys
+ * and values of the deleted elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ * Description
+ * Update multiple elements in a map by *key*.
+ *
+ * The *keys* and *values* are input parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially updated to the
+ * value in the corresponding index in *values*. The *in_batch*
+ * and *out_batch* parameters are ignored and should be zeroed.
+ *
+ * The *elem_flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create new elements or update a existing elements.
+ * **BPF_NOEXIST**
+ * Create new elements only if they do not exist.
+ * **BPF_EXIST**
+ * Update existing elements.
+ * **BPF_F_LOCK**
+ * Update spin_lock-ed map elements. This must be
+ * specified if the map value contains a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ * **E2BIG**. **E2BIG** indicates that the number of elements in
+ * the map reached the *max_entries* limit specified at map
+ * creation time.
+ *
+ * May set *errno* to one of the following error codes under
+ * specific circumstances:
+ *
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_BATCH
+ * Description
+ * Delete multiple elements in a map by *key*.
+ *
+ * The *keys* parameter is an input parameter which must point
+ * to memory large enough to hold *count* items based on the key
+ * size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially deleted. The
+ * *in_batch*, *out_batch*, and *values* parameters are ignored
+ * and should be zeroed.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements. If
+ * *errno* is **EFAULT**, up to *count* elements may be been
+ * deleted.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook and return a file descriptor handle for
+ * managing the link.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ * Description
+ * Update the eBPF program in the specified *link_fd* to
+ * *new_prog_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF Link corresponding to
+ * *link_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF link currently loaded into the kernel.
+ *
+ * Looks for the eBPF link with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF links
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ * Description
+ * Enable eBPF runtime statistics gathering.
+ *
+ * Runtime statistics gathering for the eBPF runtime is disabled
+ * by default to minimize the corresponding performance overhead.
+ * This command enables statistics globally.
+ *
+ * Multiple programs may independently enable statistics.
+ * After gathering the desired statistics, eBPF runtime statistics
+ * may be disabled again by calling **close**\ (2) for the file
+ * descriptor returned by this function. Statistics will only be
+ * disabled system-wide when all outstanding file descriptors
+ * returned by prior calls for this subcommand are closed.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ * Description
+ * Create an iterator on top of the specified *link_fd* (as
+ * previously created using **BPF_LINK_CREATE**) and return a
+ * file descriptor that can be used to trigger the iteration.
+ *
+ * If the resulting file descriptor is pinned to the filesystem
+ * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ * for that path will trigger the iterator to read kernel state
+ * using the eBPF program attached to *link_fd*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ * Description
+ * Forcefully detach the specified *link_fd* from its
+ * corresponding attachment point.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ * Description
+ * Bind a map to the lifetime of an eBPF program.
+ *
+ * The map identified by *map_fd* is bound to the program
+ * identified by *prog_fd* and only released when *prog_fd* is
+ * released. This may be used in cases where metadata should be
+ * associated with a program which otherwise does not contain any
+ * references to the map (for example, embedded in the eBPF
+ * program instructions).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * NOTES
+ * eBPF objects (maps and programs) can be shared between processes.
+ *
+ * * After **fork**\ (2), the child inherits file descriptors
+ * referring to the same eBPF objects.
+ * * File descriptors referring to eBPF objects can be transferred over
+ * **unix**\ (7) domain sockets.
+ * * File descriptors referring to eBPF objects can be duplicated in the
+ * usual way, using **dup**\ (2) and similar calls.
+ * * File descriptors referring to eBPF objects can be pinned to the
+ * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ * An eBPF object is deallocated only after all file descriptors referring
+ * to the object have been closed and no references remain pinned to the
+ * filesystem or attached (for example, bound to a program or device).
+ */
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
@@ -247,6 +957,7 @@ enum bpf_attach_type {
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
BPF_XDP,
+ BPF_SK_SKB_VERDICT,
__MAX_BPF_ATTACH_TYPE
};
@@ -393,11 +1104,24 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
+/* insn[0].src_reg: BPF_PSEUDO_FUNC
+ * insn[0].imm: insn offset to the func
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of the function
+ * verifier type: PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
*/
#define BPF_PSEUDO_CALL 1
+/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL,
+ * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel
+ */
+#define BPF_PSEUDO_KFUNC_CALL 2
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
@@ -720,7 +1444,7 @@ union bpf_attr {
* parsed and used to produce a manual page. The workflow is the following,
* and requires the rst2man utility:
*
- * $ ./scripts/bpf_helpers_doc.py \
+ * $ ./scripts/bpf_doc.py \
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
* $ man /tmp/bpf-helpers.7
@@ -1765,6 +2489,10 @@ union bpf_attr {
* Use with ENCAP_L3/L4 flags to further specify the tunnel
* type; *len* is the length of the inner MAC header.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ * L2 type as Ethernet.
+ *
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -3915,6 +4643,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ * Description
+ * For each element in **map**, call **callback_fn** function with
+ * **map**, **callback_ctx** and other map-specific parameters.
+ * The **callback_fn** should be a static function and
+ * the **callback_ctx** should be a pointer to the stack.
+ * The **flags** is used to control certain aspects of the helper.
+ * Currently, the **flags** must be 0.
+ *
+ * The following are a list of supported map types and their
+ * respective expected callback signatures:
+ *
+ * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ * For per_cpu maps, the map_value is the value on the cpu where the
+ * bpf_prog is running.
+ *
+ * If **callback_fn** return 0, the helper will continue to the next
+ * element. If return value is 1, the helper will skip the rest of
+ * elements and return. Other return values are not used now.
+ *
+ * Return
+ * The number of traversed map elements for success, **-EINVAL** for
+ * invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4081,6 +4837,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
+ FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4174,6 +4931,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
+ BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
};
enum {
@@ -5211,7 +5969,10 @@ struct bpf_pidns_info {
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
struct bpf_sk_lookup {
- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ union {
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+ };
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 5a667107ad2c..d27b1708efe9 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -52,7 +52,7 @@ struct btf_type {
};
};
-#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_INFO_KFLAG(info) ((info) >> 31)
@@ -72,7 +72,8 @@ struct btf_type {
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_VAR 14 /* Variable */
#define BTF_KIND_DATASEC 15 /* Section */
-#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define BTF_KIND_FLOAT 16 /* Floating point */
+#define BTF_KIND_MAX BTF_KIND_FLOAT
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 5afea692a3f7..f91e079e3108 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1383,15 +1383,33 @@ struct ethtool_per_queue_op {
};
/**
- * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * struct ethtool_fecparam - Ethernet Forward Error Correction parameters
* @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
- * @active_fec: FEC mode which is active on porte
- * @fec: Bitmask of supported/configured FEC modes
- * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ * @active_fec: FEC mode which is active on the port, single bit set, GET only.
+ * @fec: Bitmask of configured FEC modes.
+ * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET.
*
- * Drivers should reject a non-zero setting of @autoneg when
- * autoneogotiation is disabled (or not supported) for the link.
+ * Note that @reserved was never validated on input and ethtool user space
+ * left it uninitialized when calling SET. Hence going forward it can only be
+ * used to return a value to userspace with GET.
+ *
+ * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS.
+ * FEC settings are configured by link autonegotiation whenever it's enabled.
+ * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode.
+ *
+ * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings.
+ * It is recommended that drivers only accept a single bit set in @fec.
+ * When multiple bits are set in @fec drivers may pick mode in an implementation
+ * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other
+ * FEC modes, because it's unclear whether in this case other modes constrain
+ * AUTO or are independent choices.
+ * Drivers must reject SET requests if they support none of the requested modes.
+ *
+ * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead
+ * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM.
*
+ * See enum ethtool_fec_config_bits for definition of valid bits for both
+ * @fec and @active_fec.
*/
struct ethtool_fecparam {
__u32 cmd;
@@ -1403,11 +1421,16 @@ struct ethtool_fecparam {
/**
* enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
- * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
- * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
- * @ETHTOOL_FEC_OFF: No FEC Mode
- * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
- * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not
+ * be used together with other bits. GET only.
+ * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually
+ * based link mode and SFP parameters read from module's
+ * EEPROM. This bit does _not_ mean autonegotiation.
+ * @ETHTOOL_FEC_OFF_BIT: No FEC Mode
+ * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet
+ * Consortium)
*/
enum ethtool_fec_config_bits {
ETHTOOL_FEC_NONE_BIT,
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index a286635ac9b8..7f1bdb5b31ba 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -42,6 +42,8 @@ enum {
ETHTOOL_MSG_CABLE_TEST_ACT,
ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
ETHTOOL_MSG_TUNNEL_INFO_GET,
+ ETHTOOL_MSG_FEC_GET,
+ ETHTOOL_MSG_FEC_SET,
/* add new constants above here */
__ETHTOOL_MSG_USER_CNT,
@@ -80,6 +82,8 @@ enum {
ETHTOOL_MSG_CABLE_TEST_NTF,
ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
+ ETHTOOL_MSG_FEC_GET_REPLY,
+ ETHTOOL_MSG_FEC_NTF,
/* add new constants above here */
__ETHTOOL_MSG_KERNEL_CNT,
@@ -629,6 +633,19 @@ enum {
ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
};
+/* FEC */
+
+enum {
+ ETHTOOL_A_FEC_UNSPEC,
+ ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */
+ ETHTOOL_A_FEC_MODES, /* bitset */
+ ETHTOOL_A_FEC_AUTO, /* u8 */
+ ETHTOOL_A_FEC_ACTIVE, /* u32 */
+
+ __ETHTOOL_A_FEC_CNT,
+ ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1)
+};
+
/* generic netlink info */
#define ETHTOOL_GENL_NAME "ethtool"
#define ETHTOOL_GENL_VERSION 1
diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h
index fb169a50895e..222325d1d80e 100644
--- a/include/uapi/linux/icmp.h
+++ b/include/uapi/linux/icmp.h
@@ -20,6 +20,9 @@
#include <linux/types.h>
#include <asm/byteorder.h>
+#include <linux/in.h>
+#include <linux/if.h>
+#include <linux/in6.h>
#define ICMP_ECHOREPLY 0 /* Echo Reply */
#define ICMP_DEST_UNREACH 3 /* Destination Unreachable */
@@ -66,6 +69,23 @@
#define ICMP_EXC_TTL 0 /* TTL count exceeded */
#define ICMP_EXC_FRAGTIME 1 /* Fragment Reass time exceeded */
+/* Codes for EXT_ECHO (PROBE) */
+#define ICMP_EXT_ECHO 42
+#define ICMP_EXT_ECHOREPLY 43
+#define ICMP_EXT_MAL_QUERY 1 /* Malformed Query */
+#define ICMP_EXT_NO_IF 2 /* No such Interface */
+#define ICMP_EXT_NO_TABLE_ENT 3 /* No such Table Entry */
+#define ICMP_EXT_MULT_IFS 4 /* Multiple Interfaces Satisfy Query */
+
+/* Constants for EXT_ECHO (PROBE) */
+#define EXT_ECHOREPLY_ACTIVE (1 << 2)/* active bit in reply message */
+#define EXT_ECHOREPLY_IPV4 (1 << 1)/* ipv4 bit in reply message */
+#define EXT_ECHOREPLY_IPV6 1 /* ipv6 bit in reply message */
+#define EXT_ECHO_CTYPE_NAME 1
+#define EXT_ECHO_CTYPE_INDEX 2
+#define EXT_ECHO_CTYPE_ADDR 3
+#define ICMP_AFI_IP 1 /* Address Family Identifier for ipv4 */
+#define ICMP_AFI_IP6 2 /* Address Family Identifier for ipv6 */
struct icmphdr {
__u8 type;
@@ -118,4 +138,26 @@ struct icmp_extobj_hdr {
__u8 class_type;
};
+/* RFC 8335: 2.1 Header for c-type 3 payload */
+struct icmp_ext_echo_ctype3_hdr {
+ __be16 afi;
+ __u8 addrlen;
+ __u8 reserved;
+};
+
+/* RFC 8335: 2.1 Interface Identification Object */
+struct icmp_ext_echo_iio {
+ struct icmp_extobj_hdr extobj_hdr;
+ union {
+ char name[IFNAMSIZ];
+ __be32 ifindex;
+ struct {
+ struct icmp_ext_echo_ctype3_hdr ctype3_hdr;
+ union {
+ struct in_addr ipv4_addr;
+ struct in6_addr ipv6_addr;
+ } ip_addr;
+ } addr;
+ } ident;
+};
#endif /* _UAPI_LINUX_ICMP_H */
diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h
index 0564fd7ccde4..ecaece3af38d 100644
--- a/include/uapi/linux/icmpv6.h
+++ b/include/uapi/linux/icmpv6.h
@@ -140,6 +140,9 @@ struct icmp6hdr {
#define ICMPV6_UNK_OPTION 2
#define ICMPV6_HDR_INCOMP 3
+/* Codes for EXT_ECHO (PROBE) */
+#define ICMPV6_EXT_ECHO_REQUEST 160
+#define ICMPV6_EXT_ECHO_REPLY 161
/*
* constants for (set|get)sockopt
*/
diff --git a/include/uapi/linux/if_fddi.h b/include/uapi/linux/if_fddi.h
index 7239aa9c0766..8df2d9934bcd 100644
--- a/include/uapi/linux/if_fddi.h
+++ b/include/uapi/linux/if_fddi.h
@@ -9,7 +9,7 @@
* Version: @(#)if_fddi.h 1.0.3 Oct 6 2018
*
* Author: Lawrence V. Stefani, <stefani@yahoo.com>
- * Maintainer: Maciej W. Rozycki, <macro@linux-mips.org>
+ * Maintainer: Maciej W. Rozycki, <macro@orcam.me.uk>
*
* if_fddi.h is based on previous if_ether.h and if_tr.h work by
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index 3f302e2523b2..bdf77dffa5a4 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -120,6 +120,8 @@
#define MDIO_PMA_SPEED_100 0x0020 /* 100M capable */
#define MDIO_PMA_SPEED_10 0x0040 /* 10M capable */
#define MDIO_PCS_SPEED_10P2B 0x0002 /* 10PASS-TS/2BASE-TL capable */
+#define MDIO_PCS_SPEED_2_5G 0x0040 /* 2.5G capable */
+#define MDIO_PCS_SPEED_5G 0x0080 /* 5G capable */
/* Device present registers. */
#define MDIO_DEVS_PRESENT(devad) (1 << (devad))
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index e1172c1ffdfd..8eb3c0844bff 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -174,10 +174,21 @@ enum mptcp_event_attr {
MPTCP_ATTR_FLAGS, /* u16 */
MPTCP_ATTR_TIMEOUT, /* u32 */
MPTCP_ATTR_IF_IDX, /* s32 */
+ MPTCP_ATTR_RESET_REASON,/* u32 */
+ MPTCP_ATTR_RESET_FLAGS, /* u32 */
__MPTCP_ATTR_AFTER_LAST
};
#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1)
+/* MPTCP Reset reason codes, rfc8684 */
+#define MPTCP_RST_EUNSPEC 0
+#define MPTCP_RST_EMPTCP 1
+#define MPTCP_RST_ERESOURCE 2
+#define MPTCP_RST_EPROHIBIT 3
+#define MPTCP_RST_EWQ2BIG 4
+#define MPTCP_RST_EBADPERF 5
+#define MPTCP_RST_EMIDDLEBOX 6
+
#endif /* _UAPI_MPTCP_H */
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
index 2d4a1e784cf0..d8ffa8c9ca78 100644
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -21,7 +21,10 @@ struct nexthop_grp {
};
enum {
- NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */
+ NEXTHOP_GRP_TYPE_MPATH, /* hash-threshold nexthop group
+ * default type if not specified
+ */
+ NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */
__NEXTHOP_GRP_TYPE_MAX,
};
@@ -52,8 +55,50 @@ enum {
NHA_FDB, /* flag; nexthop belongs to a bridge fdb */
/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
+ /* nested; resilient nexthop group attributes */
+ NHA_RES_GROUP,
+ /* nested; nexthop bucket attributes */
+ NHA_RES_BUCKET,
+
__NHA_MAX,
};
#define NHA_MAX (__NHA_MAX - 1)
+
+enum {
+ NHA_RES_GROUP_UNSPEC,
+ /* Pad attribute for 64-bit alignment. */
+ NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC,
+
+ /* u16; number of nexthop buckets in a resilient nexthop group */
+ NHA_RES_GROUP_BUCKETS,
+ /* clock_t as u32; nexthop bucket idle timer (per-group) */
+ NHA_RES_GROUP_IDLE_TIMER,
+ /* clock_t as u32; nexthop unbalanced timer */
+ NHA_RES_GROUP_UNBALANCED_TIMER,
+ /* clock_t as u64; nexthop unbalanced time */
+ NHA_RES_GROUP_UNBALANCED_TIME,
+
+ __NHA_RES_GROUP_MAX,
+};
+
+#define NHA_RES_GROUP_MAX (__NHA_RES_GROUP_MAX - 1)
+
+enum {
+ NHA_RES_BUCKET_UNSPEC,
+ /* Pad attribute for 64-bit alignment. */
+ NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC,
+
+ /* u16; nexthop bucket index */
+ NHA_RES_BUCKET_INDEX,
+ /* clock_t as u64; nexthop bucket idle time */
+ NHA_RES_BUCKET_IDLE_TIME,
+ /* u32; nexthop id assigned to the nexthop bucket */
+ NHA_RES_BUCKET_NH_ID,
+
+ __NHA_RES_BUCKET_MAX,
+};
+
+#define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1)
+
#endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7ea59cfe1fa7..025c40fef93d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -190,6 +190,8 @@ enum {
TCA_POLICE_PAD,
TCA_POLICE_RATE64,
TCA_POLICE_PEAKRATE64,
+ TCA_POLICE_PKTRATE64,
+ TCA_POLICE_PKTBURST64,
__TCA_POLICE_MAX
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
};
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
index bff5032c98df..e585db5bf2d2 100644
--- a/include/uapi/linux/psample.h
+++ b/include/uapi/linux/psample.h
@@ -13,6 +13,13 @@ enum {
PSAMPLE_ATTR_GROUP_REFCOUNT,
PSAMPLE_ATTR_TUNNEL,
+ PSAMPLE_ATTR_PAD,
+ PSAMPLE_ATTR_OUT_TC, /* u16 */
+ PSAMPLE_ATTR_OUT_TC_OCC, /* u64, bytes */
+ PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */
+ PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */
+ PSAMPLE_ATTR_PROTO, /* u16 */
+
__PSAMPLE_ATTR_MAX
};
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 91e4ca064d61..5888492a5257 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -178,6 +178,13 @@ enum {
RTM_GETVLAN,
#define RTM_GETVLAN RTM_GETVLAN
+ RTM_NEWNEXTHOPBUCKET = 116,
+#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET
+ RTM_DELNEXTHOPBUCKET,
+#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET
+ RTM_GETNEXTHOPBUCKET,
+#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET
+
__RTM_MAX,
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
};
@@ -283,6 +290,7 @@ enum {
#define RTPROT_MROUTED 17 /* Multicast daemon */
#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */
#define RTPROT_BABEL 42 /* Babel daemon */
+#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */
#define RTPROT_BGP 186 /* BGP Routes */
#define RTPROT_ISIS 187 /* ISIS Routes */
#define RTPROT_OSPF 188 /* OSPF Routes */
diff --git a/include/uapi/linux/virtio_bt.h b/include/uapi/linux/virtio_bt.h
new file mode 100644
index 000000000000..a7bd48daa9a9
--- /dev/null
+++ b/include/uapi/linux/virtio_bt.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef _UAPI_LINUX_VIRTIO_BT_H
+#define _UAPI_LINUX_VIRTIO_BT_H
+
+#include <linux/virtio_types.h>
+
+/* Feature bits */
+#define VIRTIO_BT_F_VND_HCI 0 /* Indicates vendor command support */
+#define VIRTIO_BT_F_MSFT_EXT 1 /* Indicates MSFT vendor support */
+#define VIRTIO_BT_F_AOSP_EXT 2 /* Indicates AOSP vendor support */
+
+enum virtio_bt_config_type {
+ VIRTIO_BT_CONFIG_TYPE_PRIMARY = 0,
+ VIRTIO_BT_CONFIG_TYPE_AMP = 1,
+};
+
+enum virtio_bt_config_vendor {
+ VIRTIO_BT_CONFIG_VENDOR_NONE = 0,
+ VIRTIO_BT_CONFIG_VENDOR_ZEPHYR = 1,
+ VIRTIO_BT_CONFIG_VENDOR_INTEL = 2,
+ VIRTIO_BT_CONFIG_VENDOR_REALTEK = 3,
+};
+
+struct virtio_bt_config {
+ __u8 type;
+ __u16 vendor;
+ __u16 msft_opcode;
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_VIRTIO_BT_H */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index bc1c0621f5ed..b4f468e9441d 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -53,6 +53,7 @@
#define VIRTIO_ID_MEM 24 /* virtio mem */
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
+#define VIRTIO_ID_BT 28 /* virtio bluetooth */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/vdso/time64.h b/include/vdso/time64.h
index 9d43c3f5e89d..b40cfa2aa33c 100644
--- a/include/vdso/time64.h
+++ b/include/vdso/time64.h
@@ -9,6 +9,7 @@
#define NSEC_PER_MSEC 1000000L
#define USEC_PER_SEC 1000000L
#define NSEC_PER_SEC 1000000000L
+#define PSEC_PER_SEC 1000000000000LL
#define FSEC_PER_SEC 1000000000000000LL
#endif /* __VDSO_TIME64_H */