summaryrefslogtreecommitdiff
path: root/include/uapi
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-01-29 05:22:46 +0300
committerDavid S. Miller <davem@davemloft.net>2018-01-29 05:22:46 +0300
commit457740a903db30b14d53060f4e10d8cdecf464ac (patch)
tree18dd5d2200031fba1d4784e1376ca2b81ee88523 /include/uapi
parent6b2e2829c11ea677aa97ecfe95d9544aa0208b8c (diff)
parent8223967fe0b8eb2448cca5cfe3c64a0838e6f60d (diff)
downloadlinux-457740a903db30b14d53060f4e10d8cdecf464ac.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2018-01-26 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) A number of extensions to tcp-bpf, from Lawrence. - direct R or R/W access to many tcp_sock fields via bpf_sock_ops - passing up to 3 arguments to bpf_sock_ops functions - tcp_sock field bpf_sock_ops_cb_flags for controlling callbacks - optionally calling bpf_sock_ops program when RTO fires - optionally calling bpf_sock_ops program when packet is retransmitted - optionally calling bpf_sock_ops program when TCP state changes - access to tclass and sk_txhash - new selftest 2) div/mod exception handling, from Daniel. One of the ugly leftovers from the early eBPF days is that div/mod operations based on registers have a hard-coded src_reg == 0 test in the interpreter as well as in JIT code generators that would return from the BPF program with exit code 0. This was basically adopted from cBPF interpreter for historical reasons. There are multiple reasons why this is very suboptimal and prone to bugs. To name one: the return code mapping for such abnormal program exit of 0 does not always match with a suitable program type's exit code mapping. For example, '0' in tc means action 'ok' where the packet gets passed further up the stack, which is just undesirable for such cases (e.g. when implementing policy) and also does not match with other program types. After considering _four_ different ways to address the problem, we adapt the same behavior as on some major archs like ARMv8: X div 0 results in 0, and X mod 0 results in X. aarch64 and aarch32 ISA do not generate any traps or otherwise aborts of program execution for unsigned divides. Given the options, it seems the most suitable from all of them, also since major archs have similar schemes in place. Given this is all in the realm of undefined behavior, we still have the option to adapt if deemed necessary. 3) sockmap sample refactoring, from John. 4) lpm map get_next_key fixes, from Yonghong. 5) test cleanups, from Alexei and Prashant. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/uapi')
-rw-r--r--include/uapi/linux/bpf.h84
1 files changed, 81 insertions, 3 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 406c19d6016b..db6bdc375126 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -642,6 +642,14 @@ union bpf_attr {
* @optlen: length of optval in bytes
* Return: 0 or negative error
*
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
* Grow or shrink room in sk_buff.
* @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
- FN(override_return),
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -952,8 +961,9 @@ struct bpf_map_info {
struct bpf_sock_ops {
__u32 op;
union {
- __u32 reply;
- __u32 replylong[4];
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
};
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -968,8 +978,39 @@ struct bpf_sock_ops {
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
};
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -1003,6 +1044,43 @@ enum {
* a congestion threshold. RTTs above
* this indicate congestion
*/
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */