summaryrefslogtreecommitdiff
path: root/samples/bpf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-16 01:04:25 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-16 01:04:25 +0300
commit9a76aba02a37718242d7cdc294f0a3901928aa57 (patch)
tree2040d038f85d2120f21af83b0793efd5af1864e3 /samples/bpf
parent0a957467c5fd46142bc9c52758ffc552d4c5e2f7 (diff)
parent26a1ccc6c117be8e33e0410fce8c5298b0015b99 (diff)
downloadlinux-9a76aba02a37718242d7cdc294f0a3901928aa57.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: - Gustavo A. R. Silva keeps working on the implicit switch fallthru changes. - Support 802.11ax High-Efficiency wireless in cfg80211 et al, From Luca Coelho. - Re-enable ASPM in r8169, from Kai-Heng Feng. - Add virtual XFRM interfaces, which avoids all of the limitations of existing IPSEC tunnels. From Steffen Klassert. - Convert GRO over to use a hash table, so that when we have many flows active we don't traverse a long list during accumluation. - Many new self tests for routing, TC, tunnels, etc. Too many contributors to mention them all, but I'm really happy to keep seeing this stuff. - Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu. - Lots of cleanups and fixes in L2TP code from Guillaume Nault. - Add IPSEC offload support to netdevsim, from Shannon Nelson. - Add support for slotting with non-uniform distribution to netem packet scheduler, from Yousuk Seung. - Add UDP GSO support to mlx5e, from Boris Pismenny. - Support offloading of Team LAG in NFP, from John Hurley. - Allow to configure TX queue selection based upon RX queue, from Amritha Nambiar. - Support ethtool ring size configuration in aquantia, from Anton Mikaev. - Support DSCP and flowlabel per-transport in SCTP, from Xin Long. - Support list based batching and stack traversal of SKBs, this is very exciting work. From Edward Cree. - Busyloop optimizations in vhost_net, from Toshiaki Makita. - Introduce the ETF qdisc, which allows time based transmissions. IGB can offload this in hardware. From Vinicius Costa Gomes. - Add parameter support to devlink, from Moshe Shemesh. - Several multiplication and division optimizations for BPF JIT in nfp driver, from Jiong Wang. - Lots of prepatory work to make more of the packet scheduler layer lockless, when possible, from Vlad Buslov. - Add ACK filter and NAT awareness to sch_cake packet scheduler, from Toke Høiland-Jørgensen. - Support regions and region snapshots in devlink, from Alex Vesker. - Allow to attach XDP programs to both HW and SW at the same time on a given device, with initial support in nfp. From Jakub Kicinski. - Add TLS RX offload and support in mlx5, from Ilya Lesokhin. - Use PHYLIB in r8169 driver, from Heiner Kallweit. - All sorts of changes to support Spectrum 2 in mlxsw driver, from Ido Schimmel. - PTP support in mv88e6xxx DSA driver, from Andrew Lunn. - Make TCP_USER_TIMEOUT socket option more accurate, from Jon Maxwell. - Support for templates in packet scheduler classifier, from Jiri Pirko. - IPV6 support in RDS, from Ka-Cheong Poon. - Native tproxy support in nf_tables, from Máté Eckl. - Maintain IP fragment queue in an rbtree, but optimize properly for in-order frags. From Peter Oskolkov. - Improvde handling of ACKs on hole repairs, from Yuchung Cheng" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits) bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT" hv/netvsc: Fix NULL dereference at single queue mode fallback net: filter: mark expected switch fall-through xen-netfront: fix warn message as irq device name has '/' cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0 net: dsa: mv88e6xxx: missing unlock on error path rds: fix building with IPV6=m inet/connection_sock: prefer _THIS_IP_ to current_text_addr net: dsa: mv88e6xxx: bitwise vs logical bug net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd() ieee802154: hwsim: using right kind of iteration net: hns3: Add vlan filter setting by ethtool command -K net: hns3: Set tx ring' tc info when netdev is up net: hns3: Remove tx ring BD len register in hns3_enet net: hns3: Fix desc num set to default when setting channel net: hns3: Fix for phy link issue when using marvell phy driver net: hns3: Fix for information of phydev lost problem when down/up net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero net: hns3: Add support for serdes loopback selftest bnxt_en: take coredump_record structure off stack ...
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/Makefile25
-rw-r--r--samples/bpf/bpf_load.c3
-rw-r--r--samples/bpf/hash_func01.h55
-rw-r--r--samples/bpf/test_cgrp2_attach2.c21
-rw-r--r--samples/bpf/test_cgrp2_sock2.c2
-rw-r--r--samples/bpf/xdp_fwd_user.c34
-rw-r--r--samples/bpf/xdp_redirect_cpu_kern.c114
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c4
-rw-r--r--samples/bpf/xdp_rxq_info_kern.c43
-rw-r--r--samples/bpf/xdp_rxq_info_user.c45
-rw-r--r--samples/bpf/xdp_sample_pkts_kern.c66
-rw-r--r--samples/bpf/xdp_sample_pkts_user.c169
-rw-r--r--samples/bpf/xdpsock_user.c43
13 files changed, 590 insertions, 34 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index bd9f6c2a808e..36f9f41d094b 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail
hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
+hostprogs-y += xdp_sample_pkts
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -104,9 +105,10 @@ xdp_rxq_info-objs := xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o syscall_tp_user.o
cpustat-objs := bpf_load.o cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
-xdpsock-objs := bpf_load.o xdpsock_user.o
-xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
+xdpsock-objs := xdpsock_user.o
+xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
+xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
+always += xdp_sample_pkts_kern.o
KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -179,6 +182,7 @@ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
KBUILD_HOSTLDLIBS += $(LIBBPF) -lelf
HOSTLDLIBS_tracex4 += -lrt
@@ -191,6 +195,8 @@ HOSTLDLIBS_xdpsock += -pthread
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
LLC ?= llc
CLANG ?= clang
+LLVM_OBJCOPY ?= llvm-objcopy
+BTF_PAHOLE ?= pahole
# Detect that we're cross compiling and use the cross compiler
ifdef CROSS_COMPILE
@@ -198,6 +204,16 @@ HOSTCC = $(CROSS_COMPILE)gcc
CLANG_ARCH_ARGS = -target $(ARCH)
endif
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+
+ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
+ EXTRA_CFLAGS += -g
+ LLC_FLAGS += -mattr=dwarfris
+ DWARF2BTF = y
+endif
+
# Trick to allow make to be run from this directory
all:
$(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR)
@@ -256,4 +272,7 @@ $(obj)/%.o: $(src)/%.c
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
- -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+ $(BTF_PAHOLE) -J $@
+endif
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 89161c9ed466..904e775d1a44 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -107,6 +107,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return -1;
}
+ if (prog_cnt == MAX_PROGS)
+ return -1;
+
fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
bpf_log_buf, BPF_LOG_BUF_SIZE);
if (fd < 0) {
diff --git a/samples/bpf/hash_func01.h b/samples/bpf/hash_func01.h
new file mode 100644
index 000000000000..38255812e376
--- /dev/null
+++ b/samples/bpf/hash_func01.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1
+ *
+ * Based on Paul Hsieh's (LGPG 2.1) hash function
+ * From: http://www.azillionmonkeys.com/qed/hash.html
+ */
+
+#define get16bits(d) (*((const __u16 *) (d)))
+
+static __always_inline
+__u32 SuperFastHash (const char *data, int len, __u32 initval) {
+ __u32 hash = initval;
+ __u32 tmp;
+ int rem;
+
+ if (len <= 0 || data == NULL) return 0;
+
+ rem = len & 3;
+ len >>= 2;
+
+ /* Main loop */
+#pragma clang loop unroll(full)
+ for (;len > 0; len--) {
+ hash += get16bits (data);
+ tmp = (get16bits (data+2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2*sizeof (__u16);
+ hash += hash >> 11;
+ }
+
+ /* Handle end cases */
+ switch (rem) {
+ case 3: hash += get16bits (data);
+ hash ^= hash << 16;
+ hash ^= ((signed char)data[sizeof (__u16)]) << 18;
+ hash += hash >> 11;
+ break;
+ case 2: hash += get16bits (data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1: hash += (signed char)*data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
index b453e6a161be..180f9d813bca 100644
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/samples/bpf/test_cgrp2_attach2.c
@@ -8,7 +8,8 @@
* information. The number of invocations of the program, which maps
* to the number of packets received, is stored to key 0. Key 1 is
* incremented on each iteration by the number of bytes stored in
- * the skb.
+ * the skb. The program also stores the number of received bytes
+ * in the cgroup storage.
*
* - Attaches the new program to a cgroup using BPF_PROG_ATTACH
*
@@ -21,12 +22,15 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
+#include <sys/resource.h>
+#include <sys/time.h>
#include <unistd.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include "bpf_insn.h"
+#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#define FOO "/foo"
@@ -205,6 +209,8 @@ static int map_fd = -1;
static int prog_load_cnt(int verdict, int val)
{
+ int cgroup_storage_fd;
+
if (map_fd < 0)
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
if (map_fd < 0) {
@@ -212,6 +218,13 @@ static int prog_load_cnt(int verdict, int val)
return -1;
}
+ cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ if (cgroup_storage_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ return -1;
+ }
+
struct bpf_insn prog[] = {
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
@@ -222,6 +235,11 @@ static int prog_load_cnt(int verdict, int val)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+ BPF_MOV64_IMM(BPF_REG_1, val),
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
@@ -237,6 +255,7 @@ static int prog_load_cnt(int verdict, int val)
printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
return 0;
}
+ close(cgroup_storage_fd);
return ret;
}
diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c
index 3b5be2364975..a9277b118c33 100644
--- a/samples/bpf/test_cgrp2_sock2.c
+++ b/samples/bpf/test_cgrp2_sock2.c
@@ -51,7 +51,7 @@ int main(int argc, char **argv)
if (argc > 3)
filter_id = atoi(argv[3]);
- if (filter_id > prog_cnt) {
+ if (filter_id >= prog_cnt) {
printf("Invalid program id; program not found in file\n");
return EXIT_FAILURE;
}
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index a87a2048ed32..f88e1d7093d6 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -24,8 +24,7 @@
#include <fcntl.h>
#include <libgen.h>
-#include "bpf_load.h"
-#include "bpf_util.h"
+#include "bpf/libbpf.h"
#include <bpf/bpf.h>
@@ -63,9 +62,15 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ };
+ const char *prog_name = "xdp_fwd";
+ struct bpf_program *prog;
char filename[PATH_MAX];
+ struct bpf_object *obj;
int opt, i, idx, err;
- int prog_id = 0;
+ int prog_fd, map_fd;
int attach = 1;
int ret = 0;
@@ -75,7 +80,7 @@ int main(int argc, char **argv)
attach = 0;
break;
case 'D':
- prog_id = 1;
+ prog_name = "xdp_fwd_direct";
break;
default:
usage(basename(argv[0]));
@@ -90,6 +95,7 @@ int main(int argc, char **argv)
if (attach) {
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = filename;
if (access(filename, O_RDONLY) < 0) {
printf("error accessing file %s: %s\n",
@@ -97,19 +103,25 @@ int main(int argc, char **argv)
return 1;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
- }
- if (!prog_fd[prog_id]) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ printf("program not found: %s\n", strerror(prog_fd));
+ return 1;
+ }
+ map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
+ "tx_port"));
+ if (map_fd < 0) {
+ printf("map not found: %s\n", strerror(map_fd));
return 1;
}
}
if (attach) {
for (i = 1; i < 64; ++i)
- bpf_map_update_elem(map_fd[0], &i, &i, 0);
+ bpf_map_update_elem(map_fd, &i, &i, 0);
}
for (i = optind; i < argc; ++i) {
@@ -126,7 +138,7 @@ int main(int argc, char **argv)
if (err)
ret = err;
} else {
- err = do_attach(idx, prog_fd[prog_id], argv[i]);
+ err = do_attach(idx, prog_fd, argv[i]);
if (err)
ret = err;
}
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
index 4938dcbaecbf..a306d1c75622 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -13,6 +13,7 @@
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
+#include "hash_func01.h"
#define MAX_CPUS 64 /* WARNING - sync with _user.c */
@@ -134,7 +135,16 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
return false;
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
}
- /* TODO: Handle double VLAN tagged packet */
+ /* Handle double VLAN tagged packet */
+ if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ offset += sizeof(*vlan_hdr);
+ if ((void *)eth + offset > data_end)
+ return false;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ }
*eth_proto = ntohs(eth_type);
*l3_offset = offset;
@@ -452,6 +462,108 @@ int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
+/* Hashing initval */
+#define INITVAL 15485863
+
+static __always_inline
+u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct iphdr *iph = data + nh_off;
+ u32 cpu_hash;
+
+ if (iph + 1 > data_end)
+ return 0;
+
+ cpu_hash = iph->saddr + iph->daddr;
+ cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
+
+ return cpu_hash;
+}
+
+static __always_inline
+u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ipv6hdr *ip6h = data + nh_off;
+ u32 cpu_hash;
+
+ if (ip6h + 1 > data_end)
+ return 0;
+
+ cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
+ cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
+ cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
+ cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
+ cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
+
+ return cpu_hash;
+}
+
+/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
+ * hashing scheme is symmetric, meaning swapping IP src/dest still hit
+ * same CPU.
+ */
+SEC("xdp_cpu_map5_lb_hash_ip_pairs")
+int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u8 ip_proto = IPPROTO_UDP;
+ struct datarec *rec;
+ u16 eth_proto = 0;
+ u64 l3_offset = 0;
+ u32 cpu_dest = 0;
+ u32 cpu_idx = 0;
+ u32 *cpu_lookup;
+ u32 *cpu_max;
+ u32 cpu_hash;
+ u32 key = 0;
+
+ /* Count RX packet in map */
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_ABORTED;
+ rec->processed++;
+
+ cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
+ if (!cpu_max)
+ return XDP_ABORTED;
+
+ if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+ return XDP_PASS; /* Just skip */
+
+ /* Hash for IPv4 and IPv6 */
+ switch (eth_proto) {
+ case ETH_P_IP:
+ cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
+ break;
+ case ETH_P_IPV6:
+ cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
+ break;
+ case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
+ default:
+ cpu_hash = 0;
+ }
+
+ /* Choose CPU based on hash */
+ cpu_idx = cpu_hash % *cpu_max;
+
+ cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_lookup)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_lookup;
+
+ if (cpu_dest >= MAX_CPUS) {
+ rec->issue++;
+ return XDP_ABORTED;
+ }
+
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 4b4d78fffe30..9a6c7e0a6dd1 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -22,7 +22,7 @@ static const char *__doc__ =
#define MAX_CPUS 64 /* WARNING - sync with _kern.c */
/* How many xdp_progs are defined in _kern.c */
-#define MAX_PROG 5
+#define MAX_PROG 6
/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
* use bpf/libbpf.h), but cannot as (currently) needed for XDP
@@ -567,7 +567,7 @@ int main(int argc, char **argv)
int added_cpus = 0;
int longindex = 0;
int interval = 2;
- int prog_num = 0;
+ int prog_num = 5;
int add_cpu = -1;
__u32 qsize;
int opt;
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
index 3fd209291653..222a83eed1cb 100644
--- a/samples/bpf/xdp_rxq_info_kern.c
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -4,6 +4,8 @@
* Example howto extract XDP RX-queue info
*/
#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/in.h>
#include "bpf_helpers.h"
/* Config setup from with userspace
@@ -14,6 +16,12 @@
struct config {
__u32 action;
int ifindex;
+ __u32 options;
+};
+enum cfg_options_flags {
+ NO_TOUCH = 0x0U,
+ READ_MEM = 0x1U,
+ SWAP_MAC = 0x2U,
};
struct bpf_map_def SEC("maps") config_map = {
.type = BPF_MAP_TYPE_ARRAY,
@@ -45,6 +53,23 @@ struct bpf_map_def SEC("maps") rx_queue_index_map = {
.max_entries = MAX_RXQs + 1,
};
+static __always_inline
+void swap_src_dst_mac(void *data)
+{
+ unsigned short *p = data;
+ unsigned short dst[3];
+
+ dst[0] = p[0];
+ dst[1] = p[1];
+ dst[2] = p[2];
+ p[0] = p[3];
+ p[1] = p[4];
+ p[2] = p[5];
+ p[3] = dst[0];
+ p[4] = dst[1];
+ p[5] = dst[2];
+}
+
SEC("xdp_prog0")
int xdp_prognum0(struct xdp_md *ctx)
{
@@ -90,6 +115,24 @@ int xdp_prognum0(struct xdp_md *ctx)
if (key == MAX_RXQs)
rxq_rec->issue++;
+ /* Default: Don't touch packet data, only count packets */
+ if (unlikely(config->options & (READ_MEM|SWAP_MAC))) {
+ struct ethhdr *eth = data;
+
+ if (eth + 1 > data_end)
+ return XDP_ABORTED;
+
+ /* Avoid compiler removing this: Drop non 802.3 Ethertypes */
+ if (ntohs(eth->h_proto) < ETH_P_802_3_MIN)
+ return XDP_ABORTED;
+
+ /* XDP_TX requires changing MAC-addrs, else HW may drop.
+ * Can also be enabled with --swapmac (for test purposes)
+ */
+ if (unlikely(config->options & SWAP_MAC))
+ swap_src_dst_mac(data);
+ }
+
return config->action;
}
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index e4e9ba52bff0..248a7eab9531 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -50,6 +50,8 @@ static const struct option long_options[] = {
{"sec", required_argument, NULL, 's' },
{"no-separators", no_argument, NULL, 'z' },
{"action", required_argument, NULL, 'a' },
+ {"readmem", no_argument, NULL, 'r' },
+ {"swapmac", no_argument, NULL, 'm' },
{0, 0, NULL, 0 }
};
@@ -66,6 +68,12 @@ static void int_exit(int sig)
struct config {
__u32 action;
int ifindex;
+ __u32 options;
+};
+enum cfg_options_flags {
+ NO_TOUCH = 0x0U,
+ READ_MEM = 0x1U,
+ SWAP_MAC = 0x2U,
};
#define XDP_ACTION_MAX (XDP_TX + 1)
#define XDP_ACTION_MAX_STRLEN 11
@@ -109,6 +117,18 @@ static void list_xdp_actions(void)
printf("\n");
}
+static char* options2str(enum cfg_options_flags flag)
+{
+ if (flag == NO_TOUCH)
+ return "no_touch";
+ if (flag & SWAP_MAC)
+ return "swapmac";
+ if (flag & READ_MEM)
+ return "read";
+ fprintf(stderr, "ERR: Unknown config option flags");
+ exit(EXIT_FAIL);
+}
+
static void usage(char *argv[])
{
int i;
@@ -305,7 +325,7 @@ static __u64 calc_errs_pps(struct datarec *r,
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
- int action)
+ int action, __u32 cfg_opt)
{
unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -316,8 +336,8 @@ static void stats_print(struct stats_record *stats_rec,
int i;
/* Header */
- printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
- ifname, ifindex, action2str(action));
+ printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n",
+ ifname, ifindex, action2str(action), options2str(cfg_opt));
/* stats_global_map */
{
@@ -399,7 +419,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b)
*b = tmp;
}
-static void stats_poll(int interval, int action)
+static void stats_poll(int interval, int action, __u32 cfg_opt)
{
struct stats_record *record, *prev;
@@ -410,7 +430,7 @@ static void stats_poll(int interval, int action)
while (1) {
swap(&prev, &record);
stats_collect(record);
- stats_print(record, prev, action);
+ stats_print(record, prev, action, cfg_opt);
sleep(interval);
}
@@ -421,6 +441,7 @@ static void stats_poll(int interval, int action)
int main(int argc, char **argv)
{
+ __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
@@ -435,6 +456,7 @@ int main(int argc, char **argv)
int interval = 2;
__u32 key = 0;
+
char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
int action = XDP_PASS; /* Default action */
char *action_str = NULL;
@@ -496,6 +518,12 @@ int main(int argc, char **argv)
action_str = (char *)&action_str_buf;
strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
break;
+ case 'r':
+ cfg_options |= READ_MEM;
+ break;
+ case 'm':
+ cfg_options |= SWAP_MAC;
+ break;
case 'h':
error:
default:
@@ -523,6 +551,11 @@ int main(int argc, char **argv)
}
cfg.action = action;
+ /* XDP_TX requires changing MAC-addrs, else HW may drop */
+ if (action == XDP_TX)
+ cfg_options |= SWAP_MAC;
+ cfg.options = cfg_options;
+
/* Trick to pretty printf with thousands separators use %' */
if (use_separators)
setlocale(LC_NUMERIC, "en_US");
@@ -542,6 +575,6 @@ int main(int argc, char **argv)
return EXIT_FAIL_XDP;
}
- stats_poll(interval, action);
+ stats_poll(interval, action, cfg_options);
return EXIT_OK;
}
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
new file mode 100644
index 000000000000..f7ca8b850978
--- /dev/null
+++ b/samples/bpf/xdp_sample_pkts_kern.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define SAMPLE_SIZE 64ul
+#define MAX_CPUS 128
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = MAX_CPUS,
+};
+
+SEC("xdp_sample")
+int xdp_sample_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+
+ /* Metadata will be in the perf event before the packet data. */
+ struct S {
+ u16 cookie;
+ u16 pkt_len;
+ } __packed metadata;
+
+ if (data < data_end) {
+ /* The XDP perf_event_output handler will use the upper 32 bits
+ * of the flags argument as a number of bytes to include of the
+ * packet payload in the event data. If the size is too big, the
+ * call to bpf_perf_event_output will fail and return -EFAULT.
+ *
+ * See bpf_xdp_event_output in net/core/filter.c.
+ *
+ * The BPF_F_CURRENT_CPU flag means that the event output fd
+ * will be indexed by the CPU number in the event map.
+ */
+ u64 flags = BPF_F_CURRENT_CPU;
+ u16 sample_size;
+ int ret;
+
+ metadata.cookie = 0xdead;
+ metadata.pkt_len = (u16)(data_end - data);
+ sample_size = min(metadata.pkt_len, SAMPLE_SIZE);
+ flags |= (u64)sample_size << 32;
+
+ ret = bpf_perf_event_output(ctx, &my_map, flags,
+ &metadata, sizeof(metadata));
+ if (ret)
+ bpf_printk("perf_event_output failed: %d\n", ret);
+ }
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
new file mode 100644
index 000000000000..8dd87c1eb560
--- /dev/null
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <net/if.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/sysinfo.h>
+#include <sys/ioctl.h>
+#include <signal.h>
+#include <libbpf.h>
+#include <bpf/bpf.h>
+
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+#define MAX_CPUS 128
+static int pmu_fds[MAX_CPUS], if_idx;
+static struct perf_event_mmap_page *headers[MAX_CPUS];
+static char *if_name;
+
+static int do_attach(int idx, int fd, const char *name)
+{
+ int err;
+
+ err = bpf_set_link_xdp_fd(idx, fd, 0);
+ if (err < 0)
+ printf("ERROR: failed to attach program to %s\n", name);
+
+ return err;
+}
+
+static int do_detach(int idx, const char *name)
+{
+ int err;
+
+ err = bpf_set_link_xdp_fd(idx, -1, 0);
+ if (err < 0)
+ printf("ERROR: failed to detach program from %s\n", name);
+
+ return err;
+}
+
+#define SAMPLE_SIZE 64
+
+static int print_bpf_output(void *data, int size)
+{
+ struct {
+ __u16 cookie;
+ __u16 pkt_len;
+ __u8 pkt_data[SAMPLE_SIZE];
+ } __packed *e = data;
+ int i;
+
+ if (e->cookie != 0xdead) {
+ printf("BUG cookie %x sized %d\n",
+ e->cookie, size);
+ return LIBBPF_PERF_EVENT_ERROR;
+ }
+
+ printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
+ for (i = 0; i < 14 && i < e->pkt_len; i++)
+ printf("%02x ", e->pkt_data[i]);
+ printf("\n");
+
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_bpf_perf_event(int map_fd, int num)
+{
+ struct perf_event_attr attr = {
+ .sample_type = PERF_SAMPLE_RAW,
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ .wakeup_events = 1, /* get an fd notification for every event */
+ };
+ int i;
+
+ for (i = 0; i < num; i++) {
+ int key = i;
+
+ pmu_fds[i] = sys_perf_event_open(&attr, -1/*pid*/, i/*cpu*/,
+ -1/*group_fd*/, 0);
+
+ assert(pmu_fds[i] >= 0);
+ assert(bpf_map_update_elem(map_fd, &key,
+ &pmu_fds[i], BPF_ANY) == 0);
+ ioctl(pmu_fds[i], PERF_EVENT_IOC_ENABLE, 0);
+ }
+}
+
+static void sig_handler(int signo)
+{
+ do_detach(if_idx, if_name);
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ };
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ int prog_fd, map_fd;
+ char filename[256];
+ int ret, err, i;
+ int numcpus;
+
+ if (argc < 2) {
+ printf("Usage: %s <ifname>\n", argv[0]);
+ return 1;
+ }
+
+ numcpus = get_nprocs();
+ if (numcpus > MAX_CPUS)
+ numcpus = MAX_CPUS;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = filename;
+
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ return 1;
+
+ if (!prog_fd) {
+ printf("load_bpf_file: %s\n", strerror(errno));
+ return 1;
+ }
+
+ map = bpf_map__next(NULL, obj);
+ if (!map) {
+ printf("finding a map in obj file failed\n");
+ return 1;
+ }
+ map_fd = bpf_map__fd(map);
+
+ if_idx = if_nametoindex(argv[1]);
+ if (!if_idx)
+ if_idx = strtoul(argv[1], NULL, 0);
+
+ if (!if_idx) {
+ fprintf(stderr, "Invalid ifname\n");
+ return 1;
+ }
+ if_name = argv[1];
+ err = do_attach(if_idx, prog_fd, argv[1]);
+ if (err)
+ return err;
+
+ if (signal(SIGINT, sig_handler) ||
+ signal(SIGHUP, sig_handler) ||
+ signal(SIGTERM, sig_handler)) {
+ perror("signal");
+ return 1;
+ }
+
+ test_bpf_perf_event(map_fd, numcpus);
+
+ for (i = 0; i < numcpus; i++)
+ if (perf_event_mmap_header(pmu_fds[i], &headers[i]) < 0)
+ return 1;
+
+ ret = perf_event_poller_multi(pmu_fds, headers, numcpus,
+ print_bpf_output);
+ kill(0, SIGINT);
+ return ret;
+}
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 5904b1543831..4914788b6727 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -26,7 +26,7 @@
#include <sys/types.h>
#include <poll.h>
-#include "bpf_load.h"
+#include "bpf/libbpf.h"
#include "bpf_util.h"
#include <bpf/bpf.h>
@@ -145,8 +145,13 @@ static void dump_stats(void);
} while (0)
#define barrier() __asm__ __volatile__("": : :"memory")
+#ifdef __aarch64__
+#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory")
+#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory")
+#else
#define u_smp_rmb() barrier()
#define u_smp_wmb() barrier()
+#endif
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
@@ -886,7 +891,13 @@ static void l2fwd(struct xdpsock *xsk)
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ };
+ int prog_fd, qidconf_map, xsks_map;
+ struct bpf_object *obj;
char xdp_filename[256];
+ struct bpf_map *map;
int i, ret, key = 0;
pthread_t pt;
@@ -899,24 +910,38 @@ int main(int argc, char **argv)
}
snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = xdp_filename;
- if (load_bpf_file(xdp_filename)) {
- fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ exit(EXIT_FAILURE);
+ if (prog_fd < 0) {
+ fprintf(stderr, "ERROR: no program found: %s\n",
+ strerror(prog_fd));
exit(EXIT_FAILURE);
}
- if (!prog_fd[0]) {
- fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
- strerror(errno));
+ map = bpf_object__find_map_by_name(obj, "qidconf_map");
+ qidconf_map = bpf_map__fd(map);
+ if (qidconf_map < 0) {
+ fprintf(stderr, "ERROR: no qidconf map found: %s\n",
+ strerror(qidconf_map));
+ exit(EXIT_FAILURE);
+ }
+
+ map = bpf_object__find_map_by_name(obj, "xsks_map");
+ xsks_map = bpf_map__fd(map);
+ if (xsks_map < 0) {
+ fprintf(stderr, "ERROR: no xsks map found: %s\n",
+ strerror(xsks_map));
exit(EXIT_FAILURE);
}
- if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
fprintf(stderr, "ERROR: link set xdp fd failed\n");
exit(EXIT_FAILURE);
}
- ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
+ ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
exit(EXIT_FAILURE);
@@ -933,7 +958,7 @@ int main(int argc, char **argv)
/* ...and insert them into the map. */
for (i = 0; i < num_socks; i++) {
key = i;
- ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
+ ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
exit(EXIT_FAILURE);