summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-08-04 01:34:36 +0300
committerJakub Kicinski <kuba@kernel.org>2023-08-04 01:34:36 +0300
commitd07b7b32da6f678d42d96a8b9824cf0a181ce140 (patch)
tree606829d4b33a57dbe0f0e825ca8505e0b5fcb759 /arch
parent35b1b1fd96388d5e3cf179bf36bd8a4153baf4a3 (diff)
parent648880e9331c68b2008430fd90f3648d1795399d (diff)
downloadlinux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.tar.xz
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says: ==================== pull-request: bpf-next 2023-08-03 We've added 54 non-merge commits during the last 10 day(s) which contain a total of 84 files changed, 4026 insertions(+), 562 deletions(-). The main changes are: 1) Add SO_REUSEPORT support for TC bpf_sk_assign from Lorenz Bauer, Daniel Borkmann 2) Support new insns from cpu v4 from Yonghong Song 3) Non-atomically allocate freelist during prefill from YiFei Zhu 4) Support defragmenting IPv(4|6) packets in BPF from Daniel Xu 5) Add tracepoint to xdp attaching failure from Leon Hwang 6) struct netdev_rx_queue and xdp.h reshuffling to reduce rebuild time from Jakub Kicinski * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (54 commits) net: invert the netdevice.h vs xdp.h dependency net: move struct netdev_rx_queue out of netdevice.h eth: add missing xdp.h includes in drivers selftests/bpf: Add testcase for xdp attaching failure tracepoint bpf, xdp: Add tracepoint to xdp attaching failure selftests/bpf: fix static assert compilation issue for test_cls_*.c bpf: fix bpf_probe_read_kernel prototype mismatch riscv, bpf: Adapt bpf trampoline to optimized riscv ftrace framework libbpf: fix typos in Makefile tracing: bpf: use struct trace_entry in struct syscall_tp_t bpf, devmap: Remove unused dtab field from bpf_dtab_netdev bpf, cpumap: Remove unused cmap field from bpf_cpu_map_entry netfilter: bpf: Only define get_proto_defrag_hook() if necessary bpf: Fix an array-index-out-of-bounds issue in disasm.c net: remove duplicate INDIRECT_CALLABLE_DECLARE of udp[6]_ehashfn docs/bpf: Fix malformed documentation bpf: selftests: Add defrag selftests bpf: selftests: Support custom type and proto for client sockets bpf: selftests: Support not connecting client socket netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link ... ==================== Link: https://lore.kernel.org/r/20230803174845.825419-1-martin.lau@linux.dev Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c153
-rw-r--r--arch/x86/net/bpf_jit_comp.c141
2 files changed, 199 insertions, 95 deletions
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index c648864c8cd1..0ca4f5c0097c 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -13,6 +13,8 @@
#include <asm/patch.h>
#include "bpf_jit.h"
+#define RV_FENTRY_NINSNS 2
+
#define RV_REG_TCC RV_REG_A6
#define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */
@@ -241,7 +243,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
if (!is_tail_call)
emit_mv(RV_REG_A0, RV_REG_A5, ctx);
emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
- is_tail_call ? 20 : 0, /* skip reserved nops and TCC init */
+ is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */
ctx);
}
@@ -618,32 +620,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
return 0;
}
-static int gen_call_or_nops(void *target, void *ip, u32 *insns)
-{
- s64 rvoff;
- int i, ret;
- struct rv_jit_context ctx;
-
- ctx.ninsns = 0;
- ctx.insns = (u16 *)insns;
-
- if (!target) {
- for (i = 0; i < 4; i++)
- emit(rv_nop(), &ctx);
- return 0;
- }
-
- rvoff = (s64)(target - (ip + 4));
- emit(rv_sd(RV_REG_SP, -8, RV_REG_RA), &ctx);
- ret = emit_jump_and_link(RV_REG_RA, rvoff, false, &ctx);
- if (ret)
- return ret;
- emit(rv_ld(RV_REG_RA, -8, RV_REG_SP), &ctx);
-
- return 0;
-}
-
-static int gen_jump_or_nops(void *target, void *ip, u32 *insns)
+static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
{
s64 rvoff;
struct rv_jit_context ctx;
@@ -658,38 +635,35 @@ static int gen_jump_or_nops(void *target, void *ip, u32 *insns)
}
rvoff = (s64)(target - ip);
- return emit_jump_and_link(RV_REG_ZERO, rvoff, false, &ctx);
+ return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx);
}
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
void *old_addr, void *new_addr)
{
- u32 old_insns[4], new_insns[4];
+ u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS];
bool is_call = poke_type == BPF_MOD_CALL;
- int (*gen_insns)(void *target, void *ip, u32 *insns);
- int ninsns = is_call ? 4 : 2;
int ret;
- if (!is_bpf_text_address((unsigned long)ip))
+ if (!is_kernel_text((unsigned long)ip) &&
+ !is_bpf_text_address((unsigned long)ip))
return -ENOTSUPP;
- gen_insns = is_call ? gen_call_or_nops : gen_jump_or_nops;
-
- ret = gen_insns(old_addr, ip, old_insns);
+ ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call);
if (ret)
return ret;
- if (memcmp(ip, old_insns, ninsns * 4))
+ if (memcmp(ip, old_insns, RV_FENTRY_NINSNS * 4))
return -EFAULT;
- ret = gen_insns(new_addr, ip, new_insns);
+ ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call);
if (ret)
return ret;
cpus_read_lock();
mutex_lock(&text_mutex);
- if (memcmp(ip, new_insns, ninsns * 4))
- ret = patch_text(ip, new_insns, ninsns);
+ if (memcmp(ip, new_insns, RV_FENTRY_NINSNS * 4))
+ ret = patch_text(ip, new_insns, RV_FENTRY_NINSNS);
mutex_unlock(&text_mutex);
cpus_read_unlock();
@@ -787,8 +761,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
int i, ret, offset;
int *branches_off = NULL;
int stack_size = 0, nregs = m->nr_args;
- int retaddr_off, fp_off, retval_off, args_off;
- int nregs_off, ip_off, run_ctx_off, sreg_off;
+ int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -796,13 +769,27 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
bool save_ret;
u32 insn;
- /* Generated trampoline stack layout:
+ /* Two types of generated trampoline stack layout:
+ *
+ * 1. trampoline called from function entry
+ * --------------------------------------
+ * FP + 8 [ RA to parent func ] return address to parent
+ * function
+ * FP + 0 [ FP of parent func ] frame pointer of parent
+ * function
+ * FP - 8 [ T0 to traced func ] return address of traced
+ * function
+ * FP - 16 [ FP of traced func ] frame pointer of traced
+ * function
+ * --------------------------------------
*
- * FP - 8 [ RA of parent func ] return address of parent
+ * 2. trampoline called directly
+ * --------------------------------------
+ * FP - 8 [ RA to caller func ] return address to caller
* function
- * FP - retaddr_off [ RA of traced func ] return address of traced
+ * FP - 16 [ FP of caller func ] frame pointer of caller
* function
- * FP - fp_off [ FP of parent func ]
+ * --------------------------------------
*
* FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
* BPF_TRAMP_F_RET_FENTRY_RET
@@ -833,14 +820,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (nregs > 8)
return -ENOTSUPP;
- /* room for parent function return address */
- stack_size += 8;
-
- stack_size += 8;
- retaddr_off = stack_size;
-
- stack_size += 8;
- fp_off = stack_size;
+ /* room of trampoline frame to store return address and frame pointer */
+ stack_size += 16;
save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
if (save_ret) {
@@ -867,12 +848,29 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
stack_size = round_up(stack_size, 16);
- emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
-
- emit_sd(RV_REG_SP, stack_size - retaddr_off, RV_REG_RA, ctx);
- emit_sd(RV_REG_SP, stack_size - fp_off, RV_REG_FP, ctx);
-
- emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
+ if (func_addr) {
+ /* For the trampoline called from function entry,
+ * the frame of traced function and the frame of
+ * trampoline need to be considered.
+ */
+ emit_addi(RV_REG_SP, RV_REG_SP, -16, ctx);
+ emit_sd(RV_REG_SP, 8, RV_REG_RA, ctx);
+ emit_sd(RV_REG_SP, 0, RV_REG_FP, ctx);
+ emit_addi(RV_REG_FP, RV_REG_SP, 16, ctx);
+
+ emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
+ emit_sd(RV_REG_SP, stack_size - 8, RV_REG_T0, ctx);
+ emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx);
+ emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
+ } else {
+ /* For the trampoline called directly, just handle
+ * the frame of trampoline.
+ */
+ emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
+ emit_sd(RV_REG_SP, stack_size - 8, RV_REG_RA, ctx);
+ emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx);
+ emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
+ }
/* callee saved register S1 to pass start time */
emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx);
@@ -890,7 +888,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
/* skip to actual body of traced function */
if (flags & BPF_TRAMP_F_SKIP_FRAME)
- orig_call += 16;
+ orig_call += RV_FENTRY_NINSNS * 4;
if (flags & BPF_TRAMP_F_CALL_ORIG) {
emit_imm(RV_REG_A0, (const s64)im, ctx);
@@ -967,17 +965,30 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
- if (flags & BPF_TRAMP_F_SKIP_FRAME)
- /* return address of parent function */
- emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx);
- else
- /* return address of traced function */
- emit_ld(RV_REG_RA, stack_size - retaddr_off, RV_REG_SP, ctx);
+ if (func_addr) {
+ /* trampoline called from function entry */
+ emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx);
+ emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
+ emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
- emit_ld(RV_REG_FP, stack_size - fp_off, RV_REG_SP, ctx);
- emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
+ emit_ld(RV_REG_RA, 8, RV_REG_SP, ctx);
+ emit_ld(RV_REG_FP, 0, RV_REG_SP, ctx);
+ emit_addi(RV_REG_SP, RV_REG_SP, 16, ctx);
- emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* return to parent function */
+ emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
+ else
+ /* return to traced function */
+ emit_jalr(RV_REG_ZERO, RV_REG_T0, 0, ctx);
+ } else {
+ /* trampoline called directly */
+ emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx);
+ emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
+ emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
+
+ emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
+ }
ret = ctx->ninsns;
out:
@@ -1691,8 +1702,8 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
store_offset = stack_adjust - 8;
- /* reserve 4 nop insns */
- for (i = 0; i < 4; i++)
+ /* nops reserved for auipc+jalr pair */
+ for (i = 0; i < RV_FENTRY_NINSNS; i++)
emit(rv_nop(), ctx);
/* First instruction is always setting the tail-call-counter
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 83c4b45dc65f..a5930042139d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -701,6 +701,38 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
*pprog = prog;
}
+static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg,
+ u32 src_reg)
+{
+ u8 *prog = *pprog;
+
+ if (is64) {
+ /* movs[b,w,l]q dst, src */
+ if (num_bits == 8)
+ EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe,
+ add_2reg(0xC0, src_reg, dst_reg));
+ else if (num_bits == 16)
+ EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf,
+ add_2reg(0xC0, src_reg, dst_reg));
+ else if (num_bits == 32)
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63,
+ add_2reg(0xC0, src_reg, dst_reg));
+ } else {
+ /* movs[b,w]l dst, src */
+ if (num_bits == 8) {
+ EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe,
+ add_2reg(0xC0, src_reg, dst_reg));
+ } else if (num_bits == 16) {
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT1(add_2mod(0x40, src_reg, dst_reg));
+ EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf,
+ add_2reg(0xC0, src_reg, dst_reg));
+ }
+ }
+
+ *pprog = prog;
+}
+
/* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */
static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
{
@@ -779,6 +811,29 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
*pprog = prog;
}
+/* LDSX: dst_reg = *(s8*)(src_reg + off) */
+static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'movsx rax, byte ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBE);
+ break;
+ case BPF_H:
+ /* Emit 'movsx rax, word ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBF);
+ break;
+ case BPF_W:
+ /* Emit 'movsx rax, dword ptr [rax+0x14]' */
+ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x63);
+ break;
+ }
+ emit_insn_suffix(&prog, src_reg, dst_reg, off);
+ *pprog = prog;
+}
+
/* STX: *(u8*)(dst_reg + off) = src_reg */
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
@@ -1028,9 +1083,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
case BPF_ALU64 | BPF_MOV | BPF_X:
case BPF_ALU | BPF_MOV | BPF_X:
- emit_mov_reg(&prog,
- BPF_CLASS(insn->code) == BPF_ALU64,
- dst_reg, src_reg);
+ if (insn->off == 0)
+ emit_mov_reg(&prog,
+ BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, src_reg);
+ else
+ emit_movsx_reg(&prog, insn->off,
+ BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, src_reg);
break;
/* neg dst */
@@ -1134,15 +1194,26 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
/* mov rax, dst_reg */
emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg);
- /*
- * xor edx, edx
- * equivalent to 'xor rdx, rdx', but one byte less
- */
- EMIT2(0x31, 0xd2);
+ if (insn->off == 0) {
+ /*
+ * xor edx, edx
+ * equivalent to 'xor rdx, rdx', but one byte less
+ */
+ EMIT2(0x31, 0xd2);
- /* div src_reg */
- maybe_emit_1mod(&prog, src_reg, is64);
- EMIT2(0xF7, add_1reg(0xF0, src_reg));
+ /* div src_reg */
+ maybe_emit_1mod(&prog, src_reg, is64);
+ EMIT2(0xF7, add_1reg(0xF0, src_reg));
+ } else {
+ if (BPF_CLASS(insn->code) == BPF_ALU)
+ EMIT1(0x99); /* cdq */
+ else
+ EMIT2(0x48, 0x99); /* cqo */
+
+ /* idiv src_reg */
+ maybe_emit_1mod(&prog, src_reg, is64);
+ EMIT2(0xF7, add_1reg(0xF8, src_reg));
+ }
if (BPF_OP(insn->code) == BPF_MOD &&
dst_reg != BPF_REG_3)
@@ -1262,6 +1333,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
break;
case BPF_ALU | BPF_END | BPF_FROM_BE:
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE:
switch (imm32) {
case 16:
/* Emit 'ror %ax, 8' to swap lower 2 bytes */
@@ -1370,9 +1442,17 @@ st: if (is_imm8(insn->off))
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_MEM | BPF_DW:
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ /* LDXS: dst_reg = *(s8*)(src_reg + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_MEMSX | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
insn_off = insn->off;
- if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
/* Conservatively check that src_reg + insn->off is a kernel address:
* src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE
* src_reg is used as scratch for src_reg += insn->off and restored
@@ -1415,8 +1495,13 @@ st: if (is_imm8(insn->off))
start_of_ldx = prog;
end_of_jmp[-1] = start_of_ldx - end_of_jmp;
}
- emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off);
- if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEMSX ||
+ BPF_MODE(insn->code) == BPF_MEMSX)
+ emit_ldsx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off);
+ else
+ emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off);
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
struct exception_table_entry *ex;
u8 *_insn = image + proglen + (start_of_ldx - temp);
s64 delta;
@@ -1730,16 +1815,24 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
break;
case BPF_JMP | BPF_JA:
- if (insn->off == -1)
- /* -1 jmp instructions will always jump
- * backwards two bytes. Explicitly handling
- * this case avoids wasting too many passes
- * when there are long sequences of replaced
- * dead code.
- */
- jmp_offset = -2;
- else
- jmp_offset = addrs[i + insn->off] - addrs[i];
+ case BPF_JMP32 | BPF_JA:
+ if (BPF_CLASS(insn->code) == BPF_JMP) {
+ if (insn->off == -1)
+ /* -1 jmp instructions will always jump
+ * backwards two bytes. Explicitly handling
+ * this case avoids wasting too many passes
+ * when there are long sequences of replaced
+ * dead code.
+ */
+ jmp_offset = -2;
+ else
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+ } else {
+ if (insn->imm == -1)
+ jmp_offset = -2;
+ else
+ jmp_offset = addrs[i + insn->imm] - addrs[i];
+ }
if (!jmp_offset) {
/*