From 356ed64991c6847a0c4f2e8fa3b1133f7a14f1fc Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 14 Sep 2021 10:33:51 +0800 Subject: bpf: Handle return value of BPF_PROG_TYPE_STRUCT_OPS prog Currently if a function ptr in struct_ops has a return value, its caller will get a random return value from it, because the return value of related BPF_PROG_TYPE_STRUCT_OPS prog is just dropped. So adding a new flag BPF_TRAMP_F_RET_FENTRY_RET to tell bpf trampoline to save and return the return value of struct_ops prog if ret_size of the function ptr is greater than 0. Also restricting the flag to be used alone. Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS") Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210914023351.3664499-1-houtao1@huawei.com --- arch/x86/net/bpf_jit_comp.c | 53 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0fe6aacef3db..d24a512fd6f3 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1744,7 +1744,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size, bool mod_ret) + struct bpf_prog *p, int stack_size, bool save_ret) { u8 *prog = *pprog; u8 *jmp_insn; @@ -1777,11 +1777,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (emit_call(&prog, p->bpf_func, prog)) return -EINVAL; - /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + /* + * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return * of the previous call which is then passed on the stack to * the next BPF program. + * + * BPF_TRAMP_FENTRY trampoline may need to return the return + * value of BPF_PROG_TYPE_STRUCT_OPS prog. */ - if (mod_ret) + if (save_ret) emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); /* replace 2 nops with JE insn, since jmp target is known */ @@ -1828,13 +1832,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size) + struct bpf_tramp_progs *tp, int stack_size, + bool save_ret) { int i; u8 *prog = *pprog; for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, + save_ret)) return -EINVAL; } *pprog = prog; @@ -1877,6 +1883,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, return 0; } +static bool is_valid_bpf_tramp_flags(unsigned int flags) +{ + if ((flags & BPF_TRAMP_F_RESTORE_REGS) && + (flags & BPF_TRAMP_F_SKIP_FRAME)) + return false; + + /* + * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, + * and it must be used alone. + */ + if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) && + (flags & ~BPF_TRAMP_F_RET_FENTRY_RET)) + return false; + + return true; +} + /* Example: * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); * its 'struct btf_func_model' will be nr_args=2 @@ -1949,17 +1972,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; u8 **branches = NULL; u8 *prog; + bool save_ret; /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ if (nr_args > 6) return -ENOTSUPP; - if ((flags & BPF_TRAMP_F_RESTORE_REGS) && - (flags & BPF_TRAMP_F_SKIP_FRAME)) + if (!is_valid_bpf_tramp_flags(flags)) return -EINVAL; - if (flags & BPF_TRAMP_F_CALL_ORIG) - stack_size += 8; /* room for return value of orig_call */ + /* room for return value of orig_call or fentry prog */ + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; /* room for IP address argument */ @@ -2005,7 +2030,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fentry->nr_progs) - if (invoke_bpf(m, &prog, fentry, stack_size)) + if (invoke_bpf(m, &prog, fentry, stack_size, + flags & BPF_TRAMP_F_RET_FENTRY_RET)) return -EINVAL; if (fmod_ret->nr_progs) { @@ -2052,7 +2078,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fexit->nr_progs) - if (invoke_bpf(m, &prog, fexit, stack_size)) { + if (invoke_bpf(m, &prog, fexit, stack_size, false)) { ret = -EINVAL; goto cleanup; } @@ -2072,9 +2098,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ret = -EINVAL; goto cleanup; } - /* restore original return value back into RAX */ - emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); } + /* restore return value of orig_call or fentry prog back into RAX */ + if (save_ret) + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); EMIT1(0x5B); /* pop rbx */ EMIT1(0xC9); /* leave */ -- cgit v1.2.3 From 37cb28ec7d3a36a5bace7063a3dba633ab110f8b Mon Sep 17 00:00:00 2001 From: Piotr Krysiuk Date: Wed, 15 Sep 2021 17:04:37 +0100 Subject: bpf, mips: Validate conditional branch offsets The conditional branch instructions on MIPS use 18-bit signed offsets allowing for a branch range of 128 KBytes (backward and forward). However, this limit is not observed by the cBPF JIT compiler, and so the JIT compiler emits out-of-range branches when translating certain cBPF programs. A specific example of such a cBPF program is included in the "BPF_MAXINSNS: exec all MSH" test from lib/test_bpf.c that executes anomalous machine code containing incorrect branch offsets under JIT. Furthermore, this issue can be abused to craft undesirable machine code, where the control flow is hijacked to execute arbitrary Kernel code. The following steps can be used to reproduce the issue: # echo 1 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf test_name="BPF_MAXINSNS: exec all MSH" This should produce multiple warnings from build_bimm() similar to: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 209 at arch/mips/mm/uasm-mips.c:210 build_insn+0x558/0x590 Micro-assembler field overflow Modules linked in: test_bpf(+) CPU: 0 PID: 209 Comm: modprobe Not tainted 5.14.3 #1 Stack : 00000000 807bb824 82b33c9c 801843c0 00000000 00000004 00000000 63c9b5ee 82b33af4 80999898 80910000 80900000 82fd6030 00000001 82b33a98 82087180 00000000 00000000 80873b28 00000000 000000fc 82b3394c 00000000 2e34312e 6d6d6f43 809a180f 809a1836 6f6d203a 80900000 00000001 82b33bac 80900000 00027f80 00000000 00000000 807bb824 00000000 804ed790 001cc317 00000001 [...] Call Trace: [<80108f44>] show_stack+0x38/0x118 [<807a7aac>] dump_stack_lvl+0x5c/0x7c [<807a4b3c>] __warn+0xcc/0x140 [<807a4c3c>] warn_slowpath_fmt+0x8c/0xb8 [<8011e198>] build_insn+0x558/0x590 [<8011e358>] uasm_i_bne+0x20/0x2c [<80127b48>] build_body+0xa58/0x2a94 [<80129c98>] bpf_jit_compile+0x114/0x1e4 [<80613fc4>] bpf_prepare_filter+0x2ec/0x4e4 [<8061423c>] bpf_prog_create+0x80/0xc4 [] test_bpf_init+0x300/0xba8 [test_bpf] [<8010051c>] do_one_initcall+0x50/0x1d4 [<801c5e54>] do_init_module+0x60/0x220 [<801c8b20>] sys_finit_module+0xc4/0xfc [<801144d0>] syscall_common+0x34/0x58 [...] ---[ end trace a287d9742503c645 ]--- Then the anomalous machine code executes: => 0xc0a18000: addiu sp,sp,-16 0xc0a18004: sw s3,0(sp) 0xc0a18008: sw s4,4(sp) 0xc0a1800c: sw s5,8(sp) 0xc0a18010: sw ra,12(sp) 0xc0a18014: move s5,a0 0xc0a18018: move s4,zero 0xc0a1801c: move s3,zero # __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0) 0xc0a18020: lui t6,0x8012 0xc0a18024: ori t4,t6,0x9e14 0xc0a18028: li a1,0 0xc0a1802c: jalr t4 0xc0a18030: move a0,s5 0xc0a18034: bnez v0,0xc0a1ffb8 # incorrect branch offset 0xc0a18038: move v0,zero 0xc0a1803c: andi s4,s3,0xf 0xc0a18040: b 0xc0a18048 0xc0a18044: sll s4,s4,0x2 [...] # __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0) 0xc0a1ffa0: lui t6,0x8012 0xc0a1ffa4: ori t4,t6,0x9e14 0xc0a1ffa8: li a1,0 0xc0a1ffac: jalr t4 0xc0a1ffb0: move a0,s5 0xc0a1ffb4: bnez v0,0xc0a1ffb8 # incorrect branch offset 0xc0a1ffb8: move v0,zero 0xc0a1ffbc: andi s4,s3,0xf 0xc0a1ffc0: b 0xc0a1ffc8 0xc0a1ffc4: sll s4,s4,0x2 # __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0) 0xc0a1ffc8: lui t6,0x8012 0xc0a1ffcc: ori t4,t6,0x9e14 0xc0a1ffd0: li a1,0 0xc0a1ffd4: jalr t4 0xc0a1ffd8: move a0,s5 0xc0a1ffdc: bnez v0,0xc0a3ffb8 # correct branch offset 0xc0a1ffe0: move v0,zero 0xc0a1ffe4: andi s4,s3,0xf 0xc0a1ffe8: b 0xc0a1fff0 0xc0a1ffec: sll s4,s4,0x2 [...] # epilogue 0xc0a3ffb8: lw s3,0(sp) 0xc0a3ffbc: lw s4,4(sp) 0xc0a3ffc0: lw s5,8(sp) 0xc0a3ffc4: lw ra,12(sp) 0xc0a3ffc8: addiu sp,sp,16 0xc0a3ffcc: jr ra 0xc0a3ffd0: nop To mitigate this issue, we assert the branch ranges for each emit call that could generate an out-of-range branch. Fixes: 36366e367ee9 ("MIPS: BPF: Restore MIPS32 cBPF JIT") Fixes: c6610de353da ("MIPS: net: Add BPF JIT") Signed-off-by: Piotr Krysiuk Signed-off-by: Daniel Borkmann Tested-by: Johan Almbladh Acked-by: Johan Almbladh Cc: Paul Burton Cc: Thomas Bogendoerfer Link: https://lore.kernel.org/bpf/20210915160437.4080-1-piotras@gmail.com --- arch/mips/net/bpf_jit.c | 57 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 0af88622c619..cb6d22439f71 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -662,6 +662,11 @@ static void build_epilogue(struct jit_ctx *ctx) ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ func##_positive) +static bool is_bad_offset(int b_off) +{ + return b_off > 0x1ffff || b_off < -0x20000; +} + static int build_body(struct jit_ctx *ctx) { const struct bpf_prog *prog = ctx->skf; @@ -728,7 +733,10 @@ load_common: /* Load return register on DS for failures */ emit_reg_move(r_ret, r_zero, ctx); /* Return with error */ - emit_b(b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); emit_nop(ctx); break; case BPF_LD | BPF_W | BPF_IND: @@ -775,8 +783,10 @@ load_ind: emit_jalr(MIPS_R_RA, r_s0, ctx); emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ /* Check the error value */ - emit_bcond(MIPS_COND_NE, r_ret, 0, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx); emit_reg_move(r_ret, r_zero, ctx); /* We are good */ /* X <- P[1:K] & 0xf */ @@ -855,8 +865,10 @@ load_ind: /* A /= X */ ctx->flags |= SEEN_X | SEEN_A; /* Check if r_X is zero */ - emit_bcond(MIPS_COND_EQ, r_X, r_zero, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_div(r_A, r_X, ctx); break; @@ -864,8 +876,10 @@ load_ind: /* A %= X */ ctx->flags |= SEEN_X | SEEN_A; /* Check if r_X is zero */ - emit_bcond(MIPS_COND_EQ, r_X, r_zero, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_mod(r_A, r_X, ctx); break; @@ -926,7 +940,10 @@ load_ind: break; case BPF_JMP | BPF_JA: /* pc += K */ - emit_b(b_imm(i + k + 1, ctx), ctx); + b_off = b_imm(i + k + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); emit_nop(ctx); break; case BPF_JMP | BPF_JEQ | BPF_K: @@ -1056,12 +1073,16 @@ jmp_cmp: break; case BPF_RET | BPF_A: ctx->flags |= SEEN_A; - if (i != prog->len - 1) + if (i != prog->len - 1) { /* * If this is not the last instruction * then jump to the epilogue */ - emit_b(b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + } emit_reg_move(r_ret, r_A, ctx); /* delay slot */ break; case BPF_RET | BPF_K: @@ -1075,7 +1096,10 @@ jmp_cmp: * If this is not the last instruction * then jump to the epilogue */ - emit_b(b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); emit_nop(ctx); } break; @@ -1133,8 +1157,10 @@ jmp_cmp: /* Load *dev pointer */ emit_load_ptr(r_s0, r_skb, off, ctx); /* error (0) in the delay slot */ - emit_bcond(MIPS_COND_EQ, r_s0, r_zero, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx); emit_reg_move(r_ret, r_zero, ctx); if (code == (BPF_ANC | SKF_AD_IFINDEX)) { BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); @@ -1244,7 +1270,10 @@ void bpf_jit_compile(struct bpf_prog *fp) /* Generate the actual JIT code */ build_prologue(&ctx); - build_body(&ctx); + if (build_body(&ctx)) { + module_memfree(ctx.target); + goto out; + } build_epilogue(&ctx); /* Update the icache */ -- cgit v1.2.3 From ced185824c89b60e65b5a2606954c098320cdfb8 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 27 Sep 2021 13:11:57 +0000 Subject: bpf, x86: Fix bpf mapping of atomic fetch implementation Fix the case where the dst register maps to %rax as otherwise this produces an incorrect mapping with the implementation in 981f94c3e921 ("bpf: Add bitwise atomic instructions") as %rax is clobbered given it's part of the cmpxchg as operand. The issue is similar to b29dd96b905f ("bpf, x86: Fix BPF_FETCH atomic and/or/ xor with r0 as src") just that the case of dst register was missed. Before, dst=r0 (%rax) src=r2 (%rsi): [...] c5: mov %rax,%r10 c8: mov 0x0(%rax),%rax <---+ (broken) cc: mov %rax,%r11 | cf: and %rsi,%r11 | d2: lock cmpxchg %r11,0x0(%rax) <---+ d8: jne 0x00000000000000c8 | da: mov %rax,%rsi | dd: mov %r10,%rax | [...] | | After, dst=r0 (%rax) src=r2 (%rsi): | | [...] | da: mov %rax,%r10 | dd: mov 0x0(%r10),%rax <---+ (fixed) e1: mov %rax,%r11 | e4: and %rsi,%r11 | e7: lock cmpxchg %r11,0x0(%r10) <---+ ed: jne 0x00000000000000dd ef: mov %rax,%rsi f2: mov %r10,%rax [...] The remaining combinations were fine as-is though: After, dst=r9 (%r15) src=r0 (%rax): [...] dc: mov %rax,%r10 df: mov 0x0(%r15),%rax e3: mov %rax,%r11 e6: and %r10,%r11 e9: lock cmpxchg %r11,0x0(%r15) ef: jne 0x00000000000000df _ f1: mov %rax,%r10 | (unneeded, but f4: mov %r10,%rax _| not a problem) [...] After, dst=r9 (%r15) src=r4 (%rcx): [...] de: mov %rax,%r10 e1: mov 0x0(%r15),%rax e5: mov %rax,%r11 e8: and %rcx,%r11 eb: lock cmpxchg %r11,0x0(%r15) f1: jne 0x00000000000000e1 f3: mov %rax,%rcx f6: mov %r10,%rax [...] The case of dst == src register is rejected by the verifier and therefore not supported, but x86 JIT also handles this case just fine. After, dst=r0 (%rax) src=r0 (%rax): [...] eb: mov %rax,%r10 ee: mov 0x0(%r10),%rax f2: mov %rax,%r11 f5: and %r10,%r11 f8: lock cmpxchg %r11,0x0(%r10) fe: jne 0x00000000000000ee 100: mov %rax,%r10 103: mov %r10,%rax [...] Fixes: 981f94c3e921 ("bpf: Add bitwise atomic instructions") Reported-by: Johan Almbladh Signed-off-by: Johan Almbladh Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Acked-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d24a512fd6f3..9ea57389c554 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1341,9 +1341,10 @@ st: if (is_imm8(insn->off)) if (insn->imm == (BPF_AND | BPF_FETCH) || insn->imm == (BPF_OR | BPF_FETCH) || insn->imm == (BPF_XOR | BPF_FETCH)) { - u8 *branch_target; bool is64 = BPF_SIZE(insn->code) == BPF_DW; u32 real_src_reg = src_reg; + u32 real_dst_reg = dst_reg; + u8 *branch_target; /* * Can't be implemented with a single x86 insn. @@ -1354,11 +1355,13 @@ st: if (is_imm8(insn->off)) emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0); if (src_reg == BPF_REG_0) real_src_reg = BPF_REG_AX; + if (dst_reg == BPF_REG_0) + real_dst_reg = BPF_REG_AX; branch_target = prog; /* Load old value */ emit_ldx(&prog, BPF_SIZE(insn->code), - BPF_REG_0, dst_reg, insn->off); + BPF_REG_0, real_dst_reg, insn->off); /* * Perform the (commutative) operation locally, * put the result in the AUX_REG. @@ -1369,7 +1372,8 @@ st: if (is_imm8(insn->off)) add_2reg(0xC0, AUX_REG, real_src_reg)); /* Attempt to swap in new value */ err = emit_atomic(&prog, BPF_CMPXCHG, - dst_reg, AUX_REG, insn->off, + real_dst_reg, AUX_REG, + insn->off, BPF_SIZE(insn->code)); if (WARN_ON(err)) return err; @@ -1383,11 +1387,10 @@ st: if (is_imm8(insn->off)) /* Restore R0 after clobbering RAX */ emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX); break; - } err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, - insn->off, BPF_SIZE(insn->code)); + insn->off, BPF_SIZE(insn->code)); if (err) return err; break; -- cgit v1.2.3