summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-07-23 02:55:43 +0300
committerJakub Kicinski <kuba@kernel.org>2022-07-23 02:55:44 +0300
commitb3fce974d4239bd46ae81bba07b59f255eb979d7 (patch)
tree9856a94f13b97f74e831664899b1ab9d1e0c7b69 /arch
parent3c47fb2f4c4df33881fa540e35e21415a6ecfbb5 (diff)
parentea2babac63d40e59926dc5de4550dac94cc3c6d2 (diff)
downloadlinux-b3fce974d4239bd46ae81bba07b59f255eb979d7.tar.xz
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== bpf-next 2022-07-22 We've added 73 non-merge commits during the last 12 day(s) which contain a total of 88 files changed, 3458 insertions(+), 860 deletions(-). The main changes are: 1) Implement BPF trampoline for arm64 JIT, from Xu Kuohai. 2) Add ksyscall/kretsyscall section support to libbpf to simplify tracing kernel syscalls through kprobe mechanism, from Andrii Nakryiko. 3) Allow for livepatch (KLP) and BPF trampolines to attach to the same kernel function, from Song Liu & Jiri Olsa. 4) Add new kfunc infrastructure for netfilter's CT e.g. to insert and change entries, from Kumar Kartikeya Dwivedi & Lorenzo Bianconi. 5) Add a ksym BPF iterator to allow for more flexible and efficient interactions with kernel symbols, from Alan Maguire. 6) Bug fixes in libbpf e.g. for uprobe binary path resolution, from Dan Carpenter. 7) Fix BPF subprog function names in stack traces, from Alexei Starovoitov. 8) libbpf support for writing custom perf event readers, from Jon Doron. 9) Switch to use SPDX tag for BPF helper man page, from Alejandro Colomar. 10) Fix xsk send-only sockets when in busy poll mode, from Maciej Fijalkowski. 11) Reparent BPF maps and their charging on memcg offlining, from Roman Gushchin. 12) Multiple follow-up fixes around BPF lsm cgroup infra, from Stanislav Fomichev. 13) Use bootstrap version of bpftool where possible to speed up builds, from Pu Lehui. 14) Cleanup BPF verifier's check_func_arg() handling, from Joanne Koong. 15) Make non-prealloced BPF map allocations low priority to play better with memcg limits, from Yafang Shao. 16) Fix BPF test runner to reject zero-length data for skbs, from Zhengchao Shao. 17) Various smaller cleanups and improvements all over the place. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (73 commits) bpf: Simplify bpf_prog_pack_[size|mask] bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch) bpf, x64: Allow to use caller address from stack ftrace: Allow IPMODIFY and DIRECT ops on the same function ftrace: Add modify_ftrace_direct_multi_nolock bpf/selftests: Fix couldn't retrieve pinned program in xdp veth test bpf: Fix build error in case of !CONFIG_DEBUG_INFO_BTF selftests/bpf: Fix test_verifier failed test in unprivileged mode selftests/bpf: Add negative tests for new nf_conntrack kfuncs selftests/bpf: Add tests for new nf_conntrack kfuncs selftests/bpf: Add verifier tests for trusted kfunc args net: netfilter: Add kfuncs to set and change CT status net: netfilter: Add kfuncs to set and change CT timeout net: netfilter: Add kfuncs to allocate and insert CT net: netfilter: Deduplicate code in bpf_{xdp,skb}_ct_lookup bpf: Add documentation for kfuncs bpf: Add support for forcing kfunc args to be trusted bpf: Switch to new kfunc flags infrastructure tools/resolve_btfids: Add support for 8-byte BTF sets bpf: Introduce 8-byte BTF set ... ==================== Link: https://lore.kernel.org/r/20220722221218.29943-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/insn.h3
-rw-r--r--arch/arm64/lib/insn.c30
-rw-r--r--arch/arm64/net/bpf_jit.h7
-rw-r--r--arch/arm64/net/bpf_jit_comp.c715
-rw-r--r--arch/x86/net/bpf_jit_comp.c58
5 files changed, 768 insertions, 45 deletions
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 6aa2dc836db1..834bff720582 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -510,6 +510,9 @@ u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
unsigned int imm,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ bool is64bit);
u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
enum aarch64_insn_register reg2,
enum aarch64_insn_register base,
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index 695d7368fadc..49e972beeac7 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -323,7 +323,7 @@ static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
return insn;
}
-static inline long branch_imm_common(unsigned long pc, unsigned long addr,
+static inline long label_imm_common(unsigned long pc, unsigned long addr,
long range)
{
long offset;
@@ -354,7 +354,7 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
* ARM64 virtual address arrangement guarantees all kernel and module
* texts are within +/-128M.
*/
- offset = branch_imm_common(pc, addr, SZ_128M);
+ offset = label_imm_common(pc, addr, SZ_128M);
if (offset >= SZ_128M)
return AARCH64_BREAK_FAULT;
@@ -382,7 +382,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
u32 insn;
long offset;
- offset = branch_imm_common(pc, addr, SZ_1M);
+ offset = label_imm_common(pc, addr, SZ_1M);
if (offset >= SZ_1M)
return AARCH64_BREAK_FAULT;
@@ -421,7 +421,7 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
u32 insn;
long offset;
- offset = branch_imm_common(pc, addr, SZ_1M);
+ offset = label_imm_common(pc, addr, SZ_1M);
insn = aarch64_insn_get_bcond_value();
@@ -543,6 +543,28 @@ u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
}
+u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ bool is64bit)
+{
+ u32 insn;
+ long offset;
+
+ offset = label_imm_common(pc, addr, SZ_1M);
+ if (offset >= SZ_1M)
+ return AARCH64_BREAK_FAULT;
+
+ insn = aarch64_insn_get_ldr_lit_value();
+
+ if (is64bit)
+ insn |= BIT(30);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
+ offset >> 2);
+}
+
u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
enum aarch64_insn_register reg2,
enum aarch64_insn_register base,
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index 194c95ccc1cf..a6acb94ea3d6 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -80,6 +80,12 @@
#define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE)
#define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD)
+/* LDR (literal) */
+#define A64_LDR32LIT(Wt, offset) \
+ aarch64_insn_gen_load_literal(0, offset, Wt, false)
+#define A64_LDR64LIT(Xt, offset) \
+ aarch64_insn_gen_load_literal(0, offset, Xt, true)
+
/* Load/store register pair */
#define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \
aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \
@@ -270,6 +276,7 @@
#define A64_BTI_C A64_HINT(AARCH64_INSN_HINT_BTIC)
#define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ)
#define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC)
+#define A64_NOP A64_HINT(AARCH64_INSN_HINT_NOP)
/* DMB */
#define A64_DMB_ISH aarch64_insn_gen_dmb(AARCH64_INSN_MB_ISH)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f08a4447d363..7ca8779ae34f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -10,6 +10,7 @@
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/memory.h>
#include <linux/printk.h>
#include <linux/slab.h>
@@ -18,6 +19,7 @@
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
#include <asm/insn.h>
+#include <asm/patching.h>
#include <asm/set_memory.h>
#include "bpf_jit.h"
@@ -78,6 +80,15 @@ struct jit_ctx {
int fpb_offset;
};
+struct bpf_plt {
+ u32 insn_ldr; /* load target */
+ u32 insn_br; /* branch to target */
+ u64 target; /* target value */
+};
+
+#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
+#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
+
static inline void emit(const u32 insn, struct jit_ctx *ctx)
{
if (ctx->image != NULL)
@@ -140,6 +151,12 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val,
}
}
+static inline void emit_bti(u32 insn, struct jit_ctx *ctx)
+{
+ if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+ emit(insn, ctx);
+}
+
/*
* Kernel addresses in the vmalloc space use at most 48 bits, and the
* remaining bits are guaranteed to be 0x1. So we can compose the address
@@ -159,6 +176,14 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
}
}
+static inline void emit_call(u64 target, struct jit_ctx *ctx)
+{
+ u8 tmp = bpf2a64[TMP_REG_1];
+
+ emit_addr_mov_i64(tmp, target, ctx);
+ emit(A64_BLR(tmp), ctx);
+}
+
static inline int bpf2a64_offset(int bpf_insn, int off,
const struct jit_ctx *ctx)
{
@@ -235,13 +260,30 @@ static bool is_lsi_offset(int offset, int scale)
return true;
}
+/* generated prologue:
+ * bti c // if CONFIG_ARM64_BTI_KERNEL
+ * mov x9, lr
+ * nop // POKE_OFFSET
+ * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL
+ * stp x29, lr, [sp, #-16]!
+ * mov x29, sp
+ * stp x19, x20, [sp, #-16]!
+ * stp x21, x22, [sp, #-16]!
+ * stp x25, x26, [sp, #-16]!
+ * stp x27, x28, [sp, #-16]!
+ * mov x25, sp
+ * mov tcc, #0
+ * // PROLOGUE_OFFSET
+ */
+
+#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
+#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
+
+/* Offset of nop instruction in bpf prog entry to be poked */
+#define POKE_OFFSET (BTI_INSNS + 1)
+
/* Tail call offset to jump into */
-#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \
- IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)
-#define PROLOGUE_OFFSET 9
-#else
-#define PROLOGUE_OFFSET 8
-#endif
+#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
{
@@ -280,12 +322,14 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
*
*/
+ emit_bti(A64_BTI_C, ctx);
+
+ emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
+ emit(A64_NOP, ctx);
+
/* Sign lr */
if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
emit(A64_PACIASP, ctx);
- /* BTI landing pad */
- else if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
- emit(A64_BTI_C, ctx);
/* Save FP and LR registers to stay align with ARM64 AAPCS */
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
@@ -312,8 +356,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
}
/* BTI landing pad for the tail call, done with a BR */
- if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
- emit(A64_BTI_J, ctx);
+ emit_bti(A64_BTI_J, ctx);
}
emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx);
@@ -557,6 +600,53 @@ static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
return 0;
}
+void dummy_tramp(void);
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .global dummy_tramp\n"
+" .type dummy_tramp, %function\n"
+"dummy_tramp:"
+#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
+" bti j\n" /* dummy_tramp is called via "br x10" */
+#endif
+" mov x10, x30\n"
+" mov x30, x9\n"
+" ret x10\n"
+" .size dummy_tramp, .-dummy_tramp\n"
+" .popsection\n"
+);
+
+/* build a plt initialized like this:
+ *
+ * plt:
+ * ldr tmp, target
+ * br tmp
+ * target:
+ * .quad dummy_tramp
+ *
+ * when a long jump trampoline is attached, target is filled with the
+ * trampoline address, and when the trampoline is removed, target is
+ * restored to dummy_tramp address.
+ */
+static void build_plt(struct jit_ctx *ctx)
+{
+ const u8 tmp = bpf2a64[TMP_REG_1];
+ struct bpf_plt *plt = NULL;
+
+ /* make sure target is 64-bit aligned */
+ if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2)
+ emit(A64_NOP, ctx);
+
+ plt = (struct bpf_plt *)(ctx->image + ctx->idx);
+ /* plt is called via bl, no BTI needed here */
+ emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx);
+ emit(A64_BR(tmp), ctx);
+
+ if (ctx->image)
+ plt->target = (u64)&dummy_tramp;
+}
+
static void build_epilogue(struct jit_ctx *ctx)
{
const u8 r0 = bpf2a64[BPF_REG_0];
@@ -991,8 +1081,7 @@ emit_cond_jmp:
&func_addr, &func_addr_fixed);
if (ret < 0)
return ret;
- emit_addr_mov_i64(tmp, func_addr, ctx);
- emit(A64_BLR(tmp), ctx);
+ emit_call(func_addr, ctx);
emit(A64_MOV(1, r0, A64_R(0)), ctx);
break;
}
@@ -1336,6 +1425,13 @@ static int validate_code(struct jit_ctx *ctx)
if (a64_insn == AARCH64_BREAK_FAULT)
return -1;
}
+ return 0;
+}
+
+static int validate_ctx(struct jit_ctx *ctx)
+{
+ if (validate_code(ctx))
+ return -1;
if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
return -1;
@@ -1356,7 +1452,7 @@ struct arm64_jit_data {
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
- int image_size, prog_size, extable_size;
+ int image_size, prog_size, extable_size, extable_align, extable_offset;
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
struct arm64_jit_data *jit_data;
@@ -1426,13 +1522,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.epilogue_offset = ctx.idx;
build_epilogue(&ctx);
+ build_plt(&ctx);
+ extable_align = __alignof__(struct exception_table_entry);
extable_size = prog->aux->num_exentries *
sizeof(struct exception_table_entry);
/* Now we know the actual image size. */
prog_size = sizeof(u32) * ctx.idx;
- image_size = prog_size + extable_size;
+ /* also allocate space for plt target */
+ extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
+ image_size = extable_offset + extable_size;
header = bpf_jit_binary_alloc(image_size, &image_ptr,
sizeof(u32), jit_fill_hole);
if (header == NULL) {
@@ -1444,7 +1544,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.image = (__le32 *)image_ptr;
if (extable_size)
- prog->aux->extable = (void *)image_ptr + prog_size;
+ prog->aux->extable = (void *)image_ptr + extable_offset;
skip_init_ctx:
ctx.idx = 0;
ctx.exentry_idx = 0;
@@ -1458,9 +1558,10 @@ skip_init_ctx:
}
build_epilogue(&ctx);
+ build_plt(&ctx);
/* 3. Extra pass to validate JITed code. */
- if (validate_code(&ctx)) {
+ if (validate_ctx(&ctx)) {
bpf_jit_binary_free(header);
prog = orig_prog;
goto out_off;
@@ -1537,3 +1638,583 @@ bool bpf_jit_supports_subprog_tailcalls(void)
{
return true;
}
+
+static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
+ int args_off, int retval_off, int run_ctx_off,
+ bool save_ret)
+{
+ u32 *branch;
+ u64 enter_prog;
+ u64 exit_prog;
+ struct bpf_prog *p = l->link.prog;
+ int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+
+ if (p->aux->sleepable) {
+ enter_prog = (u64)__bpf_prog_enter_sleepable;
+ exit_prog = (u64)__bpf_prog_exit_sleepable;
+ } else {
+ enter_prog = (u64)__bpf_prog_enter;
+ exit_prog = (u64)__bpf_prog_exit;
+ }
+
+ if (l->cookie == 0) {
+ /* if cookie is zero, one instruction is enough to store it */
+ emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
+ } else {
+ emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
+ emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
+ ctx);
+ }
+
+ /* save p to callee saved register x19 to avoid loading p with mov_i64
+ * each time.
+ */
+ emit_addr_mov_i64(A64_R(19), (const u64)p, ctx);
+
+ /* arg1: prog */
+ emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
+ /* arg2: &run_ctx */
+ emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx);
+
+ emit_call(enter_prog, ctx);
+
+ /* if (__bpf_prog_enter(prog) == 0)
+ * goto skip_exec_of_prog;
+ */
+ branch = ctx->image + ctx->idx;
+ emit(A64_NOP, ctx);
+
+ /* save return value to callee saved register x20 */
+ emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
+
+ emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
+ if (!p->jited)
+ emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
+
+ emit_call((const u64)p->bpf_func, ctx);
+
+ if (save_ret)
+ emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
+
+ if (ctx->image) {
+ int offset = &ctx->image[ctx->idx] - branch;
+ *branch = A64_CBZ(1, A64_R(0), offset);
+ }
+
+ /* arg1: prog */
+ emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
+ /* arg2: start time */
+ emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx);
+ /* arg3: &run_ctx */
+ emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx);
+
+ emit_call(exit_prog, ctx);
+}
+
+static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
+ int args_off, int retval_off, int run_ctx_off,
+ u32 **branches)
+{
+ int i;
+
+ /* The first fmod_ret program will receive a garbage return value.
+ * Set this to 0 to avoid confusing the program.
+ */
+ emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
+ for (i = 0; i < tl->nr_links; i++) {
+ invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
+ run_ctx_off, true);
+ /* if (*(u64 *)(sp + retval_off) != 0)
+ * goto do_fexit;
+ */
+ emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx);
+ /* Save the location of branch, and generate a nop.
+ * This nop will be replaced with a cbnz later.
+ */
+ branches[i] = ctx->image + ctx->idx;
+ emit(A64_NOP, ctx);
+ }
+}
+
+static void save_args(struct jit_ctx *ctx, int args_off, int nargs)
+{
+ int i;
+
+ for (i = 0; i < nargs; i++) {
+ emit(A64_STR64I(i, A64_SP, args_off), ctx);
+ args_off += 8;
+ }
+}
+
+static void restore_args(struct jit_ctx *ctx, int args_off, int nargs)
+{
+ int i;
+
+ for (i = 0; i < nargs; i++) {
+ emit(A64_LDR64I(i, A64_SP, args_off), ctx);
+ args_off += 8;
+ }
+}
+
+/* Based on the x86's implementation of arch_prepare_bpf_trampoline().
+ *
+ * bpf prog and function entry before bpf trampoline hooked:
+ * mov x9, lr
+ * nop
+ *
+ * bpf prog and function entry after bpf trampoline hooked:
+ * mov x9, lr
+ * bl <bpf_trampoline or plt>
+ *
+ */
+static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
+ struct bpf_tramp_links *tlinks, void *orig_call,
+ int nargs, u32 flags)
+{
+ int i;
+ int stack_size;
+ int retaddr_off;
+ int regs_off;
+ int retval_off;
+ int args_off;
+ int nargs_off;
+ int ip_off;
+ int run_ctx_off;
+ struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ bool save_ret;
+ u32 **branches = NULL;
+
+ /* trampoline stack layout:
+ * [ parent ip ]
+ * [ FP ]
+ * SP + retaddr_off [ self ip ]
+ * [ FP ]
+ *
+ * [ padding ] align SP to multiples of 16
+ *
+ * [ x20 ] callee saved reg x20
+ * SP + regs_off [ x19 ] callee saved reg x19
+ *
+ * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
+ * BPF_TRAMP_F_RET_FENTRY_RET
+ *
+ * [ argN ]
+ * [ ... ]
+ * SP + args_off [ arg1 ]
+ *
+ * SP + nargs_off [ args count ]
+ *
+ * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
+ *
+ * SP + run_ctx_off [ bpf_tramp_run_ctx ]
+ */
+
+ stack_size = 0;
+ run_ctx_off = stack_size;
+ /* room for bpf_tramp_run_ctx */
+ stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
+
+ ip_off = stack_size;
+ /* room for IP address argument */
+ if (flags & BPF_TRAMP_F_IP_ARG)
+ stack_size += 8;
+
+ nargs_off = stack_size;
+ /* room for args count */
+ stack_size += 8;
+
+ args_off = stack_size;
+ /* room for args */
+ stack_size += nargs * 8;
+
+ /* room for return value */
+ retval_off = stack_size;
+ save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+ if (save_ret)
+ stack_size += 8;
+
+ /* room for callee saved registers, currently x19 and x20 are used */
+ regs_off = stack_size;
+ stack_size += 16;
+
+ /* round up to multiples of 16 to avoid SPAlignmentFault */
+ stack_size = round_up(stack_size, 16);
+
+ /* return address locates above FP */
+ retaddr_off = stack_size + 8;
+
+ /* bpf trampoline may be invoked by 3 instruction types:
+ * 1. bl, attached to bpf prog or kernel function via short jump
+ * 2. br, attached to bpf prog or kernel function via long jump
+ * 3. blr, working as a function pointer, used by struct_ops.
+ * So BTI_JC should used here to support both br and blr.
+ */
+ emit_bti(A64_BTI_JC, ctx);
+
+ /* frame for parent function */
+ emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx);
+ emit(A64_MOV(1, A64_FP, A64_SP), ctx);
+
+ /* frame for patched function */
+ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+ emit(A64_MOV(1, A64_FP, A64_SP), ctx);
+
+ /* allocate stack space */
+ emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
+
+ if (flags & BPF_TRAMP_F_IP_ARG) {
+ /* save ip address of the traced function */
+ emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx);
+ emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
+ }
+
+ /* save args count*/
+ emit(A64_MOVZ(1, A64_R(10), nargs, 0), ctx);
+ emit(A64_STR64I(A64_R(10), A64_SP, nargs_off), ctx);
+
+ /* save args */
+ save_args(ctx, args_off, nargs);
+
+ /* save callee saved registers */
+ emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
+ emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
+ emit_call((const u64)__bpf_tramp_enter, ctx);
+ }
+
+ for (i = 0; i < fentry->nr_links; i++)
+ invoke_bpf_prog(ctx, fentry->links[i], args_off,
+ retval_off, run_ctx_off,
+ flags & BPF_TRAMP_F_RET_FENTRY_RET);
+
+ if (fmod_ret->nr_links) {
+ branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *),
+ GFP_KERNEL);
+ if (!branches)
+ return -ENOMEM;
+
+ invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
+ run_ctx_off, branches);
+ }
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ restore_args(ctx, args_off, nargs);
+ /* call original func */
+ emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
+ emit(A64_BLR(A64_R(10)), ctx);
+ /* store return value */
+ emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
+ /* reserve a nop for bpf_tramp_image_put */
+ im->ip_after_call = ctx->image + ctx->idx;
+ emit(A64_NOP, ctx);
+ }
+
+ /* update the branches saved in invoke_bpf_mod_ret with cbnz */
+ for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
+ int offset = &ctx->image[ctx->idx] - branches[i];
+ *branches[i] = A64_CBNZ(1, A64_R(10), offset);
+ }
+
+ for (i = 0; i < fexit->nr_links; i++)
+ invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
+ run_ctx_off, false);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ im->ip_epilogue = ctx->image + ctx->idx;
+ emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
+ emit_call((const u64)__bpf_tramp_exit, ctx);
+ }
+
+ if (flags & BPF_TRAMP_F_RESTORE_REGS)
+ restore_args(ctx, args_off, nargs);
+
+ /* restore callee saved register x19 and x20 */
+ emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
+ emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
+
+ if (save_ret)
+ emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx);
+
+ /* reset SP */
+ emit(A64_MOV(1, A64_SP, A64_FP), ctx);
+
+ /* pop frames */
+ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
+ emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx);
+
+ if (flags & BPF_TRAMP_F_SKIP_FRAME) {
+ /* skip patched function, return to parent */
+ emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
+ emit(A64_RET(A64_R(9)), ctx);
+ } else {
+ /* return to patched function */
+ emit(A64_MOV(1, A64_R(10), A64_LR), ctx);
+ emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
+ emit(A64_RET(A64_R(10)), ctx);
+ }
+
+ if (ctx->image)
+ bpf_flush_icache(ctx->image, ctx->image + ctx->idx);
+
+ kfree(branches);
+
+ return ctx->idx;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+ void *image_end, const struct btf_func_model *m,
+ u32 flags, struct bpf_tramp_links *tlinks,
+ void *orig_call)
+{
+ int ret;
+ int nargs = m->nr_args;
+ int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
+ struct jit_ctx ctx = {
+ .image = NULL,
+ .idx = 0,
+ };
+
+ /* the first 8 arguments are passed by registers */
+ if (nargs > 8)
+ return -ENOTSUPP;
+
+ ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags);
+ if (ret < 0)
+ return ret;
+
+ if (ret > max_insns)
+ return -EFBIG;
+
+ ctx.image = image;
+ ctx.idx = 0;
+
+ jit_fill_hole(image, (unsigned int)(image_end - image));
+ ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags);
+
+ if (ret > 0 && validate_code(&ctx) < 0)
+ ret = -EINVAL;
+
+ if (ret > 0)
+ ret *= AARCH64_INSN_SIZE;
+
+ return ret;
+}
+
+static bool is_long_jump(void *ip, void *target)
+{
+ long offset;
+
+ /* NULL target means this is a NOP */
+ if (!target)
+ return false;
+
+ offset = (long)target - (long)ip;
+ return offset < -SZ_128M || offset >= SZ_128M;
+}
+
+static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
+ void *addr, void *plt, u32 *insn)
+{
+ void *target;
+
+ if (!addr) {
+ *insn = aarch64_insn_gen_nop();
+ return 0;
+ }
+
+ if (is_long_jump(ip, addr))
+ target = plt;
+ else
+ target = addr;
+
+ *insn = aarch64_insn_gen_branch_imm((unsigned long)ip,
+ (unsigned long)target,
+ type);
+
+ return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT;
+}
+
+/* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf
+ * trampoline with the branch instruction from @ip to @new_addr. If @old_addr
+ * or @new_addr is NULL, the old or new instruction is NOP.
+ *
+ * When @ip is the bpf prog entry, a bpf trampoline is being attached or
+ * detached. Since bpf trampoline and bpf prog are allocated separately with
+ * vmalloc, the address distance may exceed 128MB, the maximum branch range.
+ * So long jump should be handled.
+ *
+ * When a bpf prog is constructed, a plt pointing to empty trampoline
+ * dummy_tramp is placed at the end:
+ *
+ * bpf_prog:
+ * mov x9, lr
+ * nop // patchsite
+ * ...
+ * ret
+ *
+ * plt:
+ * ldr x10, target
+ * br x10
+ * target:
+ * .quad dummy_tramp // plt target
+ *
+ * This is also the state when no trampoline is attached.
+ *
+ * When a short-jump bpf trampoline is attached, the patchsite is patched
+ * to a bl instruction to the trampoline directly:
+ *
+ * bpf_prog:
+ * mov x9, lr
+ * bl <short-jump bpf trampoline address> // patchsite
+ * ...
+ * ret
+ *
+ * plt:
+ * ldr x10, target
+ * br x10
+ * target:
+ * .quad dummy_tramp // plt target
+ *
+ * When a long-jump bpf trampoline is attached, the plt target is filled with
+ * the trampoline address and the patchsite is patched to a bl instruction to
+ * the plt:
+ *
+ * bpf_prog:
+ * mov x9, lr
+ * bl plt // patchsite
+ * ...
+ * ret
+ *
+ * plt:
+ * ldr x10, target
+ * br x10
+ * target:
+ * .quad <long-jump bpf trampoline address> // plt target
+ *
+ * The dummy_tramp is used to prevent another CPU from jumping to unknown
+ * locations during the patching process, making the patching process easier.
+ */
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
+ void *old_addr, void *new_addr)
+{
+ int ret;
+ u32 old_insn;
+ u32 new_insn;
+ u32 replaced;
+ struct bpf_plt *plt = NULL;
+ unsigned long size = 0UL;
+ unsigned long offset = ~0UL;
+ enum aarch64_insn_branch_type branch_type;
+ char namebuf[KSYM_NAME_LEN];
+ void *image = NULL;
+ u64 plt_target = 0ULL;
+ bool poking_bpf_entry;
+
+ if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
+ /* Only poking bpf text is supported. Since kernel function
+ * entry is set up by ftrace, we reply on ftrace to poke kernel
+ * functions.
+ */
+ return -ENOTSUPP;
+
+ image = ip - offset;
+ /* zero offset means we're poking bpf prog entry */
+ poking_bpf_entry = (offset == 0UL);
+
+ /* bpf prog entry, find plt and the real patchsite */
+ if (poking_bpf_entry) {
+ /* plt locates at the end of bpf prog */
+ plt = image + size - PLT_TARGET_OFFSET;
+
+ /* skip to the nop instruction in bpf prog entry:
+ * bti c // if BTI enabled
+ * mov x9, x30
+ * nop
+ */
+ ip = image + POKE_OFFSET * AARCH64_INSN_SIZE;
+ }
+
+ /* long jump is only possible at bpf prog entry */
+ if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) &&
+ !poking_bpf_entry))
+ return -EINVAL;
+
+ if (poke_type == BPF_MOD_CALL)
+ branch_type = AARCH64_INSN_BRANCH_LINK;
+ else
+ branch_type = AARCH64_INSN_BRANCH_NOLINK;
+
+ if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
+ return -EFAULT;
+
+ if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
+ return -EFAULT;
+
+ if (is_long_jump(ip, new_addr))
+ plt_target = (u64)new_addr;
+ else if (is_long_jump(ip, old_addr))
+ /* if the old target is a long jump and the new target is not,
+ * restore the plt target to dummy_tramp, so there is always a
+ * legal and harmless address stored in plt target, and we'll
+ * never jump from plt to an unknown place.
+ */
+ plt_target = (u64)&dummy_tramp;
+
+ if (plt_target) {
+ /* non-zero plt_target indicates we're patching a bpf prog,
+ * which is read only.
+ */
+ if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1))
+ return -EFAULT;
+ WRITE_ONCE(plt->target, plt_target);
+ set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1);
+ /* since plt target points to either the new trampoline
+ * or dummy_tramp, even if another CPU reads the old plt
+ * target value before fetching the bl instruction to plt,
+ * it will be brought back by dummy_tramp, so no barrier is
+ * required here.
+ */
+ }
+
+ /* if the old target and the new target are both long jumps, no
+ * patching is required
+ */
+ if (old_insn == new_insn)
+ return 0;
+
+ mutex_lock(&text_mutex);
+ if (aarch64_insn_read(ip, &replaced)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (replaced != old_insn) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /* We call aarch64_insn_patch_text_nosync() to replace instruction
+ * atomically, so no other CPUs will fetch a half-new and half-old
+ * instruction. But there is chance that another CPU executes the
+ * old instruction after the patching operation finishes (e.g.,
+ * pipeline not flushed, or icache not synchronized yet).
+ *
+ * 1. when a new trampoline is attached, it is not a problem for
+ * different CPUs to jump to different trampolines temporarily.
+ *
+ * 2. when an old trampoline is freed, we should wait for all other
+ * CPUs to exit the trampoline and make sure the trampoline is no
+ * longer reachable, since bpf_tramp_image_put() function already
+ * uses percpu_ref and task-based rcu to do the sync, no need to call
+ * the sync version here, see bpf_tramp_image_put() for details.
+ */
+ ret = aarch64_insn_patch_text_nosync(ip, new_insn);
+out:
+ mutex_unlock(&text_mutex);
+
+ return ret;
+}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7e95697a6459..c1f6c1c51d99 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1950,23 +1950,6 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
return 0;
}
-static bool is_valid_bpf_tramp_flags(unsigned int flags)
-{
- if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
- (flags & BPF_TRAMP_F_SKIP_FRAME))
- return false;
-
- /*
- * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
- * and it must be used alone.
- */
- if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) &&
- (flags & ~BPF_TRAMP_F_RET_FENTRY_RET))
- return false;
-
- return true;
-}
-
/* Example:
* __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
* its 'struct btf_func_model' will be nr_args=2
@@ -2045,9 +2028,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (nr_args > 6)
return -ENOTSUPP;
- if (!is_valid_bpf_tramp_flags(flags))
- return -EINVAL;
-
/* Generated trampoline stack layout:
*
* RBP + 8 [ return address ]
@@ -2153,10 +2133,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_CALL_ORIG) {
restore_regs(m, &prog, nr_args, regs_off);
- /* call original function */
- if (emit_call(&prog, orig_call, prog)) {
- ret = -EINVAL;
- goto cleanup;
+ if (flags & BPF_TRAMP_F_ORIG_STACK) {
+ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
+ EMIT2(0xff, 0xd0); /* call *rax */
+ } else {
+ /* call original function */
+ if (emit_call(&prog, orig_call, prog)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
}
/* remember return value in a stack for bpf prog to access */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
@@ -2520,3 +2505,28 @@ bool bpf_jit_supports_subprog_tailcalls(void)
{
return true;
}
+
+void bpf_jit_free(struct bpf_prog *prog)
+{
+ if (prog->jited) {
+ struct x64_jit_data *jit_data = prog->aux->jit_data;
+ struct bpf_binary_header *hdr;
+
+ /*
+ * If we fail the final pass of JIT (from jit_subprogs),
+ * the program may not be finalized yet. Call finalize here
+ * before freeing it.
+ */
+ if (jit_data) {
+ bpf_jit_binary_pack_finalize(prog, jit_data->header,
+ jit_data->rw_header);
+ kvfree(jit_data->addrs);
+ kfree(jit_data);
+ }
+ hdr = bpf_jit_binary_pack_hdr(prog);
+ bpf_jit_binary_pack_free(hdr, NULL);
+ WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
+ }
+
+ bpf_prog_unlock_free(prog);
+}