summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/arraymap.c33
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/cpumap.c2
-rw-r--r--kernel/bpf/lpm_trie.c14
-rw-r--r--kernel/bpf/sockmap.c3
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/verifier.c42
7 files changed, 59 insertions, 39 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b1f66480135b..14750e7c5ee4 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array)
{
int i;
- for (i = 0; i < array->map.max_entries; i++)
+ for (i = 0; i < array->map.max_entries; i++) {
free_percpu(array->pptrs[i]);
+ cond_resched();
+ }
}
static int bpf_array_alloc_percpu(struct bpf_array *array)
@@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
return -ENOMEM;
}
array->pptrs[i] = ptr;
+ cond_resched();
}
return 0;
@@ -73,11 +76,11 @@ static int array_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
- int numa_node = bpf_map_attr_numa_node(attr);
+ int ret, numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
+ u64 cost, array_size, mask64;
struct bpf_array *array;
- u64 array_size, mask64;
elem_size = round_up(attr->value_size, 8);
@@ -109,8 +112,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
- if (array_size >= U32_MAX - PAGE_SIZE)
+ cost = array_size;
+ if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
+ if (percpu) {
+ cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
+ if (cost >= U32_MAX - PAGE_SIZE)
+ return ERR_PTR(-ENOMEM);
+ }
+ cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+ ret = bpf_map_precharge_memlock(cost);
+ if (ret < 0)
+ return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
@@ -121,20 +135,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
+ array->map.pages = cost;
array->elem_size = elem_size;
- if (!percpu)
- goto out;
-
- array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
-
- if (array_size >= U32_MAX - PAGE_SIZE ||
- bpf_array_alloc_percpu(array)) {
+ if (percpu && bpf_array_alloc_percpu(array)) {
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
-out:
- array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
return &array->map;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 29ca9208dcfa..d315b393abdd 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1590,7 +1590,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
* so always copy 'cnt' prog_ids to the user.
* In a rare race the user will see zero prog_ids
*/
- ids = kcalloc(cnt, sizeof(u32), GFP_USER);
+ ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
if (!ids)
return -ENOMEM;
rcu_read_lock();
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index fbfdada6caee..a4bb0b34375a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -334,7 +334,7 @@ static int cpu_map_kthread_run(void *data)
static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
int map_id)
{
- gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct bpf_cpu_map_entry *rcpu;
int numa, err;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 7b469d10d0e9..b4b5b81e7251 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -555,7 +555,10 @@ static void trie_free(struct bpf_map *map)
struct lpm_trie_node __rcu **slot;
struct lpm_trie_node *node;
- raw_spin_lock(&trie->lock);
+ /* Wait for outstanding programs to complete
+ * update/lookup/delete/get_next_key and free the trie.
+ */
+ synchronize_rcu();
/* Always start at the root and walk down to a node that has no
* children. Then free that node, nullify its reference in the parent
@@ -566,10 +569,9 @@ static void trie_free(struct bpf_map *map)
slot = &trie->root;
for (;;) {
- node = rcu_dereference_protected(*slot,
- lockdep_is_held(&trie->lock));
+ node = rcu_dereference_protected(*slot, 1);
if (!node)
- goto unlock;
+ goto out;
if (rcu_access_pointer(node->child[0])) {
slot = &node->child[0];
@@ -587,8 +589,8 @@ static void trie_free(struct bpf_map *map)
}
}
-unlock:
- raw_spin_unlock(&trie->lock);
+out:
+ kfree(trie);
}
static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 48c33417d13c..a927e89dad6e 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -521,8 +521,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
{
struct bpf_stab *stab;
- int err = -EINVAL;
u64 cost;
+ int err;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -547,6 +547,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+ err = -EINVAL;
if (cost >= U32_MAX - PAGE_SIZE)
goto free_stab;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e24aa3241387..43f95d190eea 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1845,7 +1845,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
union bpf_attr attr = {};
int err;
- if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
+ if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
return -EPERM;
err = check_uarg_tail_zero(uattr, sizeof(attr), size);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5fb69a85d967..c6eff108aa99 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1356,6 +1356,13 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_CTX;
}
+static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
+{
+ const struct bpf_reg_state *reg = cur_regs(env) + regno;
+
+ return type_is_pkt_pointer(reg->type);
+}
+
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
@@ -1416,10 +1423,10 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
}
static int check_ptr_alignment(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg,
- int off, int size)
+ const struct bpf_reg_state *reg, int off,
+ int size, bool strict_alignment_once)
{
- bool strict = env->strict_alignment;
+ bool strict = env->strict_alignment || strict_alignment_once;
const char *pointer_desc = "";
switch (reg->type) {
@@ -1576,9 +1583,9 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
-static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off,
- int bpf_size, enum bpf_access_type t,
- int value_regno)
+static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
+ int off, int bpf_size, enum bpf_access_type t,
+ int value_regno, bool strict_alignment_once)
{
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno;
@@ -1590,7 +1597,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return size;
/* alignment checks will add in reg->off themselves */
- err = check_ptr_alignment(env, reg, off, size);
+ err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
if (err)
return err;
@@ -1735,21 +1742,23 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
return -EACCES;
}
- if (is_ctx_reg(env, insn->dst_reg)) {
- verbose(env, "BPF_XADD stores into R%d context is not allowed\n",
- insn->dst_reg);
+ if (is_ctx_reg(env, insn->dst_reg) ||
+ is_pkt_reg(env, insn->dst_reg)) {
+ verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
+ insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
+ "context" : "packet");
return -EACCES;
}
/* check whether atomic_add can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_READ, -1);
+ BPF_SIZE(insn->code), BPF_READ, -1, true);
if (err)
return err;
/* check whether atomic_add can write into the same memory */
return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_WRITE, -1);
+ BPF_SIZE(insn->code), BPF_WRITE, -1, true);
}
/* when register 'regno' is passed into function that will read 'access_size'
@@ -2388,7 +2397,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
* is inferred from register state.
*/
for (i = 0; i < meta.access_size; i++) {
- err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1);
+ err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
+ BPF_WRITE, -1, false);
if (err)
return err;
}
@@ -4632,7 +4642,7 @@ static int do_check(struct bpf_verifier_env *env)
*/
err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ,
- insn->dst_reg);
+ insn->dst_reg, false);
if (err)
return err;
@@ -4684,7 +4694,7 @@ static int do_check(struct bpf_verifier_env *env)
/* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
- insn->src_reg);
+ insn->src_reg, false);
if (err)
return err;
@@ -4719,7 +4729,7 @@ static int do_check(struct bpf_verifier_env *env)
/* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
- -1);
+ -1, false);
if (err)
return err;