bpf: Add hash of maps support
This patch adds hash of maps support (hashmap->bpf_map). BPF_MAP_TYPE_HASH_OF_MAPS is added. A map-in-map contains a pointer to another map and lets call this pointer 'inner_map_ptr'. Notes on deleting inner_map_ptr from a hash map: 1. For BPF_F_NO_PREALLOC map-in-map, when deleting an inner_map_ptr, the htab_elem itself will go through a rcu grace period and the inner_map_ptr resides in the htab_elem. 2. For pre-allocated htab_elem (!BPF_F_NO_PREALLOC), when deleting an inner_map_ptr, the htab_elem may get reused immediately. This situation is similar to the existing prealloc-ated use cases. However, the bpf_map_fd_put_ptr() calls bpf_map_put() which calls inner_map->ops->map_free(inner_map) which will go through a rcu grace period (i.e. all bpf_map's map_free currently goes through a rcu grace period). Hence, the inner_map_ptr is still safe for the rcu reader side. This patch also includes BPF_MAP_TYPE_HASH_OF_MAPS to the check_map_prealloc() in the verifier. preallocation is a must for BPF_PROG_TYPE_PERF_EVENT. Hence, even we don't expect heavy updates to map-in-map, enforcing BPF_F_NO_PREALLOC for map-in-map is impossible without disallowing BPF_PROG_TYPE_PERF_EVENT from using map-in-map first. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
56f668dfe0
commit
bcc6b1b7eb
5 changed files with 134 additions and 2 deletions
|
@ -277,6 +277,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
|
||||||
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
|
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
|
||||||
void *key, void *value, u64 map_flags);
|
void *key, void *value, u64 map_flags);
|
||||||
void bpf_fd_array_map_clear(struct bpf_map *map);
|
void bpf_fd_array_map_clear(struct bpf_map *map);
|
||||||
|
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
|
||||||
|
void *key, void *value, u64 map_flags);
|
||||||
|
|
||||||
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
|
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
|
||||||
* forced to use 'long' read/writes to try to atomically copy long counters.
|
* forced to use 'long' read/writes to try to atomically copy long counters.
|
||||||
|
|
|
@ -97,6 +97,7 @@ enum bpf_map_type {
|
||||||
BPF_MAP_TYPE_LRU_PERCPU_HASH,
|
BPF_MAP_TYPE_LRU_PERCPU_HASH,
|
||||||
BPF_MAP_TYPE_LPM_TRIE,
|
BPF_MAP_TYPE_LPM_TRIE,
|
||||||
BPF_MAP_TYPE_ARRAY_OF_MAPS,
|
BPF_MAP_TYPE_ARRAY_OF_MAPS,
|
||||||
|
BPF_MAP_TYPE_HASH_OF_MAPS,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_prog_type {
|
enum bpf_prog_type {
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include <linux/rculist_nulls.h>
|
#include <linux/rculist_nulls.h>
|
||||||
#include "percpu_freelist.h"
|
#include "percpu_freelist.h"
|
||||||
#include "bpf_lru_list.h"
|
#include "bpf_lru_list.h"
|
||||||
|
#include "map_in_map.h"
|
||||||
|
|
||||||
struct bucket {
|
struct bucket {
|
||||||
struct hlist_nulls_head head;
|
struct hlist_nulls_head head;
|
||||||
|
@ -88,6 +89,11 @@ static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size
|
||||||
return *(void __percpu **)(l->key + key_size);
|
return *(void __percpu **)(l->key + key_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
|
||||||
|
{
|
||||||
|
return *(void **)(l->key + roundup(map->key_size, 8));
|
||||||
|
}
|
||||||
|
|
||||||
static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
|
static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
|
||||||
{
|
{
|
||||||
return (struct htab_elem *) (htab->elems + i * htab->elem_size);
|
return (struct htab_elem *) (htab->elems + i * htab->elem_size);
|
||||||
|
@ -603,6 +609,14 @@ static void htab_elem_free_rcu(struct rcu_head *head)
|
||||||
|
|
||||||
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
||||||
{
|
{
|
||||||
|
struct bpf_map *map = &htab->map;
|
||||||
|
|
||||||
|
if (map->ops->map_fd_put_ptr) {
|
||||||
|
void *ptr = fd_htab_map_get_ptr(map, l);
|
||||||
|
|
||||||
|
map->ops->map_fd_put_ptr(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
if (l->state == HTAB_EXTRA_ELEM_USED) {
|
if (l->state == HTAB_EXTRA_ELEM_USED) {
|
||||||
l->state = HTAB_EXTRA_ELEM_FREE;
|
l->state = HTAB_EXTRA_ELEM_FREE;
|
||||||
return;
|
return;
|
||||||
|
@ -1057,6 +1071,7 @@ static void delete_all_elements(struct bpf_htab *htab)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
|
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
|
||||||
static void htab_map_free(struct bpf_map *map)
|
static void htab_map_free(struct bpf_map *map)
|
||||||
{
|
{
|
||||||
|
@ -1213,12 +1228,118 @@ static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = {
|
||||||
.type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
|
.type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
|
||||||
|
{
|
||||||
|
struct bpf_map *map;
|
||||||
|
|
||||||
|
if (attr->value_size != sizeof(u32))
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
/* pointer is stored internally */
|
||||||
|
attr->value_size = sizeof(void *);
|
||||||
|
map = htab_map_alloc(attr);
|
||||||
|
attr->value_size = sizeof(u32);
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fd_htab_map_free(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||||
|
struct hlist_nulls_node *n;
|
||||||
|
struct hlist_nulls_head *head;
|
||||||
|
struct htab_elem *l;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < htab->n_buckets; i++) {
|
||||||
|
head = select_bucket(htab, i);
|
||||||
|
|
||||||
|
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
|
||||||
|
void *ptr = fd_htab_map_get_ptr(map, l);
|
||||||
|
|
||||||
|
map->ops->map_fd_put_ptr(ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
htab_map_free(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* only called from syscall */
|
||||||
|
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
|
||||||
|
void *key, void *value, u64 map_flags)
|
||||||
|
{
|
||||||
|
void *ptr;
|
||||||
|
int ret;
|
||||||
|
u32 ufd = *(u32 *)value;
|
||||||
|
|
||||||
|
ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
|
||||||
|
if (IS_ERR(ptr))
|
||||||
|
return PTR_ERR(ptr);
|
||||||
|
|
||||||
|
ret = htab_map_update_elem(map, key, &ptr, map_flags);
|
||||||
|
if (ret)
|
||||||
|
map->ops->map_fd_put_ptr(ptr);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
|
||||||
|
{
|
||||||
|
struct bpf_map *map, *inner_map_meta;
|
||||||
|
|
||||||
|
inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
|
||||||
|
if (IS_ERR(inner_map_meta))
|
||||||
|
return inner_map_meta;
|
||||||
|
|
||||||
|
map = fd_htab_map_alloc(attr);
|
||||||
|
if (IS_ERR(map)) {
|
||||||
|
bpf_map_meta_free(inner_map_meta);
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
map->inner_map_meta = inner_map_meta;
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
|
||||||
|
{
|
||||||
|
struct bpf_map **inner_map = htab_map_lookup_elem(map, key);
|
||||||
|
|
||||||
|
if (!inner_map)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return READ_ONCE(*inner_map);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void htab_of_map_free(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
bpf_map_meta_free(map->inner_map_meta);
|
||||||
|
fd_htab_map_free(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_map_ops htab_of_map_ops = {
|
||||||
|
.map_alloc = htab_of_map_alloc,
|
||||||
|
.map_free = htab_of_map_free,
|
||||||
|
.map_get_next_key = htab_map_get_next_key,
|
||||||
|
.map_lookup_elem = htab_of_map_lookup_elem,
|
||||||
|
.map_delete_elem = htab_map_delete_elem,
|
||||||
|
.map_fd_get_ptr = bpf_map_fd_get_ptr,
|
||||||
|
.map_fd_put_ptr = bpf_map_fd_put_ptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct bpf_map_type_list htab_of_map_type __ro_after_init = {
|
||||||
|
.ops = &htab_of_map_ops,
|
||||||
|
.type = BPF_MAP_TYPE_HASH_OF_MAPS,
|
||||||
|
};
|
||||||
|
|
||||||
static int __init register_htab_map(void)
|
static int __init register_htab_map(void)
|
||||||
{
|
{
|
||||||
bpf_register_map_type(&htab_type);
|
bpf_register_map_type(&htab_type);
|
||||||
bpf_register_map_type(&htab_percpu_type);
|
bpf_register_map_type(&htab_percpu_type);
|
||||||
bpf_register_map_type(&htab_lru_type);
|
bpf_register_map_type(&htab_lru_type);
|
||||||
bpf_register_map_type(&htab_lru_percpu_type);
|
bpf_register_map_type(&htab_lru_percpu_type);
|
||||||
|
bpf_register_map_type(&htab_of_map_type);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
late_initcall(register_htab_map);
|
late_initcall(register_htab_map);
|
||||||
|
|
|
@ -352,7 +352,8 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||||
err = bpf_percpu_array_copy(map, key, value);
|
err = bpf_percpu_array_copy(map, key, value);
|
||||||
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
|
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
|
||||||
err = bpf_stackmap_copy(map, key, value);
|
err = bpf_stackmap_copy(map, key, value);
|
||||||
} else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
|
} else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
|
||||||
|
map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
|
||||||
err = -ENOTSUPP;
|
err = -ENOTSUPP;
|
||||||
} else {
|
} else {
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
@ -446,6 +447,11 @@ static int map_update_elem(union bpf_attr *attr)
|
||||||
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
|
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
|
||||||
attr->flags);
|
attr->flags);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
|
||||||
|
rcu_read_lock();
|
||||||
|
err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
|
||||||
|
attr->flags);
|
||||||
|
rcu_read_unlock();
|
||||||
} else {
|
} else {
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
err = map->ops->map_update_elem(map, key, value, attr->flags);
|
err = map->ops->map_update_elem(map, key, value, attr->flags);
|
||||||
|
|
|
@ -1200,6 +1200,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
|
||||||
goto error;
|
goto error;
|
||||||
break;
|
break;
|
||||||
case BPF_MAP_TYPE_ARRAY_OF_MAPS:
|
case BPF_MAP_TYPE_ARRAY_OF_MAPS:
|
||||||
|
case BPF_MAP_TYPE_HASH_OF_MAPS:
|
||||||
if (func_id != BPF_FUNC_map_lookup_elem)
|
if (func_id != BPF_FUNC_map_lookup_elem)
|
||||||
goto error;
|
goto error;
|
||||||
default:
|
default:
|
||||||
|
@ -3044,7 +3045,8 @@ static int do_check(struct bpf_verifier_env *env)
|
||||||
static int check_map_prealloc(struct bpf_map *map)
|
static int check_map_prealloc(struct bpf_map *map)
|
||||||
{
|
{
|
||||||
return (map->map_type != BPF_MAP_TYPE_HASH &&
|
return (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||||
map->map_type != BPF_MAP_TYPE_PERCPU_HASH) ||
|
map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
|
||||||
|
map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
|
||||||
!(map->map_flags & BPF_F_NO_PREALLOC);
|
!(map->map_flags & BPF_F_NO_PREALLOC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue