bpf: add lookup/update/delete/iterate methods to BPF maps
'maps' is a generic storage of different types for sharing data between kernel and userspace. The maps are accessed from user space via BPF syscall, which has commands: - create a map with given type and attributes fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size) returns fd or negative error - lookup key in a given map referenced by fd err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero and stores found elem into value or negative error - create or update key/value pair in a given map err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero or negative error - find and delete element by key in a given map err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key - iterate map elements (based on input key return next_key) err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->next_key - close(fd) deletes the map Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
749730ce42
commit
db20fd2b01
3 changed files with 281 additions and 0 deletions
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/file.h>
|
||||
|
||||
struct bpf_map;
|
||||
|
||||
|
@ -17,6 +18,12 @@ struct bpf_map_ops {
|
|||
/* funcs callable from userspace (via syscall) */
|
||||
struct bpf_map *(*map_alloc)(union bpf_attr *attr);
|
||||
void (*map_free)(struct bpf_map *);
|
||||
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
|
||||
|
||||
/* funcs callable from userspace and from eBPF programs */
|
||||
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
|
||||
int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
|
||||
int (*map_delete_elem)(struct bpf_map *map, void *key);
|
||||
};
|
||||
|
||||
struct bpf_map {
|
||||
|
@ -37,5 +44,6 @@ struct bpf_map_type_list {
|
|||
|
||||
void bpf_register_map_type(struct bpf_map_type_list *tl);
|
||||
void bpf_map_put(struct bpf_map *map);
|
||||
struct bpf_map *bpf_map_get(struct fd f);
|
||||
|
||||
#endif /* _LINUX_BPF_H */
|
||||
|
|
|
@ -70,6 +70,35 @@ enum bpf_cmd {
|
|||
* map is deleted when fd is closed
|
||||
*/
|
||||
BPF_MAP_CREATE,
|
||||
|
||||
/* lookup key in a given map
|
||||
* err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
|
||||
* Using attr->map_fd, attr->key, attr->value
|
||||
* returns zero and stores found elem into value
|
||||
* or negative error
|
||||
*/
|
||||
BPF_MAP_LOOKUP_ELEM,
|
||||
|
||||
/* create or update key/value pair in a given map
|
||||
* err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
|
||||
* Using attr->map_fd, attr->key, attr->value
|
||||
* returns zero or negative error
|
||||
*/
|
||||
BPF_MAP_UPDATE_ELEM,
|
||||
|
||||
/* find and delete elem by key in a given map
|
||||
* err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
|
||||
* Using attr->map_fd, attr->key
|
||||
* returns zero or negative error
|
||||
*/
|
||||
BPF_MAP_DELETE_ELEM,
|
||||
|
||||
/* lookup key in a given map and return next key
|
||||
* err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
|
||||
* Using attr->map_fd, attr->key, attr->next_key
|
||||
* returns zero and stores next key or negative error
|
||||
*/
|
||||
BPF_MAP_GET_NEXT_KEY,
|
||||
};
|
||||
|
||||
enum bpf_map_type {
|
||||
|
@ -83,6 +112,15 @@ union bpf_attr {
|
|||
__u32 value_size; /* size of value in bytes */
|
||||
__u32 max_entries; /* max number of entries in a map */
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
|
||||
__u32 map_fd;
|
||||
__aligned_u64 key;
|
||||
union {
|
||||
__aligned_u64 value;
|
||||
__aligned_u64 next_key;
|
||||
};
|
||||
};
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <linux/syscalls.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/file.h>
|
||||
|
||||
static LIST_HEAD(bpf_map_types);
|
||||
|
||||
|
@ -111,6 +112,228 @@ static int map_create(union bpf_attr *attr)
|
|||
return err;
|
||||
}
|
||||
|
||||
/* if error is returned, fd is released.
|
||||
* On success caller should complete fd access with matching fdput()
|
||||
*/
|
||||
struct bpf_map *bpf_map_get(struct fd f)
|
||||
{
|
||||
struct bpf_map *map;
|
||||
|
||||
if (!f.file)
|
||||
return ERR_PTR(-EBADF);
|
||||
|
||||
if (f.file->f_op != &bpf_map_fops) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
map = f.file->private_data;
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
/* helper to convert user pointers passed inside __aligned_u64 fields */
|
||||
static void __user *u64_to_ptr(__u64 val)
|
||||
{
|
||||
return (void __user *) (unsigned long) val;
|
||||
}
|
||||
|
||||
/* last field in 'union bpf_attr' used by this command */
|
||||
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
|
||||
|
||||
static int map_lookup_elem(union bpf_attr *attr)
|
||||
{
|
||||
void __user *ukey = u64_to_ptr(attr->key);
|
||||
void __user *uvalue = u64_to_ptr(attr->value);
|
||||
int ufd = attr->map_fd;
|
||||
struct fd f = fdget(ufd);
|
||||
struct bpf_map *map;
|
||||
void *key, *value;
|
||||
int err;
|
||||
|
||||
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
|
||||
return -EINVAL;
|
||||
|
||||
map = bpf_map_get(f);
|
||||
if (IS_ERR(map))
|
||||
return PTR_ERR(map);
|
||||
|
||||
err = -ENOMEM;
|
||||
key = kmalloc(map->key_size, GFP_USER);
|
||||
if (!key)
|
||||
goto err_put;
|
||||
|
||||
err = -EFAULT;
|
||||
if (copy_from_user(key, ukey, map->key_size) != 0)
|
||||
goto free_key;
|
||||
|
||||
err = -ESRCH;
|
||||
rcu_read_lock();
|
||||
value = map->ops->map_lookup_elem(map, key);
|
||||
if (!value)
|
||||
goto err_unlock;
|
||||
|
||||
err = -EFAULT;
|
||||
if (copy_to_user(uvalue, value, map->value_size) != 0)
|
||||
goto err_unlock;
|
||||
|
||||
err = 0;
|
||||
|
||||
err_unlock:
|
||||
rcu_read_unlock();
|
||||
free_key:
|
||||
kfree(key);
|
||||
err_put:
|
||||
fdput(f);
|
||||
return err;
|
||||
}
|
||||
|
||||
#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
|
||||
|
||||
static int map_update_elem(union bpf_attr *attr)
|
||||
{
|
||||
void __user *ukey = u64_to_ptr(attr->key);
|
||||
void __user *uvalue = u64_to_ptr(attr->value);
|
||||
int ufd = attr->map_fd;
|
||||
struct fd f = fdget(ufd);
|
||||
struct bpf_map *map;
|
||||
void *key, *value;
|
||||
int err;
|
||||
|
||||
if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
|
||||
return -EINVAL;
|
||||
|
||||
map = bpf_map_get(f);
|
||||
if (IS_ERR(map))
|
||||
return PTR_ERR(map);
|
||||
|
||||
err = -ENOMEM;
|
||||
key = kmalloc(map->key_size, GFP_USER);
|
||||
if (!key)
|
||||
goto err_put;
|
||||
|
||||
err = -EFAULT;
|
||||
if (copy_from_user(key, ukey, map->key_size) != 0)
|
||||
goto free_key;
|
||||
|
||||
err = -ENOMEM;
|
||||
value = kmalloc(map->value_size, GFP_USER);
|
||||
if (!value)
|
||||
goto free_key;
|
||||
|
||||
err = -EFAULT;
|
||||
if (copy_from_user(value, uvalue, map->value_size) != 0)
|
||||
goto free_value;
|
||||
|
||||
/* eBPF program that use maps are running under rcu_read_lock(),
|
||||
* therefore all map accessors rely on this fact, so do the same here
|
||||
*/
|
||||
rcu_read_lock();
|
||||
err = map->ops->map_update_elem(map, key, value);
|
||||
rcu_read_unlock();
|
||||
|
||||
free_value:
|
||||
kfree(value);
|
||||
free_key:
|
||||
kfree(key);
|
||||
err_put:
|
||||
fdput(f);
|
||||
return err;
|
||||
}
|
||||
|
||||
#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
|
||||
|
||||
static int map_delete_elem(union bpf_attr *attr)
|
||||
{
|
||||
void __user *ukey = u64_to_ptr(attr->key);
|
||||
int ufd = attr->map_fd;
|
||||
struct fd f = fdget(ufd);
|
||||
struct bpf_map *map;
|
||||
void *key;
|
||||
int err;
|
||||
|
||||
if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
|
||||
return -EINVAL;
|
||||
|
||||
map = bpf_map_get(f);
|
||||
if (IS_ERR(map))
|
||||
return PTR_ERR(map);
|
||||
|
||||
err = -ENOMEM;
|
||||
key = kmalloc(map->key_size, GFP_USER);
|
||||
if (!key)
|
||||
goto err_put;
|
||||
|
||||
err = -EFAULT;
|
||||
if (copy_from_user(key, ukey, map->key_size) != 0)
|
||||
goto free_key;
|
||||
|
||||
rcu_read_lock();
|
||||
err = map->ops->map_delete_elem(map, key);
|
||||
rcu_read_unlock();
|
||||
|
||||
free_key:
|
||||
kfree(key);
|
||||
err_put:
|
||||
fdput(f);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* last field in 'union bpf_attr' used by this command */
|
||||
#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
|
||||
|
||||
static int map_get_next_key(union bpf_attr *attr)
|
||||
{
|
||||
void __user *ukey = u64_to_ptr(attr->key);
|
||||
void __user *unext_key = u64_to_ptr(attr->next_key);
|
||||
int ufd = attr->map_fd;
|
||||
struct fd f = fdget(ufd);
|
||||
struct bpf_map *map;
|
||||
void *key, *next_key;
|
||||
int err;
|
||||
|
||||
if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
|
||||
return -EINVAL;
|
||||
|
||||
map = bpf_map_get(f);
|
||||
if (IS_ERR(map))
|
||||
return PTR_ERR(map);
|
||||
|
||||
err = -ENOMEM;
|
||||
key = kmalloc(map->key_size, GFP_USER);
|
||||
if (!key)
|
||||
goto err_put;
|
||||
|
||||
err = -EFAULT;
|
||||
if (copy_from_user(key, ukey, map->key_size) != 0)
|
||||
goto free_key;
|
||||
|
||||
err = -ENOMEM;
|
||||
next_key = kmalloc(map->key_size, GFP_USER);
|
||||
if (!next_key)
|
||||
goto free_key;
|
||||
|
||||
rcu_read_lock();
|
||||
err = map->ops->map_get_next_key(map, key, next_key);
|
||||
rcu_read_unlock();
|
||||
if (err)
|
||||
goto free_next_key;
|
||||
|
||||
err = -EFAULT;
|
||||
if (copy_to_user(unext_key, next_key, map->key_size) != 0)
|
||||
goto free_next_key;
|
||||
|
||||
err = 0;
|
||||
|
||||
free_next_key:
|
||||
kfree(next_key);
|
||||
free_key:
|
||||
kfree(key);
|
||||
err_put:
|
||||
fdput(f);
|
||||
return err;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
||||
{
|
||||
union bpf_attr attr = {};
|
||||
|
@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
|||
case BPF_MAP_CREATE:
|
||||
err = map_create(&attr);
|
||||
break;
|
||||
case BPF_MAP_LOOKUP_ELEM:
|
||||
err = map_lookup_elem(&attr);
|
||||
break;
|
||||
case BPF_MAP_UPDATE_ELEM:
|
||||
err = map_update_elem(&attr);
|
||||
break;
|
||||
case BPF_MAP_DELETE_ELEM:
|
||||
err = map_delete_elem(&attr);
|
||||
break;
|
||||
case BPF_MAP_GET_NEXT_KEY:
|
||||
err = map_get_next_key(&attr);
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
break;
|
||||
|
|
Loading…
Reference in a new issue