xsk: do not call synchronize_net() under RCU read lock

The XSKMAP update and delete functions called synchronize_net(), which
can sleep. It is not allowed to sleep during an RCU read section.

Instead we need to make sure that the sock sk_destruct (xsk_destruct)
function is asynchronously called after an RCU grace period. Setting
the SOCK_RCU_FREE flag for XDP sockets takes care of this.

Fixes: fbfc504a24 ("bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Björn Töpel 2018-10-08 19:40:16 +02:00 committed by Daniel Borkmann
parent 262f9d811c
commit cee271678d
2 changed files with 4 additions and 8 deletions

View file

@ -192,11 +192,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
sock_hold(sock->sk); sock_hold(sock->sk);
old_xs = xchg(&m->xsk_map[i], xs); old_xs = xchg(&m->xsk_map[i], xs);
if (old_xs) { if (old_xs)
/* Make sure we've flushed everything. */
synchronize_net();
sock_put((struct sock *)old_xs); sock_put((struct sock *)old_xs);
}
sockfd_put(sock); sockfd_put(sock);
return 0; return 0;
@ -212,11 +209,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
return -EINVAL; return -EINVAL;
old_xs = xchg(&m->xsk_map[k], NULL); old_xs = xchg(&m->xsk_map[k], NULL);
if (old_xs) { if (old_xs)
/* Make sure we've flushed everything. */
synchronize_net();
sock_put((struct sock *)old_xs); sock_put((struct sock *)old_xs);
}
return 0; return 0;
} }

View file

@ -744,6 +744,8 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sk->sk_destruct = xsk_destruct; sk->sk_destruct = xsk_destruct;
sk_refcnt_debug_inc(sk); sk_refcnt_debug_inc(sk);
sock_set_flag(sk, SOCK_RCU_FREE);
xs = xdp_sk(sk); xs = xdp_sk(sk);
mutex_init(&xs->mutex); mutex_init(&xs->mutex);
spin_lock_init(&xs->tx_completion_lock); spin_lock_init(&xs->tx_completion_lock);