tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()

When __inet_inherit_port() is called on a tproxy connection the wrong locks are
held for the inet_bind_bucket it is added to. __inet_inherit_port() made an
implicit assumption that the listener's port number (and thus its bind bucket).
Unfortunately, if you're using the TPROXY target to redirect skbs to a
transparent proxy that assumption is not true anymore and things break.

This patch adds code to __inet_inherit_port() so that it can handle this case
by looking up or creating a new bind bucket for the child socket and updates
callers of __inet_inherit_port() to gracefully handle __inet_inherit_port()
failing.

Reported by and original patch from Stephen Buck <stephen.buck@exinda.com>.
See http://marc.info/?t=128169268200001&r=1&w=2 for the original discussion.

Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
This commit is contained in:
Balazs Scheidler 2010-10-21 13:06:43 +02:00 committed by Patrick McHardy
parent 6006db84a9
commit 093d282321
6 changed files with 56 additions and 16 deletions

View file

@ -245,7 +245,7 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk)
} }
/* Caller must disable local BH processing. */ /* Caller must disable local BH processing. */
extern void __inet_inherit_port(struct sock *sk, struct sock *child); extern int __inet_inherit_port(struct sock *sk, struct sock *child);
extern void inet_put_port(struct sock *sk); extern void inet_put_port(struct sock *sk);

View file

@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk = dccp_create_openreq_child(sk, req, skb); newsk = dccp_create_openreq_child(sk, req, skb);
if (newsk == NULL) if (newsk == NULL)
goto exit; goto exit_nonewsk;
sk_setup_caps(newsk, dst); sk_setup_caps(newsk, dst);
@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
dccp_sync_mss(newsk, dst_mtu(dst)); dccp_sync_mss(newsk, dst_mtu(dst));
if (__inet_inherit_port(sk, newsk) < 0) {
sock_put(newsk);
goto exit;
}
__inet_hash_nolisten(newsk, NULL); __inet_hash_nolisten(newsk, NULL);
__inet_inherit_port(sk, newsk);
return newsk; return newsk;
exit_overflow: exit_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
exit_nonewsk:
dst_release(dst);
exit: exit:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
dst_release(dst);
return NULL; return NULL;
} }

View file

@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
newsk = dccp_create_openreq_child(sk, req, skb); newsk = dccp_create_openreq_child(sk, req, skb);
if (newsk == NULL) if (newsk == NULL)
goto out; goto out_nonewsk;
/* /*
* No need to charge this sock to the relevant IPv6 refcnt debug socks * No need to charge this sock to the relevant IPv6 refcnt debug socks
@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
if (__inet_inherit_port(sk, newsk) < 0) {
sock_put(newsk);
goto out;
}
__inet6_hash(newsk, NULL); __inet6_hash(newsk, NULL);
__inet_inherit_port(sk, newsk);
return newsk; return newsk;
out_overflow: out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
dst_release(dst);
out: out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
if (opt != NULL && opt != np->opt) if (opt != NULL && opt != np->opt)
sock_kfree_s(sk, opt, opt->tot_len); sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
return NULL; return NULL;
} }

View file

@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk)
} }
EXPORT_SYMBOL(inet_put_port); EXPORT_SYMBOL(inet_put_port);
void __inet_inherit_port(struct sock *sk, struct sock *child) int __inet_inherit_port(struct sock *sk, struct sock *child)
{ {
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, unsigned short port = inet_sk(child)->inet_num;
const int bhash = inet_bhashfn(sock_net(sk), port,
table->bhash_size); table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_hashbucket *head = &table->bhash[bhash];
struct inet_bind_bucket *tb; struct inet_bind_bucket *tb;
spin_lock(&head->lock); spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash; tb = inet_csk(sk)->icsk_bind_hash;
if (tb->port != port) {
/* NOTE: using tproxy and redirecting skbs to a proxy
* on a different listener port breaks the assumption
* that the listener socket's icsk_bind_hash is the same
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
struct hlist_node *node;
inet_bind_bucket_for_each(tb, node, &head->chain) {
if (net_eq(ib_net(tb), sock_net(sk)) &&
tb->port == port)
break;
}
if (!node) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
sock_net(sk), head, port);
if (!tb) {
spin_unlock(&head->lock);
return -ENOMEM;
}
}
}
sk_add_bind_node(child, &tb->owners); sk_add_bind_node(child, &tb->owners);
inet_csk(child)->icsk_bind_hash = tb; inet_csk(child)->icsk_bind_hash = tb;
spin_unlock(&head->lock); spin_unlock(&head->lock);
return 0;
} }
EXPORT_SYMBOL_GPL(__inet_inherit_port); EXPORT_SYMBOL_GPL(__inet_inherit_port);

View file

@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk = tcp_create_openreq_child(sk, req, skb); newsk = tcp_create_openreq_child(sk, req, skb);
if (!newsk) if (!newsk)
goto exit; goto exit_nonewsk;
newsk->sk_gso_type = SKB_GSO_TCPV4; newsk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(newsk, dst); sk_setup_caps(newsk, dst);
@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
} }
#endif #endif
if (__inet_inherit_port(sk, newsk) < 0) {
sock_put(newsk);
goto exit;
}
__inet_hash_nolisten(newsk, NULL); __inet_hash_nolisten(newsk, NULL);
__inet_inherit_port(sk, newsk);
return newsk; return newsk;
exit_overflow: exit_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
exit_nonewsk:
dst_release(dst);
exit: exit:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
dst_release(dst);
return NULL; return NULL;
} }
EXPORT_SYMBOL(tcp_v4_syn_recv_sock); EXPORT_SYMBOL(tcp_v4_syn_recv_sock);

View file

@ -1409,7 +1409,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk = tcp_create_openreq_child(sk, req, skb); newsk = tcp_create_openreq_child(sk, req, skb);
if (newsk == NULL) if (newsk == NULL)
goto out; goto out_nonewsk;
/* /*
* No need to charge this sock to the relevant IPv6 refcnt debug socks * No need to charge this sock to the relevant IPv6 refcnt debug socks
@ -1497,18 +1497,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
} }
#endif #endif
if (__inet_inherit_port(sk, newsk) < 0) {
sock_put(newsk);
goto out;
}
__inet6_hash(newsk, NULL); __inet6_hash(newsk, NULL);
__inet_inherit_port(sk, newsk);
return newsk; return newsk;
out_overflow: out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out: out_nonewsk:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
if (opt && opt != np->opt) if (opt && opt != np->opt)
sock_kfree_s(sk, opt, opt->tot_len); sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst); dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL; return NULL;
} }