net: dont hold rtnl mutex during netlink dump callbacks
Four years ago, Patrick made a change to hold rtnl mutex during netlink dump callbacks. I believe it was a wrong move. This slows down concurrent dumps, making good old /proc/net/ files faster than rtnetlink in some situations. This occurred to me because one "ip link show dev ..." was _very_ slow on a workload adding/removing network devices in background. All dump callbacks are able to use RCU locking now, so this patch does roughly a revert of commits :1c2d670f36
: [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks6313c1e099
: [RTNETLINK]: Remove unnecessary locking in dump callbacks This let writers fight for rtnl mutex and readers going full speed. It also takes care of phonet : phonet_route_get() is now called from rcu read section. I renamed it to phonet_route_get_rcu() Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Patrick McHardy <kaber@trash.net> Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com> Acked-by: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
dcfd9cdc12
commit
e67f88dd12
8 changed files with 25 additions and 23 deletions
|
@ -51,7 +51,7 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr);
|
||||||
int phonet_route_add(struct net_device *dev, u8 daddr);
|
int phonet_route_add(struct net_device *dev, u8 daddr);
|
||||||
int phonet_route_del(struct net_device *dev, u8 daddr);
|
int phonet_route_del(struct net_device *dev, u8 daddr);
|
||||||
void rtm_phonet_notify(int event, struct net_device *dev, u8 dst);
|
void rtm_phonet_notify(int event, struct net_device *dev, u8 dst);
|
||||||
struct net_device *phonet_route_get(struct net *net, u8 daddr);
|
struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr);
|
||||||
struct net_device *phonet_route_output(struct net *net, u8 daddr);
|
struct net_device *phonet_route_output(struct net *net, u8 daddr);
|
||||||
|
|
||||||
#define PN_NO_ADDR 0xff
|
#define PN_NO_ADDR 0xff
|
||||||
|
|
|
@ -120,8 +120,9 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
int idx;
|
int idx;
|
||||||
|
|
||||||
idx = 0;
|
idx = 0;
|
||||||
for_each_netdev(net, dev) {
|
rcu_read_lock();
|
||||||
struct net_bridge_port *port = br_port_get_rtnl(dev);
|
for_each_netdev_rcu(net, dev) {
|
||||||
|
struct net_bridge_port *port = br_port_get_rcu(dev);
|
||||||
|
|
||||||
/* not a bridge port */
|
/* not a bridge port */
|
||||||
if (!port || idx < cb->args[0])
|
if (!port || idx < cb->args[0])
|
||||||
|
@ -135,7 +136,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
skip:
|
skip:
|
||||||
++idx;
|
++idx;
|
||||||
}
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
cb->args[0] = idx;
|
cb->args[0] = idx;
|
||||||
|
|
||||||
return skb->len;
|
return skb->len;
|
||||||
|
|
|
@ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
struct fib_rule *rule;
|
struct fib_rule *rule;
|
||||||
|
|
||||||
list_for_each_entry(rule, &ops->rules_list, list) {
|
rcu_read_lock();
|
||||||
|
list_for_each_entry_rcu(rule, &ops->rules_list, list) {
|
||||||
if (idx < cb->args[1])
|
if (idx < cb->args[1])
|
||||||
goto skip;
|
goto skip;
|
||||||
|
|
||||||
|
|
|
@ -1007,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
s_h = cb->args[0];
|
s_h = cb->args[0];
|
||||||
s_idx = cb->args[1];
|
s_idx = cb->args[1];
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
|
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
|
||||||
idx = 0;
|
idx = 0;
|
||||||
head = &net->dev_index_head[h];
|
head = &net->dev_index_head[h];
|
||||||
hlist_for_each_entry(dev, node, head, index_hlist) {
|
hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
|
||||||
if (idx < s_idx)
|
if (idx < s_idx)
|
||||||
goto cont;
|
goto cont;
|
||||||
if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
|
if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
|
||||||
|
@ -1023,6 +1024,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
cb->args[1] = idx;
|
cb->args[1] = idx;
|
||||||
cb->args[0] = h;
|
cb->args[0] = h;
|
||||||
|
|
||||||
|
@ -1879,7 +1881,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||||
int min_len;
|
int min_len;
|
||||||
int family;
|
int family;
|
||||||
int type;
|
int type;
|
||||||
int err;
|
|
||||||
|
|
||||||
type = nlh->nlmsg_type;
|
type = nlh->nlmsg_type;
|
||||||
if (type > RTM_MAX)
|
if (type > RTM_MAX)
|
||||||
|
@ -1906,11 +1907,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||||
if (dumpit == NULL)
|
if (dumpit == NULL)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
__rtnl_unlock();
|
|
||||||
rtnl = net->rtnl;
|
rtnl = net->rtnl;
|
||||||
err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
|
return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
|
||||||
rtnl_lock();
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
|
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
|
||||||
|
@ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
|
||||||
{
|
{
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
|
sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
|
||||||
rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
|
rtnetlink_rcv, NULL, THIS_MODULE);
|
||||||
if (!sk)
|
if (!sk)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
net->rtnl = sk;
|
net->rtnl = sk;
|
||||||
|
|
|
@ -752,7 +752,8 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
skip_naddr = cb->args[1];
|
skip_naddr = cb->args[1];
|
||||||
|
|
||||||
idx = 0;
|
idx = 0;
|
||||||
for_each_netdev(&init_net, dev) {
|
rcu_read_lock();
|
||||||
|
for_each_netdev_rcu(&init_net, dev) {
|
||||||
if (idx < skip_ndevs)
|
if (idx < skip_ndevs)
|
||||||
goto cont;
|
goto cont;
|
||||||
else if (idx > skip_ndevs) {
|
else if (idx > skip_ndevs) {
|
||||||
|
@ -761,11 +762,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
skip_naddr = 0;
|
skip_naddr = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL)
|
if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL)
|
||||||
goto cont;
|
goto cont;
|
||||||
|
|
||||||
for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
|
for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
|
||||||
ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) {
|
ifa = rcu_dereference(ifa->ifa_next), dn_idx++) {
|
||||||
if (dn_idx < skip_naddr)
|
if (dn_idx < skip_naddr)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -778,6 +779,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
idx++;
|
idx++;
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
|
rcu_read_unlock();
|
||||||
cb->args[0] = idx;
|
cb->args[0] = idx;
|
||||||
cb->args[1] = dn_idx;
|
cb->args[1] = dn_idx;
|
||||||
|
|
||||||
|
|
|
@ -394,10 +394,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
arg.net = net;
|
arg.net = net;
|
||||||
w->args = &arg;
|
w->args = &arg;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
|
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
|
||||||
e = 0;
|
e = 0;
|
||||||
head = &net->ipv6.fib_table_hash[h];
|
head = &net->ipv6.fib_table_hash[h];
|
||||||
hlist_for_each_entry(tb, node, head, tb6_hlist) {
|
hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
|
||||||
if (e < s_e)
|
if (e < s_e)
|
||||||
goto next;
|
goto next;
|
||||||
res = fib6_dump_table(tb, skb, cb);
|
res = fib6_dump_table(tb, skb, cb);
|
||||||
|
@ -408,6 +409,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
cb->args[1] = e;
|
cb->args[1] = e;
|
||||||
cb->args[0] = h;
|
cb->args[0] = h;
|
||||||
|
|
||||||
|
|
|
@ -426,18 +426,14 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct net_device *phonet_route_get(struct net *net, u8 daddr)
|
struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr)
|
||||||
{
|
{
|
||||||
struct phonet_net *pnn = phonet_pernet(net);
|
struct phonet_net *pnn = phonet_pernet(net);
|
||||||
struct phonet_routes *routes = &pnn->routes;
|
struct phonet_routes *routes = &pnn->routes;
|
||||||
struct net_device *dev;
|
struct net_device *dev;
|
||||||
|
|
||||||
ASSERT_RTNL(); /* no need to hold the device */
|
|
||||||
|
|
||||||
daddr >>= 2;
|
daddr >>= 2;
|
||||||
rcu_read_lock();
|
|
||||||
dev = rcu_dereference(routes->table[daddr]);
|
dev = rcu_dereference(routes->table[daddr]);
|
||||||
rcu_read_unlock();
|
|
||||||
return dev;
|
return dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -264,10 +264,11 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
struct net *net = sock_net(skb->sk);
|
struct net *net = sock_net(skb->sk);
|
||||||
u8 addr, addr_idx = 0, addr_start_idx = cb->args[0];
|
u8 addr, addr_idx = 0, addr_start_idx = cb->args[0];
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
for (addr = 0; addr < 64; addr++) {
|
for (addr = 0; addr < 64; addr++) {
|
||||||
struct net_device *dev;
|
struct net_device *dev;
|
||||||
|
|
||||||
dev = phonet_route_get(net, addr << 2);
|
dev = phonet_route_get_rcu(net, addr << 2);
|
||||||
if (!dev)
|
if (!dev)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -279,6 +280,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
cb->args[0] = addr_idx;
|
cb->args[0] = addr_idx;
|
||||||
cb->args[1] = 0;
|
cb->args[1] = 0;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue