diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7c5c0f79168a..15b492a9aa79 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -22,6 +22,12 @@ #include #include +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#define FIB6_TABLE_HASHSZ 256 +#else +#define FIB6_TABLE_HASHSZ 1 +#endif + struct rt6_info; struct fib6_config diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9e3a3f4c1f60..223e90a44824 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1280,7 +1280,7 @@ struct xfrm6_tunnel { }; extern void xfrm_init(void); -extern void xfrm4_init(void); +extern void xfrm4_init(int rt_hash_size); extern int xfrm_state_init(struct net *net); extern void xfrm_state_fini(struct net *net); extern void xfrm4_state_init(void); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 278f46f5011b..fafbe163e2b5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3442,7 +3442,7 @@ int __init ip_rt_init(void) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM xfrm_init(); - xfrm4_init(); + xfrm4_init(ip_rt_max_size); #endif rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 26496babdf3a..1ba44742ebbf 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -290,10 +290,21 @@ static void __exit xfrm4_policy_fini(void) xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); } -void __init xfrm4_init(void) +void __init xfrm4_init(int rt_max_size) { xfrm4_state_init(); xfrm4_policy_init(); + /* + * Select a default value for the gc_thresh based on the main route + * table hash size. It seems to me the worst case scenario is when + * we have ipsec operating in transport mode, in which we create a + * dst_entry per socket. The xfrm gc algorithm starts trying to remove + * entries at gc_thresh, and prevents new allocations as 2*gc_thresh + * so lets set an initial xfrm gc_thresh value at the rt_max_size/2. + * That will let us store an ipsec connection per route table entry, + * and start cleaning when were 1/2 full + */ + xfrm4_dst_ops.gc_thresh = rt_max_size/2; sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, xfrm4_policy_table); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 52ee1dced2ff..0e93ca56eb69 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -164,12 +164,6 @@ static __inline__ void rt6_release(struct rt6_info *rt) dst_free(&rt->u.dst); } -#ifdef CONFIG_IPV6_MULTIPLE_TABLES -#define FIB_TABLE_HASHSZ 256 -#else -#define FIB_TABLE_HASHSZ 1 -#endif - static void fib6_link_table(struct net *net, struct fib6_table *tb) { unsigned int h; @@ -180,7 +174,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb) */ rwlock_init(&tb->tb6_lock); - h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); + h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); /* * No protection necessary, this is the only list mutatation @@ -231,7 +225,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) if (id == 0) id = RT6_TABLE_MAIN; - h = id & (FIB_TABLE_HASHSZ - 1); + h = id & (FIB6_TABLE_HASHSZ - 1); rcu_read_lock(); head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { @@ -382,7 +376,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) arg.net = net; w->args = &arg; - for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry(tb, node, head, tb6_hlist) { @@ -1368,7 +1362,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), unsigned int h; rcu_read_lock(); - for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); @@ -1483,7 +1477,7 @@ static int fib6_net_init(struct net *net) if (!net->ipv6.rt6_stats) goto out_timer; - net->ipv6.fib_table_hash = kcalloc(FIB_TABLE_HASHSZ, + net->ipv6.fib_table_hash = kcalloc(FIB6_TABLE_HASHSZ, sizeof(*net->ipv6.fib_table_hash), GFP_KERNEL); if (!net->ipv6.fib_table_hash) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4acc308eac7f..611cffcf554f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -323,6 +323,7 @@ static struct ctl_table_header *sysctl_hdr; int __init xfrm6_init(void) { int ret; + unsigned int gc_thresh; ret = xfrm6_policy_init(); if (ret) @@ -331,6 +332,20 @@ int __init xfrm6_init(void) ret = xfrm6_state_init(); if (ret) goto out_policy; + /* + * We need a good default value for the xfrm6 gc threshold. + * In ipv4 we set it to the route hash table size * 8, which + * is half the size of the maximaum route cache for ipv4. It + * would be good to do the same thing for v6, except the table is + * constructed differently here. Here each table for a net namespace + * can have FIB_TABLE_HASHSZ entries, so lets go with the same + * computation that we used for ipv4 here. Also, lets keep the initial + * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults + * to that as a minimum as well + */ + gc_thresh = FIB6_TABLE_HASHSZ * 8; + xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; + sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path, xfrm6_policy_table); out: