inetpeer: Invalidate the inetpeer tree along with the routing cache

We initialize the routing metrics with the values cached on the
inetpeer in rt_init_metrics(). So if we have the metrics cached on the
inetpeer, we ignore the user configured fib_metrics.

To fix this issue, we replace the old tree with a fresh initialized
inet_peer_base. The old tree is removed later with a delayed work queue.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Steffen Klassert 2012-03-06 21:20:26 +00:00 committed by David S. Miller
parent dcf353b170
commit 5faa5df1fa
3 changed files with 83 additions and 1 deletions

View file

@ -41,6 +41,7 @@ struct inet_peer {
u32 pmtu_orig; u32 pmtu_orig;
u32 pmtu_learned; u32 pmtu_learned;
struct inetpeer_addr_base redirect_learned; struct inetpeer_addr_base redirect_learned;
struct list_head gc_list;
/* /*
* Once inet_peer is queued for deletion (refcnt == -1), following fields * Once inet_peer is queued for deletion (refcnt == -1), following fields
* are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
@ -96,6 +97,8 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
extern void inet_putpeer(struct inet_peer *p); extern void inet_putpeer(struct inet_peer *p);
extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
extern void inetpeer_invalidate_tree(int family);
/* /*
* temporary check to make sure we dont access rid, ip_id_count, tcp_ts, * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
* tcp_ts_stamp if no refcount is taken on inet_peer * tcp_ts_stamp if no refcount is taken on inet_peer

View file

@ -17,6 +17,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/net.h> #include <linux/net.h>
#include <linux/workqueue.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/inetpeer.h> #include <net/inetpeer.h>
#include <net/secure_seq.h> #include <net/secure_seq.h>
@ -66,6 +67,11 @@
static struct kmem_cache *peer_cachep __read_mostly; static struct kmem_cache *peer_cachep __read_mostly;
static LIST_HEAD(gc_list);
static const int gc_delay = 60 * HZ;
static struct delayed_work gc_work;
static DEFINE_SPINLOCK(gc_lock);
#define node_height(x) x->avl_height #define node_height(x) x->avl_height
#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) #define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
@ -102,6 +108,50 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m
int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
static void inetpeer_gc_worker(struct work_struct *work)
{
struct inet_peer *p, *n;
LIST_HEAD(list);
spin_lock_bh(&gc_lock);
list_replace_init(&gc_list, &list);
spin_unlock_bh(&gc_lock);
if (list_empty(&list))
return;
list_for_each_entry_safe(p, n, &list, gc_list) {
if(need_resched())
cond_resched();
if (p->avl_left != peer_avl_empty) {
list_add_tail(&p->avl_left->gc_list, &list);
p->avl_left = peer_avl_empty;
}
if (p->avl_right != peer_avl_empty) {
list_add_tail(&p->avl_right->gc_list, &list);
p->avl_right = peer_avl_empty;
}
n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
if (!atomic_read(&p->refcnt)) {
list_del(&p->gc_list);
kmem_cache_free(peer_cachep, p);
}
}
if (list_empty(&list))
return;
spin_lock_bh(&gc_lock);
list_splice(&list, &gc_list);
spin_unlock_bh(&gc_lock);
schedule_delayed_work(&gc_work, gc_delay);
}
/* Called from ip_output.c:ip_init */ /* Called from ip_output.c:ip_init */
void __init inet_initpeers(void) void __init inet_initpeers(void)
@ -126,6 +176,7 @@ void __init inet_initpeers(void)
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
NULL); NULL);
INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
} }
static int addr_compare(const struct inetpeer_addr *a, static int addr_compare(const struct inetpeer_addr *a,
@ -449,7 +500,7 @@ struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create)
p->pmtu_orig = 0; p->pmtu_orig = 0;
p->redirect_genid = 0; p->redirect_genid = 0;
memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
INIT_LIST_HEAD(&p->gc_list);
/* Link the node. */ /* Link the node. */
link_to_pool(p, base); link_to_pool(p, base);
@ -509,3 +560,30 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
return rc; return rc;
} }
EXPORT_SYMBOL(inet_peer_xrlim_allow); EXPORT_SYMBOL(inet_peer_xrlim_allow);
void inetpeer_invalidate_tree(int family)
{
struct inet_peer *old, *new, *prev;
struct inet_peer_base *base = family_to_base(family);
write_seqlock_bh(&base->lock);
old = base->root;
if (old == peer_avl_empty_rcu)
goto out;
new = peer_avl_empty_rcu;
prev = cmpxchg(&base->root, old, new);
if (prev == old) {
base->total = 0;
spin_lock(&gc_lock);
list_add_tail(&prev->gc_list, &gc_list);
spin_unlock(&gc_lock);
schedule_delayed_work(&gc_work, gc_delay);
}
out:
write_sequnlock_bh(&base->lock);
}
EXPORT_SYMBOL(inetpeer_invalidate_tree);

View file

@ -938,6 +938,7 @@ static void rt_cache_invalidate(struct net *net)
get_random_bytes(&shuffle, sizeof(shuffle)); get_random_bytes(&shuffle, sizeof(shuffle));
atomic_add(shuffle + 1U, &net->ipv4.rt_genid); atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
redirect_genid++; redirect_genid++;
inetpeer_invalidate_tree(AF_INET);
} }
/* /*