[TIMEWAIT]: Move inet_timewait_death_row routines to net/ipv4/inet_timewait_sock.c
Also export the ones that will be used in the next changeset, when DCCP uses this infrastructure. Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
295ff7edb8
commit
696ab2d3bf
4 changed files with 290 additions and 273 deletions
|
@ -82,6 +82,10 @@ struct inet_timewait_death_row {
|
|||
int sysctl_max_tw_buckets;
|
||||
};
|
||||
|
||||
extern void inet_twdr_hangman(unsigned long data);
|
||||
extern void inet_twdr_twkill_work(void *data);
|
||||
extern void inet_twdr_twcal_tick(unsigned long data);
|
||||
|
||||
#if (BITS_PER_LONG == 64)
|
||||
#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
|
||||
#else
|
||||
|
@ -206,4 +210,10 @@ extern void __inet_twsk_kill(struct inet_timewait_sock *tw,
|
|||
extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw,
|
||||
struct sock *sk,
|
||||
struct inet_hashinfo *hashinfo);
|
||||
|
||||
extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
||||
struct inet_timewait_death_row *twdr,
|
||||
const int timeo, const int timewait_len);
|
||||
extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
|
||||
struct inet_timewait_death_row *twdr);
|
||||
#endif /* _INET_TIMEWAIT_SOCK_ */
|
||||
|
|
|
@ -44,8 +44,6 @@ extern struct inet_hashinfo tcp_hashinfo;
|
|||
|
||||
extern atomic_t tcp_orphan_count;
|
||||
extern void tcp_time_wait(struct sock *sk, int state, int timeo);
|
||||
extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
|
||||
struct inet_timewait_death_row *twdr);
|
||||
|
||||
#define MAX_TCP_HEADER (128 + MAX_HEADER)
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <net/inet_hashtables.h>
|
||||
#include <net/inet_timewait_sock.h>
|
||||
#include <net/ip.h>
|
||||
|
||||
/* Must be called with locally disabled BHs. */
|
||||
void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo)
|
||||
|
@ -85,6 +86,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
|
|||
write_unlock(&ehead->lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
|
||||
|
||||
struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
|
||||
{
|
||||
struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
|
||||
|
@ -112,3 +115,270 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
|
|||
|
||||
return tw;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(inet_twsk_alloc);
|
||||
|
||||
/* Returns non-zero if quota exceeded. */
|
||||
static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
|
||||
const int slot)
|
||||
{
|
||||
struct inet_timewait_sock *tw;
|
||||
struct hlist_node *node;
|
||||
unsigned int killed;
|
||||
int ret;
|
||||
|
||||
/* NOTE: compare this to previous version where lock
|
||||
* was released after detaching chain. It was racy,
|
||||
* because tw buckets are scheduled in not serialized context
|
||||
* in 2.3 (with netfilter), and with softnet it is common, because
|
||||
* soft irqs are not sequenced.
|
||||
*/
|
||||
killed = 0;
|
||||
ret = 0;
|
||||
rescan:
|
||||
inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
|
||||
__inet_twsk_del_dead_node(tw);
|
||||
spin_unlock(&twdr->death_lock);
|
||||
__inet_twsk_kill(tw, twdr->hashinfo);
|
||||
inet_twsk_put(tw);
|
||||
killed++;
|
||||
spin_lock(&twdr->death_lock);
|
||||
if (killed > INET_TWDR_TWKILL_QUOTA) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* While we dropped twdr->death_lock, another cpu may have
|
||||
* killed off the next TW bucket in the list, therefore
|
||||
* do a fresh re-read of the hlist head node with the
|
||||
* lock reacquired. We still use the hlist traversal
|
||||
* macro in order to get the prefetches.
|
||||
*/
|
||||
goto rescan;
|
||||
}
|
||||
|
||||
twdr->tw_count -= killed;
|
||||
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void inet_twdr_hangman(unsigned long data)
|
||||
{
|
||||
struct inet_timewait_death_row *twdr;
|
||||
int unsigned need_timer;
|
||||
|
||||
twdr = (struct inet_timewait_death_row *)data;
|
||||
spin_lock(&twdr->death_lock);
|
||||
|
||||
if (twdr->tw_count == 0)
|
||||
goto out;
|
||||
|
||||
need_timer = 0;
|
||||
if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
|
||||
twdr->thread_slots |= (1 << twdr->slot);
|
||||
mb();
|
||||
schedule_work(&twdr->twkill_work);
|
||||
need_timer = 1;
|
||||
} else {
|
||||
/* We purged the entire slot, anything left? */
|
||||
if (twdr->tw_count)
|
||||
need_timer = 1;
|
||||
}
|
||||
twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
|
||||
if (need_timer)
|
||||
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
||||
out:
|
||||
spin_unlock(&twdr->death_lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(inet_twdr_hangman);
|
||||
|
||||
extern void twkill_slots_invalid(void);
|
||||
|
||||
void inet_twdr_twkill_work(void *data)
|
||||
{
|
||||
struct inet_timewait_death_row *twdr = data;
|
||||
int i;
|
||||
|
||||
if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
|
||||
twkill_slots_invalid();
|
||||
|
||||
while (twdr->thread_slots) {
|
||||
spin_lock_bh(&twdr->death_lock);
|
||||
for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
|
||||
if (!(twdr->thread_slots & (1 << i)))
|
||||
continue;
|
||||
|
||||
while (inet_twdr_do_twkill_work(twdr, i) != 0) {
|
||||
if (need_resched()) {
|
||||
spin_unlock_bh(&twdr->death_lock);
|
||||
schedule();
|
||||
spin_lock_bh(&twdr->death_lock);
|
||||
}
|
||||
}
|
||||
|
||||
twdr->thread_slots &= ~(1 << i);
|
||||
}
|
||||
spin_unlock_bh(&twdr->death_lock);
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(inet_twdr_twkill_work);
|
||||
|
||||
/* These are always called from BH context. See callers in
|
||||
* tcp_input.c to verify this.
|
||||
*/
|
||||
|
||||
/* This is for handling early-kills of TIME_WAIT sockets. */
|
||||
void inet_twsk_deschedule(struct inet_timewait_sock *tw,
|
||||
struct inet_timewait_death_row *twdr)
|
||||
{
|
||||
spin_lock(&twdr->death_lock);
|
||||
if (inet_twsk_del_dead_node(tw)) {
|
||||
inet_twsk_put(tw);
|
||||
if (--twdr->tw_count == 0)
|
||||
del_timer(&twdr->tw_timer);
|
||||
}
|
||||
spin_unlock(&twdr->death_lock);
|
||||
__inet_twsk_kill(tw, twdr->hashinfo);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(inet_twsk_deschedule);
|
||||
|
||||
void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
||||
struct inet_timewait_death_row *twdr,
|
||||
const int timeo, const int timewait_len)
|
||||
{
|
||||
struct hlist_head *list;
|
||||
int slot;
|
||||
|
||||
/* timeout := RTO * 3.5
|
||||
*
|
||||
* 3.5 = 1+2+0.5 to wait for two retransmits.
|
||||
*
|
||||
* RATIONALE: if FIN arrived and we entered TIME-WAIT state,
|
||||
* our ACK acking that FIN can be lost. If N subsequent retransmitted
|
||||
* FINs (or previous seqments) are lost (probability of such event
|
||||
* is p^(N+1), where p is probability to lose single packet and
|
||||
* time to detect the loss is about RTO*(2^N - 1) with exponential
|
||||
* backoff). Normal timewait length is calculated so, that we
|
||||
* waited at least for one retransmitted FIN (maximal RTO is 120sec).
|
||||
* [ BTW Linux. following BSD, violates this requirement waiting
|
||||
* only for 60sec, we should wait at least for 240 secs.
|
||||
* Well, 240 consumes too much of resources 8)
|
||||
* ]
|
||||
* This interval is not reduced to catch old duplicate and
|
||||
* responces to our wandering segments living for two MSLs.
|
||||
* However, if we use PAWS to detect
|
||||
* old duplicates, we can reduce the interval to bounds required
|
||||
* by RTO, rather than MSL. So, if peer understands PAWS, we
|
||||
* kill tw bucket after 3.5*RTO (it is important that this number
|
||||
* is greater than TS tick!) and detect old duplicates with help
|
||||
* of PAWS.
|
||||
*/
|
||||
slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
|
||||
|
||||
spin_lock(&twdr->death_lock);
|
||||
|
||||
/* Unlink it, if it was scheduled */
|
||||
if (inet_twsk_del_dead_node(tw))
|
||||
twdr->tw_count--;
|
||||
else
|
||||
atomic_inc(&tw->tw_refcnt);
|
||||
|
||||
if (slot >= INET_TWDR_RECYCLE_SLOTS) {
|
||||
/* Schedule to slow timer */
|
||||
if (timeo >= timewait_len) {
|
||||
slot = INET_TWDR_TWKILL_SLOTS - 1;
|
||||
} else {
|
||||
slot = (timeo + twdr->period - 1) / twdr->period;
|
||||
if (slot >= INET_TWDR_TWKILL_SLOTS)
|
||||
slot = INET_TWDR_TWKILL_SLOTS - 1;
|
||||
}
|
||||
tw->tw_ttd = jiffies + timeo;
|
||||
slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
|
||||
list = &twdr->cells[slot];
|
||||
} else {
|
||||
tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
|
||||
|
||||
if (twdr->twcal_hand < 0) {
|
||||
twdr->twcal_hand = 0;
|
||||
twdr->twcal_jiffie = jiffies;
|
||||
twdr->twcal_timer.expires = twdr->twcal_jiffie +
|
||||
(slot << INET_TWDR_RECYCLE_TICK);
|
||||
add_timer(&twdr->twcal_timer);
|
||||
} else {
|
||||
if (time_after(twdr->twcal_timer.expires,
|
||||
jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
|
||||
mod_timer(&twdr->twcal_timer,
|
||||
jiffies + (slot << INET_TWDR_RECYCLE_TICK));
|
||||
slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
||||
}
|
||||
list = &twdr->twcal_row[slot];
|
||||
}
|
||||
|
||||
hlist_add_head(&tw->tw_death_node, list);
|
||||
|
||||
if (twdr->tw_count++ == 0)
|
||||
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
||||
spin_unlock(&twdr->death_lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(inet_twsk_schedule);
|
||||
|
||||
void inet_twdr_twcal_tick(unsigned long data)
|
||||
{
|
||||
struct inet_timewait_death_row *twdr;
|
||||
int n, slot;
|
||||
unsigned long j;
|
||||
unsigned long now = jiffies;
|
||||
int killed = 0;
|
||||
int adv = 0;
|
||||
|
||||
twdr = (struct inet_timewait_death_row *)data;
|
||||
|
||||
spin_lock(&twdr->death_lock);
|
||||
if (twdr->twcal_hand < 0)
|
||||
goto out;
|
||||
|
||||
slot = twdr->twcal_hand;
|
||||
j = twdr->twcal_jiffie;
|
||||
|
||||
for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
|
||||
if (time_before_eq(j, now)) {
|
||||
struct hlist_node *node, *safe;
|
||||
struct inet_timewait_sock *tw;
|
||||
|
||||
inet_twsk_for_each_inmate_safe(tw, node, safe,
|
||||
&twdr->twcal_row[slot]) {
|
||||
__inet_twsk_del_dead_node(tw);
|
||||
__inet_twsk_kill(tw, twdr->hashinfo);
|
||||
inet_twsk_put(tw);
|
||||
killed++;
|
||||
}
|
||||
} else {
|
||||
if (!adv) {
|
||||
adv = 1;
|
||||
twdr->twcal_jiffie = j;
|
||||
twdr->twcal_hand = slot;
|
||||
}
|
||||
|
||||
if (!hlist_empty(&twdr->twcal_row[slot])) {
|
||||
mod_timer(&twdr->twcal_timer, j);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
j += 1 << INET_TWDR_RECYCLE_TICK;
|
||||
slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
||||
}
|
||||
twdr->twcal_hand = -1;
|
||||
|
||||
out:
|
||||
if ((twdr->tw_count -= killed) == 0)
|
||||
del_timer(&twdr->tw_timer);
|
||||
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
|
||||
spin_unlock(&twdr->death_lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
|
||||
|
|
|
@ -35,12 +35,6 @@
|
|||
#define SYNC_INIT 1
|
||||
#endif
|
||||
|
||||
/* New-style handling of TIME_WAIT sockets. */
|
||||
|
||||
static void inet_twdr_hangman(unsigned long data);
|
||||
static void inet_twdr_twkill_work(void *data);
|
||||
static void inet_twdr_twcal_tick(unsigned long data);
|
||||
|
||||
int sysctl_tcp_syncookies = SYNC_INIT;
|
||||
int sysctl_tcp_abort_on_overflow;
|
||||
|
||||
|
@ -63,10 +57,6 @@ struct inet_timewait_death_row tcp_death_row = {
|
|||
|
||||
EXPORT_SYMBOL_GPL(tcp_death_row);
|
||||
|
||||
static void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
||||
struct inet_timewait_death_row *twdr,
|
||||
const int timeo);
|
||||
|
||||
static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
|
||||
{
|
||||
if (seq == s_win)
|
||||
|
@ -173,9 +163,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
|||
if (tw->tw_family == AF_INET &&
|
||||
tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
|
||||
tcp_v4_tw_remember_stamp(tw))
|
||||
inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout);
|
||||
inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
|
||||
TCP_TIMEWAIT_LEN);
|
||||
else
|
||||
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
|
||||
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
|
||||
TCP_TIMEWAIT_LEN);
|
||||
return TCP_TW_ACK;
|
||||
}
|
||||
|
||||
|
@ -213,7 +205,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
|||
return TCP_TW_SUCCESS;
|
||||
}
|
||||
}
|
||||
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
|
||||
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
|
||||
TCP_TIMEWAIT_LEN);
|
||||
|
||||
if (tmp_opt.saw_tstamp) {
|
||||
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
|
||||
|
@ -263,7 +256,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
|||
* Do not reschedule in the last case.
|
||||
*/
|
||||
if (paws_reject || th->ack)
|
||||
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
|
||||
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
|
||||
TCP_TIMEWAIT_LEN);
|
||||
|
||||
/* Send ACK. Note, we do not put the bucket,
|
||||
* it will be released by caller.
|
||||
|
@ -326,7 +320,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
|||
timeo = TCP_TIMEWAIT_LEN;
|
||||
}
|
||||
|
||||
inet_twsk_schedule(tw, &tcp_death_row, timeo);
|
||||
inet_twsk_schedule(tw, &tcp_death_row, timeo,
|
||||
TCP_TIMEWAIT_LEN);
|
||||
inet_twsk_put(tw);
|
||||
} else {
|
||||
/* Sorry, if we're out of memory, just CLOSE this
|
||||
|
@ -341,261 +336,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
|||
tcp_done(sk);
|
||||
}
|
||||
|
||||
/* Returns non-zero if quota exceeded. */
|
||||
static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
|
||||
const int slot)
|
||||
{
|
||||
struct inet_timewait_sock *tw;
|
||||
struct hlist_node *node;
|
||||
unsigned int killed;
|
||||
int ret;
|
||||
|
||||
/* NOTE: compare this to previous version where lock
|
||||
* was released after detaching chain. It was racy,
|
||||
* because tw buckets are scheduled in not serialized context
|
||||
* in 2.3 (with netfilter), and with softnet it is common, because
|
||||
* soft irqs are not sequenced.
|
||||
*/
|
||||
killed = 0;
|
||||
ret = 0;
|
||||
rescan:
|
||||
inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
|
||||
__inet_twsk_del_dead_node(tw);
|
||||
spin_unlock(&twdr->death_lock);
|
||||
__inet_twsk_kill(tw, twdr->hashinfo);
|
||||
inet_twsk_put(tw);
|
||||
killed++;
|
||||
spin_lock(&twdr->death_lock);
|
||||
if (killed > INET_TWDR_TWKILL_QUOTA) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* While we dropped twdr->death_lock, another cpu may have
|
||||
* killed off the next TW bucket in the list, therefore
|
||||
* do a fresh re-read of the hlist head node with the
|
||||
* lock reacquired. We still use the hlist traversal
|
||||
* macro in order to get the prefetches.
|
||||
*/
|
||||
goto rescan;
|
||||
}
|
||||
|
||||
twdr->tw_count -= killed;
|
||||
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void inet_twdr_hangman(unsigned long data)
|
||||
{
|
||||
struct inet_timewait_death_row *twdr;
|
||||
int unsigned need_timer;
|
||||
|
||||
twdr = (struct inet_timewait_death_row *)data;
|
||||
spin_lock(&twdr->death_lock);
|
||||
|
||||
if (twdr->tw_count == 0)
|
||||
goto out;
|
||||
|
||||
need_timer = 0;
|
||||
if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
|
||||
twdr->thread_slots |= (1 << twdr->slot);
|
||||
mb();
|
||||
schedule_work(&twdr->twkill_work);
|
||||
need_timer = 1;
|
||||
} else {
|
||||
/* We purged the entire slot, anything left? */
|
||||
if (twdr->tw_count)
|
||||
need_timer = 1;
|
||||
}
|
||||
twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
|
||||
if (need_timer)
|
||||
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
||||
out:
|
||||
spin_unlock(&twdr->death_lock);
|
||||
}
|
||||
|
||||
extern void twkill_slots_invalid(void);
|
||||
|
||||
static void inet_twdr_twkill_work(void *data)
|
||||
{
|
||||
struct inet_timewait_death_row *twdr = data;
|
||||
int i;
|
||||
|
||||
if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
|
||||
twkill_slots_invalid();
|
||||
|
||||
while (twdr->thread_slots) {
|
||||
spin_lock_bh(&twdr->death_lock);
|
||||
for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
|
||||
if (!(twdr->thread_slots & (1 << i)))
|
||||
continue;
|
||||
|
||||
while (inet_twdr_do_twkill_work(twdr, i) != 0) {
|
||||
if (need_resched()) {
|
||||
spin_unlock_bh(&twdr->death_lock);
|
||||
schedule();
|
||||
spin_lock_bh(&twdr->death_lock);
|
||||
}
|
||||
}
|
||||
|
||||
twdr->thread_slots &= ~(1 << i);
|
||||
}
|
||||
spin_unlock_bh(&twdr->death_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/* These are always called from BH context. See callers in
|
||||
* tcp_input.c to verify this.
|
||||
*/
|
||||
|
||||
/* This is for handling early-kills of TIME_WAIT sockets. */
|
||||
void inet_twsk_deschedule(struct inet_timewait_sock *tw,
|
||||
struct inet_timewait_death_row *twdr)
|
||||
{
|
||||
spin_lock(&twdr->death_lock);
|
||||
if (inet_twsk_del_dead_node(tw)) {
|
||||
inet_twsk_put(tw);
|
||||
if (--twdr->tw_count == 0)
|
||||
del_timer(&twdr->tw_timer);
|
||||
}
|
||||
spin_unlock(&twdr->death_lock);
|
||||
__inet_twsk_kill(tw, twdr->hashinfo);
|
||||
}
|
||||
|
||||
static void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
||||
struct inet_timewait_death_row *twdr,
|
||||
const int timeo)
|
||||
{
|
||||
struct hlist_head *list;
|
||||
int slot;
|
||||
|
||||
/* timeout := RTO * 3.5
|
||||
*
|
||||
* 3.5 = 1+2+0.5 to wait for two retransmits.
|
||||
*
|
||||
* RATIONALE: if FIN arrived and we entered TIME-WAIT state,
|
||||
* our ACK acking that FIN can be lost. If N subsequent retransmitted
|
||||
* FINs (or previous seqments) are lost (probability of such event
|
||||
* is p^(N+1), where p is probability to lose single packet and
|
||||
* time to detect the loss is about RTO*(2^N - 1) with exponential
|
||||
* backoff). Normal timewait length is calculated so, that we
|
||||
* waited at least for one retransmitted FIN (maximal RTO is 120sec).
|
||||
* [ BTW Linux. following BSD, violates this requirement waiting
|
||||
* only for 60sec, we should wait at least for 240 secs.
|
||||
* Well, 240 consumes too much of resources 8)
|
||||
* ]
|
||||
* This interval is not reduced to catch old duplicate and
|
||||
* responces to our wandering segments living for two MSLs.
|
||||
* However, if we use PAWS to detect
|
||||
* old duplicates, we can reduce the interval to bounds required
|
||||
* by RTO, rather than MSL. So, if peer understands PAWS, we
|
||||
* kill tw bucket after 3.5*RTO (it is important that this number
|
||||
* is greater than TS tick!) and detect old duplicates with help
|
||||
* of PAWS.
|
||||
*/
|
||||
slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
|
||||
|
||||
spin_lock(&twdr->death_lock);
|
||||
|
||||
/* Unlink it, if it was scheduled */
|
||||
if (inet_twsk_del_dead_node(tw))
|
||||
twdr->tw_count--;
|
||||
else
|
||||
atomic_inc(&tw->tw_refcnt);
|
||||
|
||||
if (slot >= INET_TWDR_RECYCLE_SLOTS) {
|
||||
/* Schedule to slow timer */
|
||||
if (timeo >= TCP_TIMEWAIT_LEN) {
|
||||
slot = INET_TWDR_TWKILL_SLOTS - 1;
|
||||
} else {
|
||||
slot = (timeo + twdr->period - 1) / twdr->period;
|
||||
if (slot >= INET_TWDR_TWKILL_SLOTS)
|
||||
slot = INET_TWDR_TWKILL_SLOTS - 1;
|
||||
}
|
||||
tw->tw_ttd = jiffies + timeo;
|
||||
slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
|
||||
list = &twdr->cells[slot];
|
||||
} else {
|
||||
tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
|
||||
|
||||
if (twdr->twcal_hand < 0) {
|
||||
twdr->twcal_hand = 0;
|
||||
twdr->twcal_jiffie = jiffies;
|
||||
twdr->twcal_timer.expires = twdr->twcal_jiffie +
|
||||
(slot << INET_TWDR_RECYCLE_TICK);
|
||||
add_timer(&twdr->twcal_timer);
|
||||
} else {
|
||||
if (time_after(twdr->twcal_timer.expires,
|
||||
jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
|
||||
mod_timer(&twdr->twcal_timer,
|
||||
jiffies + (slot << INET_TWDR_RECYCLE_TICK));
|
||||
slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
||||
}
|
||||
list = &twdr->twcal_row[slot];
|
||||
}
|
||||
|
||||
hlist_add_head(&tw->tw_death_node, list);
|
||||
|
||||
if (twdr->tw_count++ == 0)
|
||||
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
||||
spin_unlock(&twdr->death_lock);
|
||||
}
|
||||
|
||||
void inet_twdr_twcal_tick(unsigned long data)
|
||||
{
|
||||
struct inet_timewait_death_row *twdr;
|
||||
int n, slot;
|
||||
unsigned long j;
|
||||
unsigned long now = jiffies;
|
||||
int killed = 0;
|
||||
int adv = 0;
|
||||
|
||||
twdr = (struct inet_timewait_death_row *)data;
|
||||
|
||||
spin_lock(&twdr->death_lock);
|
||||
if (twdr->twcal_hand < 0)
|
||||
goto out;
|
||||
|
||||
slot = twdr->twcal_hand;
|
||||
j = twdr->twcal_jiffie;
|
||||
|
||||
for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
|
||||
if (time_before_eq(j, now)) {
|
||||
struct hlist_node *node, *safe;
|
||||
struct inet_timewait_sock *tw;
|
||||
|
||||
inet_twsk_for_each_inmate_safe(tw, node, safe,
|
||||
&twdr->twcal_row[slot]) {
|
||||
__inet_twsk_del_dead_node(tw);
|
||||
__inet_twsk_kill(tw, twdr->hashinfo);
|
||||
inet_twsk_put(tw);
|
||||
killed++;
|
||||
}
|
||||
} else {
|
||||
if (!adv) {
|
||||
adv = 1;
|
||||
twdr->twcal_jiffie = j;
|
||||
twdr->twcal_hand = slot;
|
||||
}
|
||||
|
||||
if (!hlist_empty(&twdr->twcal_row[slot])) {
|
||||
mod_timer(&twdr->twcal_timer, j);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
j += 1 << INET_TWDR_RECYCLE_TICK;
|
||||
slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
||||
}
|
||||
twdr->twcal_hand = -1;
|
||||
|
||||
out:
|
||||
if ((twdr->tw_count -= killed) == 0)
|
||||
del_timer(&twdr->tw_timer);
|
||||
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
|
||||
spin_unlock(&twdr->death_lock);
|
||||
}
|
||||
|
||||
/* This is not only more efficient than what we used to do, it eliminates
|
||||
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
|
||||
*
|
||||
|
@ -933,4 +673,3 @@ EXPORT_SYMBOL(tcp_check_req);
|
|||
EXPORT_SYMBOL(tcp_child_process);
|
||||
EXPORT_SYMBOL(tcp_create_openreq_child);
|
||||
EXPORT_SYMBOL(tcp_timewait_state_process);
|
||||
EXPORT_SYMBOL(inet_twsk_deschedule);
|
||||
|
|
Loading…
Reference in a new issue