[ICSK]: Move TCP congestion avoidance members to icsk

This changeset basically moves tcp_sk()->{ca_ops,ca_state,etc} to inet_csk(),
minimal renaming/moving done in this changeset to ease review.

Most of it is just changes of struct tcp_sock * to struct sock * parameters.

With this we move to a state closer to two interesting goals:

1. Generalisation of net/ipv4/tcp_diag.c, becoming inet_diag.c, being used
   for any INET transport protocol that has struct inet_hashinfo and are
   derived from struct inet_connection_sock. Keeps the userspace API, that will
   just not display DCCP sockets, while newer versions of tools can support
   DCCP.

2. INET generic transport pluggable Congestion Avoidance infrastructure, using
   the current TCP CA infrastructure with DCCP.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Arnaldo Carvalho de Melo 2005-08-10 04:03:31 -03:00 committed by David S. Miller
parent 64ce207306
commit 6687e988d9
20 changed files with 412 additions and 334 deletions

View file

@ -258,19 +258,15 @@ struct tcp_sock {
__u32 mss_cache; /* Cached effective mss, not including SACKS */
__u16 xmit_size_goal; /* Goal for segmenting output packets */
__u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
__u8 ca_state; /* State of fast-retransmit machine */
__u8 keepalive_probes; /* num of allowed keep alive probes */
__u16 advmss; /* Advertised MSS */
__u32 window_clamp; /* Maximal window to advertise */
__u32 rcv_ssthresh; /* Current window clamp */
__u32 frto_highmark; /* snd_nxt when RTO occurred */
__u8 reordering; /* Packet reordering metric. */
__u8 frto_counter; /* Number of new acks after RTO */
__u8 nonagle; /* Disable Nagle algorithm? */
/* ONE BYTE HOLE, TRY TO PACK */
__u8 keepalive_probes; /* num of allowed keep alive probes */
/* RTT measurement */
__u32 srtt; /* smoothed round trip time << 3 */
@ -311,8 +307,7 @@ struct tcp_sock {
struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
__u8 probes_out; /* unanswered 0 window probes */
__u8 ecn_flags; /* ECN status bits. */
__u16 advmss; /* Advertised MSS */
__u16 prior_ssthresh; /* ssthresh saved at recovery start */
__u32 lost_out; /* Lost packets */
__u32 sacked_out; /* SACK'd packets */
@ -327,7 +322,7 @@ struct tcp_sock {
__u32 urg_seq; /* Seq of received urgent pointer */
__u16 urg_data; /* Saved octet of OOB data and control flags */
__u8 urg_mode; /* In urgent mode */
/* ONE BYTE HOLE, TRY TO PACK! */
__u8 ecn_flags; /* ECN status bits. */
__u32 snd_up; /* Urgent pointer */
__u32 total_retrans; /* Total retransmits for entire connection */
@ -351,11 +346,6 @@ struct tcp_sock {
__u32 seq;
__u32 time;
} rcvq_space;
/* Pluggable TCP congestion control hook */
struct tcp_congestion_ops *ca_ops;
u32 ca_priv[16];
#define TCP_CA_PRIV_SIZE (16*sizeof(u32))
};
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@ -377,11 +367,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
return (struct tcp_timewait_sock *)sk;
}
static inline void *tcp_ca(const struct tcp_sock *tp)
{
return (void *) tp->ca_priv;
}
#endif
#endif /* _LINUX_TCP_H */

View file

@ -27,6 +27,7 @@
struct inet_bind_bucket;
struct inet_hashinfo;
struct tcp_congestion_ops;
/** inet_connection_sock - INET connection oriented sock
*
@ -35,10 +36,13 @@ struct inet_hashinfo;
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
* @icsk_ca_ops Pluggable congestion control hook
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event
* @icsk_backoff: Backoff
* @icsk_syn_retries: Number of allowed SYN (or equivalent) retries
* @icsk_probes_out: unanswered 0 window probes
* @icsk_ack: Delayed ACK control data
*/
struct inet_connection_sock {
@ -50,10 +54,14 @@ struct inet_connection_sock {
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
__u32 icsk_rto;
struct tcp_congestion_ops *icsk_ca_ops;
__u8 icsk_ca_state;
__u8 icsk_retransmits;
__u8 icsk_pending;
__u8 icsk_backoff;
__u8 icsk_syn_retries;
__u8 icsk_probes_out;
/* 2 BYTES HOLE, TRY TO PACK! */
struct {
__u8 pending; /* ACK is pending */
__u8 quick; /* Scheduled number of quick acks */
@ -65,6 +73,8 @@ struct inet_connection_sock {
__u16 last_seg_size; /* Size of last incoming segment */
__u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack;
u32 icsk_ca_priv[16];
#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
};
#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
@ -77,6 +87,11 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
return (struct inet_connection_sock *)sk;
}
static inline void *inet_csk_ca(const struct sock *sk)
{
return (void *)inet_csk(sk)->icsk_ca_priv;
}
extern struct sock *inet_csk_clone(struct sock *sk,
const struct request_sock *req,
const unsigned int __nocast priority);

View file

@ -669,29 +669,29 @@ struct tcp_congestion_ops {
struct list_head list;
/* initialize private data (optional) */
void (*init)(struct tcp_sock *tp);
void (*init)(struct sock *sk);
/* cleanup private data (optional) */
void (*release)(struct tcp_sock *tp);
void (*release)(struct sock *sk);
/* return slow start threshold (required) */
u32 (*ssthresh)(struct tcp_sock *tp);
u32 (*ssthresh)(struct sock *sk);
/* lower bound for congestion window (optional) */
u32 (*min_cwnd)(struct tcp_sock *tp);
u32 (*min_cwnd)(struct sock *sk);
/* do new cwnd calculation (required) */
void (*cong_avoid)(struct tcp_sock *tp, u32 ack,
void (*cong_avoid)(struct sock *sk, u32 ack,
u32 rtt, u32 in_flight, int good_ack);
/* round trip time sample per acked packet (optional) */
void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt);
void (*rtt_sample)(struct sock *sk, u32 usrtt);
/* call before changing ca_state (optional) */
void (*set_state)(struct tcp_sock *tp, u8 new_state);
void (*set_state)(struct sock *sk, u8 new_state);
/* call when cwnd event occurs (optional) */
void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev);
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
/* new value of cwnd after loss (optional) */
u32 (*undo_cwnd)(struct tcp_sock *tp);
u32 (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked);
void (*pkts_acked)(struct sock *sk, u32 num_acked);
/* get info for tcp_diag (optional) */
void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb);
void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
char name[TCP_CA_NAME_MAX];
struct module *owner;
@ -700,30 +700,34 @@ struct tcp_congestion_ops {
extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
extern void tcp_init_congestion_control(struct tcp_sock *tp);
extern void tcp_cleanup_congestion_control(struct tcp_sock *tp);
extern void tcp_init_congestion_control(struct sock *sk);
extern void tcp_cleanup_congestion_control(struct sock *sk);
extern int tcp_set_default_congestion_control(const char *name);
extern void tcp_get_default_congestion_control(char *name);
extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name);
extern int tcp_set_congestion_control(struct sock *sk, const char *name);
extern struct tcp_congestion_ops tcp_init_congestion_ops;
extern u32 tcp_reno_ssthresh(struct tcp_sock *tp);
extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack,
extern u32 tcp_reno_ssthresh(struct sock *sk);
extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
u32 rtt, u32 in_flight, int flag);
extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp);
extern u32 tcp_reno_min_cwnd(struct sock *sk);
extern struct tcp_congestion_ops tcp_reno;
static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state)
static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
{
if (tp->ca_ops->set_state)
tp->ca_ops->set_state(tp, ca_state);
tp->ca_state = ca_state;
struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->set_state)
icsk->icsk_ca_ops->set_state(sk, ca_state);
icsk->icsk_ca_state = ca_state;
}
static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event)
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
if (tp->ca_ops->cwnd_event)
tp->ca_ops->cwnd_event(tp, event);
const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->cwnd_event)
icsk->icsk_ca_ops->cwnd_event(sk, event);
}
/* This determines how many packets are "in the network" to the best
@ -749,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
* The exception is rate halving phase, when cwnd is decreasing towards
* ssthresh.
*/
static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp)
static inline __u32 tcp_current_ssthresh(const struct sock *sk)
{
if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
const struct tcp_sock *tp = tcp_sk(sk);
if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
return tp->snd_ssthresh;
else
return max(tp->snd_ssthresh,
@ -768,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
}
/* Set slow start threshold and cwnd not falling to slow start */
static inline void __tcp_enter_cwr(struct tcp_sock *tp)
static inline void __tcp_enter_cwr(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
tp->undo_marker = 0;
tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tp->snd_cwnd = min(tp->snd_cwnd,
tcp_packets_in_flight(tp) + 1U);
tp->snd_cwnd_cnt = 0;
@ -780,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp)
TCP_ECN_queue_cwr(tp);
}
static inline void tcp_enter_cwr(struct tcp_sock *tp)
static inline void tcp_enter_cwr(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
tp->prior_ssthresh = 0;
if (tp->ca_state < TCP_CA_CWR) {
__tcp_enter_cwr(tp);
tcp_set_ca_state(tp, TCP_CA_CWR);
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
__tcp_enter_cwr(sk);
tcp_set_ca_state(sk, TCP_CA_CWR);
}
}

View file

@ -508,7 +508,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
newsk->sk_write_space = sk_stream_write_space;
newicsk->icsk_retransmits = 0;
newicsk->icsk_backoff = 0;
newicsk->icsk_backoff = 0;
newicsk->icsk_probes_out = 0;
/* Deinitialize accept_queue to trap illegal accesses. */
memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));

View file

@ -1671,11 +1671,11 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->write_seq = 1;
icsk->icsk_backoff = 0;
tp->snd_cwnd = 2;
tp->probes_out = 0;
icsk->icsk_probes_out = 0;
tp->packets_out = 0;
tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(tp, TCP_CA_Open);
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
sk->sk_send_head = NULL;
@ -1718,7 +1718,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
name[val] = 0;
lock_sock(sk);
err = tcp_set_congestion_control(tp, name);
err = tcp_set_congestion_control(sk, name);
release_sock(sk);
return err;
}
@ -1886,9 +1886,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
memset(info, 0, sizeof(*info));
info->tcpi_state = sk->sk_state;
info->tcpi_ca_state = tp->ca_state;
info->tcpi_ca_state = icsk->icsk_ca_state;
info->tcpi_retransmits = icsk->icsk_retransmits;
info->tcpi_probes = tp->probes_out;
info->tcpi_probes = icsk->icsk_probes_out;
info->tcpi_backoff = icsk->icsk_backoff;
if (tp->rx_opt.tstamp_ok)
@ -2016,7 +2016,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, tp->ca_ops->name, len))
if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
return -EFAULT;
return 0;
default:

View file

@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca)
ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
}
static void bictcp_init(struct tcp_sock *tp)
static void bictcp_init(struct sock *sk)
{
bictcp_reset(tcp_ca(tp));
bictcp_reset(inet_csk_ca(sk));
if (initial_ssthresh)
tp->snd_ssthresh = initial_ssthresh;
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
/*
@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
/* Detect low utilization in congestion avoidance */
static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag)
static inline void bictcp_low_utilization(struct sock *sk, int flag)
{
struct bictcp *ca = tcp_ca(tp);
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
u32 dist, delay;
/* No time stamp */
@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag)
}
static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack,
static void bictcp_cong_avoid(struct sock *sk, u32 ack,
u32 seq_rtt, u32 in_flight, int data_acked)
{
struct bictcp *ca = tcp_ca(tp);
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
bictcp_low_utilization(tp, data_acked);
bictcp_low_utilization(sk, data_acked);
if (in_flight < tp->snd_cwnd)
return;
@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack,
* behave like Reno until low_window is reached,
* then increase congestion window slowly
*/
static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp)
static u32 bictcp_recalc_ssthresh(struct sock *sk)
{
struct bictcp *ca = tcp_ca(tp);
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
ca->epoch_start = 0; /* end of epoch */
@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
static u32 bictcp_undo_cwnd(struct tcp_sock *tp)
static u32 bictcp_undo_cwnd(struct sock *sk)
{
struct bictcp *ca = tcp_ca(tp);
const struct tcp_sock *tp = tcp_sk(sk);
const struct bictcp *ca = inet_csk_ca(sk);
return max(tp->snd_cwnd, ca->last_max_cwnd);
}
static u32 bictcp_min_cwnd(struct tcp_sock *tp)
static u32 bictcp_min_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
return tp->snd_ssthresh;
}
static void bictcp_state(struct tcp_sock *tp, u8 new_state)
static void bictcp_state(struct sock *sk, u8 new_state)
{
if (new_state == TCP_CA_Loss)
bictcp_reset(tcp_ca(tp));
bictcp_reset(inet_csk_ca(sk));
}
/* Track delayed acknowledgement ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
static void bictcp_acked(struct tcp_sock *tp, u32 cnt)
static void bictcp_acked(struct sock *sk, u32 cnt)
{
if (cnt > 0 && tp->ca_state == TCP_CA_Open) {
struct bictcp *ca = tcp_ca(tp);
const struct inet_connection_sock *icsk = inet_csk(sk);
if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
struct bictcp *ca = inet_csk_ca(sk);
cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
ca->delayed_ack += cnt;
}
@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = {
static int __init bictcp_register(void)
{
BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE);
BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&bictcp);
}

View file

@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
/* Assign choice of congestion control. */
void tcp_init_congestion_control(struct tcp_sock *tp)
void tcp_init_congestion_control(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_congestion_ops *ca;
if (tp->ca_ops != &tcp_init_congestion_ops)
if (icsk->icsk_ca_ops != &tcp_init_congestion_ops)
return;
rcu_read_lock();
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
if (try_module_get(ca->owner)) {
tp->ca_ops = ca;
icsk->icsk_ca_ops = ca;
break;
}
}
rcu_read_unlock();
if (tp->ca_ops->init)
tp->ca_ops->init(tp);
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
}
/* Manage refcounts on socket close. */
void tcp_cleanup_congestion_control(struct tcp_sock *tp)
void tcp_cleanup_congestion_control(struct sock *sk)
{
if (tp->ca_ops->release)
tp->ca_ops->release(tp);
module_put(tp->ca_ops->owner);
struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
module_put(icsk->icsk_ca_ops->owner);
}
/* Used by sysctl to change default congestion control */
@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name)
}
/* Change congestion control for socket */
int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
int tcp_set_congestion_control(struct sock *sk, const char *name)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_congestion_ops *ca;
int err = 0;
rcu_read_lock();
ca = tcp_ca_find(name);
if (ca == tp->ca_ops)
if (ca == icsk->icsk_ca_ops)
goto out;
if (!ca)
@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
err = -EBUSY;
else {
tcp_cleanup_congestion_control(tp);
tp->ca_ops = ca;
if (tp->ca_ops->init)
tp->ca_ops->init(tp);
tcp_cleanup_congestion_control(sk);
icsk->icsk_ca_ops = ca;
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
}
out:
rcu_read_unlock();
@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328.
*/
void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight,
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
if (in_flight < tp->snd_cwnd)
return;
@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight,
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
/* Slow start threshold is half the congestion window (min 2) */
u32 tcp_reno_ssthresh(struct tcp_sock *tp)
u32 tcp_reno_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
return max(tp->snd_cwnd >> 1U, 2U);
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
/* Lower bound on congestion window. */
u32 tcp_reno_min_cwnd(struct tcp_sock *tp)
u32 tcp_reno_min_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
return tp->snd_ssthresh/2;
}
EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);

View file

@ -66,10 +66,10 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
if (ext & (1<<(TCPDIAG_INFO-1)))
info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info));
if (ext & (1<<(TCPDIAG_CONG-1))) {
size_t len = strlen(tp->ca_ops->name);
if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) {
size_t len = strlen(icsk->icsk_ca_ops->name);
strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1),
tp->ca_ops->name);
icsk->icsk_ca_ops->name);
}
}
r->tcpdiag_family = sk->sk_family;
@ -136,18 +136,17 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
r->tcpdiag_timer = 4;
r->tcpdiag_retrans = tp->probes_out;
r->tcpdiag_retrans = icsk->icsk_probes_out;
r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
} else if (timer_pending(&sk->sk_timer)) {
r->tcpdiag_timer = 2;
r->tcpdiag_retrans = tp->probes_out;
r->tcpdiag_retrans = icsk->icsk_probes_out;
r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
} else {
r->tcpdiag_timer = 0;
r->tcpdiag_expires = 0;
}
#undef EXPIRES_IN_MS
r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq;
r->tcpdiag_wqueue = tp->write_seq - tp->snd_una;
r->tcpdiag_uid = sock_i_uid(sk);
@ -163,8 +162,9 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
if (info)
tcp_get_info(sk, info);
if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info)
tp->ca_ops->get_info(tp, ext, skb);
if (sk->sk_state < TCP_TIME_WAIT &&
icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
icsk->icsk_ca_ops->get_info(sk, ext, skb);
nlh->nlmsg_len = skb->tail - b;
return skb->len;

View file

@ -98,9 +98,10 @@ struct hstcp {
u32 ai;
};
static void hstcp_init(struct tcp_sock *tp)
static void hstcp_init(struct sock *sk)
{
struct hstcp *ca = tcp_ca(tp);
struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
ca->ai = 0;
@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp)
tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
}
static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt,
static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
u32 in_flight, int good)
{
struct hstcp *ca = tcp_ca(tp);
struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
if (in_flight < tp->snd_cwnd)
return;
@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt,
}
}
static u32 hstcp_ssthresh(struct tcp_sock *tp)
static u32 hstcp_ssthresh(struct sock *sk)
{
struct hstcp *ca = tcp_ca(tp);
const struct tcp_sock *tp = tcp_sk(sk);
const struct hstcp *ca = inet_csk_ca(sk);
/* Do multiplicative decrease */
return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = {
static int __init hstcp_register(void)
{
BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE);
BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_highspeed);
}

View file

@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca)
ca->snd_cwnd_cnt2 = 0;
}
static u32 htcp_cwnd_undo(struct tcp_sock *tp)
static u32 htcp_cwnd_undo(struct sock *sk)
{
struct htcp *ca = tcp_ca(tp);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
ca->ccount = ca->undo_ccount;
ca->maxRTT = ca->undo_maxRTT;
ca->old_maxB = ca->undo_old_maxB;
return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta);
}
static inline void measure_rtt(struct tcp_sock *tp)
static inline void measure_rtt(struct sock *sk)
{
struct htcp *ca = tcp_ca(tp);
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
u32 srtt = tp->srtt>>3;
/* keep track of minimum RTT seen so far, minRTT is zero at first */
@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp)
ca->minRTT = srtt;
/* max RTT */
if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) {
if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) {
if (ca->maxRTT < ca->minRTT)
ca->maxRTT = ca->minRTT;
if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50)
@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp)
}
}
static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked)
static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
{
struct htcp *ca = tcp_ca(tp);
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
u32 now = tcp_time_stamp;
/* achieved throughput calculations */
if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) {
if (icsk->icsk_ca_state != TCP_CA_Open &&
icsk->icsk_ca_state != TCP_CA_Disorder) {
ca->packetcount = 0;
ca->lasttime = now;
return;
@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca)
* that point do we really have a real sense of maxRTT (the queues en route
* were getting just too full now).
*/
static void htcp_param_update(struct tcp_sock *tp)
static void htcp_param_update(struct sock *sk)
{
struct htcp *ca = tcp_ca(tp);
struct htcp *ca = inet_csk_ca(sk);
u32 minRTT = ca->minRTT;
u32 maxRTT = ca->maxRTT;
@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp)
ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100;
}
static u32 htcp_recalc_ssthresh(struct tcp_sock *tp)
static u32 htcp_recalc_ssthresh(struct sock *sk)
{
struct htcp *ca = tcp_ca(tp);
htcp_param_update(tp);
const struct tcp_sock *tp = tcp_sk(sk);
const struct htcp *ca = inet_csk_ca(sk);
htcp_param_update(sk);
return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
}
static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32 in_flight, int data_acked)
{
struct htcp *ca = tcp_ca(tp);
struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
if (in_flight < tp->snd_cwnd)
return;
@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
} else {
measure_rtt(tp);
measure_rtt(sk);
/* keep track of number of round-trip times since last backoff event */
if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) {
@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
}
/* Lower bound on congestion window. */
static u32 htcp_min_cwnd(struct tcp_sock *tp)
static u32 htcp_min_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
return tp->snd_ssthresh;
}
static void htcp_init(struct tcp_sock *tp)
static void htcp_init(struct sock *sk)
{
struct htcp *ca = tcp_ca(tp);
struct htcp *ca = inet_csk_ca(sk);
memset(ca, 0, sizeof(struct htcp));
ca->alpha = ALPHA_BASE;
ca->beta = BETA_MIN;
}
static void htcp_state(struct tcp_sock *tp, u8 new_state)
static void htcp_state(struct sock *sk, u8 new_state)
{
switch (new_state) {
case TCP_CA_CWR:
case TCP_CA_Recovery:
case TCP_CA_Loss:
htcp_reset(tcp_ca(tp));
htcp_reset(inet_csk_ca(sk));
break;
}
}
@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = {
static int __init htcp_register(void)
{
BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE);
BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
if (!use_bandwidth_switch)
htcp.pkts_acked = NULL;

View file

@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)");
/* This is called to refresh values for hybla parameters */
static inline void hybla_recalc_param (struct tcp_sock *tp)
static inline void hybla_recalc_param (struct sock *sk)
{
struct hybla *ca = tcp_ca(tp);
struct hybla *ca = inet_csk_ca(sk);
ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8);
ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8);
ca->rho = ca->rho_3ls >> 3;
ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
ca->rho2 = ca->rho2_7ls >>7;
}
static void hybla_init(struct tcp_sock *tp)
static void hybla_init(struct sock *sk)
{
struct hybla *ca = tcp_ca(tp);
struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca = inet_csk_ca(sk);
ca->rho = 0;
ca->rho2 = 0;
@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp)
tp->snd_cwnd_clamp = 65535;
/* 1st Rho measurement based on initial srtt */
hybla_recalc_param(tp);
hybla_recalc_param(sk);
/* set minimum rtt as this is the 1st ever seen */
ca->minrtt = tp->srtt;
tp->snd_cwnd = ca->rho;
}
static void hybla_state(struct tcp_sock *tp, u8 ca_state)
static void hybla_state(struct sock *sk, u8 ca_state)
{
struct hybla *ca = tcp_ca(tp);
struct hybla *ca = inet_csk_ca(sk);
ca->hybla_en = (ca_state == TCP_CA_Open);
}
@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds)
* o Give cwnd a new value based on the model proposed
* o remember increments <1
*/
static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32 in_flight, int flag)
{
struct hybla *ca = tcp_ca(tp);
struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca = inet_csk_ca(sk);
u32 increment, odd, rho_fractions;
int is_slowstart = 0;
/* Recalculate rho only if this srtt is the lowest */
if (tp->srtt < ca->minrtt){
hybla_recalc_param(tp);
hybla_recalc_param(sk);
ca->minrtt = tp->srtt;
}
if (!ca->hybla_en)
return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag);
return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
if (in_flight < tp->snd_cwnd)
return;
if (ca->rho == 0)
hybla_recalc_param(tp);
hybla_recalc_param(sk);
rho_fractions = ca->rho_3ls - (ca->rho << 3);
@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = {
static int __init hybla_register(void)
{
BUG_ON(sizeof(struct hybla) > TCP_CA_PRIV_SIZE);
BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_hybla);
}

View file

@ -325,11 +325,12 @@ static void tcp_init_buffer_space(struct sock *sk)
/* 5. Recalculate window clamp after socket hit its memory bounds. */
static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb;
unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
int ofo_win = 0;
inet_csk(sk)->icsk_ack.quick = 0;
icsk->icsk_ack.quick = 0;
skb_queue_walk(&tp->out_of_order_queue, skb) {
ofo_win += skb->len;
@ -350,8 +351,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
app_win += ofo_win;
if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
app_win >>= 1;
if (app_win > inet_csk(sk)->icsk_ack.rcv_mss)
app_win -= inet_csk(sk)->icsk_ack.rcv_mss;
if (app_win > icsk->icsk_ack.rcv_mss)
app_win -= icsk->icsk_ack.rcv_mss;
app_win = max(app_win, 2U*tp->advmss);
if (!ofo_win)
@ -549,8 +550,10 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
* To save cycles in the RFC 1323 implementation it was better to break
* it up into three procedures. -- erics
*/
static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt)
static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
long m = mrtt; /* RTT */
/* The following amusing code comes from Jacobson's
@ -610,8 +613,8 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt)
tp->rtt_seq = tp->snd_nxt;
}
if (tp->ca_ops->rtt_sample)
tp->ca_ops->rtt_sample(tp, *usrtt);
if (icsk->icsk_ca_ops->rtt_sample)
icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
}
/* Calculate rto without backoff. This is the second half of Van Jacobson's
@ -663,9 +666,10 @@ void tcp_update_metrics(struct sock *sk)
dst_confirm(dst);
if (dst && (dst->flags&DST_HOST)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
int m;
if (inet_csk(sk)->icsk_backoff || !tp->srtt) {
if (icsk->icsk_backoff || !tp->srtt) {
/* This session failed to estimate rtt. Why?
* Probably, no packets returned in time.
* Reset our results.
@ -714,7 +718,7 @@ void tcp_update_metrics(struct sock *sk)
tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
} else if (tp->snd_cwnd > tp->snd_ssthresh &&
tp->ca_state == TCP_CA_Open) {
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
if (!dst_metric_locked(dst, RTAX_SSTHRESH))
dst->metrics[RTAX_SSTHRESH-1] =
@ -828,8 +832,10 @@ static void tcp_init_metrics(struct sock *sk)
}
}
static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
static void tcp_update_reordering(struct sock *sk, const int metric,
const int ts)
{
struct tcp_sock *tp = tcp_sk(sk);
if (metric > tp->reordering) {
tp->reordering = min(TCP_MAX_REORDERING, metric);
@ -844,7 +850,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
#if FASTRETRANS_DEBUG > 1
printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, tp->ca_state,
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
tp->reordering,
tp->fackets_out,
tp->sacked_out,
@ -906,6 +912,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
static int
tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
@ -1071,7 +1078,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
* we have to account for reordering! Ugly,
* but should help.
*/
if (lost_retrans && tp->ca_state == TCP_CA_Recovery) {
if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) {
struct sk_buff *skb;
sk_stream_for_retrans_queue(skb, sk) {
@ -1100,8 +1107,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
tp->left_out = tp->sacked_out + tp->lost_out;
if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss)
tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0);
if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss)
tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
#if FASTRETRANS_DEBUG > 0
BUG_TRAP((int)tp->sacked_out >= 0);
@ -1118,17 +1125,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
*/
void tcp_enter_frto(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
tp->frto_counter = 1;
if (tp->ca_state <= TCP_CA_Disorder ||
if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
tp->snd_una == tp->high_seq ||
(tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(tp);
tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
tcp_ca_event(tp, CA_EVENT_FRTO);
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_FRTO);
}
/* Have to clear retransmission markers here to keep the bookkeeping
@ -1145,7 +1153,7 @@ void tcp_enter_frto(struct sock *sk)
}
tcp_sync_left_out(tp);
tcp_set_ca_state(tp, TCP_CA_Open);
tcp_set_ca_state(sk, TCP_CA_Open);
tp->frto_highmark = tp->snd_nxt;
}
@ -1191,7 +1199,7 @@ static void tcp_enter_frto_loss(struct sock *sk)
tp->reordering = min_t(unsigned int, tp->reordering,
sysctl_tcp_reordering);
tcp_set_ca_state(tp, TCP_CA_Loss);
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->frto_highmark;
TCP_ECN_queue_cwr(tp);
}
@ -1215,16 +1223,17 @@ void tcp_clear_retrans(struct tcp_sock *tp)
*/
void tcp_enter_loss(struct sock *sk, int how)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int cnt = 0;
/* Reduce ssthresh if it has not yet been made inside this window. */
if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
(tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(tp);
tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
tcp_ca_event(tp, CA_EVENT_LOSS);
if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
}
tp->snd_cwnd = 1;
tp->snd_cwnd_cnt = 0;
@ -1255,7 +1264,7 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->reordering = min_t(unsigned int, tp->reordering,
sysctl_tcp_reordering);
tcp_set_ca_state(tp, TCP_CA_Loss);
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);
}
@ -1272,13 +1281,14 @@ static int tcp_check_sack_reneging(struct sock *sk)
*/
if ((skb = skb_peek(&sk->sk_write_queue)) != NULL &&
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
struct inet_connection_sock *icsk = inet_csk(sk);
NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
tcp_enter_loss(sk, 1);
inet_csk(sk)->icsk_retransmits++;
icsk->icsk_retransmits++;
tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
icsk->icsk_rto, TCP_RTO_MAX);
return 1;
}
return 0;
@ -1431,8 +1441,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
* in assumption of absent reordering, interpret this as reordering.
* The only another reason could be bug in receiver TCP.
*/
static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend)
static void tcp_check_reno_reordering(struct sock *sk, const int addend)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 holes;
holes = max(tp->lost_out, 1U);
@ -1440,16 +1451,17 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend)
if ((tp->sacked_out + holes) > tp->packets_out) {
tp->sacked_out = tp->packets_out - holes;
tcp_update_reordering(tp, tp->packets_out+addend, 0);
tcp_update_reordering(sk, tp->packets_out + addend, 0);
}
}
/* Emulate SACKs for SACKless connection: account for a new dupack. */
static void tcp_add_reno_sack(struct tcp_sock *tp)
static void tcp_add_reno_sack(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
tp->sacked_out++;
tcp_check_reno_reordering(tp, 0);
tcp_check_reno_reordering(sk, 0);
tcp_sync_left_out(tp);
}
@ -1464,7 +1476,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke
else
tp->sacked_out -= acked-1;
}
tcp_check_reno_reordering(tp, acked);
tcp_check_reno_reordering(sk, acked);
tcp_sync_left_out(tp);
}
@ -1538,14 +1550,16 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
}
/* Decrease cwnd each second ack. */
static void tcp_cwnd_down(struct tcp_sock *tp)
static void tcp_cwnd_down(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int decr = tp->snd_cwnd_cnt + 1;
tp->snd_cwnd_cnt = decr&1;
decr >>= 1;
if (decr && tp->snd_cwnd > tp->ca_ops->min_cwnd(tp))
if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk))
tp->snd_cwnd -= decr;
tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
@ -1579,11 +1593,15 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
#define DBGUNDO(x...) do { } while (0)
#endif
static void tcp_undo_cwr(struct tcp_sock *tp, int undo)
static void tcp_undo_cwr(struct sock *sk, const int undo)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->prior_ssthresh) {
if (tp->ca_ops->undo_cwnd)
tp->snd_cwnd = tp->ca_ops->undo_cwnd(tp);
const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->undo_cwnd)
tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
else
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
@ -1611,9 +1629,9 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
/* Happy end! We did not retransmit anything
* or our original transmission succeeded.
*/
DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans");
tcp_undo_cwr(tp, 1);
if (tp->ca_state == TCP_CA_Loss)
DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
tcp_undo_cwr(sk, 1);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
else
NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);
@ -1626,7 +1644,7 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
tcp_moderate_cwnd(tp);
return 1;
}
tcp_set_ca_state(tp, TCP_CA_Open);
tcp_set_ca_state(sk, TCP_CA_Open);
return 0;
}
@ -1635,7 +1653,7 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
{
if (tp->undo_marker && !tp->undo_retrans) {
DBGUNDO(sk, tp, "D-SACK");
tcp_undo_cwr(tp, 1);
tcp_undo_cwr(sk, 1);
tp->undo_marker = 0;
NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
}
@ -1656,10 +1674,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
if (tp->retrans_out == 0)
tp->retrans_stamp = 0;
tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
DBGUNDO(sk, tp, "Hoe");
tcp_undo_cwr(tp, 0);
tcp_undo_cwr(sk, 0);
NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
/* So... Do not make Hoe's retransmit yet.
@ -1682,22 +1700,23 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
DBGUNDO(sk, tp, "partial loss");
tp->lost_out = 0;
tp->left_out = tp->sacked_out;
tcp_undo_cwr(tp, 1);
tcp_undo_cwr(sk, 1);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
inet_csk(sk)->icsk_retransmits = 0;
tp->undo_marker = 0;
if (!IsReno(tp))
tcp_set_ca_state(tp, TCP_CA_Open);
tcp_set_ca_state(sk, TCP_CA_Open);
return 1;
}
return 0;
}
static inline void tcp_complete_cwr(struct tcp_sock *tp)
static inline void tcp_complete_cwr(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
tp->snd_cwnd_stamp = tcp_time_stamp;
tcp_ca_event(tp, CA_EVENT_COMPLETE_CWR);
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
@ -1708,21 +1727,21 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
tp->retrans_stamp = 0;
if (flag&FLAG_ECE)
tcp_enter_cwr(tp);
tcp_enter_cwr(sk);
if (tp->ca_state != TCP_CA_CWR) {
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
int state = TCP_CA_Open;
if (tp->left_out || tp->retrans_out || tp->undo_marker)
state = TCP_CA_Disorder;
if (tp->ca_state != state) {
tcp_set_ca_state(tp, state);
if (inet_csk(sk)->icsk_ca_state != state) {
tcp_set_ca_state(sk, state);
tp->high_seq = tp->snd_nxt;
}
tcp_moderate_cwnd(tp);
} else {
tcp_cwnd_down(tp);
tcp_cwnd_down(sk);
}
}
@ -1741,6 +1760,7 @@ static void
tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
int prior_packets, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));
@ -1764,7 +1784,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* C. Process data loss notification, provided it is valid. */
if ((flag&FLAG_DATA_LOST) &&
before(tp->snd_una, tp->high_seq) &&
tp->ca_state != TCP_CA_Open &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
@ -1775,14 +1795,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* E. Check state exit conditions. State can be terminated
* when high_seq is ACKed. */
if (tp->ca_state == TCP_CA_Open) {
if (icsk->icsk_ca_state == TCP_CA_Open) {
if (!sysctl_tcp_frto)
BUG_TRAP(tp->retrans_out == 0);
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
switch (tp->ca_state) {
switch (icsk->icsk_ca_state) {
case TCP_CA_Loss:
inet_csk(sk)->icsk_retransmits = 0;
icsk->icsk_retransmits = 0;
if (tcp_try_undo_recovery(sk, tp))
return;
break;
@ -1791,8 +1811,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* CWR is to be held something *above* high_seq
* is ACKed for CWR bit to reach receiver. */
if (tp->snd_una != tp->high_seq) {
tcp_complete_cwr(tp);
tcp_set_ca_state(tp, TCP_CA_Open);
tcp_complete_cwr(sk);
tcp_set_ca_state(sk, TCP_CA_Open);
}
break;
@ -1803,7 +1823,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
* catching for all duplicate ACKs. */
IsReno(tp) || tp->snd_una != tp->high_seq) {
tp->undo_marker = 0;
tcp_set_ca_state(tp, TCP_CA_Open);
tcp_set_ca_state(sk, TCP_CA_Open);
}
break;
@ -1812,17 +1832,17 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tcp_reset_reno_sack(tp);
if (tcp_try_undo_recovery(sk, tp))
return;
tcp_complete_cwr(tp);
tcp_complete_cwr(sk);
break;
}
}
/* F. Process state. */
switch (tp->ca_state) {
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
if (prior_snd_una == tp->snd_una) {
if (IsReno(tp) && is_dupack)
tcp_add_reno_sack(tp);
tcp_add_reno_sack(sk);
} else {
int acked = prior_packets - tp->packets_out;
if (IsReno(tp))
@ -1832,13 +1852,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
break;
case TCP_CA_Loss:
if (flag&FLAG_DATA_ACKED)
inet_csk(sk)->icsk_retransmits = 0;
icsk->icsk_retransmits = 0;
if (!tcp_try_undo_loss(sk, tp)) {
tcp_moderate_cwnd(tp);
tcp_xmit_retransmit_queue(sk);
return;
}
if (tp->ca_state != TCP_CA_Open)
if (icsk->icsk_ca_state != TCP_CA_Open)
return;
/* Loss is undone; fall through to processing in Open state. */
default:
@ -1846,10 +1866,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
if (tp->snd_una != prior_snd_una)
tcp_reset_reno_sack(tp);
if (is_dupack)
tcp_add_reno_sack(tp);
tcp_add_reno_sack(sk);
}
if (tp->ca_state == TCP_CA_Disorder)
if (icsk->icsk_ca_state == TCP_CA_Disorder)
tcp_try_undo_dsack(sk, tp);
if (!tcp_time_to_recover(sk, tp)) {
@ -1869,20 +1889,20 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tp->undo_marker = tp->snd_una;
tp->undo_retrans = tp->retrans_out;
if (tp->ca_state < TCP_CA_CWR) {
if (icsk->icsk_ca_state < TCP_CA_CWR) {
if (!(flag&FLAG_ECE))
tp->prior_ssthresh = tcp_current_ssthresh(tp);
tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
TCP_ECN_queue_cwr(tp);
}
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(tp, TCP_CA_Recovery);
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
if (is_dupack || tcp_head_timedout(sk, tp))
tcp_update_scoreboard(sk, tp);
tcp_cwnd_down(tp);
tcp_cwnd_down(sk);
tcp_xmit_retransmit_queue(sk);
}
@ -1908,7 +1928,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
*/
struct tcp_sock *tp = tcp_sk(sk);
const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
tcp_rtt_estimator(tp, seq_rtt, usrtt);
tcp_rtt_estimator(sk, seq_rtt, usrtt);
tcp_set_rto(sk);
inet_csk(sk)->icsk_backoff = 0;
tcp_bound_rto(sk);
@ -1928,7 +1948,7 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag
if (flag & FLAG_RETRANS_DATA_ACKED)
return;
tcp_rtt_estimator(tcp_sk(sk), seq_rtt, usrtt);
tcp_rtt_estimator(sk, seq_rtt, usrtt);
tcp_set_rto(sk);
inet_csk(sk)->icsk_backoff = 0;
tcp_bound_rto(sk);
@ -1945,11 +1965,12 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
}
static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32 in_flight, int good)
{
tp->ca_ops->cong_avoid(tp, ack, rtt, in_flight, good);
tp->snd_cwnd_stamp = tcp_time_stamp;
const struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
}
/* Restart timer after forward progress on connection.
@ -2098,11 +2119,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
}
if (acked&FLAG_ACKED) {
const struct inet_connection_sock *icsk = inet_csk(sk);
tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
tcp_ack_packets_out(sk, tp);
if (tp->ca_ops->pkts_acked)
tp->ca_ops->pkts_acked(tp, pkts_acked);
if (icsk->icsk_ca_ops->pkts_acked)
icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
}
#if FASTRETRANS_DEBUG > 0
@ -2110,19 +2132,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
BUG_TRAP((int)tp->lost_out >= 0);
BUG_TRAP((int)tp->retrans_out >= 0);
if (!tp->packets_out && tp->rx_opt.sack_ok) {
const struct inet_connection_sock *icsk = inet_csk(sk);
if (tp->lost_out) {
printk(KERN_DEBUG "Leak l=%u %d\n",
tp->lost_out, tp->ca_state);
tp->lost_out, icsk->icsk_ca_state);
tp->lost_out = 0;
}
if (tp->sacked_out) {
printk(KERN_DEBUG "Leak s=%u %d\n",
tp->sacked_out, tp->ca_state);
tp->sacked_out, icsk->icsk_ca_state);
tp->sacked_out = 0;
}
if (tp->retrans_out) {
printk(KERN_DEBUG "Leak r=%u %d\n",
tp->retrans_out, tp->ca_state);
tp->retrans_out, icsk->icsk_ca_state);
tp->retrans_out = 0;
}
}
@ -2152,16 +2175,17 @@ static void tcp_ack_probe(struct sock *sk)
}
}
static inline int tcp_ack_is_dubious(struct tcp_sock *tp, int flag)
static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
{
return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
tp->ca_state != TCP_CA_Open);
inet_csk(sk)->icsk_ca_state != TCP_CA_Open);
}
static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag)
static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
const struct tcp_sock *tp = tcp_sk(sk);
return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
!((1<<tp->ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR));
!((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
}
/* Check that window update is acceptable.
@ -2251,6 +2275,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
/* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
@ -2278,7 +2303,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
tp->snd_una = ack;
flag |= FLAG_WIN_UPDATE;
tcp_ca_event(tp, CA_EVENT_FAST_ACK);
tcp_ca_event(sk, CA_EVENT_FAST_ACK);
NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS);
} else {
@ -2295,7 +2320,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
flag |= FLAG_ECE;
tcp_ca_event(tp, CA_EVENT_SLOW_ACK);
tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
}
/* We passed data and got it acked, remove any soft error
@ -2311,19 +2336,19 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
tp->ca_ops->rtt_sample ? &seq_usrtt : NULL);
icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
if (tp->frto_counter)
tcp_process_frto(sk, prior_snd_una);
if (tcp_ack_is_dubious(tp, flag)) {
if (tcp_ack_is_dubious(sk, flag)) {
/* Advanve CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(tp, flag))
tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 0);
if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0);
tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
} else {
if ((flag & FLAG_DATA_ACKED))
tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 1);
tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
}
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
@ -2332,7 +2357,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
return 1;
no_queue:
tp->probes_out = 0;
icsk->icsk_probes_out = 0;
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
@ -3301,12 +3326,12 @@ void tcp_cwnd_application_limited(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->ca_state == TCP_CA_Open &&
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
/* Limited by application or receiver window. */
u32 win_used = max(tp->snd_cwnd_used, 2U);
if (win_used < tp->snd_cwnd) {
tp->snd_ssthresh = tcp_current_ssthresh(tp);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
}
tp->snd_cwnd_used = 0;
@ -3935,7 +3960,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_init_metrics(sk);
tcp_init_congestion_control(tp);
tcp_init_congestion_control(sk);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
@ -4212,7 +4237,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tcp_init_metrics(sk);
tcp_init_congestion_control(tp);
tcp_init_congestion_control(sk);
/* Prevent spurious tcp_cwnd_restart() on
* first data packet.

View file

@ -1409,13 +1409,14 @@ struct tcp_func ipv4_specific = {
*/
static int tcp_v4_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
/* So many TCP implementations out there (incorrectly) count the
@ -1433,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk)
tp->mss_cache = 536;
tp->reordering = sysctl_tcp_reordering;
tp->ca_ops = &tcp_init_congestion_ops;
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
sk->sk_state = TCP_CLOSE;
@ -1456,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
tcp_clear_xmit_timers(sk);
tcp_cleanup_congestion_control(tp);
tcp_cleanup_congestion_control(sk);
/* Cleanup up the write buffer. */
sk_stream_writequeue_purge(sk);
@ -1883,7 +1884,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
jiffies_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
sock_i_uid(sp),
tp->probes_out,
icsk->icsk_probes_out,
sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp,
icsk->icsk_rto,

View file

@ -384,9 +384,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
newtp->ca_ops = &tcp_reno;
newicsk->icsk_ca_ops = &tcp_reno;
tcp_set_ca_state(newtp, TCP_CA_Open);
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue);
newtp->rcv_wup = treq->rcv_isn + 1;
@ -399,7 +399,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.dsack = 0;
newtp->rx_opt.eff_sacks = 0;
newtp->probes_out = 0;
newtp->rx_opt.num_sacks = 0;
newtp->urg_data = 0;

View file

@ -112,9 +112,9 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
u32 restart_cwnd = tcp_init_cwnd(tp, dst);
u32 cwnd = tp->snd_cwnd;
tcp_ca_event(tp, CA_EVENT_CWND_RESTART);
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
tp->snd_ssthresh = tcp_current_ssthresh(tp);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
restart_cwnd = min(restart_cwnd, cwnd);
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
{
if (skb != NULL) {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@ -280,7 +281,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
#define SYSCTL_FLAG_SACK 0x4
/* If congestion control is doing timestamping */
if (tp->ca_ops->rtt_sample)
if (icsk->icsk_ca_ops->rtt_sample)
do_gettimeofday(&skb->stamp);
sysctl_flags = 0;
@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
}
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(tp, CA_EVENT_TX_START);
tcp_ca_event(sk, CA_EVENT_TX_START);
th = (struct tcphdr *) skb_push(skb, tcp_header_size);
skb->h.th = th;
@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
if (err <= 0)
return err;
tcp_enter_cwr(tp);
tcp_enter_cwr(sk);
/* NET_XMIT_CN is special. It does not guarantee,
* that this packet is lost. It tells that device
@ -905,12 +906,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
*/
static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 send_win, cong_win, limit, in_flight;
if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
return 0;
if (tp->ca_state != TCP_CA_Open)
if (icsk->icsk_ca_state != TCP_CA_Open)
return 0;
in_flight = tcp_packets_in_flight(tp);
@ -1287,6 +1289,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
*/
void tcp_simple_retransmit(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
unsigned int mss = tcp_current_mss(sk, 0);
@ -1317,12 +1320,12 @@ void tcp_simple_retransmit(struct sock *sk)
* in network, but units changed and effective
* cwnd/ssthresh really reduced now.
*/
if (tp->ca_state != TCP_CA_Loss) {
if (icsk->icsk_ca_state != TCP_CA_Loss) {
tp->high_seq = tp->snd_nxt;
tp->snd_ssthresh = tcp_current_ssthresh(tp);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
tp->prior_ssthresh = 0;
tp->undo_marker = 0;
tcp_set_ca_state(tp, TCP_CA_Loss);
tcp_set_ca_state(sk, TCP_CA_Loss);
}
tcp_xmit_retransmit_queue(sk);
}
@ -1462,6 +1465,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/
void tcp_xmit_retransmit_queue(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int packet_cnt = tp->lost_out;
@ -1485,7 +1489,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
if (tcp_retransmit_skb(sk, skb))
return;
if (tp->ca_state != TCP_CA_Loss)
if (icsk->icsk_ca_state != TCP_CA_Loss)
NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
else
NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
@ -1507,7 +1511,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
/* OK, demanded retransmission is finished. */
/* Forward retransmissions are possible only during Recovery. */
if (tp->ca_state != TCP_CA_Recovery)
if (icsk->icsk_ca_state != TCP_CA_Recovery)
return;
/* No forward retransmissions in Reno are possible. */
@ -2028,7 +2032,7 @@ void tcp_send_probe0(struct sock *sk)
if (tp->packets_out || !sk->sk_send_head) {
/* Cancel probe timer, if it is not required. */
tp->probes_out = 0;
icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0;
return;
}
@ -2036,19 +2040,19 @@ void tcp_send_probe0(struct sock *sk)
if (err <= 0) {
if (icsk->icsk_backoff < sysctl_tcp_retries2)
icsk->icsk_backoff++;
tp->probes_out++;
icsk->icsk_probes_out++;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
TCP_RTO_MAX);
} else {
/* If packet was not sent due to local congestion,
* do not backoff and do not remember probes_out.
* do not backoff and do not remember icsk_probes_out.
* Let local senders to fight for local resources.
*
* Use accumulated backoff yet.
*/
if (!tp->probes_out)
tp->probes_out=1;
if (!icsk->icsk_probes_out)
icsk->icsk_probes_out = 1;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
min(icsk->icsk_rto << icsk->icsk_backoff,
TCP_RESOURCE_PROBE_INTERVAL),

View file

@ -16,9 +16,10 @@
#define TCP_SCALABLE_AI_CNT 50U
#define TCP_SCALABLE_MD_SCALE 3
static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32 in_flight, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
if (in_flight < tp->snd_cwnd)
return;
@ -35,8 +36,9 @@ static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
tp->snd_cwnd_stamp = tcp_time_stamp;
}
static u32 tcp_scalable_ssthresh(struct tcp_sock *tp)
static u32 tcp_scalable_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
}

View file

@ -233,11 +233,12 @@ static void tcp_delack_timer(unsigned long data)
static void tcp_probe_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
if (tp->packets_out || !sk->sk_send_head) {
tp->probes_out = 0;
icsk->icsk_probes_out = 0;
return;
}
@ -248,7 +249,7 @@ static void tcp_probe_timer(struct sock *sk)
* FIXME: We ought not to do it, Solaris 2.5 actually has fixing
* this behaviour in Solaris down as a bug fix. [AC]
*
* Let me to explain. probes_out is zeroed by incoming ACKs
* Let me to explain. icsk_probes_out is zeroed by incoming ACKs
* even if they advertise zero window. Hence, connection is killed only
* if we received no ACKs for normal connection timeout. It is not killed
* only because window stays zero for some time, window may be zero
@ -259,16 +260,15 @@ static void tcp_probe_timer(struct sock *sk)
max_probes = sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX);
max_probes = tcp_orphan_retries(sk, alive);
if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes))
return;
}
if (tp->probes_out > max_probes) {
if (icsk->icsk_probes_out > max_probes) {
tcp_write_err(sk);
} else {
/* Only send another probe if we didn't close things up. */
@ -319,19 +319,20 @@ static void tcp_retransmit_timer(struct sock *sk)
goto out;
if (icsk->icsk_retransmits == 0) {
if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
if (icsk->icsk_ca_state == TCP_CA_Disorder ||
icsk->icsk_ca_state == TCP_CA_Recovery) {
if (tp->rx_opt.sack_ok) {
if (tp->ca_state == TCP_CA_Recovery)
if (icsk->icsk_ca_state == TCP_CA_Recovery)
NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
else
NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
} else {
if (tp->ca_state == TCP_CA_Recovery)
if (icsk->icsk_ca_state == TCP_CA_Recovery)
NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
else
NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
}
} else if (tp->ca_state == TCP_CA_Loss) {
} else if (icsk->icsk_ca_state == TCP_CA_Loss) {
NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
} else {
NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
@ -449,6 +450,7 @@ void tcp_set_keepalive(struct sock *sk, int val)
static void tcp_keepalive_timer (unsigned long data)
{
struct sock *sk = (struct sock *) data;
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u32 elapsed;
@ -490,14 +492,14 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = tcp_time_stamp - tp->rcv_tstamp;
if (elapsed >= keepalive_time_when(tp)) {
if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
(tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) ||
(tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk);
goto out;
}
if (tcp_write_wakeup(sk) <= 0) {
tp->probes_out++;
icsk->icsk_probes_out++;
elapsed = keepalive_intvl_when(tp);
} else {
/* If keepalive was lost due to local congestion,

View file

@ -82,9 +82,10 @@ struct vegas {
* Instead we must wait until the completion of an RTT during
* which we actually receive ACKs.
*/
static inline void vegas_enable(struct tcp_sock *tp)
static inline void vegas_enable(struct sock *sk)
{
struct vegas *vegas = tcp_ca(tp);
const struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
/* Begin taking Vegas samples next time we send something. */
vegas->doing_vegas_now = 1;
@ -97,19 +98,19 @@ static inline void vegas_enable(struct tcp_sock *tp)
}
/* Stop taking Vegas samples for now. */
static inline void vegas_disable(struct tcp_sock *tp)
static inline void vegas_disable(struct sock *sk)
{
struct vegas *vegas = tcp_ca(tp);
struct vegas *vegas = inet_csk_ca(sk);
vegas->doing_vegas_now = 0;
}
static void tcp_vegas_init(struct tcp_sock *tp)
static void tcp_vegas_init(struct sock *sk)
{
struct vegas *vegas = tcp_ca(tp);
struct vegas *vegas = inet_csk_ca(sk);
vegas->baseRTT = 0x7fffffff;
vegas_enable(tp);
vegas_enable(sk);
}
/* Do RTT sampling needed for Vegas.
@ -120,9 +121,9 @@ static void tcp_vegas_init(struct tcp_sock *tp)
* o min-filter RTT samples from a much longer window (forever for now)
* to find the propagation delay (baseRTT)
*/
static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt)
static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
{
struct vegas *vegas = tcp_ca(tp);
struct vegas *vegas = inet_csk_ca(sk);
u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
/* Filter to find propagation delay: */
@ -136,13 +137,13 @@ static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt)
vegas->cntRTT++;
}
static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state)
static void tcp_vegas_state(struct sock *sk, u8 ca_state)
{
if (ca_state == TCP_CA_Open)
vegas_enable(tp);
vegas_enable(sk);
else
vegas_disable(tp);
vegas_disable(sk);
}
/*
@ -154,20 +155,21 @@ static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state)
* packets, _then_ we can make Vegas calculations
* again.
*/
static void tcp_vegas_cwnd_event(struct tcp_sock *tp, enum tcp_ca_event event)
static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_CWND_RESTART ||
event == CA_EVENT_TX_START)
tcp_vegas_init(tp);
tcp_vegas_init(sk);
}
static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
u32 seq_rtt, u32 in_flight, int flag)
{
struct vegas *vegas = tcp_ca(tp);
struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
if (!vegas->doing_vegas_now)
return tcp_reno_cong_avoid(tp, ack, seq_rtt, in_flight, flag);
return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
/* The key players are v_beg_snd_una and v_beg_snd_nxt.
*
@ -219,7 +221,7 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
* but that's not too awful, since we're taking the min,
* rather than averaging.
*/
tcp_vegas_rtt_calc(tp, seq_rtt*1000);
tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
/* We do the Vegas calculations only if we got enough RTT
* samples that we can be reasonably sure that we got
@ -359,10 +361,10 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
}
/* Extract info for Tcp socket info provided via netlink. */
static void tcp_vegas_get_info(struct tcp_sock *tp, u32 ext,
static void tcp_vegas_get_info(struct sock *sk, u32 ext,
struct sk_buff *skb)
{
const struct vegas *ca = tcp_ca(tp);
const struct vegas *ca = inet_csk_ca(sk);
if (ext & (1<<(TCPDIAG_VEGASINFO-1))) {
struct tcpvegas_info *info;
@ -393,7 +395,7 @@ static struct tcp_congestion_ops tcp_vegas = {
static int __init tcp_vegas_register(void)
{
BUG_ON(sizeof(struct vegas) > TCP_CA_PRIV_SIZE);
BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
tcp_register_congestion_control(&tcp_vegas);
return 0;
}

View file

@ -40,9 +40,9 @@ struct westwood {
* way as soon as possible. It will reasonably happen within the first
* RTT period of the connection lifetime.
*/
static void tcp_westwood_init(struct tcp_sock *tp)
static void tcp_westwood_init(struct sock *sk)
{
struct westwood *w = tcp_ca(tp);
struct westwood *w = inet_csk_ca(sk);
w->bk = 0;
w->bw_ns_est = 0;
@ -51,7 +51,7 @@ static void tcp_westwood_init(struct tcp_sock *tp)
w->cumul_ack = 0;
w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT;
w->rtt_win_sx = tcp_time_stamp;
w->snd_una = tp->snd_una;
w->snd_una = tcp_sk(sk)->snd_una;
}
/*
@ -74,11 +74,11 @@ static inline void westwood_filter(struct westwood *w, u32 delta)
* Called after processing group of packets.
* but all westwood needs is the last sample of srtt.
*/
static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt)
static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
{
struct westwood *w = tcp_ca(tp);
struct westwood *w = inet_csk_ca(sk);
if (cnt > 0)
w->rtt = tp->srtt >> 3;
w->rtt = tcp_sk(sk)->srtt >> 3;
}
/*
@ -86,9 +86,9 @@ static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt)
* It updates RTT evaluation window if it is the right moment to do
* it. If so it calls filter for evaluating bandwidth.
*/
static void westwood_update_window(struct tcp_sock *tp)
static void westwood_update_window(struct sock *sk)
{
struct westwood *w = tcp_ca(tp);
struct westwood *w = inet_csk_ca(sk);
s32 delta = tcp_time_stamp - w->rtt_win_sx;
/*
@ -114,11 +114,12 @@ static void westwood_update_window(struct tcp_sock *tp)
* header prediction is successful. In such case in fact update is
* straight forward and doesn't need any particular care.
*/
static inline void westwood_fast_bw(struct tcp_sock *tp)
static inline void westwood_fast_bw(struct sock *sk)
{
struct westwood *w = tcp_ca(tp);
const struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
westwood_update_window(tp);
westwood_update_window(sk);
w->bk += tp->snd_una - w->snd_una;
w->snd_una = tp->snd_una;
@ -130,9 +131,10 @@ static inline void westwood_fast_bw(struct tcp_sock *tp)
* This function evaluates cumul_ack for evaluating bk in case of
* delayed or partial acks.
*/
static inline u32 westwood_acked_count(struct tcp_sock *tp)
static inline u32 westwood_acked_count(struct sock *sk)
{
struct westwood *w = tcp_ca(tp);
const struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
w->cumul_ack = tp->snd_una - w->snd_una;
@ -160,9 +162,10 @@ static inline u32 westwood_acked_count(struct tcp_sock *tp)
return w->cumul_ack;
}
static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp)
static inline u32 westwood_bw_rttmin(const struct sock *sk)
{
struct westwood *w = tcp_ca(tp);
const struct tcp_sock *tp = tcp_sk(sk);
const struct westwood *w = inet_csk_ca(sk);
return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2);
}
@ -172,31 +175,32 @@ static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp)
* in packets we use mss_cache). Rttmin is guaranteed to be >= 2
* so avoids ever returning 0.
*/
static u32 tcp_westwood_cwnd_min(struct tcp_sock *tp)
static u32 tcp_westwood_cwnd_min(struct sock *sk)
{
return westwood_bw_rttmin(tp);
return westwood_bw_rttmin(sk);
}
static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event)
static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
{
struct westwood *w = tcp_ca(tp);
struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
switch(event) {
case CA_EVENT_FAST_ACK:
westwood_fast_bw(tp);
westwood_fast_bw(sk);
break;
case CA_EVENT_COMPLETE_CWR:
tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(tp);
tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(sk);
break;
case CA_EVENT_FRTO:
tp->snd_ssthresh = westwood_bw_rttmin(tp);
tp->snd_ssthresh = westwood_bw_rttmin(sk);
break;
case CA_EVENT_SLOW_ACK:
westwood_update_window(tp);
w->bk += westwood_acked_count(tp);
westwood_update_window(sk);
w->bk += westwood_acked_count(sk);
w->rtt_min = min(w->rtt, w->rtt_min);
break;
@ -208,10 +212,10 @@ static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event)
/* Extract info for Tcp socket info provided via netlink. */
static void tcp_westwood_info(struct tcp_sock *tp, u32 ext,
static void tcp_westwood_info(struct sock *sk, u32 ext,
struct sk_buff *skb)
{
const struct westwood *ca = tcp_ca(tp);
const struct westwood *ca = inet_csk_ca(sk);
if (ext & (1<<(TCPDIAG_VEGASINFO-1))) {
struct rtattr *rta;
struct tcpvegas_info *info;
@ -242,7 +246,7 @@ static struct tcp_congestion_ops tcp_westwood = {
static int __init tcp_westwood_register(void)
{
BUG_ON(sizeof(struct westwood) > TCP_CA_PRIV_SIZE);
BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_westwood);
}

View file

@ -2010,13 +2010,14 @@ static struct tcp_func ipv6_mapped = {
*/
static int tcp_v6_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
/* So many TCP implementations out there (incorrectly) count the
@ -2038,7 +2039,7 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_state = TCP_CLOSE;
tp->af_specific = &ipv6_specific;
tp->ca_ops = &tcp_init_congestion_ops;
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
@ -2135,7 +2136,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
sock_i_uid(sp),
tp->probes_out,
icsk->icsk_probes_out,
sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp,
icsk->icsk_rto,