diff --git a/include/net/tcp.h b/include/net/tcp.h index 6e392babda4a..5ec1cacca8a1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1267,8 +1267,12 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb __tcp_add_write_queue_tail(sk, skb); /* Queue it, remembering where we must start sending. */ - if (sk->sk_send_head == NULL) + if (sk->sk_send_head == NULL) { sk->sk_send_head = skb; + + if (tcp_sk(sk)->highest_sack == NULL) + tcp_sk(sk)->highest_sack = skb; + } } static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb) @@ -1318,9 +1322,38 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) { if (!tp->sacked_out) return tp->snd_una; + + if (tp->highest_sack == NULL) + return tp->snd_nxt; + return TCP_SKB_CB(tp->highest_sack)->seq; } +static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) +{ + tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL : + tcp_write_queue_next(sk, skb); +} + +static inline struct sk_buff *tcp_highest_sack(struct sock *sk) +{ + return tcp_sk(sk)->highest_sack; +} + +static inline void tcp_highest_sack_reset(struct sock *sk) +{ + tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); +} + +/* Called when old skb is about to be deleted (to be combined with new skb) */ +static inline void tcp_highest_sack_combine(struct sock *sk, + struct sk_buff *old, + struct sk_buff *new) +{ + if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack)) + tcp_sk(sk)->highest_sack = new; +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 263c536def5c..bc2d5f70966e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1125,7 +1125,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) struct sk_buff *skb; int cnt = 0; u32 new_low_seq = tp->snd_nxt; - u32 received_upto = TCP_SKB_CB(tp->highest_sack)->end_seq; + u32 received_upto = tcp_highest_sack_seq(tp); if (!tcp_is_fack(tp) || !tp->retrans_out || !after(received_upto, tp->lost_retrans_low) || @@ -1236,9 +1236,10 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, +static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, int *reord, int dup_sack, int fack_count) { + struct tcp_sock *tp = tcp_sk(sk); u8 sacked = TCP_SKB_CB(skb)->sacked; int flag = 0; @@ -1307,8 +1308,8 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; - if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - tp->highest_sack = skb; + if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + tcp_advance_highest_sack(sk, skb); } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1330,8 +1331,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, int dup_sack_in, int *fack_count, int *reord, int *flag) { - struct tcp_sock *tp = tcp_sk(sk); - tcp_for_write_queue_from(skb, sk) { int in_sack = 0; int dup_sack = dup_sack_in; @@ -1358,7 +1357,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, break; if (in_sack) - *flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, *fack_count); + *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count); *fack_count += tcp_skb_pcount(skb); } @@ -1429,7 +1428,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) tp->fackets_out = 0; - tp->highest_sack = tcp_write_queue_head(sk); + tcp_highest_sack_reset(sk); } found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire, @@ -1552,9 +1551,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ &fack_count, &reord, &flag); /* ...tail remains todo... */ - if (TCP_SKB_CB(tp->highest_sack)->end_seq == cache->end_seq) { + if (tcp_highest_sack_seq(tp) == cache->end_seq) { /* ...but better entrypoint exists! */ - skb = tcp_write_queue_next(sk, tp->highest_sack); + skb = tcp_highest_sack(sk); + if (skb == NULL) + break; fack_count = tp->fackets_out; cache++; goto walk; @@ -1566,8 +1567,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ continue; } - if (tp->sacked_out && !before(start_seq, tcp_highest_sack_seq(tp))) { - skb = tcp_write_queue_next(sk, tp->highest_sack); + if (!before(start_seq, tcp_highest_sack_seq(tp))) { + skb = tcp_highest_sack(sk); + if (skb == NULL) + break; fack_count = tp->fackets_out; } skb = tcp_sacktag_skip(skb, sk, start_seq); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d8583a15d02..9a985b55e7d8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -667,7 +667,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, if (!tp->sacked_out || tcp_is_reno(tp)) return; - if (!before(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) + if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) tp->fackets_out -= decr; } @@ -711,9 +711,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; - if (tcp_is_sack(tp) && tp->sacked_out && (skb == tp->highest_sack)) - tp->highest_sack = buff; - /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); @@ -1707,9 +1704,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); - if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out && - (next_skb == tp->highest_sack))) - return; + tcp_highest_sack_combine(sk, next_skb, skb); /* Ok. We will be able to collapse the packet. */ tcp_unlink_write_queue(next_skb, sk); @@ -2019,7 +2014,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; tp->forward_skb_hint = skb; - if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)