From 59a08cba6a604a265e45e9b970e372554cf46627 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 28 Feb 2009 04:44:28 +0000 Subject: tcp: fix lost_cnt_hint miscounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible that lost_cnt_hint gets underflow in tcp_clean_rtx_queue because the cumulative ACK can cover the segment where lost_skb_hint points to only partially, which means that the hint is not cleared, opposite to what my (earlier) comment claimed. Also I don't agree what I ended up writing about non-trivial case there to be what I intented to say. It was not supposed to happen that the hint won't get cleared and we underflow in any scenario. In general, this is quite hard to trigger in practice. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c28976a7e59..3f2f09091bc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3273,18 +3273,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (tcp_is_reno(tp)) { tcp_remove_reno_sacks(sk, pkts_acked); } else { + int delta; + /* Non-retransmitted hole got filled? That's reordering */ if (reord < prior_fackets) tcp_update_reordering(sk, tp->fackets_out - reord, 0); - /* No need to care for underflows here because - * the lost_skb_hint gets NULLed if we're past it - * (or something non-trivial happened) - */ - if (tcp_is_fack(tp)) - tp->lost_cnt_hint -= pkts_acked; - else - tp->lost_cnt_hint -= prior_sacked - tp->sacked_out; + delta = tcp_is_fack(tp) ? pkts_acked : + prior_sacked - tp->sacked_out; + tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); } tp->fackets_out -= min(pkts_acked, tp->fackets_out); -- cgit From d0af4160d19ff2849386140881e729f9ba86f2aa Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 28 Feb 2009 04:44:32 +0000 Subject: tcp: remove redundant code from tcp_mark_lost_retrans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arnd Hannemann noticed and was puzzled by the fact that !tcp_is_fack(tp) leads to early return near the beginning and the later on tcp_is_fack(tp) was still used in an if condition. The later check was a left-over from RFC3517 SACK stuff (== !tcp_is_fack(tp) behavior nowadays) as there wasn't clear way how to handle this particular check cheaply in the spirit of RFC3517 (using only SACK blocks, not holes + SACK blocks as with FACK). I sort of left it there as a reminder but since it's confusing other people just remove it and comment the missing-feature stuff instead. Signed-off-by: Ilpo Järvinen Cc: Arnd Hannemann Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f2f09091bc..125b4517f36 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1178,10 +1178,18 @@ static void tcp_mark_lost_retrans(struct sock *sk) if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) continue; - if (after(received_upto, ack_seq) && - (tcp_is_fack(tp) || - !before(received_upto, - ack_seq + tp->reordering * tp->mss_cache))) { + /* TODO: We would like to get rid of tcp_is_fack(tp) only + * constraint here (see above) but figuring out that at + * least tp->reordering SACK blocks reside between ack_seq + * and received_upto is not easy task to do cheaply with + * the available datastructures. + * + * Whether FACK should check here for tp->reordering segs + * in-between one could argue for either way (it would be + * rather simple to implement as we could count fack_count + * during the walk and do tp->fackets_out - fack_count). + */ + if (after(received_upto, ack_seq)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); -- cgit From 7363a5b233734dba339f2874ff6ed6c489d3d865 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 28 Feb 2009 04:44:33 +0000 Subject: tcp: separate timeout marking loop to it's own function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some comment about its current state added. So far I have seen very few cases where the thing is actually useful, usually just marginally (though admittedly I don't usually see top of window losses where it seems possible that there could be some gain), instead, more often the cases suffer from L-marking spike which is certainly not desirable (I'll bury improving it to my todo list, but on a low prio position). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 63 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 24 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 125b4517f36..03f5ede8722 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2461,6 +2461,44 @@ static int tcp_time_to_recover(struct sock *sk) return 0; } +/* New heuristics: it is possible only after we switched to restart timer + * each time when something is ACKed. Hence, we can detect timed out packets + * during fast retransmit without falling to slow start. + * + * Usefulness of this as is very questionable, since we should know which of + * the segments is the next to timeout which is relatively expensive to find + * in general case unless we add some data structure just for that. The + * current approach certainly won't find the right one too often and when it + * finally does find _something_ it usually marks large part of the window + * right away (because a retransmission with a larger timestamp blocks the + * loop from advancing). -ij + */ +static void tcp_timeout_skbs(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (!tcp_is_fack(tp) || !tcp_head_timedout(sk)) + return; + + skb = tp->scoreboard_skb_hint; + if (tp->scoreboard_skb_hint == NULL) + skb = tcp_write_queue_head(sk); + + tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + if (!tcp_skb_timedout(sk, skb)) + break; + + tcp_skb_mark_lost(tp, skb); + } + + tp->scoreboard_skb_hint = skb; + + tcp_verify_left_out(tp); +} + /* Mark head of queue up as lost. With RFC3517 SACK, the packets is * is against sacked "cnt", otherwise it's against facked "cnt" */ @@ -2533,30 +2571,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) tcp_mark_head_lost(sk, sacked_upto); } - /* New heuristics: it is possible only after we switched - * to restart timer each time when something is ACKed. - * Hence, we can detect timed out packets during fast - * retransmit without falling to slow start. - */ - if (tcp_is_fack(tp) && tcp_head_timedout(sk)) { - struct sk_buff *skb; - - skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint - : tcp_write_queue_head(sk); - - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (!tcp_skb_timedout(sk, skb)) - break; - - tcp_skb_mark_lost(tp, skb); - } - - tp->scoreboard_skb_hint = skb; - - tcp_verify_left_out(tp); - } + tcp_timeout_skbs(sk); } /* CWND moderation, preventing bursts due to too big ACKs -- cgit From cabeccbd172cc305f4383f5a4808ae254745275f Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 28 Feb 2009 04:44:38 +0000 Subject: tcp: kill eff_sacks "cache", the sole user can calculate itself MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also fixes insignificant bug that would cause sending of stale SACK block (would occur in some corner cases). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 03f5ede8722..e4442a293eb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4099,7 +4099,6 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; tp->duplicate_sack[0].end_seq = end_seq; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1; } } @@ -4154,8 +4153,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) * Decrease num_sacks. */ tp->rx_opt.num_sacks--; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + - tp->rx_opt.dsack; for (i = this_sack; i < tp->rx_opt.num_sacks; i++) sp[i] = sp[i + 1]; continue; @@ -4218,7 +4215,6 @@ new_sack: sp->start_seq = seq; sp->end_seq = end_seq; tp->rx_opt.num_sacks++; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; } /* RCV.NXT advances, some SACKs should be eaten. */ @@ -4232,7 +4228,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ if (skb_queue_empty(&tp->out_of_order_queue)) { tp->rx_opt.num_sacks = 0; - tp->rx_opt.eff_sacks = tp->rx_opt.dsack; return; } @@ -4253,11 +4248,8 @@ static void tcp_sack_remove(struct tcp_sock *tp) this_sack++; sp++; } - if (num_sacks != tp->rx_opt.num_sacks) { + if (num_sacks != tp->rx_opt.num_sacks) tp->rx_opt.num_sacks = num_sacks; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + - tp->rx_opt.dsack; - } } /* This one checks to see if we can put data from the @@ -4333,10 +4325,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) TCP_ECN_accept_cwr(tp, skb); - if (tp->rx_opt.dsack) { + if (tp->rx_opt.dsack) tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; - } /* Queue data for delivery to the user. * Packets in sequence go to the receive queue. @@ -4456,7 +4446,6 @@ drop: if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks = 1; tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq; -- cgit From ee7537b63a28b42b22e48842dfeedc66d96b71f1 Mon Sep 17 00:00:00 2001 From: Hantzis Fotis Date: Mon, 2 Mar 2009 22:42:02 -0800 Subject: tcp: tcp_init_wl / tcp_update_wl argument cleanup The above functions from include/net/tcp.h have been defined with an argument that they never use. The argument is 'u32 ack' which is never used inside the function body, and thus it can be removed. The rest of the patch involves the necessary changes to the function callers of the above two functions. Signed-off-by: Hantzis Fotis Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e4442a293eb..5ecd7aa2597 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3416,7 +3416,7 @@ static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { flag |= FLAG_WIN_UPDATE; - tcp_update_wl(tp, ack, ack_seq); + tcp_update_wl(tp, ack_seq); if (tp->snd_wnd != nwin) { tp->snd_wnd = nwin; @@ -3621,7 +3621,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) * No more checks are required. * Note, we use the fact that SND.UNA>=SND.WL2. */ - tcp_update_wl(tp, ack, ack_seq); + tcp_update_wl(tp, ack_seq); tp->snd_una = ack; flag |= FLAG_WIN_UPDATE; @@ -5418,7 +5418,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * never scaled. */ tp->snd_wnd = ntohs(th->window); - tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); if (!tp->rx_opt.wscale_ok) { tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; @@ -5679,8 +5679,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, - TCP_SKB_CB(skb)->seq); + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); /* tcp_ack considers this ACK as duplicate * and does not calculate rtt. -- cgit From 5861f8e58dd84fc34b691c2e8d4824dea68c360e Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 14 Mar 2009 14:23:01 +0000 Subject: tcp: remove pointless .dsack/.num_sacks code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the pure assignment case, the earlier zeroing is still in effect. David S. Miller raised concerns if the ifs are there to avoid dirtying cachelines. I came to these conclusions: > We'll be dirty it anyway (now that I check), the first "real" statement > in tcp_rcv_established is: > > tp->rx_opt.saw_tstamp = 0; > > ...that'll land on the same dword. :-/ > > I suppose the blocks are there just because they had more complexity > inside when they had to calculate the eff_sacks too (maybe it would > have been better to just remove them in that drop-patch so you would > have had less head-ache :-)). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5ecd7aa2597..cd39d1d02dc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4248,8 +4248,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) this_sack++; sp++; } - if (num_sacks != tp->rx_opt.num_sacks) - tp->rx_opt.num_sacks = num_sacks; + tp->rx_opt.num_sacks = num_sacks; } /* This one checks to see if we can put data from the @@ -4325,8 +4324,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) TCP_ECN_accept_cwr(tp, skb); - if (tp->rx_opt.dsack) - tp->rx_opt.dsack = 0; + tp->rx_opt.dsack = 0; /* Queue data for delivery to the user. * Packets in sequence go to the receive queue. @@ -4445,7 +4443,6 @@ drop: /* Initial out of order segment, build 1 SACK. */ if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; - tp->rx_opt.dsack = 0; tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq; -- cgit From c43d558a5139a3b22dcac3f19f64ecb39130b02e Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 14 Mar 2009 14:23:02 +0000 Subject: tcp: kill dead end_seq variable in clean_rtx_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've already forgotten what for this was necessary, anyway it's no longer used (if it ever was). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cd39d1d02dc..f527a16a7b3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3201,7 +3201,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - u32 end_seq; u32 acked_pcount; u8 sacked = scb->sacked; @@ -3216,10 +3215,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, break; fully_acked = 0; - end_seq = tp->snd_una; } else { acked_pcount = tcp_skb_pcount(skb); - end_seq = scb->end_seq; } /* MTU probing checks */ -- cgit From c887e6d2d9aee56ee7c9f2af4cec3a5efdcc4c72 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 14 Mar 2009 14:23:03 +0000 Subject: tcp: consolidate paws check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wow, it was quite tricky to merge that stream of negations but I think I finally got it right: check & replace_ts_recent: (s32)(rcv_tsval - ts_recent) >= 0 => 0 (s32)(ts_recent - rcv_tsval) <= 0 => 0 discard: (s32)(ts_recent - rcv_tsval) > TCP_PAWS_WINDOW => 1 (s32)(ts_recent - rcv_tsval) <= TCP_PAWS_WINDOW => 0 I toggled the return values of tcp_paws_check around since the old encoding added yet-another negation making tracking of truth-values really complicated. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f527a16a7b3..b7d02c5dd6d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3883,8 +3883,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) * Not only, also it occurs for expired timestamps. */ - if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || - get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) + if (tcp_paws_check(&tp->rx_opt, 0)) tcp_store_ts_recent(tp); } } @@ -3936,9 +3935,9 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) { const struct tcp_sock *tp = tcp_sk(sk); - return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && - get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && - !tcp_disordered_ack(sk, skb)); + + return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) && + !tcp_disordered_ack(sk, skb); } /* Check segment sequence number for validity. @@ -5513,7 +5512,7 @@ discard: /* PAWS check. */ if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && - tcp_paws_check(&tp->rx_opt, 0)) + tcp_paws_reject(&tp->rx_opt, 0)) goto discard_and_undo; if (th->syn) { -- cgit From 72211e90501f954f586481c25521c3724cda3cc7 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 14 Mar 2009 14:23:04 +0000 Subject: tcp: don't check mtu probe completion in the loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that no variables clash such that we couldn't do the check just once later on. Therefore move it. Also kill dead obvious comment, dead argument and add unlikely since this mtu probe does not happen too often. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b7d02c5dd6d..311c30f73ee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2836,7 +2836,7 @@ static void tcp_mtup_probe_failed(struct sock *sk) icsk->icsk_mtup.probe_size = 0; } -static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) +static void tcp_mtup_probe_success(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -3219,12 +3219,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, acked_pcount = tcp_skb_pcount(skb); } - /* MTU probing checks */ - if (fully_acked && icsk->icsk_mtup.probe_size && - !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) { - tcp_mtup_probe_success(sk, skb); - } - if (sacked & TCPCB_RETRANS) { if (sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= acked_pcount; @@ -3287,6 +3281,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + if (unlikely(icsk->icsk_mtup.probe_size && + !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { + tcp_mtup_probe_success(sk); + } + tcp_ack_update_rtt(sk, flag, seq_rtt); tcp_rearm_rto(sk); -- cgit From 0c54b85f2828128274f319a1eb3ce7f604fe2a53 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 14 Mar 2009 14:23:05 +0000 Subject: tcp: simplify tcp_current_mss MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's very little need for most of the callsites to get tp->xmit_goal_size updated. That will cost us divide as is, so slice the function in two. Also, the only users of the tp->xmit_goal_size are directly behind tcp_current_mss(), so there's no need to store that variable into tcp_sock at all! The drop of xmit_goal_size currently leaves 16-bit hole and some reorganization would again be necessary to change that (but I'm aiming to fill that hole with u16 xmit_goal_size_segs to cache the results of the remaining divide to get that tso on regression). Bring xmit_goal_size parts into tcp.c Signed-off-by: Ilpo Järvinen Cc: Evgeniy Polyakov Cc: Ingo Molnar Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 311c30f73ee..fae78e3eccc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2864,7 +2864,7 @@ void tcp_simple_retransmit(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk, 0); + unsigned int mss = tcp_current_mss(sk); u32 prior_lost = tp->lost_out; tcp_for_write_queue(skb, sk) { -- cgit From a0bffffc148cd8e75a48a89ad2ddb74e4081a20a Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 21 Mar 2009 13:36:17 -0700 Subject: net/*: use linux/kernel.h swap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tcp_sack_swap seems unnecessary so I pushed swap to the caller. Also removed comment that seemed then pointless, and added include when not already there. Compile tested. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fae78e3eccc..8ac82b3703a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -1802,11 +1803,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, for (i = used_sacks - 1; i > 0; i--) { for (j = 0; j < i; j++) { if (after(sp[j].start_seq, sp[j + 1].start_seq)) { - struct tcp_sack_block tmp; - - tmp = sp[j]; - sp[j] = sp[j + 1]; - sp[j + 1] = tmp; + swap(sp[j], sp[j + 1]); /* Track where the first SACK block goes to */ if (j == first_sack_index) @@ -4156,20 +4153,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) } } -static inline void tcp_sack_swap(struct tcp_sack_block *sack1, - struct tcp_sack_block *sack2) -{ - __u32 tmp; - - tmp = sack1->start_seq; - sack1->start_seq = sack2->start_seq; - sack2->start_seq = tmp; - - tmp = sack1->end_seq; - sack1->end_seq = sack2->end_seq; - sack2->end_seq = tmp; -} - static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); @@ -4184,7 +4167,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) if (tcp_sack_extend(sp, seq, end_seq)) { /* Rotate this_sack to the first one. */ for (; this_sack > 0; this_sack--, sp--) - tcp_sack_swap(sp, sp - 1); + swap(*sp, *(sp - 1)); if (cur_sacks > 1) tcp_sack_maybe_coalesce(tp); return; -- cgit From 96e0bf4b5193d0d97d139f99e2dd128763d55521 Mon Sep 17 00:00:00 2001 From: John Dykstra Date: Sun, 22 Mar 2009 21:49:57 -0700 Subject: tcp: Discard segments that ack data not yet sent Discard incoming packets whose ack field iincludes data not yet sent. This is consistent with RFC 793 Section 3.9. Change tcp_ack() to distinguish between too-small and too-large ack field values. Keep segments with too-large ack fields out of the fast path, and change slow path to discard them. Reported-by: Oliver Zheng Signed-off-by: John Dykstra Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8ac82b3703a..2bc8e27a163 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3585,15 +3585,18 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) int prior_packets; int frto_cwnd = 0; - /* If the ack is newer than sent or older than previous acks + /* If the ack is older than previous acks * then we can probably ignore it. */ - if (after(ack, tp->snd_nxt)) - goto uninteresting_ack; - if (before(ack, prior_snd_una)) goto old_ack; + /* If the ack includes data we haven't sent yet, discard + * this segment (RFC793 Section 3.9). + */ + if (after(ack, tp->snd_nxt)) + goto invalid_ack; + if (after(ack, prior_snd_una)) flag |= FLAG_SND_UNA_ADVANCED; @@ -3683,6 +3686,10 @@ no_queue: tcp_ack_probe(sk); return 1; +invalid_ack: + SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt); + return -1; + old_ack: if (TCP_SKB_CB(skb)->sacked) { tcp_sacktag_write_queue(sk, skb, prior_snd_una); @@ -3690,8 +3697,7 @@ old_ack: tcp_try_keep_open(sk); } -uninteresting_ack: - SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); + SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); return 0; } @@ -5141,7 +5147,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, */ if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && - TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { + TCP_SKB_CB(skb)->seq == tp->rcv_nxt && + !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { int tcp_header_len = tp->tcp_header_len; /* Timestamp header prediction: tcp_header_len @@ -5294,8 +5301,8 @@ slow_path: return -res; step5: - if (th->ack) - tcp_ack(sk, skb, FLAG_SLOWPATH); + if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) + goto discard; tcp_rcv_rtt_measure_ts(sk, skb); @@ -5632,7 +5639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* step 5: check the ACK field */ if (th->ack) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); + int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; switch (sk->sk_state) { case TCP_SYN_RECV: -- cgit