summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorIlpo Järvinen <ilpo.jarvinen@helsinki.fi>2007-10-11 17:35:41 -0700
committerDavid S. Miller <davem@davemloft.net>2007-10-11 17:35:41 -0700
commitf785a8e28b9d103c7473655743b6ac1bc3cd3a58 (patch)
tree44f3b88cca49ffa696a7e082cdb89e2f30550c63 /net/ipv4
parent4cd829995b86e0359796780d43d2f210cb5cf021 (diff)
downloadkernel-crypto-f785a8e28b9d103c7473655743b6ac1bc3cd3a58.tar.gz
kernel-crypto-f785a8e28b9d103c7473655743b6ac1bc3cd3a58.tar.xz
kernel-crypto-f785a8e28b9d103c7473655743b6ac1bc3cd3a58.zip
[TCP]: Fix lost_retrans loop vs fastpath problems
Detection implemented with lost_retrans must work also when fastpath is taken, yet most of the queue is skipped including (very likely) those retransmitted skb's we're interested in. This problem appeared when the hints got added, which removed a need to always walk over the whole write queue head. Therefore decicion for the lost_retrans worker loop entry must be separated from the sacktag processing more than it was necessary before. It turns out to be problematic to optimize the worker loop very heavily because ack_seqs of skb may have a number of discontinuity points. Maybe similar approach as currently is implemented could be attempted but that's becoming more and more complex because the trend is towards less skb walking in sacktag marker. Trying a simple work until all rexmitted skbs heve been processed approach. Maybe after(highest_sack_end_seq, tp->high_seq) checking is not sufficiently accurate and causes entry too often in no-work-to-do cases. Since that's not known, I've separated solution to that from this patch. Noticed because of report against a related problem from TAKANO Ryousei <takano@axe-inc.co.jp>. He also provided a patch to that part of the problem. This patch includes solution to it (though this patch has to use somewhat different placement). TAKANO's description and patch is available here: http://marc.info/?l=linux-netdev&m=119149311913288&w=2 ...In short, TAKANO's problem is that end_seq the loop is using not necessarily the largest SACK block's end_seq because the current ACK may still have higher SACK blocks which are later by the loop. Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_input.c37
1 files changed, 22 insertions, 15 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a0660391213..d5e0fcc22a3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1109,27 +1109,34 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
* Event "C". Later note: FACK people cheated me again 8), we have to account
* for reordering! Ugly, but should help.
+ *
+ * Search retransmitted skbs from write_queue that were sent when snd_nxt was
+ * less than what is now known to be received by the other end (derived from
+ * SACK blocks by the caller).
*/
-static int tcp_mark_lost_retrans(struct sock *sk, u32 lost_retrans)
+static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int flag = 0;
+ int cnt = 0;
tcp_for_write_queue(skb, sk) {
u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
if (skb == tcp_send_head(sk))
break;
- if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
+ if (cnt == tp->retrans_out)
break;
if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
continue;
- if ((TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) &&
- after(lost_retrans, ack_seq) &&
+ if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
+ continue;
+
+ if (after(received_upto, ack_seq) &&
(tcp_is_fack(tp) ||
- !before(lost_retrans,
+ !before(received_upto,
ack_seq + tp->reordering * tp->mss_cache))) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
@@ -1143,6 +1150,8 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 lost_retrans)
flag |= FLAG_DATA_SACKED;
NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
}
+ } else {
+ cnt += tcp_skb_pcount(skb);
}
}
return flag;
@@ -1225,7 +1234,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
int reord = tp->packets_out;
int prior_fackets;
- u32 lost_retrans = 0;
+ u32 highest_sack_end_seq = 0;
int flag = 0;
int found_dup_sack = 0;
int cached_fack_count;
@@ -1396,11 +1405,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
continue;
}
- if ((sacked&TCPCB_SACKED_RETRANS) &&
- after(end_seq, TCP_SKB_CB(skb)->ack_seq) &&
- (!lost_retrans || after(end_seq, lost_retrans)))
- lost_retrans = end_seq;
-
if (!in_sack)
continue;
@@ -1454,9 +1458,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (fack_count > tp->fackets_out)
tp->fackets_out = fack_count;
- if (after(TCP_SKB_CB(skb)->seq,
- tp->highest_sack))
+ if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) {
tp->highest_sack = TCP_SKB_CB(skb)->seq;
+ highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq;
+ }
} else {
if (dup_sack && (sacked&TCPCB_RETRANS))
reord = min(fack_count, reord);
@@ -1476,8 +1481,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
}
}
- if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery)
- flag |= tcp_mark_lost_retrans(sk, lost_retrans);
+ if (tp->retrans_out && highest_sack_end_seq &&
+ after(highest_sack_end_seq, tp->high_seq) &&
+ icsk->icsk_ca_state == TCP_CA_Recovery)
+ flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
tcp_verify_left_out(tp);