summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorJames Bottomley <jejb@mulgrave.il.steeleye.com>2007-05-30 23:57:05 -0500
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2007-05-30 23:57:05 -0500
commit5bc65793cbf8da0d35f19ef025dda22887e79e80 (patch)
tree8291998abd73055de6f487fafa174ee2a5d3afee /net/ipv4
parent6edae708bf77e012d855a7e2c7766f211d234f4f (diff)
parent3f0a6766e0cc5a577805732e5adb50a585c58175 (diff)
downloadkernel-crypto-5bc65793cbf8da0d35f19ef025dda22887e79e80.tar.gz
kernel-crypto-5bc65793cbf8da0d35f19ef025dda22887e79e80.tar.xz
kernel-crypto-5bc65793cbf8da0d35f19ef025dda22887e79e80.zip
[SCSI] Merge up to linux-2.6 head
Conflicts: drivers/scsi/jazz_esp.c Same changes made by both SCSI and SPARC trees: problem with UTF-8 conversion in the copyright. Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/fib_hash.c6
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/fib_trie.c6
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/ipvs/Kconfig30
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c20
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c6
-rw-r--r--net/ipv4/route.c75
-rw-r--r--net/ipv4/tcp_cong.c40
-rw-r--r--net/ipv4/tcp_input.c3
13 files changed, 132 insertions, 76 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c68196cc56a..010fbb2d45e 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -43,11 +43,11 @@ config IP_ADVANCED_ROUTER
asymmetric routing (packets from you to a host take a different path
than packets from that host to you) or if you operate a non-routing
host which has several IP addresses on different interfaces. To turn
- rp_filter off use:
+ rp_filter on use:
- echo 0 > /proc/sys/net/ipv4/conf/<device>/rp_filter
+ echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter
or
- echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
+ echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter
If unsure, say N here.
@@ -577,6 +577,7 @@ config TCP_CONG_VENO
config TCP_CONG_YEAH
tristate "YeAH TCP"
depends on EXPERIMENTAL
+ select TCP_CONG_VEGAS
default n
---help---
YeAH-TCP is a sender-side high-speed enabled TCP congestion control
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 9cfecf1215c..07e843a47dd 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -456,6 +456,8 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
rt_cache_flush(-1);
+ rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
+ &cfg->fc_nlinfo, NLM_F_REPLACE);
return 0;
}
@@ -523,7 +525,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
rt_cache_flush(-1);
rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
- &cfg->fc_nlinfo);
+ &cfg->fc_nlinfo, 0);
return 0;
out_free_new_fa:
@@ -589,7 +591,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
fa = fa_to_delete;
rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
- tb->tb_id, &cfg->fc_nlinfo);
+ tb->tb_id, &cfg->fc_nlinfo, 0);
kill_fn = 0;
write_lock_bh(&fib_hash_lock);
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 0e8b70bad4e..eef9eec17e0 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -30,7 +30,8 @@ extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
- int dst_len, u32 tb_id, struct nl_info *info);
+ int dst_len, u32 tb_id, struct nl_info *info,
+ unsigned int nlm_flags);
extern struct fib_alias *fib_find_alias(struct list_head *fah,
u8 tos, u32 prio);
extern int fib_detect_death(struct fib_info *fi, int order,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 406ea7050ae..bb94550d95c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -301,7 +301,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
}
void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
- int dst_len, u32 tb_id, struct nl_info *info)
+ int dst_len, u32 tb_id, struct nl_info *info,
+ unsigned int nlm_flags)
{
struct sk_buff *skb;
u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
@@ -313,7 +314,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
err = fib_dump_info(skb, info->pid, seq, event, tb_id,
fa->fa_type, fa->fa_scope, key, dst_len,
- fa->fa_tos, fa->fa_info, 0);
+ fa->fa_tos, fa->fa_info, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 9be7da7c3a8..30e332ade61 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1226,6 +1226,8 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
rt_cache_flush(-1);
+ rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
+ tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
goto succeeded;
}
@@ -1278,7 +1280,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
rt_cache_flush(-1);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
- &cfg->fc_nlinfo);
+ &cfg->fc_nlinfo, 0);
succeeded:
return 0;
@@ -1624,7 +1626,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
fa = fa_to_delete;
rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
- &cfg->fc_nlinfo);
+ &cfg->fc_nlinfo, 0);
l = fib_find_node(t, key);
li = find_leaf_info(l, plen);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index d38cbba92a4..e238b17f554 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -514,7 +514,10 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
saddr = iph->daddr;
if (!(rt->rt_flags & RTCF_LOCAL)) {
- if (sysctl_icmp_errors_use_inbound_ifaddr)
+ /* This is broken, skb_in->dev points to the outgoing device
+ * after the packet passes through ip_output().
+ */
+ if (skb_in->dev && sysctl_icmp_errors_use_inbound_ifaddr)
saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK);
else
saddr = 0;
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 891b9355cf9..09d0c3f3566 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -1,10 +1,7 @@
#
# IP Virtual Server configuration
#
-menu "IP: Virtual Server Configuration"
- depends on NETFILTER
-
-config IP_VS
+menuconfig IP_VS
tristate "IP virtual server support (EXPERIMENTAL)"
depends on NETFILTER
---help---
@@ -25,9 +22,10 @@ config IP_VS
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+if IP_VS
+
config IP_VS_DEBUG
bool "IP virtual server debugging"
- depends on IP_VS
---help---
Say Y here if you want to get additional messages useful in
debugging the IP virtual server code. You can change the debug
@@ -35,7 +33,6 @@ config IP_VS_DEBUG
config IP_VS_TAB_BITS
int "IPVS connection table size (the Nth power of 2)"
- depends on IP_VS
default "12"
---help---
The IPVS connection hash table uses the chaining scheme to handle
@@ -61,42 +58,35 @@ config IP_VS_TAB_BITS
needed for your box.
comment "IPVS transport protocol load balancing support"
- depends on IP_VS
config IP_VS_PROTO_TCP
bool "TCP load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing TCP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_UDP
bool "UDP load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing UDP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_ESP
bool "ESP load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
bool "AH load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
comment "IPVS scheduler"
- depends on IP_VS
config IP_VS_RR
tristate "round-robin scheduling"
- depends on IP_VS
---help---
The robin-robin scheduling algorithm simply directs network
connections to different real servers in a round-robin manner.
@@ -106,7 +96,6 @@ config IP_VS_RR
config IP_VS_WRR
tristate "weighted round-robin scheduling"
- depends on IP_VS
---help---
The weighted robin-robin scheduling algorithm directs network
connections to different real servers based on server weights
@@ -120,7 +109,6 @@ config IP_VS_WRR
config IP_VS_LC
tristate "least-connection scheduling"
- depends on IP_VS
---help---
The least-connection scheduling algorithm directs network
connections to the server with the least number of active
@@ -131,7 +119,6 @@ config IP_VS_LC
config IP_VS_WLC
tristate "weighted least-connection scheduling"
- depends on IP_VS
---help---
The weighted least-connection scheduling algorithm directs network
connections to the server with the least active connections
@@ -142,7 +129,6 @@ config IP_VS_WLC
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
- depends on IP_VS
---help---
The locality-based least-connection scheduling algorithm is for
destination IP load balancing. It is usually used in cache cluster.
@@ -157,7 +143,6 @@ config IP_VS_LBLC
config IP_VS_LBLCR
tristate "locality-based least-connection with replication scheduling"
- depends on IP_VS
---help---
The locality-based least-connection with replication scheduling
algorithm is also for destination IP load balancing. It is
@@ -176,7 +161,6 @@ config IP_VS_LBLCR
config IP_VS_DH
tristate "destination hashing scheduling"
- depends on IP_VS
---help---
The destination hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
@@ -187,7 +171,6 @@ config IP_VS_DH
config IP_VS_SH
tristate "source hashing scheduling"
- depends on IP_VS
---help---
The source hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
@@ -198,7 +181,6 @@ config IP_VS_SH
config IP_VS_SED
tristate "shortest expected delay scheduling"
- depends on IP_VS
---help---
The shortest expected delay scheduling algorithm assigns network
connections to the server with the shortest expected delay. The
@@ -212,7 +194,6 @@ config IP_VS_SED
config IP_VS_NQ
tristate "never queue scheduling"
- depends on IP_VS
---help---
The never queue scheduling algorithm adopts a two-speed model.
When there is an idle server available, the job will be sent to
@@ -225,11 +206,10 @@ config IP_VS_NQ
module, choose M here. If unsure, say N.
comment 'IPVS application helper'
- depends on IP_VS
config IP_VS_FTP
tristate "FTP protocol helper"
- depends on IP_VS && IP_VS_PROTO_TCP
+ depends on IP_VS_PROTO_TCP
---help---
FTP is a protocol that transfers IP address and/or port number in
the payload. In the virtual server via Network Address Translation,
@@ -241,4 +221,4 @@ config IP_VS_FTP
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
-endmenu
+endif # IP_VS
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 0654eaae70c..fd62a41d69c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -154,12 +154,10 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
-#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
if ((*pskb)->nfct)
return NF_ACCEPT;
-#endif
/* Gather fragments. */
if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 751b5980175..e6bc8e5a72f 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -40,8 +40,7 @@ mangle_rfc959_packet(struct sk_buff **pskb,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
+ enum ip_conntrack_info ctinfo)
{
char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
@@ -50,7 +49,6 @@ mangle_rfc959_packet(struct sk_buff **pskb,
DEBUGP("calling nf_nat_mangle_tcp_packet\n");
- *seq += strlen(buffer) - matchlen;
return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
@@ -63,8 +61,7 @@ mangle_eprt_packet(struct sk_buff **pskb,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
+ enum ip_conntrack_info ctinfo)
{
char buffer[sizeof("|1|255.255.255.255|65535|")];
@@ -72,7 +69,6 @@ mangle_eprt_packet(struct sk_buff **pskb,
DEBUGP("calling nf_nat_mangle_tcp_packet\n");
- *seq += strlen(buffer) - matchlen;
return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
@@ -85,8 +81,7 @@ mangle_epsv_packet(struct sk_buff **pskb,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
+ enum ip_conntrack_info ctinfo)
{
char buffer[sizeof("|||65535|")];
@@ -94,14 +89,13 @@ mangle_epsv_packet(struct sk_buff **pskb,
DEBUGP("calling nf_nat_mangle_tcp_packet\n");
- *seq += strlen(buffer) - matchlen;
return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
unsigned int, unsigned int, struct nf_conn *,
- enum ip_conntrack_info, u32 *seq)
+ enum ip_conntrack_info)
= {
[NF_CT_FTP_PORT] = mangle_rfc959_packet,
[NF_CT_FTP_PASV] = mangle_rfc959_packet,
@@ -116,8 +110,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb,
enum nf_ct_ftp_type type,
unsigned int matchoff,
unsigned int matchlen,
- struct nf_conntrack_expect *exp,
- u32 *seq)
+ struct nf_conntrack_expect *exp)
{
__be32 newip;
u_int16_t port;
@@ -145,8 +138,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb,
if (port == 0)
return NF_DROP;
- if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
- seq)) {
+ if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) {
nf_conntrack_unexpect_related(exp);
return NF_DROP;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index fcebc968d37..c5d2a2d690b 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -455,9 +455,9 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
if (idx > 0 &&
get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
(ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr_hook(pskb, data, 0, &taddr[0],
- &ct->tuplehash[!dir].tuple.dst.u3,
- info->sig_port[!dir]);
+ set_h225_addr(pskb, data, 0, &taddr[0],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir]);
}
} else {
nf_conntrack_unexpect_related(exp);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cb76e3c725a..8603cfb271f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
dev_out = ip_dev_find(oldflp->fl4_src);
- if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind))
+ if (dev_out == NULL)
goto out;
/* I removed check for oif == dev_out->oif here.
@@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
of another iface. --ANK
*/
- if (dev_out && oldflp->oif == 0
+ if (oldflp->oif == 0
&& (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
/* Special hack: user can direct multicasts
and limited broadcast via necessary interface
@@ -2598,6 +2598,69 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
EXPORT_SYMBOL_GPL(__ip_route_output_key);
+static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+}
+
+static struct dst_ops ipv4_dst_blackhole_ops = {
+ .family = AF_INET,
+ .protocol = __constant_htons(ETH_P_IP),
+ .destroy = ipv4_dst_destroy,
+ .check = ipv4_dst_check,
+ .update_pmtu = ipv4_rt_blackhole_update_pmtu,
+ .entry_size = sizeof(struct rtable),
+};
+
+
+static int ipv4_blackhole_output(struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return 0;
+}
+
+static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk)
+{
+ struct rtable *ort = *rp;
+ struct rtable *rt = (struct rtable *)
+ dst_alloc(&ipv4_dst_blackhole_ops);
+
+ if (rt) {
+ struct dst_entry *new = &rt->u.dst;
+
+ atomic_set(&new->__refcnt, 1);
+ new->__use = 1;
+ new->input = ipv4_blackhole_output;
+ new->output = ipv4_blackhole_output;
+ memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+
+ new->dev = ort->u.dst.dev;
+ if (new->dev)
+ dev_hold(new->dev);
+
+ rt->fl = ort->fl;
+
+ rt->idev = ort->idev;
+ if (rt->idev)
+ in_dev_hold(rt->idev);
+ rt->rt_flags = ort->rt_flags;
+ rt->rt_type = ort->rt_type;
+ rt->rt_dst = ort->rt_dst;
+ rt->rt_src = ort->rt_src;
+ rt->rt_iif = ort->rt_iif;
+ rt->rt_gateway = ort->rt_gateway;
+ rt->rt_spec_dst = ort->rt_spec_dst;
+ rt->peer = ort->peer;
+ if (rt->peer)
+ atomic_inc(&rt->peer->refcnt);
+
+ dst_free(new);
+ }
+
+ dst_release(&(*rp)->u.dst);
+ *rp = rt;
+ return (rt ? 0 : -ENOMEM);
+}
+
int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
{
int err;
@@ -2610,7 +2673,11 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
flp->fl4_src = (*rp)->rt_src;
if (!flp->fl4_dst)
flp->fl4_dst = (*rp)->rt_dst;
- return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
+ err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
+ if (err == -EREMOTE)
+ err = ipv4_dst_blackhole(rp, flp, sk);
+
+ return err;
}
return 0;
@@ -3139,6 +3206,8 @@ int __init ip_rt_init(void)
kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
+
rt_hash_table = (struct rt_hash_bucket *)
alloc_large_system_hash("IP route cache",
sizeof(struct rt_hash_bucket),
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 86b26539e54..1260e52ad77 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -276,30 +276,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
/*
- * Slow start (exponential increase) with
- * RFC3742 Limited Slow Start (fast linear increase) support.
+ * Slow start is used when congestion window is less than slow start
+ * threshold. This version implements the basic RFC2581 version
+ * and optionally supports:
+ * RFC3742 Limited Slow Start - growth limited to max_ssthresh
+ * RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged
*/
void tcp_slow_start(struct tcp_sock *tp)
{
- int cnt = 0;
-
- if (sysctl_tcp_abc) {
- /* RFC3465: Slow Start
- * TCP sender SHOULD increase cwnd by the number of
- * previously unacknowledged bytes ACKed by each incoming
- * acknowledgment, provided the increase is not more than L
- */
- if (tp->bytes_acked < tp->mss_cache)
- return;
- }
+ int cnt; /* increase in packets */
+
+ /* RFC3465: ABC Slow start
+ * Increase only after a full MSS of bytes is acked
+ *
+ * TCP sender SHOULD increase cwnd by the number of
+ * previously unacknowledged bytes ACKed by each incoming
+ * acknowledgment, provided the increase is not more than L
+ */
+ if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
+ return;
- if (sysctl_tcp_max_ssthresh > 0 &&
- tp->snd_cwnd > sysctl_tcp_max_ssthresh)
- cnt += sysctl_tcp_max_ssthresh>>1;
+ if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
+ cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */
else
- cnt += tp->snd_cwnd;
+ cnt = tp->snd_cwnd; /* exponential increase */
- /* RFC3465: We MAY increase by 2 if discovered delayed ack */
+ /* RFC3465: ABC
+ * We MAY increase by 2 if discovered delayed ack
+ */
if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
cnt <<= 1;
tp->bytes_acked = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7641b2761a1..38cb25b48bf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1501,6 +1501,8 @@ void tcp_enter_loss(struct sock *sk, int how)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);
+ /* Abort FRTO algorithm if one is in progress */
+ tp->frto_counter = 0;
clear_all_retrans_hints(tp);
}
@@ -2608,6 +2610,7 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
{
tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
tp->snd_cwnd_cnt = 0;
+ TCP_ECN_queue_cwr(tp);
tcp_moderate_cwnd(tp);
}