From af2681828af5f2b42e12e8b16ba0cf113cf486c8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 3 Apr 2008 12:52:19 -0700 Subject: [ICMP]: Ensure that ICMP relookup maintains status quo The ICMP relookup path is only meant to modify behaviour when appropriate IPsec policies are in place and marked as requiring relookups. It is certainly not meant to modify behaviour when IPsec policies don't exist at all. However, due to an oversight on the error paths existing behaviour may in fact change should one of the relookup steps fail. This patch corrects this by redirecting all errors on relookup failures to the previous code path. That is, if the initial xfrm_lookup let the packet pass, we will stand by that decision should the relookup fail due to an error. This should be safe from a security point-of-view because compliant systems must install a default deny policy so the packet would'nt have passed in that case. Many thanks to Julian Anastasov for pointing out this error. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 24 +++++++++++++----------- net/ipv6/icmp.c | 22 ++++++++++++---------- 2 files changed, 25 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a944e8053e2..40508babad8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -591,7 +591,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET)) - goto ende; + goto relookup_failed; if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) err = __ip_route_output_key(net, &rt2, &fl); @@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) fl2.fl4_dst = fl.fl4_src; if (ip_route_output_key(net, &rt2, &fl2)) - goto ende; + goto relookup_failed; /* Ugh! */ odst = skb_in->dst; @@ -614,21 +614,23 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } if (err) - goto ende; + goto relookup_failed; err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP); - if (err == -ENOENT) { + switch (err) { + case 0: + dst_release(&rt->u.dst); + rt = rt2; + break; + case -EPERM: + goto ende; + default: +relookup_failed: if (!rt) goto out_unlock; - goto route_done; + break; } - - dst_release(&rt->u.dst); - rt = rt2; - - if (err) - goto out_unlock; } route_done: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f204a7275a0..893287ecc62 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -436,24 +436,26 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, } if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6)) - goto out_dst_release; + goto relookup_failed; if (ip6_dst_lookup(sk, &dst2, &fl)) - goto out_dst_release; + goto relookup_failed; err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP); - if (err == -ENOENT) { + switch (err) { + case 0: + dst_release(dst); + dst = dst2; + break; + case -EPERM: + goto out_dst_release; + default: +relookup_failed: if (!dst) goto out; - goto route_done; + break; } - dst_release(dst); - dst = dst2; - - if (err) - goto out; - route_done: if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; -- cgit From 439e23857a21c3a953826eed23c818697a97de1a Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 3 Apr 2008 13:30:17 -0700 Subject: [IPV6]: Event type in addrconf_ifdown is mis-used. addrconf_ifdown is broken in respect to the usage of how parameter. This function is called with (event != NETDEV_DOWN) and (2) on the IPv6 stop. It the latter case inet6_dev from loopback device should be destroyed. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e7a1882db04..4fa9da0be19 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2469,7 +2469,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Step 1: remove reference to ipv6 device from parent device. Do not dev_put! */ - if (how == 1) { + if (how) { idev->dead = 1; /* protected by rtnl_lock */ @@ -2501,12 +2501,12 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); /* Step 3: clear flags for stateless addrconf */ - if (how != 1) + if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); /* Step 4: clear address list */ #ifdef CONFIG_IPV6_PRIVACY - if (how == 1 && del_timer(&idev->regen_timer)) + if (how && del_timer(&idev->regen_timer)) in6_dev_put(idev); /* clear tempaddr list */ @@ -2543,7 +2543,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Step 5: Discard multicast list */ - if (how == 1) + if (how) ipv6_mc_destroy_dev(idev); else ipv6_mc_down(idev); @@ -2552,7 +2552,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Shot the device (if unregistered) */ - if (how == 1) { + if (how) { addrconf_sysctl_unregister(idev); neigh_parms_release(&nd_tbl, idev->nd_parms); neigh_ifdown(&nd_tbl, dev); -- cgit From eb867579311a9c1e998d6911af056772c400122a Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 3 Apr 2008 13:31:53 -0700 Subject: [IPV6]: inet6_dev on loopback should be kept until namespace stop. In the other case it will be destroyed when last address will be removed from lo inside a namespace. This will break IPv6 in several places. The most obvious one is ip6_dst_ifdown. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4fa9da0be19..a65935a9afd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2456,7 +2456,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - if (dev == init_net.loopback_dev && how == 1) + if ((dev->flags & IFF_LOOPBACK) && how == 1) how = 0; rt6_ifdown(dev); -- cgit From 84f59370c519449c70dcc813b050f5cbbf0098e7 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 3 Apr 2008 13:33:00 -0700 Subject: [IPV6]: Fix refcounting for anycast dst entries. Anycast DST entries allocated inside ipv6_dev_ac_inc are leaked when network device is stopped without removing IPv6 addresses from it. The bug has been observed in the reality on 2.6.18-rhel5 kernel. In the above case addrconf_ifdown marks all entries as obsolete and ip6_del_rt called from __ipv6_dev_ac_dec returns ENOENT. The referrence is not dropped. The fix is simple. DST entry should not keep referrence when stored in the FIB6 tree. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/anycast.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 9c7f83fbc3a..e5f56c953b5 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -334,9 +334,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) idev->ac_list = aca; write_unlock_bh(&idev->lock); - dst_hold(&rt->u.dst); - if (ip6_ins_rt(rt)) - dst_release(&rt->u.dst); + ip6_ins_rt(rt); addrconf_join_solict(dev, &aca->aca_addr); @@ -378,10 +376,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) addrconf_leave_solict(idev, &aca->aca_addr); dst_hold(&aca->aca_rt->u.dst); - if (ip6_del_rt(aca->aca_rt)) - dst_free(&aca->aca_rt->u.dst); - else - dst_release(&aca->aca_rt->u.dst); + ip6_del_rt(aca->aca_rt); aca_put(aca); return 0; -- cgit From 23556323b22fef35bdc36465b7e7439ba3748c9f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 4 Apr 2008 12:45:12 -0700 Subject: [VLAN]: Fix egress priority mappings leak. These entries are allocated in vlan_dev_set_egress_priority, but are never released and leaks on vlan device removal. Drop these in vlan's ->uninit callback - after the device is brought down and everyone is notified about it is going to be unregistered. Found during testing vlan netnsization patchset. Signed-off-by: Pavel Emelyanov Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 480ea90e7dc..41a76a05e6f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -692,6 +692,20 @@ static int vlan_dev_init(struct net_device *dev) return 0; } +static void vlan_dev_uninit(struct net_device *dev) +{ + struct vlan_priority_tci_mapping *pm; + struct vlan_dev_info *vlan = vlan_dev_info(dev); + int i; + + for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { + while ((pm = vlan->egress_priority_map[i]) != NULL) { + vlan->egress_priority_map[i] = pm->next; + kfree(pm); + } + } +} + void vlan_setup(struct net_device *dev) { ether_setup(dev); @@ -701,6 +715,7 @@ void vlan_setup(struct net_device *dev) dev->change_mtu = vlan_dev_change_mtu; dev->init = vlan_dev_init; + dev->uninit = vlan_dev_uninit; dev->open = vlan_dev_open; dev->stop = vlan_dev_stop; dev->set_mac_address = vlan_dev_set_mac_address; -- cgit From 16f2e85d3151efa643879fa5aa87c9d77d60f57e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Apr 2008 14:35:46 +0200 Subject: nl80211: fix STA AID bug This fixes the STA AID setting and actually makes hostapd/mac80211 work properly in presence of power-saving stations. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e3a214f63f9..f68a5c8f214 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -945,7 +945,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS], ¶ms.station_flags)) -- cgit From 1b69d745397eac12b3f8a2eb6b799cd476aef282 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 7 Apr 2008 22:31:38 -0700 Subject: [TCP]: Restore 2.6.24 mark_head_lost behavior for newreno/fack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fast retransmission can be forced locally to the rfc3517 branch in tcp_update_scoreboard instead of making such fragile constructs deeper in tcp_mark_head_lost. This is necessary for the next patch which must not have loopholes for cnt > packets check. As one can notice, readability got some improvements too because of this :-). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7facdb0f696..5573202f086 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2134,7 +2134,7 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) /* Mark head of queue up as lost. With RFC3517 SACK, the packets is * is against sacked "cnt", otherwise it's against facked "cnt" */ -static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) +static void tcp_mark_head_lost(struct sock *sk, int packets) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -2161,7 +2161,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) cnt += tcp_skb_pcount(skb); - if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) || + if ((cnt > packets) || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) break; if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { @@ -2180,17 +2180,17 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) struct tcp_sock *tp = tcp_sk(sk); if (tcp_is_reno(tp)) { - tcp_mark_head_lost(sk, 1, fast_rexmit); + tcp_mark_head_lost(sk, 1); } else if (tcp_is_fack(tp)) { int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; - tcp_mark_head_lost(sk, lost, fast_rexmit); + tcp_mark_head_lost(sk, lost); } else { int sacked_upto = tp->sacked_out - tp->reordering; - if (sacked_upto < 0) - sacked_upto = 0; - tcp_mark_head_lost(sk, sacked_upto, fast_rexmit); + if (sacked_upto < fast_rexmit) + sacked_upto = fast_rexmit; + tcp_mark_head_lost(sk, sacked_upto); } /* New heuristics: it is possible only after we switched @@ -2524,7 +2524,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) before(tp->snd_una, tp->high_seq) && icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); + tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } -- cgit From c137f3dda04b0aee1bc6889cdc69185f53df8a82 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 7 Apr 2008 22:32:38 -0700 Subject: [TCP]: Fix NewReno's fast rexmit/recovery problems with GSOed skb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a long-standing bug which makes NewReno recovery crippled. With GSO the whole head skb was marked as LOST which is in violation of NewReno procedure that only wants to mark one packet and ended up breaking our TCP code by causing counter overflow because our code was built on top of assumption about valid NewReno procedure. This manifested as triggering a WARN_ON for the overflow in a number of places. It seems relatively safe alternative to just do nothing if tcp_fragment fails due to oom because another duplicate ACK is likely to be received soon and the fragmentation will be retried. Special thanks goes to Soeren Sonnenburg who was lucky enough to be able to reproduce this so that the warning for the overflow was hit. It's not as easy task as it seems even if this bug happens quite often because the amount of outstanding data is pretty significant for the mismarkings to lead to an overflow. Because it's very late in 2.6.25-rc cycle (if this even makes in time), I didn't want to touch anything with SACK enabled here. Fragmenting might be useful for it as well but it's more or less a policy decision rather than mandatory fix. Thus there's no need to rush and we can postpone considering tcp_fragment with SACK for 2.6.26. In 2.6.24 and earlier, this very same bug existed but the effect is slightly different because of a small changes in the if conditions that fit to the patch's context. With them nothing got lost marker and thus no retransmissions happened. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5573202f086..7d0958785bf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2138,7 +2138,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt; + int cnt, oldcnt; + int err; + unsigned int mss; BUG_TRAP(packets <= tp->packets_out); if (tp->lost_skb_hint) { @@ -2157,13 +2159,25 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; + if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) + break; + + oldcnt = cnt; if (tcp_is_fack(tp) || tcp_is_reno(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) cnt += tcp_skb_pcount(skb); - if ((cnt > packets) || - after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) - break; + if (cnt > packets) { + if (tcp_is_sack(tp) || (oldcnt >= packets)) + break; + + mss = skb_shinfo(skb)->gso_size; + err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss); + if (err < 0) + break; + cnt = packets; + } + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); -- cgit From 882bebaaca4bb1484078d44ef011f918c0e1e14e Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 7 Apr 2008 22:33:07 -0700 Subject: [TCP]: tcp_simple_retransmit can cause S+L MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes Bugzilla #10384 tcp_simple_retransmit does L increment without any checking whatsoever for overflowing S+L when Reno is in use. The simplest scenario I can currently think of is rather complex in practice (there might be some more straightforward cases though). Ie., if mss is reduced during mtu probing, it may end up marking everything lost and if some duplicate ACKs arrived prior to that sacked_out will be non-zero as well, leading to S+L > packets_out, tcp_clean_rtx_queue on the next cumulative ACK or tcp_fastretrans_alert on the next duplicate ACK will fix the S counter. More straightforward (but questionable) solution would be to just call tcp_reset_reno_sack() in tcp_simple_retransmit but it would negatively impact the probe's retransmission, ie., the retransmissions would not occur if some duplicate ACKs had arrived. So I had to add reno sacked_out reseting to CA_Loss state when the first cumulative ACK arrives (this stale sacked_out might actually be the explanation for the reports of left_out overflows in kernel prior to 2.6.23 and S+L overflow reports of 2.6.24). However, this alone won't be enough to fix kernel before 2.6.24 because it is building on top of the commit 1b6d427bb7e ([TCP]: Reduce sacked_out with reno when purging write_queue) to keep the sacked_out from overflowing. Signed-off-by: Ilpo Järvinen Reported-by: Alessandro Suardi Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 24 ++++++++++++++++++------ net/ipv4/tcp_output.c | 3 +++ 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7d0958785bf..b4812c3cbbc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1625,13 +1625,11 @@ out: return flag; } -/* If we receive more dupacks than we expected counting segments - * in assumption of absent reordering, interpret this as reordering. - * The only another reason could be bug in receiver TCP. +/* Limits sacked_out so that sum with lost_out isn't ever larger than + * packets_out. Returns zero if sacked_out adjustement wasn't necessary. */ -static void tcp_check_reno_reordering(struct sock *sk, const int addend) +int tcp_limit_reno_sacked(struct tcp_sock *tp) { - struct tcp_sock *tp = tcp_sk(sk); u32 holes; holes = max(tp->lost_out, 1U); @@ -1639,8 +1637,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) if ((tp->sacked_out + holes) > tp->packets_out) { tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(sk, tp->packets_out + addend, 0); + return 1; } + return 0; +} + +/* If we receive more dupacks than we expected counting segments + * in assumption of absent reordering, interpret this as reordering. + * The only another reason could be bug in receiver TCP. + */ +static void tcp_check_reno_reordering(struct sock *sk, const int addend) +{ + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_limit_reno_sacked(tp)) + tcp_update_reordering(sk, tp->packets_out + addend, 0); } /* Emulate SACKs for SACKless connection: account for a new dupack. */ @@ -2600,6 +2610,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) case TCP_CA_Loss: if (flag & FLAG_DATA_ACKED) icsk->icsk_retransmits = 0; + if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) + tcp_reset_reno_sack(tp); if (!tcp_try_undo_loss(sk)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 72b9350006f..d29ef79c00c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1808,6 +1808,9 @@ void tcp_simple_retransmit(struct sock *sk) if (!lost) return; + if (tcp_is_reno(tp)) + tcp_limit_reno_sacked(tp); + tcp_verify_left_out(tp); /* Don't muck with the congestion window here. -- cgit From 6adb4f733e9996b4fd68a6db50dd51bd2463ccac Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 7 Apr 2008 22:33:57 -0700 Subject: [TCP]: Don't allow FRTO to take place while MTU is being probed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MTU probe can cause some remedies for FRTO because the normal packet ordering may be violated allowing FRTO to make a wrong decision (it might not be that serious threat for anything though). Thus it's safer to not run FRTO while MTU probe is underway. It seems that the basic FRTO variant should also look for an skb at probe_seq.start to check if that's retransmitted one but I didn't implement it now (plain seqno in window check isn't robust against wraparounds). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b4812c3cbbc..5119856017a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1691,11 +1691,16 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) int tcp_use_frto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb; if (!sysctl_tcp_frto) return 0; + /* MTU probe and F-RTO won't really play nicely along currently */ + if (icsk->icsk_mtup.probe_size) + return 0; + if (IsSackFrto()) return 1; -- cgit From 21f644f3eabde637f255f75ad05d0821a7a36b7f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Apr 2008 16:50:44 -0700 Subject: [NET]: Undo code bloat in hot paths due to print_mac(). If print_mac() is used inside of a pr_debug() the compiler can't see that the call is redundant so still performs it even of pr_debug() ends up being a nop. So don't use print_mac() in such cases in hot code paths, use MAC_FMT et al. instead. As noted by Joe Perches, pr_debug() could be modified to handle this better, but that is a change to an interface used by the entire kernel and thus needs to be validated carefully. This here is thus the less risky fix for 2.6.25 Signed-off-by: David S. Miller --- net/atm/lec.c | 29 +++++++++++++++++++---------- net/ieee80211/ieee80211_rx.c | 43 +++++++++++++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/atm/lec.c b/net/atm/lec.c index a2efa7ff41f..3235c57615e 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -266,7 +266,6 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) char buf[300]; int i = 0; #endif /* DUMP_PACKETS >0 */ - DECLARE_MAC_BUF(mac); pr_debug("lec_start_xmit called\n"); if (!priv->lecd) { @@ -374,15 +373,19 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { pr_debug("%s:lec_start_xmit: queuing packet, ", dev->name); - pr_debug("MAC address %s\n", - print_mac(mac, lec_h->h_dest)); + pr_debug("MAC address " MAC_FMT "\n", + lec_h->h_dest[0], lec_h->h_dest[1], + lec_h->h_dest[2], lec_h->h_dest[3], + lec_h->h_dest[4], lec_h->h_dest[5]); skb_queue_tail(&entry->tx_wait, skb); } else { pr_debug ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name); - pr_debug("MAC address %s\n", - print_mac(mac, lec_h->h_dest)); + pr_debug("MAC address " MAC_FMT "\n", + lec_h->h_dest[0], lec_h->h_dest[1], + lec_h->h_dest[2], lec_h->h_dest[3], + lec_h->h_dest[4], lec_h->h_dest[5]); priv->stats.tx_dropped++; dev_kfree_skb(skb); } @@ -394,8 +397,10 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { pr_debug("lec.c: emptying tx queue, "); - pr_debug("MAC address %s\n", - print_mac(mac, lec_h->h_dest)); + pr_debug("MAC address " MAC_FMT "\n", + lec_h->h_dest[0], lec_h->h_dest[1], + lec_h->h_dest[2], lec_h->h_dest[3], + lec_h->h_dest[4], lec_h->h_dest[5]); lec_send(vcc, skb2, priv); } @@ -449,7 +454,6 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) struct lec_arp_table *entry; int i; char *tmp; /* FIXME */ - DECLARE_MAC_BUF(mac); atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); mesg = (struct atmlec_msg *)skb->data; @@ -536,9 +540,14 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) struct net_bridge_fdb_entry *f; pr_debug - ("%s: bridge zeppelin asks about %s\n", + ("%s: bridge zeppelin asks about " MAC_FMT "\n", dev->name, - print_mac(mac, mesg->content.proxy.mac_addr)); + mesg->content.proxy.mac_addr[0], + mesg->content.proxy.mac_addr[1], + mesg->content.proxy.mac_addr[2], + mesg->content.proxy.mac_addr[3], + mesg->content.proxy.mac_addr[4], + mesg->content.proxy.mac_addr[5]); if (br_fdb_get_hook == NULL || dev->br_port == NULL) break; diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 1e3f87c8c01..200ee1e6372 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -271,7 +271,6 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, { struct ieee80211_hdr_3addr *hdr; int res, hdrlen; - DECLARE_MAC_BUF(mac); if (crypt == NULL || crypt->ops->decrypt_mpdu == NULL) return 0; @@ -283,8 +282,12 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, res = crypt->ops->decrypt_mpdu(skb, hdrlen, crypt->priv); atomic_dec(&crypt->refcnt); if (res < 0) { - IEEE80211_DEBUG_DROP("decryption failed (SA=%s" - ") res=%d\n", print_mac(mac, hdr->addr2), res); + IEEE80211_DEBUG_DROP("decryption failed (SA=" MAC_FMT + ") res=%d\n", + hdr->addr2[0], hdr->addr2[1], + hdr->addr2[2], hdr->addr2[3], + hdr->addr2[4], hdr->addr2[5], + res); if (res == -2) IEEE80211_DEBUG_DROP("Decryption failed ICV " "mismatch (key %d)\n", @@ -304,7 +307,6 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, { struct ieee80211_hdr_3addr *hdr; int res, hdrlen; - DECLARE_MAC_BUF(mac); if (crypt == NULL || crypt->ops->decrypt_msdu == NULL) return 0; @@ -317,8 +319,12 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, atomic_dec(&crypt->refcnt); if (res < 0) { printk(KERN_DEBUG "%s: MSDU decryption/MIC verification failed" - " (SA=%s keyidx=%d)\n", - ieee->dev->name, print_mac(mac, hdr->addr2), keyidx); + " (SA=" MAC_FMT " keyidx=%d)\n", + ieee->dev->name, + hdr->addr2[0], hdr->addr2[1], + hdr->addr2[2], hdr->addr2[3], + hdr->addr2[4], hdr->addr2[5], + keyidx); return -1; } @@ -462,8 +468,10 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, * frames silently instead of filling system log with * these reports. */ IEEE80211_DEBUG_DROP("Decryption failed (not set)" - " (SA=%s)\n", - print_mac(mac, hdr->addr2)); + " (SA=" MAC_FMT ")\n", + hdr->addr2[0], hdr->addr2[1], + hdr->addr2[2], hdr->addr2[3], + hdr->addr2[4], hdr->addr2[5]); ieee->ieee_stats.rx_discards_undecryptable++; goto rx_dropped; } @@ -474,8 +482,10 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, fc & IEEE80211_FCTL_PROTECTED && ieee->host_decrypt && (keyidx = hostap_rx_frame_decrypt(ieee, skb, crypt)) < 0) { printk(KERN_DEBUG "%s: failed to decrypt mgmt::auth " - "from %s\n", dev->name, - print_mac(mac, hdr->addr2)); + "from " MAC_FMT "\n", dev->name, + hdr->addr2[0], hdr->addr2[1], + hdr->addr2[2], hdr->addr2[3], + hdr->addr2[4], hdr->addr2[5]); /* TODO: could inform hostapd about this so that it * could send auth failure report */ goto rx_dropped; @@ -653,8 +663,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, * configured */ } else { IEEE80211_DEBUG_DROP("encryption configured, but RX " - "frame not encrypted (SA=%s" - ")\n", print_mac(mac, hdr->addr2)); + "frame not encrypted (SA=" + MAC_FMT ")\n", + hdr->addr2[0], hdr->addr2[1], + hdr->addr2[2], hdr->addr2[3], + hdr->addr2[4], hdr->addr2[5]); goto rx_dropped; } } @@ -662,9 +675,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep && !ieee80211_is_eapol_frame(ieee, skb)) { IEEE80211_DEBUG_DROP("dropped unencrypted RX data " - "frame from %s" + "frame from " MAC_FMT " (drop_unencrypted=1)\n", - print_mac(mac, hdr->addr2)); + hdr->addr2[0], hdr->addr2[1], + hdr->addr2[2], hdr->addr2[3], + hdr->addr2[4], hdr->addr2[5]); goto rx_dropped; } -- cgit From 216bce90b811a35eb5cd2ed8216bdbb1753e9b2b Mon Sep 17 00:00:00 2001 From: Vladimir Koutny Date: Mon, 31 Mar 2008 17:05:10 +0200 Subject: mac80211: use short_preamble mode from capability if ERP IE not present When associating to a b-only AP where there is no ERP IE, short preamble mode is left at previous state (probably also protection mode). In this case, disable protection and use short preamble mode as specified in capability field. The same is done if capability field is changed on-the-fly. Signed-off-by: Vladimir Koutny Signed-off-by: John W. Linville --- net/mac80211/ieee80211_sta.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index e0c72d04584..c1706855460 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -312,14 +312,12 @@ static void ieee80211_sta_wmm_params(struct net_device *dev, } } - -static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata, - u8 erp_value) +static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, + bool use_protection, + bool use_short_preamble) { struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; struct ieee80211_if_sta *ifsta = &sdata->u.sta; - bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; - bool use_short_preamble = (erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0; DECLARE_MAC_BUF(mac); u32 changed = 0; @@ -350,6 +348,32 @@ static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata, return changed; } +static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata, + u8 erp_value) +{ + bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + bool use_short_preamble = (erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0; + + return ieee80211_handle_protect_preamb(sdata, + use_protection, use_short_preamble); +} + +static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta_bss *bss) +{ + u32 changed = 0; + + if (bss->has_erp_value) + changed |= ieee80211_handle_erp_ie(sdata, bss->erp_value); + else { + u16 capab = bss->capability; + changed |= ieee80211_handle_protect_preamb(sdata, false, + (capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0); + } + + return changed; +} + int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, struct ieee80211_ht_info *ht_info) { @@ -468,9 +492,7 @@ static void ieee80211_set_associated(struct net_device *dev, local->hw.conf.channel, ifsta->ssid, ifsta->ssid_len); if (bss) { - if (bss->has_erp_value) - changed |= ieee80211_handle_erp_ie( - sdata, bss->erp_value); + changed |= ieee80211_handle_bss_capability(sdata, bss); ieee80211_rx_bss_put(dev, bss); } @@ -2116,6 +2138,11 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, if (elems.erp_info && elems.erp_info_len >= 1) changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]); + else { + u16 capab = le16_to_cpu(mgmt->u.beacon.capab_info); + changed |= ieee80211_handle_protect_preamb(sdata, false, + (capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0); + } if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && local->ops->conf_ht && -- cgit From bcf0dda8d2408fe1c1040cdec5a98e5fcad2ac72 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 9 Apr 2008 15:08:24 -0700 Subject: [XFRM]: xfrm_user: fix selector family initialization Commit df9dcb45 ([IPSEC]: Fix inter address family IPsec tunnel handling) broke openswan by removing the selector initialization for tunnel mode in case it is uninitialized. This patch restores the initialization, fixing openswan, but probably breaking inter-family tunnels again (unknown since the patch author disappeared). The correct thing for inter-family tunnels is probably to simply initialize the selector family explicitly. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5d96f2728dc..019d21de19b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -288,7 +288,7 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; - if (x->props.mode == XFRM_MODE_TRANSPORT) + if (!x->sel.family) x->sel.family = p->family; } -- cgit From 1b9b70ea2ebaab26c3e4fed385dfab6fc16359ed Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 9 Apr 2008 15:14:18 -0700 Subject: [NETFILTER]: xt_hashlimit: fix mask calculation Shifts larger than the data type are undefined, don't try to shift an u32 by 32. Also remove some special-casing of bitmasks divisible by 32. Based on patch by Jan Engelhardt . Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_hashlimit.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index dc29007c52c..40d344b2145 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -466,38 +466,25 @@ static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) static inline __be32 maskl(__be32 a, unsigned int l) { - return htonl(ntohl(a) & ~(~(u_int32_t)0 >> l)); + return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0; } #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) static void hashlimit_ipv6_mask(__be32 *i, unsigned int p) { switch (p) { - case 0: - i[0] = i[1] = 0; - i[2] = i[3] = 0; - break; - case 1 ... 31: + case 0 ... 31: i[0] = maskl(i[0], p); i[1] = i[2] = i[3] = 0; break; - case 32: - i[1] = i[2] = i[3] = 0; - break; - case 33 ... 63: + case 32 ... 63: i[1] = maskl(i[1], p - 32); i[2] = i[3] = 0; break; - case 64: - i[2] = i[3] = 0; - break; - case 65 ... 95: + case 64 ... 95: i[2] = maskl(i[2], p - 64); i[3] = 0; - case 96: - i[3] = 0; - break; - case 97 ... 127: + case 96 ... 127: i[3] = maskl(i[3], p - 96); break; case 128: -- cgit From 475959d4773e53a2700e523dd30acebbd47556a5 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 9 Apr 2008 15:14:58 -0700 Subject: [NETFILTER]: nf_nat: autoload IPv4 connection tracking Without this patch, the generic L3 tracker would kick in if nf_conntrack_ipv4 was not loaded before nf_nat, which would lead to translation problems with ICMP errors. NAT does not make sense without IPv4 connection tracking anyway, so just add a call to need_ipv4_conntrack(). Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 0d5fa3a54d0..36b4e3bb056 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -629,6 +629,8 @@ static int __init nf_nat_init(void) size_t i; int ret; + need_ipv4_conntrack(); + ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); -- cgit From 951e07c930f5f66b676eaa4c32a1b0d8e2d7d06a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Apr 2008 01:29:36 -0700 Subject: [IPV4]: Fix byte value boundary check in do_ip_getsockopt(). This fixes kernel bugzilla 10371. As reported by M.Piechaczek@osmosys.tv, if we try to grab a char sized socket option value, as in: unsigned char ttl = 255; socklen_t len = sizeof(ttl); setsockopt(socket, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, &len); getsockopt(socket, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, &len); The ttl returned will be wrong on big-endian, and on both little- endian and big-endian the next three bytes in userspace are written with garbage. It's because of this test in do_ip_getsockopt(): if (len < sizeof(int) && len > 0 && val>=0 && val<255) { It should allow a 'val' of 255 to pass here, but it doesn't so it copies a full 'int' back to userspace. On little-endian that will write the correct value into the location but it spams on the next three bytes in userspace. On big endian it writes the wrong value into the location and spams the next three bytes. Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f72457b4b0a..c2921d01e92 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1132,7 +1132,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, } release_sock(sk); - if (len < sizeof(int) && len > 0 && val>=0 && val<255) { + if (len < sizeof(int) && len > 0 && val>=0 && val<=255) { unsigned char ucval = (unsigned char)val; len = 1; if (put_user(len, optlen)) -- cgit From 7951f0b03a63d657c72c7d54d306ef3357e7e604 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Apr 2008 20:53:10 -0700 Subject: [NETNS][IPV6] tcp - assign the netns for timewait sockets Copy the network namespace from the socket to the timewait socket. Signed-off-by: Daniel Lezcano Acked-by: Mark Lord Signed-off-by: David S. Miller --- net/ipv4/inet_timewait_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 876169f3a52..717c411a5c6 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -124,6 +124,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; tw->tw_prot = sk->sk_prot_creator; + tw->tw_net = sk->sk_net; atomic_set(&tw->tw_refcnt, 1); inet_twsk_dead_node_init(tw); __module_get(tw->tw_prot->owner); -- cgit From ae1b6a31b1f9ef2c7ba5ef89799f210a9ba6937c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 12 Apr 2008 18:33:50 -0700 Subject: [AX25]: Potential ax25_uid_assoc-s leaks on module unload. The ax25_uid_free call walks the ax25_uid_list and releases entries from it. The problem is that after the fisrt call to hlist_del_init the hlist_for_each_entry (which hides behind the ax25_uid_for_each) will consider the current position to be the last and will return. Thus, the whole list will be left not freed. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ax25/ax25_uid.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 5f4eb73fb9d..57aeba729ba 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -218,9 +218,11 @@ void __exit ax25_uid_free(void) struct hlist_node *node; write_lock(&ax25_uid_lock); +again: ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { hlist_del_init(&ax25_uid->uid_node); ax25_uid_put(ax25_uid); + goto again; } write_unlock(&ax25_uid_lock); } -- cgit From 028b027524b162eef90839a92ba4b8bddf23e06c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 12 Apr 2008 18:35:41 -0700 Subject: [DCCP]: Fix skb->cb conflicts with IP dev_queue_xmit() and the other IP output functions expect to get a skb with clear or properly initialized skb->cb. Unlike TCP and UDP, the dccp_skb_cb doesn't contain a struct inet_skb_parm at the beginning, so the DCCP-specific data is interpreted by the IP output functions. This can cause false negatives for the conditional POST_ROUTING hook invocation, making the packet bypass the hook. Add a inet_skb_parm/inet6_skb_parm union to the beginning of dccp_skb_cb to avoid clashes. Also add a BUILD_BUG_ON to make sure it fits in the cb. [ Combined with patch from Gerrit Renker to remove two now unnecessary memsets of IPCB(skb)->opt ] Signed-off-by: Patrick McHardy Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 6 ++++++ net/dccp/ipv4.c | 1 - net/dccp/output.c | 1 - net/dccp/proto.c | 3 +++ 4 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 287a62bc2e0..ba2ef94a230 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -325,6 +325,12 @@ static inline int dccp_bad_service_code(const struct sock *sk, * This is used for transmission as well as for reception. */ struct dccp_skb_cb { + union { + struct inet_skb_parm h4; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + struct inet6_skb_parm h6; +#endif + } header; __u8 dccpd_type:4; __u8 dccpd_ccval:4; __u8 dccpd_reset_code, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 474075adbde..b3370441555 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -489,7 +489,6 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, ireq->rmt_addr); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, ireq->rmt_addr, ireq->opt); diff --git a/net/dccp/output.c b/net/dccp/output.c index 3b763db3d86..3d7d628d870 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -126,7 +126,6 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = icsk->icsk_af_ops->queue_xmit(skb, 0); return net_xmit_eval(err); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index e3f5d37b84b..c91d3c1fd30 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1057,6 +1057,9 @@ static int __init dccp_init(void) int ehash_order, bhash_order, i; int rc = -ENOBUFS; + BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > + FIELD_SIZEOF(struct sk_buff, cb)); + dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, -- cgit From e56cfad132f2ae269082359d279c17230c987e74 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sat, 12 Apr 2008 18:37:13 -0700 Subject: [NET_SCHED] cls_u32: refcounting fix for u32_delete() Deleting of nonroot hnodes mostly doesn't work in u32_delete(): refcnt == 1 is expected, but such hnodes' refcnts are initialized with 0 and charged only with "link" nodes. Now they'll start with 1 like usual. Thanks to Patrick McHardy for an improving suggestion. Signed-off-by: Jarek Poplawski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c5c16b4b6e9..4d755444c44 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -411,8 +411,10 @@ static void u32_destroy(struct tcf_proto *tp) } } - for (ht=tp_c->hlist; ht; ht = ht->next) + for (ht = tp_c->hlist; ht; ht = ht->next) { + ht->refcnt--; u32_clear_hnode(tp, ht); + } while ((ht = tp_c->hlist) != NULL) { tp_c->hlist = ht->next; @@ -441,8 +443,12 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) if (tp->root == ht) return -EINVAL; - if (--ht->refcnt == 0) + if (ht->refcnt == 1) { + ht->refcnt--; u32_destroy_hnode(tp, ht); + } else { + return -EBUSY; + } return 0; } @@ -568,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, if (ht == NULL) return -ENOBUFS; ht->tp_c = tp_c; - ht->refcnt = 0; + ht->refcnt = 1; ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; -- cgit From 72da7b3860cabf427590b4982bc880bafab4d5c8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 12 Apr 2008 18:39:19 -0700 Subject: [SCTP]: Add check for hmac_algo parameter in sctp_verify_param() RFC 4890 has the following text: The HMAC algorithm based on SHA-1 MUST be supported and included in the HMAC-ALGO parameter. As a result, we need to check in sctp_verify_param() that HMAC_SHA1 is present in the list. If not, we should probably treat this as a protocol violation. It should also be a protocol violation if the HMAC parameter is empty. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 578630e8e00..36ebb392472 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1982,7 +1982,10 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_chunk **err_chunk) { + struct sctp_hmac_algo_param *hmacs; int retval = SCTP_IERROR_NO_ERROR; + __u16 n_elt, id = 0; + int i; /* FIXME - This routine is not looking at each parameter per the * chunk type, i.e., unrecognized parameters should be further @@ -2056,9 +2059,29 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_HMAC_ALGO: - if (sctp_auth_enable) - break; - /* Fall Through */ + if (!sctp_auth_enable) + goto fallthrough; + + hmacs = (struct sctp_hmac_algo_param *)param.p; + n_elt = (ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) >> 1; + + /* SCTP-AUTH: Section 6.1 + * The HMAC algorithm based on SHA-1 MUST be supported and + * included in the HMAC-ALGO parameter. + */ + for (i = 0; i < n_elt; i++) { + id = ntohs(hmacs->hmac_ids[i]); + + if (id == SCTP_AUTH_HMAC_ID_SHA1) + break; + } + + if (id != SCTP_AUTH_HMAC_ID_SHA1) { + sctp_process_inv_paramlength(asoc, param.p, chunk, + err_chunk); + retval = SCTP_IERROR_ABORT; + } + break; fallthrough: default: SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n", -- cgit From f4ad85ca3ef8a1ede76c5020a28a8f4057b4d24f Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Sat, 12 Apr 2008 18:39:34 -0700 Subject: [SCTP]: Fix protocol violation when receiving an error lenght INIT-ACK When receiving an error length INIT-ACK during COOKIE-WAIT, a 0-vtag ABORT will be responsed. This action violates the protocol apparently. This patch achieves the following things. 1 If the INIT-ACK contains all the fixed parameters, use init-tag recorded from INIT-ACK as vtag. 2 If the INIT-ACK doesn't contain all the fixed parameters, just reflect its vtag. Signed-off-by: Gui Jianfeng Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 +++ net/sctp/sm_sideeffect.c | 3 +++ net/sctp/sm_statefuns.c | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 1bb3c5c35d2..c0714469233 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -793,6 +793,9 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) break; case SCTP_CID_ABORT: + if (sctp_test_T_bit(chunk)) { + packet->vtag = asoc->c.my_vtag; + } case SCTP_CID_SACK: case SCTP_CID_HEARTBEAT: case SCTP_CID_HEARTBEAT_ACK: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 28eb38eb608..a4763fd24fd 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1536,6 +1536,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, error = sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); break; + case SCTP_CMD_UPDATE_INITTAG: + asoc->peer.i.init_tag = cmd->obj.u32; + break; default: printk(KERN_WARNING "Impossible command: %u, %p\n", diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index f2ed6473fee..3ef97499df0 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4144,6 +4144,24 @@ static sctp_disposition_t sctp_sf_abort_violation( goto nomem; if (asoc) { + /* Treat INIT-ACK as a special case during COOKIE-WAIT. */ + if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK && + !asoc->peer.i.init_tag) { + sctp_initack_chunk_t *initack; + + initack = (sctp_initack_chunk_t *)chunk->chunk_hdr; + if (!sctp_chunk_length_valid(chunk, + sizeof(sctp_initack_chunk_t))) + abort->chunk_hdr->flags |= SCTP_CHUNK_FLAG_T; + else { + unsigned int inittag; + + inittag = ntohl(initack->init_hdr.init_tag); + sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_INITTAG, + SCTP_U32(inittag)); + } + } + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); -- cgit From ab38fb04c9f8928cfaf6f4966633d783419906a1 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 12 Apr 2008 18:40:06 -0700 Subject: [SCTP]: Fix compiler warning about const qualifiers Fix 3 warnings about discarding const qualifiers: net/sctp/ulpevent.c:862: warning: passing argument 1 of 'sctp_event2skb' discards qualifiers from pointer target type net/sctp/sm_statefuns.c:4393: warning: passing argument 1 of 'SCTP_ASOC' discards qualifiers from pointer target type net/sctp/socket.c:5874: warning: passing argument 1 of 'cmsg_nxthdr' discards qualifiers from pointer target type Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 5 +++-- net/sctp/socket.c | 5 +++-- net/sctp/ulpevent.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3ef97499df0..07194c2a32d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4367,6 +4367,7 @@ sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *repl; + struct sctp_association* my_asoc; /* The comment below says that we enter COOKIE-WAIT AFTER * sending the INIT, but that doesn't actually work in our @@ -4390,8 +4391,8 @@ sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep, /* Cast away the const modifier, as we want to just * rerun it through as a sideffect. */ - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, - SCTP_ASOC((struct sctp_association *) asoc)); + my_asoc = (struct sctp_association *)asoc; + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(my_asoc)); /* Choose transport for INIT. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d994d822900..998e63a3131 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5868,11 +5868,12 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) { struct cmsghdr *cmsg; + struct msghdr *my_msg = (struct msghdr *)msg; for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; - cmsg = CMSG_NXTHDR((struct msghdr*)msg, cmsg)) { - if (!CMSG_OK(msg, cmsg)) + cmsg = CMSG_NXTHDR(my_msg, cmsg)) { + if (!CMSG_OK(my_msg, cmsg)) return -EINVAL; /* Should we parse this header or ignore? */ diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index b43f1f110f8..ce6cda6b699 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -859,7 +859,7 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event) union sctp_notification *notification; struct sk_buff *skb; - skb = sctp_event2skb((struct sctp_ulpevent *)event); + skb = sctp_event2skb(event); notification = (union sctp_notification *) skb->data; return notification->sn_header.sn_type; } -- cgit From a40a7d15ba602b547f56b7b19e0282fe4fc3dee3 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 12 Apr 2008 18:40:38 -0700 Subject: [SCTP]: IPv4 vs IPv6 addresses mess in sctp_inet[6]addr_event. All IP addresses that are present in a system are duplicated on struct sctp_sockaddr_entry. They are linked in the global list called sctp_local_addr_list. And this struct unions IPv4 and IPv6 addresses. So, there can be rare case, when a sockaddr_in.sin_addr coincides with the corresponding part of the sockaddr_in6 and the notifier for IPv4 will carry away an IPv6 entry. The fix is to check the family before comparing the addresses. Signed-off-by: Pavel Emelyanov Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 5 +++-- net/sctp/protocol.c | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index b1e05d719f9..85f1495e0ed 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -110,8 +110,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, spin_lock_bh(&sctp_local_addr_lock); list_for_each_entry_safe(addr, temp, &sctp_local_addr_list, list) { - if (ipv6_addr_equal(&addr->a.v6.sin6_addr, - &ifa->addr)) { + if (addr->a.sa.sa_family == AF_INET6 && + ipv6_addr_equal(&addr->a.v6.sin6_addr, + &ifa->addr)) { found = 1; addr->valid = 0; list_del_rcu(&addr->list); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f90091a1b9c..c2dd65d9f38 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -647,7 +647,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, spin_lock_bh(&sctp_local_addr_lock); list_for_each_entry_safe(addr, temp, &sctp_local_addr_list, list) { - if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { + if (addr->a.sa.sa_family == AF_INET && + addr->a.v4.sin_addr.s_addr == + ifa->ifa_local) { found = 1; addr->valid = 0; list_del_rcu(&addr->list); -- cgit From f37f0afb2916ccf287428983026261db78c7661a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 13 Apr 2008 21:39:26 -0700 Subject: [SOCK] sk_stamp: should be initialized to ktime_set(-1L, 0) Problem spotted by Andrew Brampton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 2654c147c00..7a0567b4b2c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1725,7 +1725,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; - sk->sk_stamp = ktime_set(-1L, -1L); + sk->sk_stamp = ktime_set(-1L, 0); atomic_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); -- cgit From 4c821d753d5c097babd6609bcd85f08e254a3505 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 13 Apr 2008 21:52:48 -0700 Subject: [NET]: Fix kernel-doc for skb_segment The kernel-doc comment for skb_segment is clearly wrong. This states what it actually does. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0d0fd28a904..60870133962 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2131,8 +2131,8 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns - * the segment at the given position. It returns NULL if there are - * no more segments to generate, or when an error is encountered. + * a pointer to the first in a list of new skbs for the segments. + * In case of error it returns ERR_PTR(err). */ struct sk_buff *skb_segment(struct sk_buff *skb, int features) { -- cgit From 2ed9926e16094ad143b96b09c64cba8bcba05ee1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 13 Apr 2008 22:45:40 -0700 Subject: [NET]: Return more appropriate error from eth_validate_addr(). Paul Bolle wrote: > http://bugzilla.kernel.org/show_bug.cgi?id=9923 would have been much easier to > track down if eth_validate_addr() would somehow complain aloud if an address > is invalid. Shouldn't it make at least some noise? I guess it should return -EADDRNOTAVAIL similar to eth_mac_addr() when validation fails. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ethernet/eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a7b417523e9..a80839b02e3 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -301,7 +301,7 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu) static int eth_validate_addr(struct net_device *dev) { if (!is_valid_ether_addr(dev->dev_addr)) - return -EINVAL; + return -EADDRNOTAVAIL; return 0; } -- cgit From b45e9189c058bfa495073951ff461ee0eea968be Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 13 Apr 2008 23:14:15 -0700 Subject: [IPV6]: Fix ipv6 address fetching in raw6_icmp_error(). Fixes kernel bugzilla 10437 Based almost entirely upon a patch by Dmitry Butskoy. When deciding what raw sockets to deliver the ICMPv6 to, we should use the addresses in the ICMPv6 quoted IPV6 header, not the top-level one. Signed-off-by: David S. Miller --- net/ipv6/raw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8897ccf8086..0a6fbc1d1a5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -372,8 +372,10 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); if (sk != NULL) { - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; + struct ipv6hdr *hdr = (struct ipv6hdr *) skb->data; + + saddr = &hdr->saddr; + daddr = &hdr->daddr; net = skb->dev->nd_net; while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, -- cgit From b077d7ababdb5433aef18c62bf1f785e8729f49a Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sun, 13 Apr 2008 23:42:18 -0700 Subject: [IPV6] ADDRCONF: Ensure disabling multicast RS even if privacy extensions are disabled. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a65935a9afd..b9eeb4f51d4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -371,6 +371,15 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) */ in6_dev_hold(ndev); +#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) + if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { + printk(KERN_INFO + "%s: Disabled Multicast RS\n", + dev->name); + ndev->cnf.rtr_solicits = 0; + } +#endif + #ifdef CONFIG_IPV6_PRIVACY setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || @@ -383,13 +392,6 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) "%s: Disabled Privacy Extensions\n", dev->name); ndev->cnf.use_tempaddr = -1; - - if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { - printk(KERN_INFO - "%s: Disabled Multicast RS\n", - dev->name); - ndev->cnf.rtr_solicits = 0; - } } else { in6_dev_hold(ndev); ipv6_regen_rndid((unsigned long) ndev); -- cgit From 9625ed72e8bd619c3984f3024bd37143b7f0c7b0 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sun, 13 Apr 2008 23:47:11 -0700 Subject: [IPV6] ADDRCONF: Don't generate temporary address for ip6-ip6 interface. As far as I can remember, I was going to disable privacy extensions on all "tunnel" interfaces. Disable it on ip6-ip6 interface as well. Also, just remove ifdefs for SIT for simplicity. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b9eeb4f51d4..e08955baedf 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -384,9 +384,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) + dev->type == ARPHRD_TUNNEL6 || dev->type == ARPHRD_SIT || -#endif dev->type == ARPHRD_NONE) { printk(KERN_INFO "%s: Disabled Privacy Extensions\n", -- cgit From 4dee959723e2bf3a0f9343a46841cd2f0029d424 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 14 Apr 2008 00:44:52 -0700 Subject: [NETFILTER]: ipt_CLUSTERIP: fix race between clusterip_config_find_get and _entry_put Consider we are putting a clusterip_config entry with the "entries" count == 1, and on the other CPU there's a clusterip_config_find_get in progress: CPU1: CPU2: clusterip_config_entry_put: clusterip_config_find_get: if (atomic_dec_and_test(&c->entries)) { /* true */ read_lock_bh(&clusterip_lock); c = __clusterip_config_find(clusterip); /* found - it's still in list */ ... atomic_inc(&c->entries); read_unlock_bh(&clusterip_lock); write_lock_bh(&clusterip_lock); list_del(&c->list); write_unlock_bh(&clusterip_lock); ... dev_put(c->dev); Oops! We have an entry returned by the clusterip_config_find_get, which is a) not in list b) has a stale dev pointer. The problems will happen when the CPU2 will release the entry - it will remove it from the list for the 2nd time, thus spoiling it, and will put a stale dev pointer. The fix is to make atomic_dec_and_test under the clusterip_lock. Signed-off-by: Pavel Emelyanov Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 52926c8e3cc..a12dd329e20 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -82,8 +82,8 @@ clusterip_config_put(struct clusterip_config *c) static inline void clusterip_config_entry_put(struct clusterip_config *c) { + write_lock_bh(&clusterip_lock); if (atomic_dec_and_test(&c->entries)) { - write_lock_bh(&clusterip_lock); list_del(&c->list); write_unlock_bh(&clusterip_lock); @@ -96,7 +96,9 @@ clusterip_config_entry_put(struct clusterip_config *c) #ifdef CONFIG_PROC_FS remove_proc_entry(c->pde->name, c->pde->parent); #endif + return; } + write_unlock_bh(&clusterip_lock); } static struct clusterip_config * -- cgit From 159d83363b629c91d020734207c1bc788b96af5a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Apr 2008 00:46:01 -0700 Subject: [BRIDGE]: Fix crash in __ip_route_output_key with bridge netfilter The bridge netfilter code attaches a fake dst_entry with a pointer to a fake net_device structure to skbs it passes up to IPv4 netfilter. This leads to crashes when the skb is passed to __ip_route_output_key when dereferencing the namespace pointer. Since bridging can currently only operate in the init_net namespace, the easiest fix for now is to initialize the nd_net pointer of the fake net_device struct to &init_net. Should fix bugzilla 10323: http://bugzilla.kernel.org/show_bug.cgi?id=10323 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1c0efd8ad9f..af7e8be8d8d 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -110,7 +110,8 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) * ipt_REJECT needs it. Future netfilter modules might * require us to fill additional fields. */ static struct net_device __fake_net_device = { - .hard_header_len = ETH_HLEN + .hard_header_len = ETH_HLEN, + .nd_net = &init_net, }; static struct rtable __fake_rtable = { -- cgit