From 20c2df83d25c6a95affe6157a4c9cac4cf5ffaac Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 20 Jul 2007 10:11:58 +0900 Subject: mm: Remove slab destructors from kmem_cache_create(). Slab destructors were no longer supported after Christoph's c59def9f222d44bb7e2f0a559f2906191a0862d7 change. They've been BUGs for both slab and slub, and slob never supported them either. This rips out support for the dtor pointer from kmem_cache_create() completely and fixes up every single callsite in the kernel (there were about 224, not including the slab allocator definitions themselves, or the documentation references). Signed-off-by: Paul Mundt --- net/sctp/protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 34bab36637a..e98579b788b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -980,14 +980,14 @@ SCTP_STATIC __init int sctp_init(void) sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket", sizeof(struct sctp_bind_bucket), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!sctp_bucket_cachep) goto out; sctp_chunk_cachep = kmem_cache_create("sctp_chunk", sizeof(struct sctp_chunk), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!sctp_chunk_cachep) goto err_chunk_cachep; -- cgit From 293035479942400a7fe8e4f72465d4e4e466b91a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sun, 16 Sep 2007 16:02:12 -0700 Subject: [SCTP]: Add RCU synchronization around sctp_localaddr_list sctp_localaddr_list is modified dynamically via NETDEV_UP and NETDEV_DOWN events, but there is not synchronization between writer (even handler) and readers. As a result, the readers can access an entry that has been freed and crash the sytem. Signed-off-by: Vlad Yasevich Acked-by: Paul E. McKenney Acked-by: Sridhar Samdurala Signed-off-by: David S. Miller --- net/sctp/protocol.c | 54 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 15 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e98579b788b..7ee120e8591 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -153,6 +153,9 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; + addr->valid = 1; + INIT_LIST_HEAD(&addr->list); + INIT_RCU_HEAD(&addr->rcu); list_add_tail(&addr->list, addrlist); } } @@ -192,16 +195,24 @@ static void sctp_free_local_addr_list(void) } } +void sctp_local_addr_free(struct rcu_head *head) +{ + struct sctp_sockaddr_entry *e = container_of(head, + struct sctp_sockaddr_entry, rcu); + kfree(e); +} + /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; - struct list_head *pos, *temp; - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; if (sctp_in_scope(&addr->a, scope)) { /* Now that the address is in scope, check to see if * the address type is really supported by the local @@ -221,6 +232,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, } end_copy: + rcu_read_unlock(); return error; } @@ -600,13 +612,18 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } -/* Event handler for inet address addition/deletion events. */ +/* Event handler for inet address addition/deletion events. + * The sctp_local_addr_list needs to be protocted by a spin lock since + * multiple notifiers (say IPv4 and IPv6) may be running at the same + * time and thus corrupt the list. + * The reader side is protected with RCU. + */ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; - struct sctp_sockaddr_entry *addr; - struct list_head *pos, *temp; + struct sctp_sockaddr_entry *addr = NULL; + struct sctp_sockaddr_entry *temp; switch (ev) { case NETDEV_UP: @@ -615,19 +632,25 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; - list_add_tail(&addr->list, &sctp_local_addr_list); + addr->valid = 1; + spin_lock_bh(&sctp_local_addr_lock); + list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + spin_unlock_bh(&sctp_local_addr_lock); } break; case NETDEV_DOWN: - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + spin_lock_bh(&sctp_local_addr_lock); + list_for_each_entry_safe(addr, temp, + &sctp_local_addr_list, list) { if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - list_del(pos); - kfree(addr); + addr->valid = 0; + list_del_rcu(&addr->list); break; } } - + spin_unlock_bh(&sctp_local_addr_lock); + if (addr && !addr->valid) + call_rcu(&addr->rcu, sctp_local_addr_free); break; } @@ -1160,6 +1183,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the local address list. */ INIT_LIST_HEAD(&sctp_local_addr_list); + spin_lock_init(&sctp_local_addr_lock); sctp_get_local_addr_list(); /* Register notifier for inet address additions/deletions. */ @@ -1227,6 +1251,9 @@ SCTP_STATIC __exit void sctp_exit(void) sctp_v6_del_protocol(); inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); + /* Unregister notifier for inet address additions/deletions. */ + unregister_inetaddr_notifier(&sctp_inetaddr_notifier); + /* Free the local address list. */ sctp_free_local_addr_list(); @@ -1240,9 +1267,6 @@ SCTP_STATIC __exit void sctp_exit(void) inet_unregister_protosw(&sctp_stream_protosw); inet_unregister_protosw(&sctp_seqpacket_protosw); - /* Unregister notifier for inet address additions/deletions. */ - unregister_inetaddr_notifier(&sctp_inetaddr_notifier); - sctp_sysctl_unregister(); list_del(&sctp_ipv4_specific.list); -- cgit From 559cf710b07c5e2cfa3fb8d8f4a1320fd84c53f9 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sun, 16 Sep 2007 16:03:28 -0700 Subject: [SCTP]: Convert bind_addr_list locking to RCU Since the sctp_sockaddr_entry is now RCU enabled as part of the patch to synchronize sctp_localaddr_list, it makes sense to change all handling of these entries to RCU. This includes the sctp_bind_addrs structure and it's list of bound addresses. This list is currently protected by an external rw_lock and that looks like an overkill. There are only 2 writers to the list: bind()/bindx() calls, and BH processing of ASCONF-ACK chunks. These are already seriealized via the socket lock, so they will not step on each other. These are also relatively rare, so we should be good with RCU. The readers are varied and they are easily converted to RCU. Signed-off-by: Vlad Yasevich Acked-by: Paul E. McKenney Acked-by: Sridhar Samdurala Signed-off-by: David S. Miller --- net/sctp/protocol.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 7ee120e8591..3d036cdfae4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -224,7 +224,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, (copy_flags & SCTP_ADDR6_ALLOWED) && (copy_flags & SCTP_ADDR6_PEERSUPP)))) { error = sctp_add_bind_addr(bp, &addr->a, 1, - GFP_ATOMIC); + GFP_ATOMIC); if (error) goto end_copy; } @@ -428,9 +428,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, struct rtable *rt; struct flowi fl; struct sctp_bind_addr *bp; - rwlock_t *addr_lock; struct sctp_sockaddr_entry *laddr; - struct list_head *pos; struct dst_entry *dst = NULL; union sctp_addr dst_saddr; @@ -459,23 +457,20 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, goto out; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; if (dst) { /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, - list); - if (!laddr->use_as_src) + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid || !laddr->use_as_src) continue; sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; } - sctp_read_unlock(addr_lock); + rcu_read_unlock(); /* None of the bound addresses match the source address of the * dst. So release it. @@ -487,10 +482,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, /* Walk through the bind address list and try to get a dst that * matches a bind address as the source address. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; if ((laddr->use_as_src) && (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; @@ -502,7 +497,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, } out_unlock: - sctp_read_unlock(addr_lock); + rcu_read_unlock(); out: if (dst) SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n", -- cgit From 4d93df0abd50b9c9e2d4561439a1a1d21ec5e68f Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 15 Aug 2007 16:07:44 -0700 Subject: [SCTP]: Rewrite of sctp buffer management code This patch introduces autotuning to the sctp buffer management code similar to the TCP. The buffer space can be grown if the advertised receive window still has room. This might happen if small message sizes are used, which is common in telecom environmens. New tunables are introduced that provide limits to buffer growth and memory pressure is entered if to much buffer spaces is used. Signed-off-by: Neil Horman Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3d036cdfae4..957c118a606 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -82,6 +83,10 @@ static struct sctp_af *sctp_af_v6_specific; struct kmem_cache *sctp_chunk_cachep __read_mostly; struct kmem_cache *sctp_bucket_cachep __read_mostly; +extern int sysctl_sctp_mem[3]; +extern int sysctl_sctp_rmem[3]; +extern int sysctl_sctp_wmem[3]; + /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) { @@ -987,6 +992,8 @@ SCTP_STATIC __init int sctp_init(void) int i; int status = -EINVAL; unsigned long goal; + unsigned long limit; + int max_share; int order; /* SCTP_DEBUG sanity check. */ @@ -1077,6 +1084,31 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); + /* Set the pressure threshold to be a fraction of global memory that + * is up to 1/2 at 256 MB, decreasing toward zero with the amount of + * memory, with a floor of 128 pages. + * Note this initalizes the data in sctpv6_prot too + * Unabashedly stolen from tcp_init + */ + limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = max(limit, 128UL); + sysctl_sctp_mem[0] = limit / 4 * 3; + sysctl_sctp_mem[1] = limit; + sysctl_sctp_mem[2] = sysctl_sctp_mem[0] * 2; + + /* Set per-socket limits to no more than 1/128 the pressure threshold*/ + limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7); + max_share = min(4UL*1024*1024, limit); + + sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */ + sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); + sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); + + sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_sctp_wmem[1] = 16*1024; + sysctl_sctp_wmem[2] = max(64*1024, max_share); + /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. */ -- cgit From 457c4cbc5a3dde259d2a1f15d5f9785290397267 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 12 Sep 2007 12:01:34 +0200 Subject: [NET]: Make /proc/net per network namespace This patch makes /proc/net per network namespace. It modifies the global variables proc_net and proc_net_stat to be per network namespace. The proc_net file helpers are modified to take a network namespace argument, and all of their callers are fixed to pass &init_net for that argument. This ensures that all of the /proc/net files are only visible and usable in the initial network namespace until the code behind them has been updated to be handle multiple network namespaces. Making /proc/net per namespace is necessary as at least some files in /proc/net depend upon the set of network devices which is per network namespace, and even more files in /proc/net have contents that are relevant to a single network namespace. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/sctp/protocol.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 957c118a606..30929e3ca05 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -98,7 +99,7 @@ static __init int sctp_proc_init(void) { if (!proc_net_sctp) { struct proc_dir_entry *ent; - ent = proc_mkdir("net/sctp", NULL); + ent = proc_mkdir("sctp", init_net.proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_sctp = ent; @@ -131,7 +132,7 @@ static void sctp_proc_exit(void) if (proc_net_sctp) { proc_net_sctp = NULL; - remove_proc_entry("net/sctp", NULL); + remove_proc_entry("sctp", init_net.proc_net); } } -- cgit From 1b8d7ae42d02e483ad94035cca851e4f7fbecb40 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Oct 2007 23:24:22 -0700 Subject: [NET]: Make socket creation namespace safe. This patch passes in the namespace a new socket should be created in and has the socket code do the appropriate reference counting. By virtue of this all socket create methods are touched. In addition the socket create methods are modified so that they will fail if you attempt to create a socket in a non-default network namespace. Failing if we attempt to create a socket outside of the default network namespace ensures that as we incrementally make the network stack network namespace aware we will not export functionality that someone has not audited and made certain is network namespace safe. Allowing us to partially enable network namespaces before all of the exotic protocols are supported. Any protocol layers I have missed will fail to compile because I now pass an extra parameter into the socket creation code. [ Integrated AF_IUCV build fixes from Andrew Morton... -DaveM ] Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 30929e3ca05..af67c839ef9 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -552,7 +552,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; - struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1); + struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; -- cgit From 881d966b48b035ab3f3aeaae0f3d3f9b584f45b2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 17 Sep 2007 11:56:21 -0700 Subject: [NET]: Make the device list and device lookups per namespace. This patch makes most of the generic device layer network namespace safe. This patch makes dev_base_head a network namespace variable, and then it picks up a few associated variables. The functions: dev_getbyhwaddr dev_getfirsthwbytype dev_get_by_flags dev_get_by_name __dev_get_by_name dev_get_by_index __dev_get_by_index dev_ioctl dev_ethtool dev_load wireless_process_ioctl were modified to take a network namespace argument, and deal with it. vlan_ioctl_set and brioctl_set were modified so their hooks will receive a network namespace argument. So basically anthing in the core of the network stack that was affected to by the change of dev_base was modified to handle multiple network namespaces. The rest of the network stack was simply modified to explicitly use &init_net the initial network namespace. This can be fixed when those components of the network stack are modified to handle multiple network namespaces. For now the ifindex generator is left global. Fundametally ifindex numbers are per namespace, or else we will have corner case problems with migration when we get that far. At the same time there are assumptions in the network stack that the ifindex of a network device won't change. Making the ifindex number global seems a good compromise until the network stack can cope with ifindex changes when you change namespaces, and the like. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index af67c839ef9..54edcd978f7 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -179,7 +179,7 @@ static void sctp_get_local_addr_list(void) struct sctp_af *af; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); -- cgit From 007e3936bdaaa012483c9fe06ca71c272458c710 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sun, 16 Sep 2007 16:04:37 -0700 Subject: [SCTP]: Move sysctl_sctp_[rw]mem definitions to protocol.c The sctp_[rw]mem definitions should really be in protocol.c since that is where they are initialized. This also allows one to build a kernel without sysctl support. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 54edcd978f7..3ec8b12b6da 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -84,9 +84,9 @@ static struct sctp_af *sctp_af_v6_specific; struct kmem_cache *sctp_chunk_cachep __read_mostly; struct kmem_cache *sctp_bucket_cachep __read_mostly; -extern int sysctl_sctp_mem[3]; -extern int sysctl_sctp_rmem[3]; -extern int sysctl_sctp_wmem[3]; +int sysctl_sctp_mem[3]; +int sysctl_sctp_rmem[3]; +int sysctl_sctp_wmem[3]; /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) -- cgit From a29a5bd4f5c3e8ba2e89688feab8b01c44f1654f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sun, 16 Sep 2007 19:31:35 -0700 Subject: [SCTP]: Implement SCTP-AUTH initializations. The patch initializes AUTH related members of the generic SCTP structures and provides a way to enable/disable auth extension. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3ec8b12b6da..4e6b59e8b69 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1185,6 +1185,9 @@ SCTP_STATIC __init int sctp_init(void) /* Enable PR-SCTP by default. */ sctp_prsctp_enable = 1; + /* Disable AUTH by default. */ + sctp_auth_enable = 0; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); -- cgit From 06393009000779b00a558fd2f280882cc7dc2008 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 10 Oct 2007 17:30:18 -0700 Subject: [SCTP]: port randomization Add port randomization rather than a simple fixed rover for use with SCTP. This makes it act similar to TCP, UDP, DCCP when allocating ports. No longer need port_alloc_lock as well (suggestion by Brian Haley). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sctp/protocol.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 4e6b59e8b69..81b26c5ffd4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1172,9 +1172,6 @@ SCTP_STATIC __init int sctp_init(void) sctp_port_hashtable[i].chain = NULL; } - spin_lock_init(&sctp_port_alloc_lock); - sctp_port_rover = sysctl_local_port_range[0] - 1; - printk(KERN_INFO "SCTP: Hash tables configured " "(established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); -- cgit