From 830a1e5c212fb3fdc83b66359c780c3b3a294897 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 13 Dec 2005 23:22:32 -0800 Subject: [AF_UNIX]: Remove superfluous reference counting in unix_stream_sendmsg AF_UNIX stream socket performance on P4 CPUs tends to suffer due to a lot of pipeline flushes from atomic operations. The patch below removes the sock_hold() and sock_put() in unix_stream_sendmsg(). This should be safe as the socket still holds a reference to its peer which is only released after the file descriptor's final user invokes unix_release_sock(). The only consideration is that we must add a memory barrier before setting the peer initially. Signed-off-by: Benjamin LaHaise Signed-off-by: David S. Miller --- net/unix/af_unix.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index acc73ba8bad..1dc3685048f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1063,10 +1063,12 @@ restart: /* Set credentials */ sk->sk_peercred = other->sk_peercred; - sock_hold(newsk); - unix_peer(sk) = newsk; sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; + sock_hold(newsk); + + smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */ + unix_peer(sk) = newsk; unix_state_wunlock(sk); @@ -1414,7 +1416,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, } else { sunaddr = NULL; err = -ENOTCONN; - other = unix_peer_get(sk); + other = unix_peer(sk); if (!other) goto out_err; } @@ -1476,7 +1478,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, other->sk_data_ready(other, size); sent+=size; } - sock_put(other); scm_destroy(siocb->scm); siocb->scm = NULL; @@ -1491,8 +1492,6 @@ pipe_err: send_sig(SIGPIPE,current,0); err = -EPIPE; out_err: - if (other) - sock_put(other); scm_destroy(siocb->scm); siocb->scm = NULL; return sent ? : err; -- cgit From fbe9cc4a87030d5cad5f944ffaef6af7efd119e4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Dec 2005 23:26:29 -0800 Subject: [AF_UNIX]: Use spinlock for unix_table_lock This lock is actually taken mostly as a writer, so using a rwlock actually just makes performance worse especially on chips like the Intel P4. Signed-off-by: David S. Miller --- net/unix/af_unix.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1dc3685048f..04e850e04e3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -121,7 +121,7 @@ int sysctl_unix_max_dgram_qlen = 10; struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; -DEFINE_RWLOCK(unix_table_lock); +DEFINE_SPINLOCK(unix_table_lock); static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) @@ -130,7 +130,7 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); /* * SMP locking strategy: - * hash table is protected with rwlock unix_table_lock + * hash table is protected with spinlock unix_table_lock * each socket state is protected by separate rwlock. */ @@ -214,16 +214,16 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) static inline void unix_remove_socket(struct sock *sk) { - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); __unix_remove_socket(sk); - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); } static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) { - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); __unix_insert_socket(list, sk); - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); } static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, @@ -250,11 +250,11 @@ static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, { struct sock *s; - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); s = __unix_find_socket_byname(sunname, len, type, hash); if (s) sock_hold(s); - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); return s; } @@ -263,7 +263,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) struct sock *s; struct hlist_node *node; - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); sk_for_each(s, node, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; @@ -276,7 +276,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) } s = NULL; found: - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); return s; } @@ -642,12 +642,12 @@ retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; if (__unix_find_socket_byname(addr->name, addr->len, sock->type, addr->hash)) { - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); /* Sanity yield. It is unusual case, but yet... */ if (!(ordernum&0xFF)) yield(); @@ -658,7 +658,7 @@ retry: __unix_remove_socket(sk); u->addr = addr; __unix_insert_socket(&unix_socket_table[addr->hash], sk); - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); err = 0; out: up(&u->readsem); @@ -791,7 +791,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr->hash = UNIX_HASH_SIZE; } - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); if (!sunaddr->sun_path[0]) { err = -EADDRINUSE; @@ -814,7 +814,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) __unix_insert_socket(list, sk); out_unlock: - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); out_up: up(&u->readsem); out: @@ -1916,7 +1916,7 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos) static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); } @@ -1931,7 +1931,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void unix_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); } static int unix_seq_show(struct seq_file *seq, void *v) -- cgit From 90ddc4f0470427df306f308ad03db6b6b21644b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Dec 2005 12:49:22 -0800 Subject: [NET]: move struct proto_ops to const I noticed that some of 'struct proto_ops' used in the kernel may share a cache line used by locks or other heavily modified data. (default linker alignement is 32 bytes, and L1_CACHE_LINE is 64 or 128 at least) This patch makes sure a 'struct proto_ops' can be declared as const, so that all cpus can share all parts of it without false sharing. This is not mandatory : a driver can still use a read/write structure if it needs to (and eventually a __read_mostly) I made a global stubstitute to change all existing occurences to make them const. This should reduce the possibility of false sharing on SMP, and speedup some socket system calls. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 04e850e04e3..7d3fe6aebcd 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -473,7 +473,7 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *, static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); -static struct proto_ops unix_stream_ops = { +static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, @@ -494,7 +494,7 @@ static struct proto_ops unix_stream_ops = { .sendpage = sock_no_sendpage, }; -static struct proto_ops unix_dgram_ops = { +static const struct proto_ops unix_dgram_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, @@ -515,7 +515,7 @@ static struct proto_ops unix_dgram_ops = { .sendpage = sock_no_sendpage, }; -static struct proto_ops unix_seqpacket_ops = { +static const struct proto_ops unix_seqpacket_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, -- cgit From fd19f329a32bdc4eb07885e0b3889567cfe00aa7 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 3 Jan 2006 14:10:46 -0800 Subject: [AF_UNIX]: Convert to use a spinlock instead of rwlock From: Benjamin LaHaise In af_unix, a rwlock is used to protect internal state. At least on my P4 with HT it is faster to use a spinlock due to the simpler memory barrier used to unlock. This patch raises bw_unix to ~690K/s. Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7d3fe6aebcd..1ddd36d5009 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -564,7 +564,7 @@ static struct sock * unix_create1(struct socket *sock) u = unix_sk(sk); u->dentry = NULL; u->mnt = NULL; - rwlock_init(&u->lock); + spin_lock_init(&u->lock); atomic_set(&u->inflight, sock ? 0 : -1); init_MUTEX(&u->readsem); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); -- cgit From b5e5fa5e093e42cab4ee3d6dcbc4f450ad29a723 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Jan 2006 14:18:33 -0800 Subject: [NET]: Add a dev_ioctl() fallback to sock_ioctl() Currently all network protocols need to call dev_ioctl as the default fallback in their ioctl implementations. This patch adds a fallback to dev_ioctl to sock_ioctl if the protocol returned -ENOIOCTLCMD. This way all the procotol ioctl handlers can be simplified and we don't need to export dev_ioctl. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1ddd36d5009..5f6ae79b8b1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1859,7 +1859,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } default: - err = dev_ioctl(cmd, (void __user *)arg); + err = -ENOIOCTLCMD; break; } return err; -- cgit