diff options
Diffstat (limited to 'net')
247 files changed, 7837 insertions, 7068 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c index 0549317b935..f1611a1e06a 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -167,23 +167,27 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(fddi_type_trans); -static int fddi_change_mtu(struct net_device *dev, int new_mtu) +int fddi_change_mtu(struct net_device *dev, int new_mtu) { if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) return(-EINVAL); dev->mtu = new_mtu; return(0); } +EXPORT_SYMBOL(fddi_change_mtu); static const struct header_ops fddi_header_ops = { .create = fddi_header, .rebuild = fddi_rebuild_header, }; + static void fddi_setup(struct net_device *dev) { - dev->change_mtu = fddi_change_mtu; dev->header_ops = &fddi_header_ops; +#ifdef CONFIG_COMPAT_NET_DEV_OPS + dev->change_mtu = fddi_change_mtu, +#endif dev->type = ARPHRD_FDDI; dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ diff --git a/net/802/hippi.c b/net/802/hippi.c index e35dc1e0915..313b9ebf92e 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -144,7 +144,7 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(hippi_type_trans); -static int hippi_change_mtu(struct net_device *dev, int new_mtu) +int hippi_change_mtu(struct net_device *dev, int new_mtu) { /* * HIPPI's got these nice large MTUs. @@ -154,12 +154,13 @@ static int hippi_change_mtu(struct net_device *dev, int new_mtu) dev->mtu = new_mtu; return(0); } +EXPORT_SYMBOL(hippi_change_mtu); /* * For HIPPI we will actually use the lower 4 bytes of the hardware * address as the I-FIELD rather than the actual hardware address. */ -static int hippi_mac_addr(struct net_device *dev, void *p) +int hippi_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; if (netif_running(dev)) @@ -167,8 +168,9 @@ static int hippi_mac_addr(struct net_device *dev, void *p) memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } +EXPORT_SYMBOL(hippi_mac_addr); -static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) +int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) { /* Never send broadcast/multicast ARP messages */ p->mcast_probes = 0; @@ -181,6 +183,7 @@ static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) p->ucast_probes = 0; return 0; } +EXPORT_SYMBOL(hippi_neigh_setup_dev); static const struct header_ops hippi_header_ops = { .create = hippi_header, @@ -190,11 +193,12 @@ static const struct header_ops hippi_header_ops = { static void hippi_setup(struct net_device *dev) { - dev->set_multicast_list = NULL; +#ifdef CONFIG_COMPAT_NET_DEV_OPS dev->change_mtu = hippi_change_mtu; - dev->header_ops = &hippi_header_ops; dev->set_mac_address = hippi_mac_addr; dev->neigh_setup = hippi_neigh_setup_dev; +#endif + dev->header_ops = &hippi_header_ops; /* * We don't support HIPPI `ARP' for the time being, and probably diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 72326b9c759..41e8f65bd3f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -144,6 +144,7 @@ void unregister_vlan_dev(struct net_device *dev) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; struct vlan_group *grp; u16 vlan_id = vlan->vlan_id; @@ -156,7 +157,7 @@ void unregister_vlan_dev(struct net_device *dev) * HW accelerating devices or SW vlan input packet processing. */ if (real_dev->features & NETIF_F_HW_VLAN_FILTER) - real_dev->vlan_rx_kill_vid(real_dev, vlan_id); + ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); vlan_group_set_device(grp, vlan_id, NULL); grp->nr_vlans--; @@ -170,7 +171,7 @@ void unregister_vlan_dev(struct net_device *dev) vlan_gvrp_uninit_applicant(real_dev); if (real_dev->features & NETIF_F_HW_VLAN_RX) - real_dev->vlan_rx_register(real_dev, NULL); + ops->ndo_vlan_rx_register(real_dev, NULL); hlist_del_rcu(&grp->hlist); @@ -205,21 +206,21 @@ static void vlan_transfer_operstate(const struct net_device *dev, int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) { - char *name = real_dev->name; + const char *name = real_dev->name; + const struct net_device_ops *ops = real_dev->netdev_ops; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { pr_info("8021q: VLANs not supported on %s\n", name); return -EOPNOTSUPP; } - if ((real_dev->features & NETIF_F_HW_VLAN_RX) && - !real_dev->vlan_rx_register) { + if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !ops->ndo_vlan_rx_register) { pr_info("8021q: device %s has buggy VLAN hw accel\n", name); return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && - (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { + (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { pr_info("8021q: Device %s has buggy VLAN hw accel\n", name); return -EOPNOTSUPP; } @@ -240,6 +241,7 @@ int register_vlan_dev(struct net_device *dev) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; u16 vlan_id = vlan->vlan_id; struct vlan_group *grp, *ngrp = NULL; int err; @@ -275,9 +277,9 @@ int register_vlan_dev(struct net_device *dev) grp->nr_vlans++; if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) - real_dev->vlan_rx_register(real_dev, ngrp); + ops->ndo_vlan_rx_register(real_dev, ngrp); if (real_dev->features & NETIF_F_HW_VLAN_FILTER) - real_dev->vlan_rx_add_vid(real_dev, vlan_id); + ops->ndo_vlan_rx_add_vid(real_dev, vlan_id); return 0; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 118adef476c..dd86a1dc4cd 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -3,11 +3,20 @@ #include <linux/if_vlan.h> #include "vlan.h" +struct vlan_hwaccel_cb { + struct net_device *dev; +}; + +static inline struct vlan_hwaccel_cb *vlan_hwaccel_cb(struct sk_buff *skb) +{ + return (struct vlan_hwaccel_cb *)skb->cb; +} + /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { - struct net_device_stats *stats; + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); if (skb_bond_should_drop(skb)) { dev_kfree_skb_any(skb); @@ -15,22 +24,33 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, } skb->vlan_tci = vlan_tci; + cb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + + return (polling ? netif_receive_skb(skb) : netif_rx(skb)); +} +EXPORT_SYMBOL(__vlan_hwaccel_rx); + +int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); + struct net_device *dev = cb->dev; + struct net_device_stats *stats; + netif_nit_deliver(skb); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); - if (skb->dev == NULL) { - dev_kfree_skb_any(skb); - /* Not NET_RX_DROP, this is not being dropped - * due to congestion. */ - return NET_RX_SUCCESS; + if (dev == NULL) { + kfree_skb(skb); + return -1; } + + skb->dev = dev; + skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - stats = &skb->dev->stats; + stats = &dev->stats; stats->rx_packets++; stats->rx_bytes += skb->len; - skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: break; @@ -42,13 +62,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, - skb->dev->dev_addr)) + dev->dev_addr)) skb->pkt_type = PACKET_HOST; break; }; - return (polling ? netif_receive_skb(skb) : netif_rx(skb)); + return 0; } -EXPORT_SYMBOL(__vlan_hwaccel_rx); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e4bf39f8703..71193a6b10e 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -524,6 +524,7 @@ out: static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; struct ifreq ifrr; int err = -EOPNOTSUPP; @@ -534,8 +535,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (real_dev->do_ioctl && netif_device_present(real_dev)) - err = real_dev->do_ioctl(real_dev, &ifrr, cmd); + if (netif_device_present(real_dev) && ops->ndo_do_ioctl) + err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); break; } @@ -697,6 +698,20 @@ static const struct ethtool_ops vlan_ethtool_ops = { .get_flags = vlan_ethtool_get_flags, }; +static const struct net_device_ops vlan_netdev_ops = { + .ndo_change_mtu = vlan_dev_change_mtu, + .ndo_init = vlan_dev_init, + .ndo_uninit = vlan_dev_uninit, + .ndo_open = vlan_dev_open, + .ndo_stop = vlan_dev_stop, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = vlan_dev_set_mac_address, + .ndo_set_rx_mode = vlan_dev_set_rx_mode, + .ndo_set_multicast_list = vlan_dev_set_rx_mode, + .ndo_change_rx_flags = vlan_dev_change_rx_flags, + .ndo_do_ioctl = vlan_dev_ioctl, +}; + void vlan_setup(struct net_device *dev) { ether_setup(dev); @@ -704,16 +719,7 @@ void vlan_setup(struct net_device *dev) dev->priv_flags |= IFF_802_1Q_VLAN; dev->tx_queue_len = 0; - dev->change_mtu = vlan_dev_change_mtu; - dev->init = vlan_dev_init; - dev->uninit = vlan_dev_uninit; - dev->open = vlan_dev_open; - dev->stop = vlan_dev_stop; - dev->set_mac_address = vlan_dev_set_mac_address; - dev->set_rx_mode = vlan_dev_set_rx_mode; - dev->set_multicast_list = vlan_dev_set_rx_mode; - dev->change_rx_flags = vlan_dev_change_rx_flags; - dev->do_ioctl = vlan_dev_ioctl; + dev->netdev_ops = &vlan_netdev_ops; dev->destructor = free_netdev; dev->ethtool_ops = &vlan_ethtool_ops; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 0feefa4e1a4..3628e0a81b4 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -314,7 +314,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) dev_info->ingress_priority_map[6], dev_info->ingress_priority_map[7]); - seq_printf(seq, "EGRESSS priority Mappings: "); + seq_printf(seq, " EGRESS priority mappings: "); for (i = 0; i < 16; i++) { const struct vlan_priority_tci_mapping *mp = dev_info->egress_priority_map[i]; diff --git a/net/9p/Kconfig b/net/9p/Kconfig index c42c0c400bf..0663f99e977 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -13,22 +13,24 @@ menuconfig NET_9P If unsure, say N. +if NET_9P + config NET_9P_VIRTIO - depends on NET_9P && EXPERIMENTAL && VIRTIO + depends on EXPERIMENTAL && VIRTIO tristate "9P Virtio Transport (Experimental)" help This builds support for a transports between guest partitions and a host partition. config NET_9P_RDMA - depends on NET_9P && INFINIBAND && EXPERIMENTAL + depends on INET && INFINIBAND && EXPERIMENTAL tristate "9P RDMA Transport (Experimental)" help - This builds support for a RDMA transport. + This builds support for an RDMA transport. config NET_9P_DEBUG bool "Debug information" - depends on NET_9P help Say Y if you want the 9P subsystem to log debug information. +endif diff --git a/net/9p/client.c b/net/9p/client.c index 67717f69412..4b529454616 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -189,6 +189,9 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; return ERR_PTR(-ENOMEM); } req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); @@ -311,12 +314,6 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) r->status = REQ_STATUS_IDLE; if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) p9_idpool_put(tag, c->tagpool); - - /* if this was a flush request we have to free response fcall */ - if (r->rc->id == P9_RFLUSH) { - kfree(r->tc); - kfree(r->rc); - } } /** @@ -611,19 +608,21 @@ reterr: static struct p9_fid *p9_fid_create(struct p9_client *clnt) { - int err; + int ret; struct p9_fid *fid; + unsigned long flags; P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); if (!fid) return ERR_PTR(-ENOMEM); - fid->fid = p9_idpool_get(clnt->fidpool); + ret = p9_idpool_get(clnt->fidpool); if (fid->fid < 0) { - err = -ENOSPC; + ret = -ENOSPC; goto error; } + fid->fid = ret; memset(&fid->qid, 0, sizeof(struct p9_qid)); fid->mode = -1; @@ -632,27 +631,28 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) fid->clnt = clnt; fid->aux = NULL; - spin_lock(&clnt->lock); + spin_lock_irqsave(&clnt->lock, flags); list_add(&fid->flist, &clnt->fidlist); - spin_unlock(&clnt->lock); + spin_unlock_irqrestore(&clnt->lock, flags); return fid; error: kfree(fid); - return ERR_PTR(err); + return ERR_PTR(ret); } static void p9_fid_destroy(struct p9_fid *fid) { struct p9_client *clnt; + unsigned long flags; P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); clnt = fid->clnt; p9_idpool_put(fid->fid, clnt->fidpool); - spin_lock(&clnt->lock); + spin_lock_irqsave(&clnt->lock, flags); list_del(&fid->flist); - spin_unlock(&clnt->lock); + spin_unlock_irqrestore(&clnt->lock, flags); kfree(fid); } @@ -818,7 +818,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, } P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -865,7 +867,9 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) } P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&afid->qid, &qid, sizeof(struct p9_qid)); p9_free_req(clnt, req); @@ -930,7 +934,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, for (count = 0; count < nwqids; count++) P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", - count, wqids[count].type, wqids[count].path, + count, wqids[count].type, + (unsigned long long)wqids[count].path, wqids[count].version); if (nwname) @@ -980,7 +985,9 @@ int p9_client_open(struct p9_fid *fid, int mode) } P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1023,7 +1030,9 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, } P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1230,9 +1239,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" "<<< uid=%d gid=%d n_muid=%d\n", ret->size, ret->type, ret->dev, ret->qid.type, - ret->qid.path, ret->qid.version, ret->mode, - ret->atime, ret->mtime, ret->length, ret->name, - ret->uid, ret->gid, ret->muid, ret->extension, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); free_and_error: @@ -1255,9 +1264,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" " uid=%d gid=%d n_muid=%d\n", wst->size, wst->type, wst->dev, wst->qid.type, - wst->qid.path, wst->qid.version, wst->mode, - wst->atime, wst->mtime, wst->length, wst->name, - wst->uid, wst->gid, wst->muid, wst->extension, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); err = 0; clnt = fid->clnt; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index a7fe63f704d..7fa0eb20b2f 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -528,8 +528,6 @@ static void rdma_close(struct p9_client *client) /** * alloc_rdma - Allocate and initialize the rdma transport structure - * @msize: MTU - * @dotu: Extension attribute * @opts: Mount options structure */ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) @@ -588,6 +586,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (IS_ERR(rdma->cm_id)) goto error; + /* Associate the client with the transport */ + client->trans = rdma; + /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); @@ -668,7 +669,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; - client->trans = rdma; client->status = Connected; return 0; diff --git a/net/Kconfig b/net/Kconfig index 8c3d97ca0d9..6ec2cce7c16 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -32,6 +32,9 @@ config NET_NS Allow user space to create what appear to be multiple instances of the network stack. +config COMPAT_NET_DEV_OPS + def_bool y + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" @@ -191,6 +194,7 @@ source "net/lapb/Kconfig" source "net/econet/Kconfig" source "net/wanrouter/Kconfig" source "net/sched/Kconfig" +source "net/dcb/Kconfig" menu "Network testing" @@ -247,7 +251,6 @@ if WIRELESS source "net/wireless/Kconfig" source "net/mac80211/Kconfig" -source "net/ieee80211/Kconfig" endif # WIRELESS diff --git a/net/Makefile b/net/Makefile index 27d1f10dc0e..ba4460432b7 100644 --- a/net/Makefile +++ b/net/Makefile @@ -51,12 +51,14 @@ obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-y += wireless/ obj-$(CONFIG_MAC80211) += mac80211/ -obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ obj-$(CONFIG_IUCV) += iucv/ obj-$(CONFIG_RFKILL) += rfkill/ obj-$(CONFIG_NET_9P) += 9p/ +ifneq ($(CONFIG_DCB),) +obj-y += dcb/ +endif ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index d3134e7e6ee..c3f00271737 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1284,7 +1284,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb) skb->dev = dev; skb_reset_transport_header(skb); - stats = dev->priv; + stats = netdev_priv(dev); stats->rx_packets++; stats->rx_bytes += skb->len + 13; netif_rx(skb); /* Send the SKB up to a higher place. */ diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 1b88311f213..b5674dc2083 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -149,7 +149,7 @@ int atm_register_sysfs(struct atm_dev *adev) cdev->class = &atm_class; dev_set_drvdata(cdev, adev); - snprintf(cdev->bus_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number); + dev_set_name(cdev, "%s%d", adev->type, adev->number); err = device_register(cdev); if (err < 0) return err; diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 29ef8dc6921..ea9438fc685 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -101,7 +101,7 @@ static LIST_HEAD(br2684_devs); static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev) { - return (struct br2684_dev *)net_dev->priv; + return (struct br2684_dev *)netdev_priv(net_dev); } static inline struct net_device *list_entry_brdev(const struct list_head *le) diff --git a/net/atm/lec.c b/net/atm/lec.c index 1def62d1773..e5e301550e8 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -152,7 +152,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) buff += 4; mesg->content.normal.flag = *buff & 0x01; /* 0x01 is topology change */ - priv = (struct lec_priv *)dev->priv; + priv = netdev_priv(dev); atm_force_charge(priv->lecd, skb2->truesize); sk = sk_atm(priv->lecd); skb_queue_tail(&sk->sk_receive_queue, skb2); @@ -218,7 +218,7 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc) static int lec_open(struct net_device *dev) { - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); netif_start_queue(dev); memset(&priv->stats, 0, sizeof(struct net_device_stats)); @@ -252,7 +252,7 @@ static void lec_tx_timeout(struct net_device *dev) static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct sk_buff *skb2; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); struct lecdatahdr_8023 *lec_h; struct atm_vcc *vcc; struct lec_arp_table *entry; @@ -433,14 +433,14 @@ static int lec_close(struct net_device *dev) */ static struct net_device_stats *lec_get_stats(struct net_device *dev) { - return &((struct lec_priv *)dev->priv)->stats; + return &((struct lec_priv *)netdev_priv(dev))->stats; } static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); struct atmlec_msg *mesg; struct lec_arp_table *entry; int i; @@ -580,7 +580,7 @@ static void lec_atm_close(struct atm_vcc *vcc) { struct sk_buff *skb; struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); priv->lecd = NULL; /* Do something needful? */ @@ -711,7 +711,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); #if DUMP_PACKETS >0 int i = 0; @@ -858,7 +858,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) vpriv->old_pop = vcc->pop; vcc->user_back = vpriv; vcc->pop = lec_pop; - lec_vcc_added(dev_lec[ioc_data.dev_num]->priv, + lec_vcc_added(netdev_priv(dev_lec[ioc_data.dev_num]), &ioc_data, vcc, vcc->push); vcc->proto_data = dev_lec[ioc_data.dev_num]; vcc->push = lec_push; @@ -870,7 +870,8 @@ static int lec_mcast_attach(struct atm_vcc *vcc, int arg) if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; - return (lec_mcast_make((struct lec_priv *)dev_lec[arg]->priv, vcc)); + return lec_mcast_make((struct lec_priv *)netdev_priv(dev_lec[arg]), + vcc); } /* Initialize device. */ @@ -912,11 +913,11 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) return -EINVAL; } - priv = dev_lec[i]->priv; + priv = netdev_priv(dev_lec[i]); priv->is_trdev = is_trdev; lec_init(dev_lec[i]); } else { - priv = dev_lec[i]->priv; + priv = netdev_priv(dev_lec[i]); if (priv->lecd) return -EADDRINUSE; } @@ -1077,7 +1078,8 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l) void *v; dev = state->dev ? state->dev : dev_lec[state->itf]; - v = (dev && dev->priv) ? lec_priv_walk(state, l, dev->priv) : NULL; + v = (dev && netdev_priv(dev)) ? + lec_priv_walk(state, l, netdev_priv(dev)) : NULL; if (!v && dev) { dev_put(dev); /* Partial state reset for the next time we get called */ @@ -1239,7 +1241,7 @@ static void __exit lane_module_cleanup(void) for (i = 0; i < MAX_LEC_ITF; i++) { if (dev_lec[i] != NULL) { - priv = (struct lec_priv *)dev_lec[i]->priv; + priv = netdev_priv(dev_lec[i]); unregister_netdev(dev_lec[i]); free_netdev(dev_lec[i]); dev_lec[i] = NULL; @@ -1263,7 +1265,7 @@ static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force, u8 **tlvs, u32 *sizeoftlvs) { unsigned long flags; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); struct lec_arp_table *table; struct sk_buff *skb; int retval; @@ -1310,7 +1312,7 @@ static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst, { int retval; struct sk_buff *skb; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); if (compare_ether_addr(lan_dst, dev->dev_addr)) return (0); /* not our mac address */ @@ -1347,7 +1349,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr, #if 0 int i = 0; #endif - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); #if 0 /* * Why have the TLVs in LE_ARP entries * since we do not use them? When you diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 010b1d2a5a8..12e9ea371db 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -785,7 +785,7 @@ static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg) } if (mpc->dev) { /* check if the lec is LANE2 capable */ - priv = (struct lec_priv *)mpc->dev->priv; + priv = netdev_priv(mpc->dev); if (priv->lane_version < 2) { dev_put(mpc->dev); mpc->dev = NULL; @@ -845,7 +845,7 @@ static void mpoad_close(struct atm_vcc *vcc) mpc->mpoad_vcc = NULL; if (mpc->dev) { - struct lec_priv *priv = (struct lec_priv *)mpc->dev->priv; + struct lec_priv *priv = netdev_priv(mpc->dev); priv->lane2_ops->associate_indicator = NULL; stop_mpc(mpc); dev_put(mpc->dev); @@ -976,7 +976,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo switch (event) { case NETDEV_REGISTER: /* a new lec device was allocated */ - priv = (struct lec_priv *)dev->priv; + priv = netdev_priv(dev); if (priv->lane_version < 2) break; priv->lane2_ops->associate_indicator = lane2_assoc_ind; @@ -1324,7 +1324,7 @@ static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *m dprintk("\n"); if (mpc->dev) { - priv = (struct lec_priv *)mpc->dev->priv; + priv = netdev_priv(mpc->dev); retval = priv->lane2_ops->associate_req(mpc->dev, mpc->dev->dev_addr, tlv, sizeof(tlv)); if (retval == 0) printk("mpoa: (%s) MPOA device type TLV association failed\n", mpc->dev->name); @@ -1474,7 +1474,7 @@ static void __exit atm_mpoa_cleanup(void) tmp = mpc->next; if (mpc->dev != NULL) { stop_mpc(mpc); - priv = (struct lec_priv *)mpc->dev->priv; + priv = netdev_priv(mpc->dev); if (priv->lane2_ops != NULL) priv->lane2_ops->associate_indicator = NULL; } diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index c1d877bb5df..62ee3fb3473 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -24,7 +24,9 @@ static int min_idle[1], max_idle[] = {65535000}; static int min_n2[] = {1}, max_n2[] = {31}; static int min_paclen[] = {1}, max_paclen[] = {512}; static int min_proto[1], max_proto[] = { AX25_PROTO_MAX }; +#ifdef CONFIG_AX25_DAMA_SLAVE static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; +#endif static struct ctl_table_header *ax25_table_header; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 7a4d5303b4f..f8efaf35293 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -565,7 +565,7 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) goto failed; } - s = dev->priv; + s = netdev_priv(dev); /* This is rx header therefore addresses are swapped. * ie eh.h_dest is our local address. */ diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index d9fa0ab2c87..47e179f62e8 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -62,14 +62,14 @@ static int bnep_net_close(struct net_device *dev) static struct net_device_stats *bnep_net_get_stats(struct net_device *dev) { - struct bnep_session *s = dev->priv; + struct bnep_session *s = netdev_priv(dev); return &s->stats; } static void bnep_net_set_mc_list(struct net_device *dev) { #ifdef CONFIG_BT_BNEP_MC_FILTER - struct bnep_session *s = dev->priv; + struct bnep_session *s = netdev_priv(dev); struct sock *sk = s->sock->sk; struct bnep_set_filter_req *r; struct sk_buff *skb; @@ -183,7 +183,7 @@ static inline int bnep_net_proto_filter(struct sk_buff *skb, struct bnep_session static int bnep_net_xmit(struct sk_buff *skb, struct net_device *dev) { - struct bnep_session *s = dev->priv; + struct bnep_session *s = netdev_priv(dev); struct sock *sk = s->sock->sk; BT_DBG("skb %p, dev %p", skb, dev); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index f4f6615cad9..f2bbb2f6543 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -113,8 +113,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) conn->dev.class = bt_class; conn->dev.parent = &hdev->dev; - snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d", - hdev->name, conn->handle); + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); dev_set_drvdata(&conn->dev, conn); @@ -132,7 +131,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) */ static int __match_tty(struct device *dev, void *data) { - return !strncmp(dev->bus_id, "rfcomm", 6); + return !strncmp(dev_name(dev), "rfcomm", 6); } static void del_conn(struct work_struct *work) @@ -421,7 +420,7 @@ int hci_register_sysfs(struct hci_dev *hdev) dev->class = bt_class; dev->parent = hdev->parent; - strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE); + dev_set_name(dev, hdev->name); dev_set_drvdata(dev, hdev); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8a972b6ba85..bc0d4a7ce6a 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -792,7 +792,9 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { +#if defined(CONFIG_BT_RFCOMM_TTY) || defined(CONFIG_BT_RFCOMM_DEBUG) struct sock *sk = sock->sk; +#endif int err; BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 6c023f0f825..18538d7460d 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -147,7 +147,7 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) return 0; } -static struct ethtool_ops br_ethtool_ops = { +static const struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, .get_tx_csum = ethtool_op_get_tx_csum, @@ -160,21 +160,25 @@ static struct ethtool_ops br_ethtool_ops = { .get_flags = ethtool_op_get_flags, }; +static const struct net_device_ops br_netdev_ops = { + .ndo_open = br_dev_open, + .ndo_stop = br_dev_stop, + .ndo_start_xmit = br_dev_xmit, + .ndo_set_mac_address = br_set_mac_address, + .ndo_set_multicast_list = br_dev_set_multicast_list, + .ndo_change_mtu = br_change_mtu, + .ndo_do_ioctl = br_dev_ioctl, +}; + void br_dev_setup(struct net_device *dev) { random_ether_addr(dev->dev_addr); ether_setup(dev); - dev->do_ioctl = br_dev_ioctl; - dev->hard_start_xmit = br_dev_xmit; - dev->open = br_dev_open; - dev->set_multicast_list = br_dev_set_multicast_list; - dev->change_mtu = br_change_mtu; + dev->netdev_ops = &br_netdev_ops; dev->destructor = free_netdev; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); - dev->stop = br_dev_stop; dev->tx_queue_len = 0; - dev->set_mac_address = br_set_mac_address; dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 0a09ccf68c1..727c5c510a6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -373,7 +373,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) return -EINVAL; - if (dev->hard_start_xmit == br_dev_xmit) + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) return -ELOOP; if (dev->br_port != NULL) @@ -460,7 +460,7 @@ void br_net_exit(struct net *net) restart: for_each_netdev(net, dev) { if (dev->priv_flags & IFF_EBRIDGE) { - del_br(dev->priv); + del_br(netdev_priv(dev)); goto restart; } } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index bf9d6af9628..a65e43a17fb 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -101,6 +101,17 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) +static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +{ +} + +static struct dst_ops fake_dst_ops = { + .family = AF_INET, + .protocol = __constant_htons(ETH_P_IP), + .update_pmtu = fake_update_pmtu, + .entries = ATOMIC_INIT(0), +}; + /* * Initialize bogus route table used to keep netfilter happy. * Currently, we fill in the PMTU entry because netfilter @@ -117,6 +128,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) rt->u.dst.path = &rt->u.dst; rt->u.dst.metrics[RTAX_MTU - 1] = 1500; rt->u.dst.flags = DST_NOXFRM; + rt->u.dst.ops = &fake_dst_ops; } static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 158dee8b496..603d89248e7 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -22,7 +22,7 @@ #include "br_private.h" #define to_dev(obj) container_of(obj, struct device, kobj) -#define to_bridge(cd) ((struct net_bridge *)(to_net_dev(cd)->priv)) +#define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd))) /* * Common code for storing bridge parameters. diff --git a/net/compat.c b/net/compat.c index 67fb6a3834a..a3a2ba0fac0 100644 --- a/net/compat.c +++ b/net/compat.c @@ -226,14 +226,14 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat return 0; /* XXX: return error? check spec. */ } - if (level == SOL_SOCKET && type == SO_TIMESTAMP) { + if (level == SOL_SOCKET && type == SCM_TIMESTAMP) { struct timeval *tv = (struct timeval *)data; ctv.tv_sec = tv->tv_sec; ctv.tv_usec = tv->tv_usec; data = &ctv; len = sizeof(ctv); } - if (level == SOL_SOCKET && type == SO_TIMESTAMPNS) { + if (level == SOL_SOCKET && type == SCM_TIMESTAMPNS) { struct timespec *ts = (struct timespec *)data; cts.tv_sec = ts->tv_sec; cts.tv_nsec = ts->tv_nsec; @@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt); static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(6)}; + AL(4)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const compat_sigset_t __user *sigmask, - compat_size_t sigsetsize, int flags) -{ - compat_sigset_t ss32; - sigset_t ksigmask, sigsaved; - int ret; - - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) - return -EFAULT; - sigset_from_compat(&ksigmask, &ss32); - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); - - if (ret == -ERESTARTNOHAND) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return ret; -} - asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_PACCEPT) + if (call < SYS_SOCKET || call > SYS_ACCEPT4) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ret = sys_listen(a0, a1); break; case SYS_ACCEPT: - ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0); + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); @@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; - case SYS_PACCEPT: - ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]), - compat_ptr(a[3]), a[4], a[5]); + case SYS_ACCEPT4: + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; default: ret = -EINVAL; diff --git a/net/core/datagram.c b/net/core/datagram.c index ee631843c2f..5e2ac0c4b07 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -209,7 +209,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); - sk_mem_reclaim(sk); + sk_mem_reclaim_partial(sk); } /** @@ -248,8 +248,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) spin_unlock_bh(&sk->sk_receive_queue.lock); } - kfree_skb(skb); - sk_mem_reclaim(sk); + skb_free_datagram(sk, skb); return err; } diff --git a/net/core/dev.c b/net/core/dev.c index 8f9d3b38a44..4615e9a443a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -108,7 +108,6 @@ #include <linux/init.h> #include <linux/kmod.h> #include <linux/module.h> -#include <linux/kallsyms.h> #include <linux/netpoll.h> #include <linux/rcupdate.h> #include <linux/delay.h> @@ -1060,6 +1059,7 @@ void dev_load(struct net *net, const char *name) */ int dev_open(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; int ret = 0; ASSERT_RTNL(); @@ -1082,11 +1082,11 @@ int dev_open(struct net_device *dev) */ set_bit(__LINK_STATE_START, &dev->state); - if (dev->validate_addr) - ret = dev->validate_addr(dev); + if (ops->ndo_validate_addr) + ret = ops->ndo_validate_addr(dev); - if (!ret && dev->open) - ret = dev->open(dev); + if (!ret && ops->ndo_open) + ret = ops->ndo_open(dev); /* * If it went open OK then: @@ -1130,6 +1130,7 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; ASSERT_RTNL(); might_sleep(); @@ -1162,8 +1163,8 @@ int dev_close(struct net_device *dev) * We allow it to be called even after a DETACH hot-plug * event. */ - if (dev->stop) - dev->stop(dev); + if (ops->ndo_stop) + ops->ndo_stop(dev); /* * Device is now down. @@ -1659,6 +1660,9 @@ static int dev_gso_segment(struct sk_buff *skb) int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { + const struct net_device_ops *ops = dev->netdev_ops; + + prefetch(&dev->netdev_ops->ndo_start_xmit); if (likely(!skb->next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); @@ -1670,7 +1674,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } - return dev->hard_start_xmit(skb, dev); + return ops->ndo_start_xmit(skb, dev); } gso: @@ -1680,7 +1684,7 @@ gso: skb->next = nskb->next; nskb->next = NULL; - rc = dev->hard_start_xmit(nskb, dev); + rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc)) { nskb->next = skb->next; skb->next = nskb; @@ -1754,10 +1758,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { + const struct net_device_ops *ops = dev->netdev_ops; u16 queue_index = 0; - if (dev->select_queue) - queue_index = dev->select_queue(dev, skb); + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) queue_index = simple_tx_hash(dev, skb); @@ -2223,6 +2228,9 @@ int netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; __be16 type; + if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + return NET_RX_SUCCESS; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) return NET_RX_DROP; @@ -2253,8 +2261,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) + if (!net_alive(dev_net(skb->dev))) { + kfree_skb(skb); goto out; + } #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { @@ -2614,7 +2624,7 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { - struct net_device_stats *stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", @@ -2796,31 +2806,6 @@ static void ptype_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock(); } -static void ptype_seq_decode(struct seq_file *seq, void *sym) -{ -#ifdef CONFIG_KALLSYMS - unsigned long offset = 0, symsize; - const char *symname; - char *modname; - char namebuf[128]; - - symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset, - &modname, namebuf); - - if (symname) { - char *delim = ":"; - - if (!modname) - modname = delim = ""; - seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim, - symname, offset); - return; - } -#endif - - seq_printf(seq, "[%p]", sym); -} - static int ptype_seq_show(struct seq_file *seq, void *v) { struct packet_type *pt = v; @@ -2833,10 +2818,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-8s ", - pt->dev ? pt->dev->name : ""); - ptype_seq_decode(seq, pt->func); - seq_putc(seq, '\n'); + seq_printf(seq, " %-8s %pF\n", + pt->dev ? pt->dev->name : "", pt->func); } return 0; @@ -2953,8 +2936,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) static void dev_change_rx_flags(struct net_device *dev, int flags) { - if (dev->flags & IFF_UP && dev->change_rx_flags) - dev->change_rx_flags(dev, flags); + const struct net_device_ops *ops = dev->netdev_ops; + + if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + ops->ndo_change_rx_flags(dev, flags); } static int __dev_set_promiscuity(struct net_device *dev, int inc) @@ -3074,6 +3059,8 @@ int dev_set_allmulti(struct net_device *dev, int inc) */ void __dev_set_rx_mode(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; + /* dev_open will call this function so the list will stay sane. */ if (!(dev->flags&IFF_UP)) return; @@ -3081,8 +3068,8 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (dev->set_rx_mode) - dev->set_rx_mode(dev); + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); else { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. @@ -3095,8 +3082,8 @@ void __dev_set_rx_mode(struct net_device *dev) dev->uc_promisc = 0; } - if (dev->set_multicast_list) - dev->set_multicast_list(dev); + if (ops->ndo_set_multicast_list) + ops->ndo_set_multicast_list(dev); } } @@ -3455,6 +3442,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) */ int dev_set_mtu(struct net_device *dev, int new_mtu) { + const struct net_device_ops *ops = dev->netdev_ops; int err; if (new_mtu == dev->mtu) @@ -3468,10 +3456,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return -ENODEV; err = 0; - if (dev->change_mtu) - err = dev->change_mtu(dev, new_mtu); + if (ops->ndo_change_mtu) + err = ops->ndo_change_mtu(dev, new_mtu); else dev->mtu = new_mtu; + if (!err && dev->flags & IFF_UP) call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; @@ -3486,15 +3475,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { + const struct net_device_ops *ops = dev->netdev_ops; int err; - if (!dev->set_mac_address) + if (!ops->ndo_set_mac_address) return -EOPNOTSUPP; if (sa->sa_family != dev->type) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3574,6 +3564,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + const struct net_device_ops *ops = dev->netdev_ops; if (!dev) return -ENODEV; @@ -3601,15 +3592,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return 0; case SIOCSIFMAP: - if (dev->set_config) { + if (ops->ndo_set_config) { if (!netif_device_present(dev)) return -ENODEV; - return dev->set_config(dev, &ifr->ifr_map); + return ops->ndo_set_config(dev, &ifr->ifr_map); } return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3618,7 +3609,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) dev->addr_len, 1); case SIOCDELMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3656,10 +3647,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCBRDELIF || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; - if (dev->do_ioctl) { + if (ops->ndo_do_ioctl) { if (netif_device_present(dev)) - err = dev->do_ioctl(dev, ifr, - cmd); + err = ops->ndo_do_ioctl(dev, ifr, cmd); else err = -ENODEV; } @@ -3920,8 +3910,8 @@ static void rollback_registered(struct net_device *dev) */ dev_addr_discard(dev); - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); /* Notifier chain MUST detach us from master device. */ WARN_ON(dev->master); @@ -4011,7 +4001,7 @@ int register_netdevice(struct net_device *dev) struct hlist_head *head; struct hlist_node *p; int ret; - struct net *net; + struct net *net = dev_net(dev); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -4020,8 +4010,7 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); - BUG_ON(!dev_net(dev)); - net = dev_net(dev); + BUG_ON(!net); spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -4029,9 +4018,46 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; +#ifdef CONFIG_COMPAT_NET_DEV_OPS + /* Netdevice_ops API compatiability support. + * This is temporary until all network devices are converted. + */ + if (dev->netdev_ops) { + const struct net_device_ops *ops = dev->netdev_ops; + + dev->init = ops->ndo_init; + dev->uninit = ops->ndo_uninit; + dev->open = ops->ndo_open; + dev->change_rx_flags = ops->ndo_change_rx_flags; + dev->set_rx_mode = ops->ndo_set_rx_mode; + dev->set_multicast_list = ops->ndo_set_multicast_list; + dev->set_mac_address = ops->ndo_set_mac_address; + dev->validate_addr = ops->ndo_validate_addr; + dev->do_ioctl = ops->ndo_do_ioctl; + dev->set_config = ops->ndo_set_config; + dev->change_mtu = ops->ndo_change_mtu; + dev->tx_timeout = ops->ndo_tx_timeout; + dev->get_stats = ops->ndo_get_stats; + dev->vlan_rx_register = ops->ndo_vlan_rx_register; + dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ops->ndo_poll_controller; +#endif + } else { + char drivername[64]; + pr_info("%s (%s): not using net_device_ops yet\n", + dev->name, netdev_drivername(dev, drivername, 64)); + + /* This works only because net_device_ops and the + compatiablity structure are the same. */ + dev->netdev_ops = (void *) &(dev->init); + } +#endif + /* Init, if this function is available */ - if (dev->init) { - ret = dev->init(dev); + if (dev->netdev_ops->ndo_init) { + ret = dev->netdev_ops->ndo_init(dev); if (ret) { if (ret > 0) ret = -EIO; @@ -4109,8 +4135,8 @@ out: return ret; err_uninit: - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); goto out; } @@ -4266,10 +4292,24 @@ void netdev_run_todo(void) } } -static struct net_device_stats *internal_stats(struct net_device *dev) -{ - return &dev->stats; +/** + * dev_get_stats - get network device statistics + * @dev: device to get statistics from + * + * Get network statistics from device. The device driver may provide + * its own method by setting dev->netdev_ops->get_stats; otherwise + * the internal statistics structure is used. + */ +const struct net_device_stats *dev_get_stats(struct net_device *dev) + { + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_get_stats) + return ops->ndo_get_stats(dev); + else + return &dev->stats; } +EXPORT_SYMBOL(dev_get_stats); static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, @@ -4348,7 +4388,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); - dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); strcpy(dev->name, name); @@ -4852,6 +4891,12 @@ static void __net_exit default_device_exit(struct net *net) if (dev->features & NETIF_F_NETNS_LOCAL) continue; + /* Delete virtual devices */ + if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { + dev->rtnl_link_ops->dellink(dev); + continue; + } + /* Push remaing network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); err = dev_change_net_namespace(dev, &init_net, fb_name); @@ -4898,9 +4943,6 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; - if (register_pernet_device(&default_device_ops)) - goto out; - /* * Initialise the packet receive queues. */ @@ -4917,10 +4959,25 @@ static int __init net_dev_init(void) queue->backlog.weight = weight_p; } - netdev_dma_register(); - dev_boot_phase = 0; + /* The loopback device is special if any other network devices + * is present in a network namespace the loopback device must + * be present. Since we now dynamically allocate and free the + * loopback device ensure this invariant is maintained by + * keeping the loopback device as the first device on the + * list of network devices. Ensuring the loopback devices + * is the first device that appears and the last network device + * that disappears. + */ + if (register_pernet_device(&loopback_net_ops)) + goto out; + + if (register_pernet_device(&default_device_ops)) + goto out; + + netdev_dma_register(); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action); diff --git a/net/core/dst.c b/net/core/dst.c index 09c1530f468..57bc4d5b8d0 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -263,9 +263,11 @@ again: void dst_release(struct dst_entry *dst) { if (dst) { - WARN_ON(atomic_read(&dst->__refcnt) < 1); + int newrefcnt; + smp_mb__before_atomic_dec(); - atomic_dec(&dst->__refcnt); + newrefcnt = atomic_dec_return(&dst->__refcnt); + WARN_ON(newrefcnt < 0); } } EXPORT_SYMBOL(dst_release); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 79de3b14a8d..32b3a0152d7 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -664,17 +664,18 @@ static int __init fib_rules_init(void) rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); - err = register_netdevice_notifier(&fib_rules_notifier); + err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) goto fail; - err = register_pernet_subsys(&fib_rules_net_ops); + err = register_netdevice_notifier(&fib_rules_notifier); if (err < 0) goto fail_unregister; + return 0; fail_unregister: - unregister_netdevice_notifier(&fib_rules_notifier); + unregister_pernet_subsys(&fib_rules_net_ops); fail: rtnl_unregister(PF_UNSPEC, RTM_NEWRULE); rtnl_unregister(PF_UNSPEC, RTM_DELRULE); diff --git a/net/core/filter.c b/net/core/filter.c index df374435583..d1d779ca096 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -319,6 +319,25 @@ load_b: A = 0; continue; } + case SKF_AD_NLATTR_NEST: { + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = (struct nlattr *)&skb->data[A]; + if (nla->nla_len > A - skb->len) + return 0; + + nla = nla_find_nested(nla, X); + if (nla) + A = (void *)nla - (void *)skb->data; + else + A = 0; + continue; + } default: return 0; } diff --git a/net/core/flow.c b/net/core/flow.c index 5cf81052d04..96015871ece 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -165,7 +165,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, +void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -225,7 +225,7 @@ nocache: void *obj; atomic_t *obj_ref; - err = resolver(key, family, dir, &obj, &obj_ref); + err = resolver(net, key, family, dir, &obj, &obj_ref); if (fle && !err) { fle->genid = atomic_read(&flow_cache_genid); @@ -307,7 +307,7 @@ void flow_cache_flush(void) put_online_cpus(); } -static void __devinit flow_cache_cpu_prepare(int cpu) +static void __init flow_cache_cpu_prepare(int cpu) { struct tasklet_struct *tasklet; unsigned long order; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 57abe8266be..9cc9f95b109 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -31,6 +31,7 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/init.h> +#include <linux/rbtree.h> #include <net/sock.h> #include <net/gen_stats.h> @@ -89,6 +90,7 @@ struct gen_estimator u32 avpps; u32 avbps; struct rcu_head e_rcu; + struct rb_node node; }; struct gen_estimator_head @@ -102,6 +104,9 @@ static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; /* Protects against NULL dereference */ static DEFINE_RWLOCK(est_lock); +/* Protects against soft lockup during large deletion */ +static struct rb_root est_root = RB_ROOT; + static void est_timer(unsigned long arg) { int idx = (int)arg; @@ -139,6 +144,46 @@ skip: rcu_read_unlock(); } +static void gen_add_node(struct gen_estimator *est) +{ + struct rb_node **p = &est_root.rb_node, *parent = NULL; + + while (*p) { + struct gen_estimator *e; + + parent = *p; + e = rb_entry(parent, struct gen_estimator, node); + + if (est->bstats > e->bstats) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + rb_link_node(&est->node, parent, p); + rb_insert_color(&est->node, &est_root); +} + +static +struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est) +{ + struct rb_node *p = est_root.rb_node; + + while (p) { + struct gen_estimator *e; + + e = rb_entry(p, struct gen_estimator, node); + + if (bstats > e->bstats) + p = p->rb_right; + else if (bstats < e->bstats || rate_est != e->rate_est) + p = p->rb_left; + else + return e; + } + return NULL; +} + /** * gen_new_estimator - create a new rate estimator * @bstats: basic statistics @@ -194,8 +239,11 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx)); list_add_rcu(&est->list, &elist[idx].list); + gen_add_node(est); + return 0; } +EXPORT_SYMBOL(gen_new_estimator); static void __gen_kill_estimator(struct rcu_head *head) { @@ -209,36 +257,27 @@ static void __gen_kill_estimator(struct rcu_head *head) * @bstats: basic statistics * @rate_est: rate estimator statistics * - * Removes the rate estimator specified by &bstats and &rate_est - * and deletes the timer. + * Removes the rate estimator specified by &bstats and &rate_est. * * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic *bstats, - struct gnet_stats_rate_est *rate_est) + struct gnet_stats_rate_est *rate_est) { - int idx; - struct gen_estimator *e, *n; - - for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - - /* Skip non initialized indexes */ - if (!elist[idx].timer.function) - continue; + struct gen_estimator *e; - list_for_each_entry_safe(e, n, &elist[idx].list, list) { - if (e->rate_est != rate_est || e->bstats != bstats) - continue; + while ((e = gen_find_node(bstats, rate_est))) { + rb_erase(&e->node, &est_root); - write_lock_bh(&est_lock); - e->bstats = NULL; - write_unlock_bh(&est_lock); + write_lock_bh(&est_lock); + e->bstats = NULL; + write_unlock_bh(&est_lock); - list_del_rcu(&e->list); - call_rcu(&e->e_rcu, __gen_kill_estimator); - } + list_del_rcu(&e->list); + call_rcu(&e->e_rcu, __gen_kill_estimator); } } +EXPORT_SYMBOL(gen_kill_estimator); /** * gen_replace_estimator - replace rate estimator configuration @@ -259,8 +298,20 @@ int gen_replace_estimator(struct gnet_stats_basic *bstats, gen_kill_estimator(bstats, rate_est); return gen_new_estimator(bstats, rate_est, stats_lock, opt); } +EXPORT_SYMBOL(gen_replace_estimator); +/** + * gen_estimator_active - test if estimator is currently in use + * @bstats: basic statistics + * @rate_est: rate estimator statistics + * + * Returns true if estimator is active, and false if not. + */ +bool gen_estimator_active(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est) +{ + ASSERT_RTNL(); -EXPORT_SYMBOL(gen_kill_estimator); -EXPORT_SYMBOL(gen_new_estimator); -EXPORT_SYMBOL(gen_replace_estimator); + return gen_find_node(bstats, rate_est) != NULL; +} +EXPORT_SYMBOL(gen_estimator_active); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d9bbe010e0e..9c3717a23cf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -531,9 +531,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; -#ifdef CONFIG_NET_NS - n->net = hold_net(net); -#endif + write_pnet(&n->net, hold_net(net)); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -1329,9 +1327,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { struct neigh_parms *p, *ref; - struct net *net; + struct net *net = dev_net(dev); + const struct net_device_ops *ops = dev->netdev_ops; - net = dev_net(dev); ref = lookup_neigh_params(tbl, net, 0); if (!ref) return NULL; @@ -1343,16 +1341,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); - if (dev->neigh_setup && dev->neigh_setup(dev, p)) { + if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { kfree(p); return NULL; } dev_hold(dev); p->dev = dev; -#ifdef CONFIG_NET_NS - p->net = hold_net(net); -#endif + write_pnet(&p->net, hold_net(net)); p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; @@ -1407,9 +1403,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) unsigned long now = jiffies; unsigned long phsize; -#ifdef CONFIG_NET_NS - tbl->parms.net = &init_net; -#endif + write_pnet(&tbl->parms.net, &init_net); atomic_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); @@ -1424,9 +1418,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = proc_create_data(tbl->id, 0, init_net.proc_net_stat, - &neigh_stat_seq_fops, tbl); - if (!tbl->pde) + if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat, + &neigh_stat_seq_fops, tbl)) panic("cannot create neighbour proc dir entry"); #endif diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 85cb8bdcfb8..6ac29a46e23 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -270,7 +270,6 @@ static ssize_t netstat_show(const struct device *d, unsigned long offset) { struct net_device *dev = to_net_dev(d); - struct net_device_stats *stats; ssize_t ret = -EINVAL; WARN_ON(offset > sizeof(struct net_device_stats) || @@ -278,7 +277,7 @@ static ssize_t netstat_show(const struct device *d, read_lock(&dev_base_lock); if (dev_isalive(dev)) { - stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); } @@ -428,6 +427,9 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) struct net_device *dev = to_net_dev(d); int retval; + if (!net_eq(dev_net(dev), &init_net)) + return 0; + /* pass interface to uevent. */ retval = add_uevent_var(env, "INTERFACE=%s", dev->name); if (retval) @@ -494,7 +496,7 @@ int netdev_register_kobject(struct net_device *net) dev->groups = groups; BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); - strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); + dev_set_name(dev, net->name); #ifdef CONFIG_SYSFS *groups++ = &netstat_group; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 34f5d072f16..96fb0519eb7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,6 +58,7 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { @@ -71,7 +72,7 @@ static void queue_process(struct work_struct *work) __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); local_irq_restore(flags); @@ -172,12 +173,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; - if (!dev || !netif_running(dev) || !dev->poll_controller) + if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) return; /* Process pending work on NIC */ - dev->poll_controller(dev); + ops->ndo_poll_controller(dev); poll_napi(dev); @@ -272,6 +274,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) int status = NETDEV_TX_BUSY; unsigned long tries; struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netpoll_info *npinfo = np->dev->npinfo; if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { @@ -292,7 +295,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) - status = dev->hard_start_xmit(skb, dev); + status = ops->ndo_start_xmit(skb, dev); __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) @@ -343,7 +346,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->check = csum_tcpudp_magic(htonl(np->local_ip), htonl(np->remote_ip), udp_len, IPPROTO_UDP, - csum_partial((unsigned char *)udph, udp_len, 0)); + csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; @@ -694,7 +697,7 @@ int netpoll_setup(struct netpoll *np) atomic_inc(&npinfo->refcnt); } - if (!ndev->poll_controller) { + if (!ndev->netdev_ops->ndo_poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); err = -ENOTSUPP; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fa4973bf73e..65498483325 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -422,6 +422,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char *ifname); static int pktgen_device_event(struct notifier_block *, unsigned long, void *); static void pktgen_run_all_threads(void); +static void pktgen_reset_all_threads(void); static void pktgen_stop_all_threads_ifs(void); static int pktgen_stop_device(struct pktgen_dev *pkt_dev); static void pktgen_stop(struct pktgen_thread *t); @@ -480,6 +481,9 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf, else if (!strcmp(data, "start")) pktgen_run_all_threads(); + else if (!strcmp(data, "reset")) + pktgen_reset_all_threads(); + else printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); @@ -1972,13 +1976,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) /* make sure that we don't pick a non-existing transmit queue */ ntxq = pkt_dev->odev->real_num_tx_queues; - if (ntxq > num_online_cpus() && (pkt_dev->flags & F_QUEUE_MAP_CPU)) { - printk(KERN_WARNING "pktgen: WARNING: QUEUE_MAP_CPU " - "disabled because CPU count (%d) exceeds number " - "of tx queues (%d) on %s\n", num_online_cpus(), ntxq, - pkt_dev->odev->name); - pkt_dev->flags &= ~F_QUEUE_MAP_CPU; - } + if (ntxq <= pkt_dev->queue_map_min) { printk(KERN_WARNING "pktgen: WARNING: Requested " "queue_map_min (zero-based) (%d) exceeds valid range " @@ -2167,7 +2165,8 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) struct xfrm_state *x = pkt_dev->flows[flow].x; if (!x) { /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr, + x = xfrm_stateonly_find(&init_net, + (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, pkt_dev->ipsmode, @@ -2201,6 +2200,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev) } pkt_dev->cur_queue_map = t; } + pkt_dev->cur_queue_map = pkt_dev->cur_queue_map % pkt_dev->odev->real_num_tx_queues; } /* Increment/randomize headers according to flags and current values @@ -3173,6 +3173,24 @@ static void pktgen_run_all_threads(void) pktgen_wait_all_threads_run(); } +static void pktgen_reset_all_threads(void) +{ + struct pktgen_thread *t; + + pr_debug("pktgen: entering pktgen_reset_all_threads.\n"); + + mutex_lock(&pktgen_thread_lock); + + list_for_each_entry(t, &pktgen_threads, th_list) + t->control |= (T_REMDEVALL); + + mutex_unlock(&pktgen_thread_lock); + + schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + + pktgen_wait_all_threads_run(); +} + static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) { __u64 total_us, bps, mbps, pps, idle; @@ -3335,14 +3353,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) { - struct net_device *odev = NULL; + struct net_device *odev = pkt_dev->odev; + int (*xmit)(struct sk_buff *, struct net_device *) + = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; __u64 idle_start = 0; u16 queue_map; int ret; - odev = pkt_dev->odev; - if (pkt_dev->delay_us || pkt_dev->delay_ns) { u64 now; @@ -3423,7 +3441,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) atomic_inc(&(pkt_dev->skb->users)); retry_now: - ret = odev->hard_start_xmit(pkt_dev->skb, odev); + ret = (*xmit)(pkt_dev->skb, odev); if (likely(ret == NETDEV_TX_OK)) { pkt_dev->last_ok = 1; pkt_dev->sofar++; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 31f29d2989f..790dd205bb5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -551,7 +551,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - struct net_device_stats *b) + const struct net_device_stats *b) { a->rx_packets = b->rx_packets; a->tx_packets = b->tx_packets; @@ -609,7 +609,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; - struct net_device_stats *stats; + const struct net_device_stats *stats; struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -666,7 +666,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (attr == NULL) goto nla_put_failure; - stats = dev->get_stats(dev); + stats = dev_get_stats(dev); copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { @@ -762,6 +762,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { + const struct net_device_ops *ops = dev->netdev_ops; int send_addr_notify = 0; int err; @@ -783,7 +784,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct rtnl_link_ifmap *u_map; struct ifmap k_map; - if (!dev->set_config) { + if (!ops->ndo_set_config) { err = -EOPNOTSUPP; goto errout; } @@ -801,7 +802,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, k_map.dma = (unsigned char) u_map->dma; k_map.port = (unsigned char) u_map->port; - err = dev->set_config(dev, &k_map); + err = ops->ndo_set_config(dev, &k_map); if (err < 0) goto errout; @@ -812,7 +813,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct sockaddr *sa; int len; - if (!dev->set_mac_address) { + if (!ops->ndo_set_mac_address) { err = -EOPNOTSUPP; goto errout; } @@ -831,7 +832,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); kfree(sa); if (err) goto errout; @@ -878,7 +879,9 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, if (ifm->ifi_change) flags = (flags & ifm->ifi_change) | (dev->flags & ~ifm->ifi_change); - dev_change_flags(dev, flags); + err = dev_change_flags(dev, flags); + if (err < 0) + goto errout; } if (tb[IFLA_TXQLEN]) diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a4..b12303dd39d 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -106,9 +106,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c4c8a33f341..b1f628741f4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -149,7 +149,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) void skb_truesize_bug(struct sk_buff *skb) { - printk(KERN_ERR "SKB BUG: Invalid truesize (%u) " + WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) " "len=%u, sizeof(sk_buff)=%Zd\n", skb->truesize, skb->len, sizeof(struct sk_buff)); } @@ -449,6 +449,18 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +/** + * skb_recycle_check - check if skb can be reused for receive + * @skb: buffer + * @skb_size: minimum receive buffer size + * + * Checks that the skb passed in is not shared or cloned, and + * that it is linear and its head portion at least as large as + * skb_size so that it can be recycled as a receive buffer. + * If these conditions are met, this function does any necessary + * reference count dropping and cleans up the skbuff as if it + * just came from __alloc_skb(). + */ int skb_recycle_check(struct sk_buff *skb, int skb_size) { struct skb_shared_info *shinfo; @@ -474,8 +486,8 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) shinfo->frag_list = NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); - skb_reset_tail_pointer(skb); skb->data = skb->head + NET_SKB_PAD; + skb_reset_tail_pointer(skb); return 1; } @@ -2006,6 +2018,148 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_split_no_header(skb, skb1, len, pos); } +/* Shifting from/to a cloned skb is a no-go. + * + * Caller cannot keep skb_shinfo related pointers past calling here! + */ +static int skb_prepare_for_shift(struct sk_buff *skb) +{ + return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} + +/** + * skb_shift - Shifts paged data partially from skb to another + * @tgt: buffer into which tail data gets added + * @skb: buffer from which the paged data comes from + * @shiftlen: shift up to this many bytes + * + * Attempts to shift up to shiftlen worth of bytes, which may be less than + * the length of the skb, from tgt to skb. Returns number bytes shifted. + * It's up to caller to free skb if everything was shifted. + * + * If @tgt runs out of frags, the whole operation is aborted. + * + * Skb cannot include anything else but paged data while tgt is allowed + * to have non-paged data as well. + * + * TODO: full sized shift could be optimized but that would need + * specialized skb free'er to handle frags without up-to-date nr_frags. + */ +int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) +{ + int from, to, merge, todo; + struct skb_frag_struct *fragfrom, *fragto; + + BUG_ON(shiftlen > skb->len); + BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ + + todo = shiftlen; + from = 0; + to = skb_shinfo(tgt)->nr_frags; + fragfrom = &skb_shinfo(skb)->frags[from]; + + /* Actual merge is delayed until the point when we know we can + * commit all, so that we don't have to undo partial changes + */ + if (!to || + !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { + merge = -1; + } else { + merge = to - 1; + + todo -= fragfrom->size; + if (todo < 0) { + if (skb_prepare_for_shift(skb) || + skb_prepare_for_shift(tgt)) + return 0; + + /* All previous frag pointers might be stale! */ + fragfrom = &skb_shinfo(skb)->frags[from]; + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += shiftlen; + fragfrom->size -= shiftlen; + fragfrom->page_offset += shiftlen; + + goto onlymerged; + } + + from++; + } + + /* Skip full, not-fitting skb to avoid expensive operations */ + if ((shiftlen == skb->len) && + (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) + return 0; + + if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) + return 0; + + while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { + if (to == MAX_SKB_FRAGS) + return 0; + + fragfrom = &skb_shinfo(skb)->frags[from]; + fragto = &skb_shinfo(tgt)->frags[to]; + + if (todo >= fragfrom->size) { + *fragto = *fragfrom; + todo -= fragfrom->size; + from++; + to++; + + } else { + get_page(fragfrom->page); + fragto->page = fragfrom->page; + fragto->page_offset = fragfrom->page_offset; + fragto->size = todo; + + fragfrom->page_offset += todo; + fragfrom->size -= todo; + todo = 0; + + to++; + break; + } + } + + /* Ready to "commit" this state change to tgt */ + skb_shinfo(tgt)->nr_frags = to; + + if (merge >= 0) { + fragfrom = &skb_shinfo(skb)->frags[0]; + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += fragfrom->size; + put_page(fragfrom->page); + } + + /* Reposition in the original skb */ + to = 0; + while (from < skb_shinfo(skb)->nr_frags) + skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; + skb_shinfo(skb)->nr_frags = to; + + BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); + +onlymerged: + /* Most likely the tgt won't ever need its checksum anymore, skb on + * the other hand might need it if it needs to be resent + */ + tgt->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_PARTIAL; + + /* Yak, is it really working this way? Some helper please? */ + skb->len -= shiftlen; + skb->data_len -= shiftlen; + skb->truesize -= shiftlen; + tgt->len += shiftlen; + tgt->data_len += shiftlen; + tgt->truesize += shiftlen; + + return shiftlen; +} + /** * skb_prepare_seq_read - Prepare a sequential read of skb data * @skb: the buffer to read diff --git a/net/core/sock.c b/net/core/sock.c index ded1eb5d2fd..ac4f0e79226 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -136,7 +136,6 @@ static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -#ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket @@ -187,7 +186,6 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_MAX" }; -#endif /* * sk_callback_lock locking rules are per-address-family, @@ -291,7 +289,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb->dev = NULL; skb_set_owner_r(skb, sk); - + /* + * release dst right now while its hot + */ + dst_release(skb->dst); + skb->dst = NULL; /* Cache the SKB length before we tack it onto the receive * queue. Once it is added it no longer belongs to us and * may be freed by other threads of control pulling packets @@ -1073,7 +1075,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) - atomic_inc(newsk->sk_prot->sockets_allocated); + percpu_counter_inc(newsk->sk_prot->sockets_allocated); } out: return newsk; @@ -1465,8 +1467,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) } if (prot->memory_pressure) { - if (!*prot->memory_pressure || - prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + int alloc; + + if (!*prot->memory_pressure) + return 1; + alloc = percpu_counter_read_positive(prot->sockets_allocated); + if (prot->sysctl_mem[2] > alloc * sk_mem_pages(sk->sk_wmem_queued + atomic_read(&sk->sk_rmem_alloc) + sk->sk_forward_alloc)) @@ -2037,9 +2043,6 @@ static inline void release_proto_idx(struct proto *prot) int proto_register(struct proto *prot, int alloc_slab) { - char *request_sock_slab_name = NULL; - char *timewait_sock_slab_name; - if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN | prot->slab_flags, @@ -2054,12 +2057,12 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->rsk_prot != NULL) { static const char mask[] = "request_sock_%s"; - request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); - if (request_sock_slab_name == NULL) + prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + if (prot->rsk_prot->slab_name == NULL) goto out_free_sock_slab; - sprintf(request_sock_slab_name, mask, prot->name); - prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name, + sprintf(prot->rsk_prot->slab_name, mask, prot->name); + prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, prot->rsk_prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2073,16 +2076,18 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->twsk_prot != NULL) { static const char mask[] = "tw_sock_%s"; - timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); - if (timewait_sock_slab_name == NULL) + if (prot->twsk_prot->twsk_slab_name == NULL) goto out_free_request_sock_slab; - sprintf(timewait_sock_slab_name, mask, prot->name); + sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name); prot->twsk_prot->twsk_slab = - kmem_cache_create(timewait_sock_slab_name, + kmem_cache_create(prot->twsk_prot->twsk_slab_name, prot->twsk_prot->twsk_obj_size, - 0, SLAB_HWCACHE_ALIGN, + 0, + SLAB_HWCACHE_ALIGN | + prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; @@ -2096,14 +2101,14 @@ int proto_register(struct proto *prot, int alloc_slab) return 0; out_free_timewait_sock_slab_name: - kfree(timewait_sock_slab_name); + kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: if (prot->rsk_prot && prot->rsk_prot->slab) { kmem_cache_destroy(prot->rsk_prot->slab); prot->rsk_prot->slab = NULL; } out_free_request_sock_slab_name: - kfree(request_sock_slab_name); + kfree(prot->rsk_prot->slab_name); out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; @@ -2126,18 +2131,14 @@ void proto_unregister(struct proto *prot) } if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { - const char *name = kmem_cache_name(prot->rsk_prot->slab); - kmem_cache_destroy(prot->rsk_prot->slab); - kfree(name); + kfree(prot->rsk_prot->slab_name); prot->rsk_prot->slab = NULL; } if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { - const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab); - kmem_cache_destroy(prot->twsk_prot->twsk_slab); - kfree(name); + kfree(prot->twsk_prot->twsk_slab_name); prot->twsk_prot->twsk_slab = NULL; } } @@ -2174,7 +2175,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", proto->name, proto->obj_size, - proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1, + sock_prot_inuse_get(seq_file_net(seq), proto), proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", proto->max_header, @@ -2228,7 +2229,8 @@ static const struct seq_operations proto_seq_ops = { static int proto_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &proto_seq_ops); + return seq_open_net(inode, file, &proto_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations proto_seq_fops = { @@ -2236,13 +2238,31 @@ static const struct file_operations proto_seq_fops = { .open = proto_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, +}; + +static __net_init int proto_init_net(struct net *net) +{ + if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) + return -ENOMEM; + + return 0; +} + +static __net_exit void proto_exit_net(struct net *net) +{ + proc_net_remove(net, "protocols"); +} + + +static __net_initdata struct pernet_operations proto_net_ops = { + .init = proto_init_net, + .exit = proto_exit_net, }; static int __init proto_init(void) { - /* register /proc/net/protocols */ - return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + return register_pernet_subsys(&proto_net_ops); } subsys_initcall(proto_init); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 2bc0384b044..83d3398559e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -12,7 +12,6 @@ #include <linux/netdevice.h> #include <linux/init.h> #include <net/sock.h> -#include <net/xfrm.h> static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET @@ -89,40 +88,6 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, -#ifdef CONFIG_XFRM - { - .ctl_name = NET_CORE_AEVENT_ETIME, - .procname = "xfrm_aevent_etime", - .data = &sysctl_xfrm_aevent_etime, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .ctl_name = NET_CORE_AEVENT_RSEQTH, - .procname = "xfrm_aevent_rseqth", - .data = &sysctl_xfrm_aevent_rseqth, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xfrm_larval_drop", - .data = &sysctl_xfrm_larval_drop, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xfrm_acq_expires", - .data = &sysctl_xfrm_acq_expires, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, -#endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { .ctl_name = NET_CORE_BUDGET, @@ -155,7 +120,7 @@ static struct ctl_table netns_core_table[] = { { .ctl_name = 0 } }; -static __net_initdata struct ctl_path net_core_path[] = { +__net_initdata struct ctl_path net_core_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "core", .ctl_name = NET_CORE, }, { }, @@ -207,8 +172,11 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { + static struct ctl_table empty[1]; + + register_sysctl_paths(net_core_path, empty); register_net_sysctl_rotable(net_core_path, net_core_table); return register_pernet_subsys(&sysctl_core_ops); } -__initcall(sysctl_core_init); +fs_initcall(sysctl_core_init); diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig new file mode 100644 index 00000000000..4066d59c8de --- /dev/null +++ b/net/dcb/Kconfig @@ -0,0 +1,22 @@ +config DCB + bool "Data Center Bridging support" + default n + ---help--- + This enables support for configuring Data Center Bridging (DCB) + features on DCB capable Ethernet adapters via rtnetlink. Say 'Y' + if you have a DCB capable Ethernet adapter which supports this + interface and you are connected to a DCB capable switch. + + DCB is a collection of Ethernet enhancements which allow DCB capable + NICs and switches to support network traffic with differing + requirements (highly reliable, no drops vs. best effort vs. low + latency) to co-exist on Ethernet. + + DCB features include: + Enhanced Transmission Selection (aka Priority Grouping) - provides a + framework for assigning bandwidth guarantees to traffic classes. + Priority-based Flow Control (PFC) - a MAC control pause frame which + works at the granularity of the 802.1p priority instead of the + link (802.3x). + + If unsure, say N. diff --git a/net/dcb/Makefile b/net/dcb/Makefile new file mode 100644 index 00000000000..9930f4cde81 --- /dev/null +++ b/net/dcb/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DCB) += dcbnl.o diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c new file mode 100644 index 00000000000..79a351d323a --- /dev/null +++ b/net/dcb/dcbnl.c @@ -0,0 +1,1121 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu <lucy.liu@intel.com> + */ + +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <net/netlink.h> +#include <net/rtnetlink.h> +#include <linux/dcbnl.h> +#include <linux/rtnetlink.h> +#include <net/sock.h> + +/** + * Data Center Bridging (DCB) is a collection of Ethernet enhancements + * intended to allow network traffic with differing requirements + * (highly reliable, no drops vs. best effort vs. low latency) to operate + * and co-exist on Ethernet. Current DCB features are: + * + * Enhanced Transmission Selection (aka Priority Grouping [PG]) - provides a + * framework for assigning bandwidth guarantees to traffic classes. + * + * Priority-based Flow Control (PFC) - provides a flow control mechanism which + * can work independently for each 802.1p priority. + * + * Congestion Notification - provides a mechanism for end-to-end congestion + * control for protocols which do not have built-in congestion management. + * + * More information about the emerging standards for these Ethernet features + * can be found at: http://www.ieee802.org/1/pages/dcbridges.html + * + * This file implements an rtnetlink interface to allow configuration of DCB + * features for capable devices. + */ + +MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>"); +MODULE_DESCRIPTION("Data Center Bridging netlink interface"); +MODULE_LICENSE("GPL"); + +/**************** DCB attribute policies *************************************/ + +/* DCB netlink attributes policy */ +static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { + [DCB_ATTR_IFNAME] = {.type = NLA_NUL_STRING, .len = IFNAMSIZ - 1}, + [DCB_ATTR_STATE] = {.type = NLA_U8}, + [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_PG_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_SET_ALL] = {.type = NLA_U8}, + [DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG}, + [DCB_ATTR_CAP] = {.type = NLA_NESTED}, + [DCB_ATTR_PFC_STATE] = {.type = NLA_U8}, + [DCB_ATTR_BCN] = {.type = NLA_NESTED}, +}; + +/* DCB priority flow control to User Priority nested attributes */ +static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { + [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_3] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_4] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_5] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_6] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_7] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB priority grouping nested attributes */ +static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { + [DCB_PG_ATTR_TC_0] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_1] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_2] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_3] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_4] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_5] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_6] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_7] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_ALL] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_BW_ID_0] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_1] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_2] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_3] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_4] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_5] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_6] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_7] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB traffic class nested attributes. */ +static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { + [DCB_TC_ATTR_PARAM_PGID] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_UP_MAPPING] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_STRICT_PRIO] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_BW_PCT] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB capabilities nested attributes. */ +static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { + [DCB_CAP_ATTR_ALL] = {.type = NLA_FLAG}, + [DCB_CAP_ATTR_PG] = {.type = NLA_U8}, + [DCB_CAP_ATTR_PFC] = {.type = NLA_U8}, + [DCB_CAP_ATTR_UP2TC] = {.type = NLA_U8}, + [DCB_CAP_ATTR_PG_TCS] = {.type = NLA_U8}, + [DCB_CAP_ATTR_PFC_TCS] = {.type = NLA_U8}, + [DCB_CAP_ATTR_GSP] = {.type = NLA_U8}, + [DCB_CAP_ATTR_BCN] = {.type = NLA_U8}, +}; + +/* DCB capabilities nested attributes. */ +static struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = { + [DCB_NUMTCS_ATTR_ALL] = {.type = NLA_FLAG}, + [DCB_NUMTCS_ATTR_PG] = {.type = NLA_U8}, + [DCB_NUMTCS_ATTR_PFC] = {.type = NLA_U8}, +}; + +/* DCB BCN nested attributes. */ +static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { + [DCB_BCN_ATTR_RP_0] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_1] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_2] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_3] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_4] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_5] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_6] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_7] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_ALL] = {.type = NLA_FLAG}, + [DCB_BCN_ATTR_ALPHA] = {.type = NLA_U32}, + [DCB_BCN_ATTR_BETA] = {.type = NLA_U32}, + [DCB_BCN_ATTR_GD] = {.type = NLA_U32}, + [DCB_BCN_ATTR_GI] = {.type = NLA_U32}, + [DCB_BCN_ATTR_TMAX] = {.type = NLA_U32}, + [DCB_BCN_ATTR_TD] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RMIN] = {.type = NLA_U32}, + [DCB_BCN_ATTR_W] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RD] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RU] = {.type = NLA_U32}, + [DCB_BCN_ATTR_WRTT] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RI] = {.type = NLA_U32}, + [DCB_BCN_ATTR_C] = {.type = NLA_U32}, + [DCB_BCN_ATTR_ALL] = {.type = NLA_FLAG}, +}; + +/* standard netlink reply call */ +static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, + u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct dcbmsg *dcb; + struct nlmsghdr *nlh; + int ret = -EINVAL; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + return ret; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = cmd; + dcb->dcb_pad = 0; + + ret = nla_put_u8(dcbnl_skb, attr, value); + if (ret) + goto err; + + /* end the message, assign the nlmsg_len. */ + nlmsg_end(dcbnl_skb, nlh); + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); + return ret; +} + +static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + /* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */ + if (!netdev->dcbnl_ops->getstate) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB, + DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags); + + return ret; +} + +static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg) + return ret; + + ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest); + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_PFC_GCFG; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG); + if (!nest) + goto err; + + if (data[DCB_PFC_UP_ATTR_ALL]) + getall = 1; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + if (!getall && !data[i]) + continue; + + netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0, + &value); + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + goto err; + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + u8 perm_addr[MAX_ADDR_LEN]; + int ret = -EINVAL; + + if (!netdev->dcbnl_ops->getpermhwaddr) + return ret; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GPERM_HWADDR; + + netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr); + + ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), + perm_addr); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_CAP_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_CAP] || !netdev->dcbnl_ops->getcap) + return ret; + + ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP], + dcbnl_cap_nest); + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GCAP; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_CAP); + if (!nest) + goto err; + + if (data[DCB_CAP_ATTR_ALL]) + getall = 1; + + for (i = DCB_CAP_ATTR_ALL+1; i <= DCB_CAP_ATTR_MAX; i++) { + if (!getall && !data[i]) + continue; + + if (!netdev->dcbnl_ops->getcap(netdev, i, &value)) { + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + goto err; + } + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->getnumtcs) + return ret; + + ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest); + if (ret) { + ret = -EINVAL; + goto err_out; + } + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) { + ret = -EINVAL; + goto err_out; + } + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GNUMTCS; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_NUMTCS); + if (!nest) { + ret = -EINVAL; + goto err; + } + + if (data[DCB_NUMTCS_ATTR_ALL]) + getall = 1; + + for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) { + if (!getall && !data[i]) + continue; + + ret = netdev->dcbnl_ops->getnumtcs(netdev, i, &value); + if (!ret) { + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + ret = -EINVAL; + goto err; + } + } else { + goto err; + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) { + ret = -EINVAL; + goto err; + } + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return ret; +} + +static int dcbnl_setnumtcs(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1]; + int ret = -EINVAL; + u8 value; + int i; + + if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->setstate) + return ret; + + ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest); + + if (ret) { + ret = -EINVAL; + goto err; + } + + for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) { + if (data[i] == NULL) + continue; + + value = nla_get_u8(data[i]); + + ret = netdev->dcbnl_ops->setnumtcs(netdev, i, value); + + if (ret) + goto operr; + } + +operr: + ret = dcbnl_reply(!!ret, RTM_SETDCB, DCB_CMD_SNUMTCS, + DCB_ATTR_NUMTCS, pid, seq, flags); + +err: + return ret; +} + +static int dcbnl_getpfcstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + if (!netdev->dcbnl_ops->getpfcstate) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->getpfcstate(netdev), RTM_GETDCB, + DCB_CMD_PFC_GSTATE, DCB_ATTR_PFC_STATE, + pid, seq, flags); + + return ret; +} + +static int dcbnl_setpfcstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_PFC_STATE] || !netdev->dcbnl_ops->setpfcstate) + return ret; + + value = nla_get_u8(tb[DCB_ATTR_PFC_STATE]); + + netdev->dcbnl_ops->setpfcstate(netdev, value); + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SSTATE, DCB_ATTR_PFC_STATE, + pid, seq, flags); + + return ret; +} + +static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags, int dir) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *pg_nest, *param_nest, *data; + struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; + struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; + u8 prio, pgid, tc_pct, up_map; + int ret = -EINVAL; + int getall = 0; + int i; + + if (!tb[DCB_ATTR_PG_CFG] || + !netdev->dcbnl_ops->getpgtccfgtx || + !netdev->dcbnl_ops->getpgtccfgrx || + !netdev->dcbnl_ops->getpgbwgcfgtx || + !netdev->dcbnl_ops->getpgbwgcfgrx) + return ret; + + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG; + + pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG); + if (!pg_nest) + goto err; + + if (pg_tb[DCB_PG_ATTR_TC_ALL]) + getall = 1; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + if (!getall && !pg_tb[i]) + continue; + + if (pg_tb[DCB_PG_ATTR_TC_ALL]) + data = pg_tb[DCB_PG_ATTR_TC_ALL]; + else + data = pg_tb[i]; + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, + data, dcbnl_tc_param_nest); + if (ret) + goto err_pg; + + param_nest = nla_nest_start(dcbnl_skb, i); + if (!param_nest) + goto err_pg; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (dir) { + /* Rx */ + netdev->dcbnl_ops->getpgtccfgrx(netdev, + i - DCB_PG_ATTR_TC_0, &prio, + &pgid, &tc_pct, &up_map); + } else { + /* Tx */ + netdev->dcbnl_ops->getpgtccfgtx(netdev, + i - DCB_PG_ATTR_TC_0, &prio, + &pgid, &tc_pct, &up_map); + } + + if (param_tb[DCB_TC_ATTR_PARAM_PGID] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_PGID, pgid); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_UP_MAPPING, up_map); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_STRICT_PRIO, prio); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT, + tc_pct); + if (ret) + goto err_param; + } + nla_nest_end(dcbnl_skb, param_nest); + } + + if (pg_tb[DCB_PG_ATTR_BW_ID_ALL]) + getall = 1; + else + getall = 0; + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + if (!getall && !pg_tb[i]) + continue; + + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + + if (dir) { + /* Rx */ + netdev->dcbnl_ops->getpgbwgcfgrx(netdev, + i - DCB_PG_ATTR_BW_ID_0, &tc_pct); + } else { + /* Tx */ + netdev->dcbnl_ops->getpgbwgcfgtx(netdev, + i - DCB_PG_ATTR_BW_ID_0, &tc_pct); + } + ret = nla_put_u8(dcbnl_skb, i, tc_pct); + + if (ret) + goto err_pg; + } + + nla_nest_end(dcbnl_skb, pg_nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +err_param: + nla_nest_cancel(dcbnl_skb, param_nest); +err_pg: + nla_nest_cancel(dcbnl_skb, pg_nest); +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + ret = -EINVAL; + return ret; +} + +static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0); +} + +static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1); +} + +static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate) + return ret; + + value = nla_get_u8(tb[DCB_ATTR_STATE]); + + netdev->dcbnl_ops->setstate(netdev, value); + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE, + pid, seq, flags); + + return ret; +} + +static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1]; + int i; + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg) + return ret; + + ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest); + if (ret) + goto err; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + if (data[i] == NULL) + continue; + value = nla_get_u8(data[i]); + netdev->dcbnl_ops->setpfccfg(netdev, + data[i]->nla_type - DCB_PFC_UP_ATTR_0, value); + } + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG, + pid, seq, flags); +err: + return ret; +} + +static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB, + DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags); + + return ret; +} + +static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags, int dir) +{ + struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; + struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; + int ret = -EINVAL; + int i; + u8 pgid; + u8 up_map; + u8 prio; + u8 tc_pct; + + if (!tb[DCB_ATTR_PG_CFG] || + !netdev->dcbnl_ops->setpgtccfgtx || + !netdev->dcbnl_ops->setpgtccfgrx || + !netdev->dcbnl_ops->setpgbwgcfgtx || + !netdev->dcbnl_ops->setpgbwgcfgrx) + return ret; + + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + if (ret) + goto err; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + if (!pg_tb[i]) + continue; + + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, + pg_tb[i], dcbnl_tc_param_nest); + if (ret) + goto err; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]) + prio = + nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]); + + if (param_tb[DCB_TC_ATTR_PARAM_PGID]) + pgid = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_PGID]); + + if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT]) + tc_pct = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_BW_PCT]); + + if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]) + up_map = + nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]); + + /* dir: Tx = 0, Rx = 1 */ + if (dir) { + /* Rx */ + netdev->dcbnl_ops->setpgtccfgrx(netdev, + i - DCB_PG_ATTR_TC_0, + prio, pgid, tc_pct, up_map); + } else { + /* Tx */ + netdev->dcbnl_ops->setpgtccfgtx(netdev, + i - DCB_PG_ATTR_TC_0, + prio, pgid, tc_pct, up_map); + } + } + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + if (!pg_tb[i]) + continue; + + tc_pct = nla_get_u8(pg_tb[i]); + + /* dir: Tx = 0, Rx = 1 */ + if (dir) { + /* Rx */ + netdev->dcbnl_ops->setpgbwgcfgrx(netdev, + i - DCB_PG_ATTR_BW_ID_0, tc_pct); + } else { + /* Tx */ + netdev->dcbnl_ops->setpgbwgcfgtx(netdev, + i - DCB_PG_ATTR_BW_ID_0, tc_pct); + } + } + + ret = dcbnl_reply(0, RTM_SETDCB, + (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG), + DCB_ATTR_PG_CFG, pid, seq, flags); + +err: + return ret; +} + +static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0); +} + +static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1); +} + +static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *bcn_nest; + struct nlattr *bcn_tb[DCB_BCN_ATTR_MAX + 1]; + u8 value_byte; + u32 value_integer; + int ret = -EINVAL; + bool getall = false; + int i; + + if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->getbcnrp || + !netdev->dcbnl_ops->getbcncfg) + return ret; + + ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], dcbnl_bcn_nest); + + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_BCN_GCFG; + + bcn_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_BCN); + if (!bcn_nest) + goto err; + + if (bcn_tb[DCB_BCN_ATTR_ALL]) + getall = true; + + for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) { + if (!getall && !bcn_tb[i]) + continue; + + netdev->dcbnl_ops->getbcnrp(netdev, i - DCB_BCN_ATTR_RP_0, + &value_byte); + ret = nla_put_u8(dcbnl_skb, i, value_byte); + if (ret) + goto err_bcn; + } + + for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) { + if (!getall && !bcn_tb[i]) + continue; + + netdev->dcbnl_ops->getbcncfg(netdev, i, + &value_integer); + ret = nla_put_u32(dcbnl_skb, i, value_integer); + if (ret) + goto err_bcn; + } + + nla_nest_end(dcbnl_skb, bcn_nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +err_bcn: + nla_nest_cancel(dcbnl_skb, bcn_nest); +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + ret = -EINVAL; + return ret; +} + +static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_BCN_ATTR_MAX + 1]; + int i; + int ret = -EINVAL; + u8 value_byte; + u32 value_int; + + if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->setbcncfg + || !netdev->dcbnl_ops->setbcnrp) + return ret; + + ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], + dcbnl_pfc_up_nest); + if (ret) + goto err; + + for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) { + if (data[i] == NULL) + continue; + value_byte = nla_get_u8(data[i]); + netdev->dcbnl_ops->setbcnrp(netdev, + data[i]->nla_type - DCB_BCN_ATTR_RP_0, value_byte); + } + + for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) { + if (data[i] == NULL) + continue; + value_int = nla_get_u32(data[i]); + netdev->dcbnl_ops->setbcncfg(netdev, + i, value_int); + } + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_BCN_SCFG, DCB_ATTR_BCN, + pid, seq, flags); +err: + return ret; +} + +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + struct dcbmsg *dcb = (struct dcbmsg *)NLMSG_DATA(nlh); + struct nlattr *tb[DCB_ATTR_MAX + 1]; + u32 pid = skb ? NETLINK_CB(skb).pid : 0; + int ret = -EINVAL; + + if (net != &init_net) + return -EINVAL; + + ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, + dcbnl_rtnl_policy); + if (ret < 0) + return ret; + + if (!tb[DCB_ATTR_IFNAME]) + return -EINVAL; + + netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME])); + if (!netdev) + return -EINVAL; + + if (!netdev->dcbnl_ops) + goto errout; + + switch (dcb->cmd) { + case DCB_CMD_GSTATE: + ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_GCFG: + ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GPERM_HWADDR: + ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGTX_GCFG: + ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGRX_GCFG: + ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_BCN_GCFG: + ret = dcbnl_bcn_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SSTATE: + ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_SCFG: + ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + + case DCB_CMD_SET_ALL: + ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGTX_SCFG: + ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGRX_SCFG: + ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GCAP: + ret = dcbnl_getcap(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GNUMTCS: + ret = dcbnl_getnumtcs(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SNUMTCS: + ret = dcbnl_setnumtcs(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_GSTATE: + ret = dcbnl_getpfcstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_SSTATE: + ret = dcbnl_setpfcstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_BCN_SCFG: + ret = dcbnl_bcn_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + default: + goto errout; + } +errout: + ret = -EINVAL; +out: + dev_put(netdev); + return ret; +} + +static int __init dcbnl_init(void) +{ + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL); + rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL); + + return 0; +} +module_init(dcbnl_init); + +static void __exit dcbnl_exit(void) +{ + rtnl_unregister(PF_UNSPEC, RTM_GETDCB); + rtnl_unregister(PF_UNSPEC, RTM_SETDCB); +} +module_exit(dcbnl_exit); + + diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 1e8be246ad1..01e4d39fa23 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -12,7 +12,6 @@ #include "ackvec.h" #include "dccp.h" -#include <linux/dccp.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -68,7 +67,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; /* Figure out how many options do we need to represent the ackvec */ - const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN); + const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN); u16 len = av->av_vec_len + 2 * nr_opts, i; u32 elapsed_time; const unsigned char *tail, *from; @@ -100,8 +99,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) for (i = 0; i < nr_opts; ++i) { int copylen = len; - if (len > DCCP_MAX_ACKVEC_OPT_LEN) - copylen = DCCP_MAX_ACKVEC_OPT_LEN; + if (len > DCCP_SINGLE_OPT_MAXLEN) + copylen = DCCP_SINGLE_OPT_MAXLEN; *to++ = DCCPO_ACK_VECTOR_0; *to++ = copylen + 2; @@ -432,7 +431,7 @@ found: int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, u64 *ackno, const u8 opt, const u8 *value, const u8 len) { - if (len > DCCP_MAX_ACKVEC_OPT_LEN) + if (len > DCCP_SINGLE_OPT_MAXLEN) return -1; /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index bcb64fb4ace..4ccee030524 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -11,15 +11,14 @@ * published by the Free Software Foundation. */ +#include <linux/dccp.h> #include <linux/compiler.h> #include <linux/ktime.h> #include <linux/list.h> #include <linux/types.h> -/* Read about the ECN nonce to see why it is 253 */ -#define DCCP_MAX_ACKVEC_OPT_LEN 253 /* We can spread an ack vector across multiple options */ -#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2) +#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) #define DCCP_ACKVEC_STATE_RECEIVED 0 #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 8fe931a3d7a..647cb0614f8 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -13,6 +13,13 @@ #include "ccid.h" +static u8 builtin_ccids[] = { + DCCPC_CCID2, /* CCID2 is supported by default */ +#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE) + DCCPC_CCID3, +#endif +}; + static struct ccid_operations *ccids[CCID_MAX]; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) static atomic_t ccids_lockct = ATOMIC_INIT(0); @@ -86,6 +93,47 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab) } } +/* check that up to @array_len members in @ccid_array are supported */ +bool ccid_support_check(u8 const *ccid_array, u8 array_len) +{ + u8 i, j, found; + + for (i = 0, found = 0; i < array_len; i++, found = 0) { + for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++) + found = (ccid_array[i] == builtin_ccids[j]); + if (!found) + return false; + } + return true; +} + +/** + * ccid_get_builtin_ccids - Provide copy of `builtin' CCID array + * @ccid_array: pointer to copy into + * @array_len: value to return length into + * This function allocates memory - caller must see that it is freed after use. + */ +int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) +{ + *ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any()); + if (*ccid_array == NULL) + return -ENOBUFS; + *array_len = ARRAY_SIZE(builtin_ccids); + return 0; +} + +int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + if (len < sizeof(builtin_ccids)) + return -EINVAL; + + if (put_user(sizeof(builtin_ccids), optlen) || + copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids))) + return -EFAULT; + return 0; +} + int ccid_register(struct ccid_operations *ccid_ops) { int err = -ENOBUFS; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index fdeae7b5731..803343aed00 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -103,6 +103,11 @@ static inline void *ccid_priv(const struct ccid *ccid) return (void *)ccid->ccid_priv; } +extern bool ccid_support_check(u8 const *ccid_array, u8 array_len); +extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); +extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *, int __user *); + extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp); @@ -111,6 +116,24 @@ extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk, gfp_t gfp); +static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) +{ + struct ccid *ccid = dp->dccps_hc_rx_ccid; + + if (ccid == NULL || ccid->ccid_ops == NULL) + return -1; + return ccid->ccid_ops->ccid_id; +} + +static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp) +{ + struct ccid *ccid = dp->dccps_hc_tx_ccid; + + if (ccid == NULL || ccid->ccid_ops == NULL) + return -1; + return ccid->ccid_ops->ccid_id; +} + extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 9a430734530..c9ea19a4d85 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -25,7 +25,7 @@ /* * This implementation should follow RFC 4341 */ - +#include "../feat.h" #include "../ccid.h" #include "../dccp.h" #include "ccid2.h" @@ -147,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > 0xFFFF) /* RFC 4340, 11.3 */ - val = 0xFFFF; + if (val > DCCPF_ACK_RATIO_MAX) + val = DCCPF_ACK_RATIO_MAX; if (val == dp->dccps_l_ack_ratio) return; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index b4bc6e095a0..33a1127270c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -49,7 +49,7 @@ extern int dccp_debug; extern struct inet_hashinfo dccp_hashinfo; -extern atomic_t dccp_orphan_count; +extern struct percpu_counter dccp_orphan_count; extern void dccp_time_wait(struct sock *sk, int state, int timeo); @@ -98,7 +98,6 @@ extern int sysctl_dccp_retries2; extern int sysctl_dccp_feat_sequence_window; extern int sysctl_dccp_feat_rx_ccid; extern int sysctl_dccp_feat_tx_ccid; -extern int sysctl_dccp_feat_ack_ratio; extern int sysctl_dccp_feat_send_ack_vector; extern int sysctl_dccp_feat_send_ndp_count; extern int sysctl_dccp_tx_qlen; @@ -252,7 +251,8 @@ extern const char *dccp_state_name(const int state); extern void dccp_set_state(struct sock *sk, const int state); extern void dccp_done(struct sock *sk); -extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb); +extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, + struct sk_buff const *skb); extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); @@ -441,6 +441,10 @@ static inline int dccp_ack_pending(const struct sock *sk) inet_csk_ack_scheduled(sk); } +extern int dccp_feat_finalise_settings(struct dccp_sock *dp); +extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); +extern void dccp_feat_list_purge(struct list_head *fn_list); + extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); extern int dccp_insert_option_elapsed_time(struct sock *sk, diff --git a/net/dccp/diag.c b/net/dccp/diag.c index d8a3509b26f..d1e100395ef 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -45,7 +45,7 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, dccp_get_info(sk, _info); } -static struct inet_diag_handler dccp_diag_handler = { +static const struct inet_diag_handler dccp_diag_handler = { .idiag_hashinfo = &dccp_hashinfo, .idiag_get_info = dccp_diag_get_info, .idiag_type = DCCPDIAG_GETSOCK, diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 933a0ecf8d4..2c2216f64b1 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -6,6 +6,8 @@ * * ASSUMPTIONS * ----------- + * o Feature negotiation is coordinated with connection setup (as in TCP), wild + * changes of parameters of an established connection are not supported. * o All currently known SP features have 1-byte quantities. If in the future * extensions of RFCs 4340..42 define features with item lengths larger than * one byte, a feature-specific extension of the code will be required. @@ -23,53 +25,560 @@ #define DCCP_FEAT_SP_NOAGREE (-123) -int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, - u8 *val, u8 len, gfp_t gfp) +static const struct { + u8 feat_num; /* DCCPF_xxx */ + enum dccp_feat_type rxtx; /* RX or TX */ + enum dccp_feat_type reconciliation; /* SP or NN */ + u8 default_value; /* as in 6.4 */ +/* + * Lookup table for location and type of features (from RFC 4340/4342) + * +--------------------------+----+-----+----+----+---------+-----------+ + * | Feature | Location | Reconc. | Initial | Section | + * | | RX | TX | SP | NN | Value | Reference | + * +--------------------------+----+-----+----+----+---------+-----------+ + * | DCCPF_CCID | | X | X | | 2 | 10 | + * | DCCPF_SHORT_SEQNOS | | X | X | | 0 | 7.6.1 | + * | DCCPF_SEQUENCE_WINDOW | | X | | X | 100 | 7.5.2 | + * | DCCPF_ECN_INCAPABLE | X | | X | | 0 | 12.1 | + * | DCCPF_ACK_RATIO | | X | | X | 2 | 11.3 | + * | DCCPF_SEND_ACK_VECTOR | X | | X | | 0 | 11.5 | + * | DCCPF_SEND_NDP_COUNT | | X | X | | 0 | 7.7.2 | + * | DCCPF_MIN_CSUM_COVER | X | | X | | 0 | 9.2.1 | + * | DCCPF_DATA_CHECKSUM | X | | X | | 0 | 9.3.1 | + * | DCCPF_SEND_LEV_RATE | X | | X | | 0 | 4342/8.4 | + * +--------------------------+----+-----+----+----+---------+-----------+ + */ +} dccp_feat_table[] = { + { DCCPF_CCID, FEAT_AT_TX, FEAT_SP, 2 }, + { DCCPF_SHORT_SEQNOS, FEAT_AT_TX, FEAT_SP, 0 }, + { DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100 }, + { DCCPF_ECN_INCAPABLE, FEAT_AT_RX, FEAT_SP, 0 }, + { DCCPF_ACK_RATIO, FEAT_AT_TX, FEAT_NN, 2 }, + { DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0 }, + { DCCPF_SEND_NDP_COUNT, FEAT_AT_TX, FEAT_SP, 0 }, + { DCCPF_MIN_CSUM_COVER, FEAT_AT_RX, FEAT_SP, 0 }, + { DCCPF_DATA_CHECKSUM, FEAT_AT_RX, FEAT_SP, 0 }, + { DCCPF_SEND_LEV_RATE, FEAT_AT_RX, FEAT_SP, 0 }, +}; +#define DCCP_FEAT_SUPPORTED_MAX ARRAY_SIZE(dccp_feat_table) + +/** + * dccp_feat_index - Hash function to map feature number into array position + * Returns consecutive array index or -1 if the feature is not understood. + */ +static int dccp_feat_index(u8 feat_num) { - struct dccp_opt_pend *opt; + /* The first 9 entries are occupied by the types from RFC 4340, 6.4 */ + if (feat_num > DCCPF_RESERVED && feat_num <= DCCPF_DATA_CHECKSUM) + return feat_num - 1; - dccp_feat_debug(type, feature, *val); + /* + * Other features: add cases for new feature types here after adding + * them to the above table. + */ + switch (feat_num) { + case DCCPF_SEND_LEV_RATE: + return DCCP_FEAT_SUPPORTED_MAX - 1; + } + return -1; +} - if (len > 3) { - DCCP_WARN("invalid length %d\n", len); - return -EINVAL; +static u8 dccp_feat_type(u8 feat_num) +{ + int idx = dccp_feat_index(feat_num); + + if (idx < 0) + return FEAT_UNKNOWN; + return dccp_feat_table[idx].reconciliation; +} + +static int dccp_feat_default_value(u8 feat_num) +{ + int idx = dccp_feat_index(feat_num); + /* + * There are no default values for unknown features, so encountering a + * negative index here indicates a serious problem somewhere else. + */ + DCCP_BUG_ON(idx < 0); + + return idx < 0 ? 0 : dccp_feat_table[idx].default_value; +} + +/* copy constructor, fval must not already contain allocated memory */ +static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len) +{ + fval->sp.len = len; + if (fval->sp.len > 0) { + fval->sp.vec = kmemdup(val, len, gfp_any()); + if (fval->sp.vec == NULL) { + fval->sp.len = 0; + return -ENOBUFS; + } } - /* XXX add further sanity checks */ + return 0; +} - /* check if that feature is already being negotiated */ - list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { - /* ok we found a negotiation for this option already */ - if (opt->dccpop_feat == feature && opt->dccpop_type == type) { - dccp_pr_debug("Replacing old\n"); - /* replace */ - BUG_ON(opt->dccpop_val == NULL); - kfree(opt->dccpop_val); - opt->dccpop_val = val; - opt->dccpop_len = len; - opt->dccpop_conf = 0; - return 0; +static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val) +{ + if (unlikely(val == NULL)) + return; + if (dccp_feat_type(feat_num) == FEAT_SP) + kfree(val->sp.vec); + memset(val, 0, sizeof(*val)); +} + +static struct dccp_feat_entry * + dccp_feat_clone_entry(struct dccp_feat_entry const *original) +{ + struct dccp_feat_entry *new; + u8 type = dccp_feat_type(original->feat_num); + + if (type == FEAT_UNKNOWN) + return NULL; + + new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any()); + if (new == NULL) + return NULL; + + if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val, + original->val.sp.vec, + original->val.sp.len)) { + kfree(new); + return NULL; + } + return new; +} + +static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry) +{ + if (entry != NULL) { + dccp_feat_val_destructor(entry->feat_num, &entry->val); + kfree(entry); + } +} + +/* + * List management functions + * + * Feature negotiation lists rely on and maintain the following invariants: + * - each feat_num in the list is known, i.e. we know its type and default value + * - each feat_num/is_local combination is unique (old entries are overwritten) + * - SP values are always freshly allocated + * - list is sorted in increasing order of feature number (faster lookup) + */ +static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list, + u8 feat_num, bool is_local) +{ + struct dccp_feat_entry *entry; + + list_for_each_entry(entry, fn_list, node) { + if (entry->feat_num == feat_num && entry->is_local == is_local) + return entry; + else if (entry->feat_num > feat_num) + break; + } + return NULL; +} + +/** + * dccp_feat_entry_new - Central list update routine (called by all others) + * @head: list to add to + * @feat: feature number + * @local: whether the local (1) or remote feature with number @feat is meant + * This is the only constructor and serves to ensure the above invariants. + */ +static struct dccp_feat_entry * + dccp_feat_entry_new(struct list_head *head, u8 feat, bool local) +{ + struct dccp_feat_entry *entry; + + list_for_each_entry(entry, head, node) + if (entry->feat_num == feat && entry->is_local == local) { + dccp_feat_val_destructor(entry->feat_num, &entry->val); + return entry; + } else if (entry->feat_num > feat) { + head = &entry->node; + break; } + + entry = kmalloc(sizeof(*entry), gfp_any()); + if (entry != NULL) { + entry->feat_num = feat; + entry->is_local = local; + list_add_tail(&entry->node, head); } + return entry; +} - /* negotiation for a new feature */ - opt = kmalloc(sizeof(*opt), gfp); - if (opt == NULL) +/** + * dccp_feat_push_change - Add/overwrite a Change option in the list + * @fn_list: feature-negotiation list to update + * @feat: one of %dccp_feature_numbers + * @local: whether local (1) or remote (0) @feat_num is meant + * @needs_mandatory: whether to use Mandatory feature negotiation options + * @fval: pointer to NN/SP value to be inserted (will be copied) + */ +static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local, + u8 mandatory, dccp_feat_val *fval) +{ + struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local); + + if (new == NULL) return -ENOMEM; - opt->dccpop_type = type; - opt->dccpop_feat = feature; - opt->dccpop_len = len; - opt->dccpop_val = val; - opt->dccpop_conf = 0; - opt->dccpop_sc = NULL; + new->feat_num = feat; + new->is_local = local; + new->state = FEAT_INITIALISING; + new->needs_confirm = 0; + new->empty_confirm = 0; + new->val = *fval; + new->needs_mandatory = mandatory; + + return 0; +} + +static inline void dccp_feat_list_pop(struct dccp_feat_entry *entry) +{ + list_del(&entry->node); + dccp_feat_entry_destructor(entry); +} + +void dccp_feat_list_purge(struct list_head *fn_list) +{ + struct dccp_feat_entry *entry, *next; + + list_for_each_entry_safe(entry, next, fn_list, node) + dccp_feat_entry_destructor(entry); + INIT_LIST_HEAD(fn_list); +} +EXPORT_SYMBOL_GPL(dccp_feat_list_purge); + +/* generate @to as full clone of @from - @to must not contain any nodes */ +int dccp_feat_clone_list(struct list_head const *from, struct list_head *to) +{ + struct dccp_feat_entry *entry, *new; + + INIT_LIST_HEAD(to); + list_for_each_entry(entry, from, node) { + new = dccp_feat_clone_entry(entry); + if (new == NULL) + goto cloning_failed; + list_add_tail(&new->node, to); + } + return 0; + +cloning_failed: + dccp_feat_list_purge(to); + return -ENOMEM; +} + +static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val) +{ + switch (feat_num) { + case DCCPF_ACK_RATIO: + return val <= DCCPF_ACK_RATIO_MAX; + case DCCPF_SEQUENCE_WINDOW: + return val >= DCCPF_SEQ_WMIN && val <= DCCPF_SEQ_WMAX; + } + return 0; /* feature unknown - so we can't tell */ +} + +/* check that SP values are within the ranges defined in RFC 4340 */ +static u8 dccp_feat_is_valid_sp_val(u8 feat_num, u8 val) +{ + switch (feat_num) { + case DCCPF_CCID: + return val == DCCPC_CCID2 || val == DCCPC_CCID3; + /* Type-check Boolean feature values: */ + case DCCPF_SHORT_SEQNOS: + case DCCPF_ECN_INCAPABLE: + case DCCPF_SEND_ACK_VECTOR: + case DCCPF_SEND_NDP_COUNT: + case DCCPF_DATA_CHECKSUM: + case DCCPF_SEND_LEV_RATE: + return val < 2; + case DCCPF_MIN_CSUM_COVER: + return val < 16; + } + return 0; /* feature unknown */ +} + +static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len) +{ + if (sp_list == NULL || sp_len < 1) + return 0; + while (sp_len--) + if (!dccp_feat_is_valid_sp_val(feat_num, *sp_list++)) + return 0; + return 1; +} + +/** + * __feat_register_nn - Register new NN value on socket + * @fn: feature-negotiation list to register with + * @feat: an NN feature from %dccp_feature_numbers + * @mandatory: use Mandatory option if 1 + * @nn_val: value to register (restricted to 4 bytes) + * Note that NN features are local by definition (RFC 4340, 6.3.2). + */ +static int __feat_register_nn(struct list_head *fn, u8 feat, + u8 mandatory, u64 nn_val) +{ + dccp_feat_val fval = { .nn = nn_val }; + + if (dccp_feat_type(feat) != FEAT_NN || + !dccp_feat_is_valid_nn_val(feat, nn_val)) + return -EINVAL; + + /* Don't bother with default values, they will be activated anyway. */ + if (nn_val - (u64)dccp_feat_default_value(feat) == 0) + return 0; + + return dccp_feat_push_change(fn, feat, 1, mandatory, &fval); +} + +/** + * __feat_register_sp - Register new SP value/list on socket + * @fn: feature-negotiation list to register with + * @feat: an SP feature from %dccp_feature_numbers + * @is_local: whether the local (1) or the remote (0) @feat is meant + * @mandatory: use Mandatory option if 1 + * @sp_val: SP value followed by optional preference list + * @sp_len: length of @sp_val in bytes + */ +static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, + u8 mandatory, u8 const *sp_val, u8 sp_len) +{ + dccp_feat_val fval; + + if (dccp_feat_type(feat) != FEAT_SP || + !dccp_feat_sp_list_ok(feat, sp_val, sp_len)) + return -EINVAL; + + /* Avoid negotiating alien CCIDs by only advertising supported ones */ + if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len)) + return -EOPNOTSUPP; - BUG_ON(opt->dccpop_val == NULL); + if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) + return -ENOMEM; - list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending); + return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); +} + +/** + * dccp_feat_register_sp - Register requests to change SP feature values + * @sk: client or listening socket + * @feat: one of %dccp_feature_numbers + * @is_local: whether the local (1) or remote (0) @feat is meant + * @list: array of preferred values, in descending order of preference + * @len: length of @list in bytes + */ +int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, + u8 const *list, u8 len) +{ /* any changes must be registered before establishing the connection */ + if (sk->sk_state != DCCP_CLOSED) + return -EISCONN; + if (dccp_feat_type(feat) != FEAT_SP) + return -EINVAL; + return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local, + 0, list, len); +} + +/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */ +int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val) +{ + /* any changes must be registered before establishing the connection */ + if (sk->sk_state != DCCP_CLOSED) + return -EISCONN; + if (dccp_feat_type(feat) != FEAT_NN) + return -EINVAL; + return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val); +} + +/* + * Tracking features whose value depend on the choice of CCID + * + * This is designed with an extension in mind so that a list walk could be done + * before activating any features. However, the existing framework was found to + * work satisfactorily up until now, the automatic verification is left open. + * When adding new CCIDs, add a corresponding dependency table here. + */ +static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local) +{ + static const struct ccid_dependency ccid2_dependencies[2][2] = { + /* + * CCID2 mandates Ack Vectors (RFC 4341, 4.): as CCID is a TX + * feature and Send Ack Vector is an RX feature, `is_local' + * needs to be reversed. + */ + { /* Dependencies of the receiver-side (remote) CCID2 */ + { + .dependent_feat = DCCPF_SEND_ACK_VECTOR, + .is_local = true, + .is_mandatory = true, + .val = 1 + }, + { 0, 0, 0, 0 } + }, + { /* Dependencies of the sender-side (local) CCID2 */ + { + .dependent_feat = DCCPF_SEND_ACK_VECTOR, + .is_local = false, + .is_mandatory = true, + .val = 1 + }, + { 0, 0, 0, 0 } + } + }; + static const struct ccid_dependency ccid3_dependencies[2][5] = { + { /* + * Dependencies of the receiver-side CCID3 + */ + { /* locally disable Ack Vectors */ + .dependent_feat = DCCPF_SEND_ACK_VECTOR, + .is_local = true, + .is_mandatory = false, + .val = 0 + }, + { /* see below why Send Loss Event Rate is on */ + .dependent_feat = DCCPF_SEND_LEV_RATE, + .is_local = true, + .is_mandatory = true, + .val = 1 + }, + { /* NDP Count is needed as per RFC 4342, 6.1.1 */ + .dependent_feat = DCCPF_SEND_NDP_COUNT, + .is_local = false, + .is_mandatory = true, + .val = 1 + }, + { 0, 0, 0, 0 }, + }, + { /* + * CCID3 at the TX side: we request that the HC-receiver + * will not send Ack Vectors (they will be ignored, so + * Mandatory is not set); we enable Send Loss Event Rate + * (Mandatory since the implementation does not support + * the Loss Intervals option of RFC 4342, 8.6). + * The last two options are for peer's information only. + */ + { + .dependent_feat = DCCPF_SEND_ACK_VECTOR, + .is_local = false, + .is_mandatory = false, + .val = 0 + }, + { + .dependent_feat = DCCPF_SEND_LEV_RATE, + .is_local = false, + .is_mandatory = true, + .val = 1 + }, + { /* this CCID does not support Ack Ratio */ + .dependent_feat = DCCPF_ACK_RATIO, + .is_local = true, + .is_mandatory = false, + .val = 0 + }, + { /* tell receiver we are sending NDP counts */ + .dependent_feat = DCCPF_SEND_NDP_COUNT, + .is_local = true, + .is_mandatory = false, + .val = 1 + }, + { 0, 0, 0, 0 } + } + }; + switch (ccid) { + case DCCPC_CCID2: + return ccid2_dependencies[is_local]; + case DCCPC_CCID3: + return ccid3_dependencies[is_local]; + default: + return NULL; + } +} + +/** + * dccp_feat_propagate_ccid - Resolve dependencies of features on choice of CCID + * @fn: feature-negotiation list to update + * @id: CCID number to track + * @is_local: whether TX CCID (1) or RX CCID (0) is meant + * This function needs to be called after registering all other features. + */ +static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local) +{ + const struct ccid_dependency *table = dccp_feat_ccid_deps(id, is_local); + int i, rc = (table == NULL); + + for (i = 0; rc == 0 && table[i].dependent_feat != DCCPF_RESERVED; i++) + if (dccp_feat_type(table[i].dependent_feat) == FEAT_SP) + rc = __feat_register_sp(fn, table[i].dependent_feat, + table[i].is_local, + table[i].is_mandatory, + &table[i].val, 1); + else + rc = __feat_register_nn(fn, table[i].dependent_feat, + table[i].is_mandatory, + table[i].val); + return rc; +} + +/** + * dccp_feat_finalise_settings - Finalise settings before starting negotiation + * @dp: client or listening socket (settings will be inherited) + * This is called after all registrations (socket initialisation, sysctls, and + * sockopt calls), and before sending the first packet containing Change options + * (ie. client-Request or server-Response), to ensure internal consistency. + */ +int dccp_feat_finalise_settings(struct dccp_sock *dp) +{ + struct list_head *fn = &dp->dccps_featneg; + struct dccp_feat_entry *entry; + int i = 2, ccids[2] = { -1, -1 }; + + /* + * Propagating CCIDs: + * 1) not useful to propagate CCID settings if this host advertises more + * than one CCID: the choice of CCID may still change - if this is + * the client, or if this is the server and the client sends + * singleton CCID values. + * 2) since is that propagate_ccid changes the list, we defer changing + * the sorted list until after the traversal. + */ + list_for_each_entry(entry, fn, node) + if (entry->feat_num == DCCPF_CCID && entry->val.sp.len == 1) + ccids[entry->is_local] = entry->val.sp.vec[0]; + while (i--) + if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i)) + return -1; return 0; } -EXPORT_SYMBOL_GPL(dccp_feat_change); +/** + * dccp_feat_server_ccid_dependencies - Resolve CCID-dependent features + * It is the server which resolves the dependencies once the CCID has been + * fully negotiated. If no CCID has been negotiated, it uses the default CCID. + */ +int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq) +{ + struct list_head *fn = &dreq->dreq_featneg; + struct dccp_feat_entry *entry; + u8 is_local, ccid; + + for (is_local = 0; is_local <= 1; is_local++) { + entry = dccp_feat_list_lookup(fn, DCCPF_CCID, is_local); + + if (entry != NULL && !entry->empty_confirm) + ccid = entry->val.sp.vec[0]; + else + ccid = dccp_feat_default_value(DCCPF_CCID); + + if (dccp_feat_propagate_ccid(fn, ccid, is_local)) + return -1; + } + return 0; +} static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr) { @@ -377,6 +886,9 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) { int rc; + /* Ignore Change requests other than during connection setup */ + if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) + return 0; dccp_feat_debug(type, feature, *val); /* figure out if it's SP or NN feature */ @@ -426,6 +938,9 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, int found = 0; int all_confirmed = 1; + /* Ignore Confirm options other than during connection setup */ + if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) + return 0; dccp_feat_debug(type, feature, *val); /* locate our change request */ @@ -460,17 +975,6 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, all_confirmed = 0; } - /* fix re-transmit timer */ - /* XXX gotta make sure that no option negotiation occurs during - * connection shutdown. Consider that the CLOSEREQ is sent and timer is - * on. if all options are confirmed it might kill timer which should - * remain alive until close is received. - */ - if (all_confirmed) { - dccp_pr_debug("clear feat negotiation timer %p\n", sk); - inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); - } - if (!found) dccp_pr_debug("%s(%d, ...) never requested\n", dccp_feat_typename(type), feature); @@ -564,42 +1068,30 @@ out_clean: EXPORT_SYMBOL_GPL(dccp_feat_clone); -static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat, - u8 *val, u8 len) -{ - int rc = -ENOMEM; - u8 *copy = kmemdup(val, len, GFP_KERNEL); - - if (copy != NULL) { - rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL); - if (rc) - kfree(copy); - } - return rc; -} - -int dccp_feat_init(struct dccp_minisock *dmsk) +int dccp_feat_init(struct sock *sk) { + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_minisock *dmsk = dccp_msk(sk); int rc; - INIT_LIST_HEAD(&dmsk->dccpms_pending); - INIT_LIST_HEAD(&dmsk->dccpms_conf); + INIT_LIST_HEAD(&dmsk->dccpms_pending); /* XXX no longer used */ + INIT_LIST_HEAD(&dmsk->dccpms_conf); /* XXX no longer used */ /* CCID L */ - rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID, - &dmsk->dccpms_tx_ccid, 1); + rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 1, 0, + &dmsk->dccpms_tx_ccid, 1); if (rc) goto out; /* CCID R */ - rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID, - &dmsk->dccpms_rx_ccid, 1); + rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 0, 0, + &dmsk->dccpms_rx_ccid, 1); if (rc) goto out; /* Ack ratio */ - rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO, - &dmsk->dccpms_ack_ratio, 1); + rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0, + dp->dccps_l_ack_ratio); out: return rc; } @@ -639,6 +1131,8 @@ const char *dccp_feat_name(const u8 feat) if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) return feature_names[DCCPF_RESERVED]; + if (feat == DCCPF_SEND_LEV_RATE) + return "Send Loss Event Rate"; if (feat >= DCCPF_MIN_CCID_SPECIFIC) return "CCID-specific"; diff --git a/net/dccp/feat.h b/net/dccp/feat.h index e272222c7ac..7efb2025f6b 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -14,6 +14,92 @@ #include <linux/types.h> #include "dccp.h" +/* + * Known limit values + */ +/* Ack Ratio takes 2-byte integer values (11.3) */ +#define DCCPF_ACK_RATIO_MAX 0xFFFF +/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */ +#define DCCPF_SEQ_WMIN 32 +#define DCCPF_SEQ_WMAX 0x3FFFFFFFFFFFull +/* Maximum number of SP values that fit in a single (Confirm) option */ +#define DCCP_FEAT_MAX_SP_VALS (DCCP_SINGLE_OPT_MAXLEN - 2) + +enum dccp_feat_type { + FEAT_AT_RX = 1, /* located at RX side of half-connection */ + FEAT_AT_TX = 2, /* located at TX side of half-connection */ + FEAT_SP = 4, /* server-priority reconciliation (6.3.1) */ + FEAT_NN = 8, /* non-negotiable reconciliation (6.3.2) */ + FEAT_UNKNOWN = 0xFF /* not understood or invalid feature */ +}; + +enum dccp_feat_state { + FEAT_DEFAULT = 0, /* using default values from 6.4 */ + FEAT_INITIALISING, /* feature is being initialised */ + FEAT_CHANGING, /* Change sent but not confirmed yet */ + FEAT_UNSTABLE, /* local modification in state CHANGING */ + FEAT_STABLE /* both ends (think they) agree */ +}; + +/** + * dccp_feat_val - Container for SP or NN feature values + * @nn: single NN value + * @sp.vec: single SP value plus optional preference list + * @sp.len: length of @sp.vec in bytes + */ +typedef union { + u64 nn; + struct { + u8 *vec; + u8 len; + } sp; +} dccp_feat_val; + +/** + * struct feat_entry - Data structure to perform feature negotiation + * @val: feature's current value (SP features may have preference list) + * @state: feature's current state + * @feat_num: one of %dccp_feature_numbers + * @needs_mandatory: whether Mandatory options should be sent + * @needs_confirm: whether to send a Confirm instead of a Change + * @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm) + * @is_local: feature location (1) or feature-remote (0) + * @node: list pointers, entries arranged in FIFO order + */ +struct dccp_feat_entry { + dccp_feat_val val; + enum dccp_feat_state state:8; + u8 feat_num; + + bool needs_mandatory, + needs_confirm, + empty_confirm, + is_local; + + struct list_head node; +}; + +static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry) +{ + if (entry->needs_confirm) + return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R; + return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R; +} + +/** + * struct ccid_dependency - Track changes resulting from choosing a CCID + * @dependent_feat: one of %dccp_feature_numbers + * @is_local: local (1) or remote (0) @dependent_feat + * @is_mandatory: whether presence of @dependent_feat is mission-critical or not + * @val: corresponding default value for @dependent_feat (u8 is sufficient here) + */ +struct ccid_dependency { + u8 dependent_feat; + bool is_local:1, + is_mandatory:1; + u8 val; +}; + #ifdef CONFIG_IP_DCCP_DEBUG extern const char *dccp_feat_typename(const u8 type); extern const char *dccp_feat_name(const u8 feat); @@ -27,14 +113,34 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) #define dccp_feat_debug(type, feat, val) #endif /* CONFIG_IP_DCCP_DEBUG */ -extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, - u8 *val, u8 len, gfp_t gfp); +extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, + u8 const *list, u8 len); +extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len); extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len); extern void dccp_feat_clean(struct dccp_minisock *dmsk); extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); -extern int dccp_feat_init(struct dccp_minisock *dmsk); +extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); +extern int dccp_feat_init(struct sock *sk); + +/* + * Encoding variable-length options and their maximum length. + * + * This affects NN options (SP options are all u8) and other variable-length + * options (see table 3 in RFC 4340). The limit is currently given the Sequence + * Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other + * options consume less than 6 bytes (timestamps are 4 bytes). + * When updating this constant (e.g. due to new internet drafts / RFCs), make + * sure that you also update all code which refers to it. + */ +#define DCCP_OPTVAL_MAXLEN 6 + +extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); +extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); +extern int dccp_insert_option_mandatory(struct sk_buff *skb); +extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, + u8 *val, u8 len, bool repeat_first); #endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 779d0ed9ae9..3070015edc7 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -590,8 +590,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - - /* FIXME: do congestion control initialization */ goto discard; } if (dh->dccph_type == DCCP_PKT_RESET) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 01e3e020625..d1dd95289b8 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -545,6 +545,7 @@ out: static void dccp_v4_reqsk_destructor(struct request_sock *req) { + dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); kfree(inet_rsk(req)->opt); } @@ -595,7 +596,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - dccp_reqsk_init(req, skb); + if (dccp_reqsk_init(req, dccp_sk(sk), skb)) + goto drop_and_free; dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) @@ -936,6 +938,7 @@ static struct proto dccp_v4_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d4ce1224e00..b963f35c65f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -168,7 +168,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); if (err < 0) { sk->sk_err_soft = -err; goto out; @@ -279,7 +279,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0); if (err < 0) goto done; @@ -304,6 +304,7 @@ done: static void dccp_v6_reqsk_destructor(struct request_sock *req) { + dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); if (inet6_rsk(req)->pktopts != NULL) kfree_skb(inet6_rsk(req)->pktopts); } @@ -342,7 +343,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) { - if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, skb, &fl, NULL, 0); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); @@ -426,7 +427,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - dccp_reqsk_init(req, skb); + if (dccp_reqsk_init(req, dccp_sk(sk), skb)) + goto drop_and_free; dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) @@ -567,7 +569,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } @@ -1002,7 +1004,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT); + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); @@ -1138,6 +1140,7 @@ static struct proto dccp_v6_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e6bf99e3e41..ed61bc58e41 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -47,7 +47,6 @@ void dccp_minisock_init(struct dccp_minisock *dmsk) dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; - dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; } @@ -125,6 +124,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + INIT_LIST_HEAD(&newdp->dccps_featneg); if (dccp_feat_clone(sk, newsk)) goto out_free; @@ -304,7 +304,8 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); -void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) +int dccp_reqsk_init(struct request_sock *req, + struct dccp_sock const *dp, struct sk_buff const *skb) { struct dccp_request_sock *dreq = dccp_rsk(req); @@ -313,6 +314,9 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) inet_rsk(req)->acked = 0; req->rcv_wnd = sysctl_dccp_feat_sequence_window; dreq->dreq_timestamp_echo = 0; + + /* inherit feature negotiation options from listening socket */ + return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg); } EXPORT_SYMBOL_GPL(dccp_reqsk_init); diff --git a/net/dccp/options.c b/net/dccp/options.c index 0809b63cb05..20d5c72d1fd 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -26,20 +26,23 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; -int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) +u64 dccp_decode_value_var(const u8 *bf, const u8 len) { - u32 value = 0; + u64 value = 0; + if (len >= DCCP_OPTVAL_MAXLEN) + value += ((u64)*bf++) << 40; + if (len > 4) + value += ((u64)*bf++) << 32; if (len > 3) - value += *bf++ << 24; + value += ((u64)*bf++) << 24; if (len > 2) - value += *bf++ << 16; + value += ((u64)*bf++) << 16; if (len > 1) - value += *bf++ << 8; + value += ((u64)*bf++) << 8; if (len > 0) value += *bf; @@ -64,7 +67,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, (dh->dccph_doff * 4); struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; - unsigned char *value; + unsigned char *uninitialized_var(value); u32 elapsed_time; __be32 opt_val; int rc; @@ -299,9 +302,12 @@ out_invalid_option: EXPORT_SYMBOL_GPL(dccp_parse_options); -static void dccp_encode_value_var(const u32 value, unsigned char *to, - const unsigned int len) +void dccp_encode_value_var(const u64 value, u8 *to, const u8 len) { + if (len >= DCCP_OPTVAL_MAXLEN) + *to++ = (value & 0xFF0000000000ull) >> 40; + if (len > 4) + *to++ = (value & 0xFF00000000ull) >> 32; if (len > 3) *to++ = (value & 0xFF000000) >> 24; if (len > 2) @@ -461,23 +467,61 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, return 0; } -static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, - u8 *val, u8 len) +/** + * dccp_insert_option_mandatory - Mandatory option (5.8.2) + * Note that since we are using skb_push, this function needs to be called + * _after_ inserting the option it is supposed to influence (stack order). + */ +int dccp_insert_option_mandatory(struct sk_buff *skb) { - u8 *to; + if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN) + return -1; - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) { - DCCP_WARN("packet too small for feature %d option!\n", feat); + DCCP_SKB_CB(skb)->dccpd_opt_len++; + *skb_push(skb, 1) = DCCPO_MANDATORY; + return 0; +} + +/** + * dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb + * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R + * @feat: one out of %dccp_feature_numbers + * @val: NN value or SP array (preferred element first) to copy + * @len: true length of @val in bytes (excluding first element repetition) + * @repeat_first: whether to copy the first element of @val twice + * The last argument is used to construct Confirm options, where the preferred + * value and the preference list appear separately (RFC 4340, 6.3.1). Preference + * lists are kept such that the preferred entry is always first, so we only need + * to copy twice, and avoid the overhead of cloning into a bigger array. + */ +int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, + u8 *val, u8 len, bool repeat_first) +{ + u8 tot_len, *to; + + /* take the `Feature' field and possible repetition into account */ + if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) { + DCCP_WARN("length %u for feature %u too large\n", len, feat); return -1; } - DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3; + if (unlikely(val == NULL || len == 0)) + len = repeat_first = 0; + tot_len = 3 + repeat_first + len; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) { + DCCP_WARN("packet too small for feature %d option!\n", feat); + return -1; + } + DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len; - to = skb_push(skb, len + 3); + to = skb_push(skb, tot_len); *to++ = type; - *to++ = len + 3; + *to++ = tot_len; *to++ = feat; + if (repeat_first) + *to++ = *val; if (len) memcpy(to, val, len); @@ -487,69 +531,6 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, return 0; } -static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); - struct dccp_opt_pend *opt, *next; - int change = 0; - - /* confirm any options [NN opts] */ - list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) { - dccp_insert_feat_opt(skb, opt->dccpop_type, - opt->dccpop_feat, opt->dccpop_val, - opt->dccpop_len); - /* fear empty confirms */ - if (opt->dccpop_val) - kfree(opt->dccpop_val); - kfree(opt); - } - INIT_LIST_HEAD(&dmsk->dccpms_conf); - - /* see which features we need to send */ - list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { - /* see if we need to send any confirm */ - if (opt->dccpop_sc) { - dccp_insert_feat_opt(skb, opt->dccpop_type + 1, - opt->dccpop_feat, - opt->dccpop_sc->dccpoc_val, - opt->dccpop_sc->dccpoc_len); - - BUG_ON(!opt->dccpop_sc->dccpoc_val); - kfree(opt->dccpop_sc->dccpoc_val); - kfree(opt->dccpop_sc); - opt->dccpop_sc = NULL; - } - - /* any option not confirmed, re-send it */ - if (!opt->dccpop_conf) { - dccp_insert_feat_opt(skb, opt->dccpop_type, - opt->dccpop_feat, opt->dccpop_val, - opt->dccpop_len); - change++; - } - } - - /* Retransmit timer. - * If this is the master listening sock, we don't set a timer on it. It - * should be fine because if the dude doesn't receive our RESPONSE - * [which will contain the CHANGE] he will send another REQUEST which - * will "retrnasmit" the change. - */ - if (change && dp->dccps_role != DCCP_ROLE_LISTEN) { - dccp_pr_debug("reset feat negotiation timer %p\n", sk); - - /* XXX don't reset the timer on re-transmissions. I.e. reset it - * only when sending new stuff i guess. Currently the timer - * never backs off because on re-transmission it just resets it! - */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); - } - - return 0; -} - /* The length of all options needs to be a multiple of 4 (5.8) */ static void dccp_insert_option_padding(struct sk_buff *skb) { @@ -586,13 +567,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) dp->dccps_hc_rx_insert_options = 0; } - /* Feature negotiation */ - /* Data packets can't do feat negotiation */ - if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA && - DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK && - dccp_insert_options_feat(sk, skb)) - return -1; - /* * Obtain RTT sample from Request/Response exchange. * This is currently used in CCID 3 initialisation. diff --git a/net/dccp/output.c b/net/dccp/output.c index 809d803d500..fea30cdc0be 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -339,10 +339,12 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; - if (dccp_insert_options_rsk(dreq, skb)) { - kfree_skb(skb); - return NULL; - } + /* Resolve feature dependencies resulting from choice of CCID */ + if (dccp_feat_server_ccid_dependencies(dreq)) + goto response_failed; + + if (dccp_insert_options_rsk(dreq, skb)) + goto response_failed; /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); @@ -363,6 +365,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, inet_rsk(req)->acked = 1; DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; +response_failed: + kfree_skb(skb); + return NULL; } EXPORT_SYMBOL_GPL(dccp_make_response); @@ -469,6 +474,10 @@ int dccp_connect(struct sock *sk) struct sk_buff *skb; struct inet_connection_sock *icsk = inet_csk(sk); + /* do not connect if feature negotiation setup fails */ + if (dccp_feat_finalise_settings(dccp_sk(sk))) + return -EPROTO; + dccp_connect_init(sk); skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 49ba8d5ec4b..37731da4148 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -74,14 +74,11 @@ static void printl(const char *fmt, ...) static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { - const struct dccp_minisock *dmsk = dccp_msk(sk); const struct inet_sock *inet = inet_sk(sk); - const struct ccid3_hc_tx_sock *hctx; + struct ccid3_hc_tx_sock *hctx = NULL; - if (dmsk->dccpms_tx_ccid == DCCPC_CCID3) + if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3) hctx = ccid3_hc_tx_sk(sk); - else - hctx = NULL; if (port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) { diff --git a/net/dccp/proto.c b/net/dccp/proto.c index d0bd3481976..db225f93cd5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -40,16 +40,10 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); -atomic_t dccp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter dccp_orphan_count; EXPORT_SYMBOL_GPL(dccp_orphan_count); -struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), -}; - +struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); /* the maximum queue length for tx in packets. 0 is no limit */ @@ -193,6 +187,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dccp_init_xmit_timers(sk); + INIT_LIST_HEAD(&dp->dccps_featneg); /* * FIXME: We're hardcoding the CCID, and doing this at this point makes * the listening (master) sock get CCID control blocks, which is not @@ -201,7 +196,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) * setsockopt(CCIDs-I-want/accept). -acme */ if (likely(ctl_sock_initialized)) { - int rc = dccp_feat_init(dmsk); + int rc = dccp_feat_init(sk); if (rc) return rc; @@ -267,7 +262,7 @@ void dccp_destroy_sock(struct sock *sk) dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; /* clean up feature negotiation state */ - dccp_feat_clean(dmsk); + dccp_feat_list_purge(&dp->dccps_featneg); } EXPORT_SYMBOL_GPL(dccp_destroy_sock); @@ -277,6 +272,9 @@ static inline int dccp_listen_start(struct sock *sk, int backlog) struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; + /* do not start to listen if feature negotiation setup fails */ + if (dccp_feat_finalise_settings(dp)) + return -EPROTO; return inet_csk_listen_start(sk, backlog); } @@ -466,42 +464,70 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, return 0; } -/* byte 1 is feature. the rest is the preference list */ -static int dccp_setsockopt_change(struct sock *sk, int type, - struct dccp_so_feat __user *optval) +static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) { - struct dccp_so_feat opt; - u8 *val; - int rc; + u8 *list, len; + int i, rc; - if (copy_from_user(&opt, optval, sizeof(opt))) - return -EFAULT; + if (cscov < 0 || cscov > 15) + return -EINVAL; /* - * rfc4340: 6.1. Change Options + * Populate a list of permissible values, in the range cscov...15. This + * is necessary since feature negotiation of single values only works if + * both sides incidentally choose the same value. Since the list starts + * lowest-value first, negotiation will pick the smallest shared value. */ - if (opt.dccpsf_len < 1) + if (cscov == 0) + return 0; + len = 16 - cscov; + + list = kmalloc(len, GFP_KERNEL); + if (list == NULL) + return -ENOBUFS; + + for (i = 0; i < len; i++) + list[i] = cscov++; + + rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len); + + if (rc == 0) { + if (rx) + dccp_sk(sk)->dccps_pcrlen = cscov; + else + dccp_sk(sk)->dccps_pcslen = cscov; + } + kfree(list); + return rc; +} + +static int dccp_setsockopt_ccid(struct sock *sk, int type, + char __user *optval, int optlen) +{ + u8 *val; + int rc = 0; + + if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) return -EINVAL; - val = kmalloc(opt.dccpsf_len, GFP_KERNEL); - if (!val) + val = kmalloc(optlen, GFP_KERNEL); + if (val == NULL) return -ENOMEM; - if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) { - rc = -EFAULT; - goto out_free_val; + if (copy_from_user(val, optval, optlen)) { + kfree(val); + return -EFAULT; } - rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat, - val, opt.dccpsf_len, GFP_KERNEL); - if (rc) - goto out_free_val; + lock_sock(sk); + if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) + rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen); -out: - return rc; + if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID)) + rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen); + release_sock(sk); -out_free_val: kfree(val); - goto out; + return rc; } static int do_dccp_setsockopt(struct sock *sk, int level, int optname, @@ -510,7 +536,21 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, struct dccp_sock *dp = dccp_sk(sk); int val, err = 0; - if (optlen < sizeof(int)) + switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); + return 0; + case DCCP_SOCKOPT_CHANGE_L: + case DCCP_SOCKOPT_CHANGE_R: + DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); + return 0; + case DCCP_SOCKOPT_CCID: + case DCCP_SOCKOPT_RX_CCID: + case DCCP_SOCKOPT_TX_CCID: + return dccp_setsockopt_ccid(sk, optname, optval, optlen); + } + + if (optlen < (int)sizeof(int)) return -EINVAL; if (get_user(val, (int __user *)optval)) @@ -521,53 +561,24 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { - case DCCP_SOCKOPT_PACKET_SIZE: - DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); - err = 0; - break; - case DCCP_SOCKOPT_CHANGE_L: - if (optlen != sizeof(struct dccp_so_feat)) - err = -EINVAL; - else - err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, - (struct dccp_so_feat __user *) - optval); - break; - case DCCP_SOCKOPT_CHANGE_R: - if (optlen != sizeof(struct dccp_so_feat)) - err = -EINVAL; - else - err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, - (struct dccp_so_feat __user *) - optval); - break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: if (dp->dccps_role != DCCP_ROLE_SERVER) err = -EOPNOTSUPP; else dp->dccps_server_timewait = (val != 0); break; - case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ - if (val < 0 || val > 15) - err = -EINVAL; - else - dp->dccps_pcslen = val; + case DCCP_SOCKOPT_SEND_CSCOV: + err = dccp_setsockopt_cscov(sk, val, false); break; - case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */ - if (val < 0 || val > 15) - err = -EINVAL; - else { - dp->dccps_pcrlen = val; - /* FIXME: add feature negotiation, - * ChangeL(MinimumChecksumCoverage, val) */ - } + case DCCP_SOCKOPT_RECV_CSCOV: + err = dccp_setsockopt_cscov(sk, val, true); break; default: err = -ENOPROTOOPT; break; } - release_sock(sk); + return err; } @@ -648,6 +659,18 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_GET_CUR_MPS: val = dp->dccps_mss_cache; break; + case DCCP_SOCKOPT_AVAILABLE_CCIDS: + return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); + case DCCP_SOCKOPT_TX_CCID: + val = ccid_get_current_tx_ccid(dp); + if (val < 0) + return -ENOPROTOOPT; + break; + case DCCP_SOCKOPT_RX_CCID: + val = ccid_get_current_rx_ccid(dp); + if (val < 0) + return -ENOPROTOOPT; + break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: val = dp->dccps_server_timewait; break; @@ -976,7 +999,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* * It is the last release_sock in its life. It will remove backlog. @@ -1040,17 +1063,21 @@ static int __init dccp_init(void) { unsigned long goal; int ehash_order, bhash_order, i; - int rc = -ENOBUFS; + int rc; BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); - + rc = percpu_counter_init(&dccp_orphan_count, 0); + if (rc) + goto out; + rc = -ENOBUFS; + inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) - goto out; + goto out_free_percpu; /* * Size and allocate the main established and bind bucket @@ -1084,8 +1111,8 @@ static int __init dccp_init(void) } for (i = 0; i < dccp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&dccp_hashinfo)) @@ -1143,6 +1170,8 @@ out_free_dccp_ehash: out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_hashinfo.bind_bucket_cachep = NULL; +out_free_percpu: + percpu_counter_destroy(&dccp_orphan_count); goto out; } diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 21295993fdb..f6e54f433e2 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -41,13 +41,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .procname = "ack_ratio", - .data = &sysctl_dccp_feat_ack_ratio, - .maxlen = sizeof(sysctl_dccp_feat_ack_ratio), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "send_ackvec", .data = &sysctl_dccp_feat_send_ack_vector, .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector), diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 54b3c7e9e01..162d1e683c3 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -87,17 +87,6 @@ static void dccp_retransmit_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - /* retransmit timer is used for feature negotiation throughout - * connection. In this case, no packet is re-transmitted, but rather an - * ack is generated and pending changes are placed into its options. - */ - if (sk->sk_send_head == NULL) { - dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk); - if (sk->sk_state == DCCP_OPEN) - dccp_send_ack(sk); - goto backoff; - } - /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. @@ -126,7 +115,6 @@ static void dccp_retransmit_timer(struct sock *sk) return; } -backoff: icsk->icsk_backoff++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 3c23ab33dbc..cf0e1849929 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -167,7 +167,7 @@ static struct hlist_head *dn_find_list(struct sock *sk) if (scp->addr.sdn_flags & SDF_WILD) return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL; - return &dn_sk_hash[dn_ntohs(scp->addrloc) & DN_SK_HASH_MASK]; + return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK]; } /* @@ -181,7 +181,7 @@ static int check_port(__le16 port) if (port == 0) return -1; - sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { struct dn_scp *scp = DN_SK(sk); if (scp->addrloc == port) return -1; @@ -195,12 +195,12 @@ static unsigned short port_alloc(struct sock *sk) static unsigned short port = 0x2000; unsigned short i_port = port; - while(check_port(dn_htons(++port)) != 0) { + while(check_port(cpu_to_le16(++port)) != 0) { if (port == i_port) return 0; } - scp->addrloc = dn_htons(port); + scp->addrloc = cpu_to_le16(port); return 1; } @@ -255,7 +255,7 @@ static struct hlist_head *listen_hash(struct sockaddr_dn *addr) if (hash == 0) { hash = addr->sdn_objnamel; - for(i = 0; i < dn_ntohs(addr->sdn_objnamel); i++) { + for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) { hash ^= addr->sdn_objname[i]; hash ^= (hash << 3); } @@ -297,16 +297,16 @@ int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned c break; case 1: *buf++ = 0; - *buf++ = dn_ntohs(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel)); - len = 3 + dn_ntohs(sdn->sdn_objnamel); + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 3 + le16_to_cpu(sdn->sdn_objnamel); break; case 2: memset(buf, 0, 5); buf += 5; - *buf++ = dn_ntohs(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel)); - len = 7 + dn_ntohs(sdn->sdn_objnamel); + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 7 + le16_to_cpu(sdn->sdn_objnamel); break; } @@ -327,7 +327,7 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, int namel = 12; sdn->sdn_objnum = 0; - sdn->sdn_objnamel = dn_htons(0); + sdn->sdn_objnamel = cpu_to_le16(0); memset(sdn->sdn_objname, 0, DN_MAXOBJL); if (len < 2) @@ -361,13 +361,13 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, if (len < 0) return -1; - sdn->sdn_objnamel = dn_htons(*data++); - len -= dn_ntohs(sdn->sdn_objnamel); + sdn->sdn_objnamel = cpu_to_le16(*data++); + len -= le16_to_cpu(sdn->sdn_objnamel); - if ((len < 0) || (dn_ntohs(sdn->sdn_objnamel) > namel)) + if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel)) return -1; - memcpy(sdn->sdn_objname, data, dn_ntohs(sdn->sdn_objnamel)); + memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel)); return size - len; } @@ -391,7 +391,7 @@ struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr) continue; if (scp->addr.sdn_objnamel != addr->sdn_objnamel) continue; - if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, dn_ntohs(addr->sdn_objnamel)) != 0) + if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0) continue; } sock_hold(sk); @@ -419,7 +419,7 @@ struct sock *dn_find_by_skb(struct sk_buff *skb) struct dn_scp *scp; read_lock(&dn_hash_lock); - sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(cb->dst_port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { scp = DN_SK(sk); if (cb->src != dn_saddr2dn(&scp->peer)) continue; @@ -734,10 +734,10 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (saddr->sdn_family != AF_DECnet) return -EINVAL; - if (dn_ntohs(saddr->sdn_nodeaddrl) && (dn_ntohs(saddr->sdn_nodeaddrl) != 2)) + if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2)) return -EINVAL; - if (dn_ntohs(saddr->sdn_objnamel) > DN_MAXOBJL) + if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL) return -EINVAL; if (saddr->sdn_flags & ~SDF_WILD) @@ -748,7 +748,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EACCES; if (!(saddr->sdn_flags & SDF_WILD)) { - if (dn_ntohs(saddr->sdn_nodeaddrl)) { + if (le16_to_cpu(saddr->sdn_nodeaddrl)) { read_lock(&dev_base_lock); ldev = NULL; for_each_netdev(&init_net, dev) { @@ -799,15 +799,15 @@ static int dn_auto_bind(struct socket *sock) if ((scp->accessdata.acc_accl != 0) && (scp->accessdata.acc_accl <= 12)) { - scp->addr.sdn_objnamel = dn_htons(scp->accessdata.acc_accl); - memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, dn_ntohs(scp->addr.sdn_objnamel)); + scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl); + memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel)); scp->accessdata.acc_accl = 0; memset(scp->accessdata.acc_acc, 0, 40); } /* End of compatibility stuff */ - scp->addr.sdn_add.a_len = dn_htons(2); + scp->addr.sdn_add.a_len = cpu_to_le16(2); rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr); if (rv == 0) { rv = dn_hash_sock(sk); @@ -1027,7 +1027,7 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt) u16 len = *ptr++; /* yes, it's 8bit on the wire */ BUG_ON(len > 16); /* we've checked the contents earlier */ - opt->opt_optl = dn_htons(len); + opt->opt_optl = cpu_to_le16(len); opt->opt_status = 0; memcpy(opt->opt_data, ptr, len); skb_pull(skb, len + 1); @@ -1375,7 +1375,7 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us if (optlen != sizeof(struct optdata_dn)) return -EINVAL; - if (dn_ntohs(u.opt.opt_optl) > 16) + if (le16_to_cpu(u.opt.opt_optl) > 16) return -EINVAL; memcpy(&scp->conndata_out, &u.opt, optlen); @@ -1388,7 +1388,7 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us if (optlen != sizeof(struct optdata_dn)) return -EINVAL; - if (dn_ntohs(u.opt.opt_optl) > 16) + if (le16_to_cpu(u.opt.opt_optl) > 16) return -EINVAL; memcpy(&scp->discdata_out, &u.opt, optlen); @@ -2213,12 +2213,12 @@ static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf) { int i; - switch (dn_ntohs(dn->sdn_objnamel)) { + switch (le16_to_cpu(dn->sdn_objnamel)) { case 0: sprintf(buf, "%d", dn->sdn_objnum); break; default: - for (i = 0; i < dn_ntohs(dn->sdn_objnamel); i++) { + for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) { buf[i] = dn->sdn_objname[i]; if (IS_NOT_PRINTABLE(buf[i])) buf[i] = '.'; @@ -2281,7 +2281,7 @@ static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk) seq_printf(seq, "%6s/%04X %04d:%04d %04d:%04d %01d %-16s " "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n", - dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->addr)), buf1), + dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1), scp->addrloc, scp->numdat, scp->numoth, @@ -2289,7 +2289,7 @@ static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk) scp->ackxmt_oth, scp->flowloc_sw, local_object, - dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->peer)), buf2), + dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2), scp->addrrem, scp->numdat_rcv, scp->numoth_rcv, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 28e26bd08e2..424d86ac12f 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -885,7 +885,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) memcpy(msg->tiver, dn_eco_version, 3); dn_dn2eth(msg->id, ifa->ifa_local); msg->iinfo = DN_RT_INFO_ENDN; - msg->blksize = dn_htons(mtu2blksize(dev)); + msg->blksize = cpu_to_le16(mtu2blksize(dev)); msg->area = 0x00; memset(msg->seed, 0, 8); memcpy(msg->neighbor, dn_hiord, ETH_ALEN); @@ -895,13 +895,13 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) dn_dn2eth(msg->neighbor, dn->addr); } - msg->timer = dn_htons((unsigned short)dn_db->parms.t3); + msg->timer = cpu_to_le16((unsigned short)dn_db->parms.t3); msg->mpd = 0x00; msg->datalen = 0x02; memset(msg->data, 0xAA, 2); pktlen = (__le16 *)skb_push(skb,2); - *pktlen = dn_htons(skb->len - 2); + *pktlen = cpu_to_le16(skb->len - 2); skb_reset_network_header(skb); @@ -929,7 +929,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn if (dn->priority != dn_db->parms.priority) return 0; - if (dn_ntohs(dn->addr) < dn_ntohs(ifa->ifa_local)) + if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local)) return 1; return 0; @@ -973,11 +973,11 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) ptr += ETH_ALEN; *ptr++ = dn_db->parms.forwarding == 1 ? DN_RT_INFO_L1RT : DN_RT_INFO_L2RT; - *((__le16 *)ptr) = dn_htons(mtu2blksize(dev)); + *((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev)); ptr += 2; *ptr++ = dn_db->parms.priority; /* Priority */ *ptr++ = 0; /* Area: Reserved */ - *((__le16 *)ptr) = dn_htons((unsigned short)dn_db->parms.t3); + *((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3); ptr += 2; *ptr++ = 0; /* MPD: Reserved */ i1 = ptr++; @@ -993,7 +993,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) skb_trim(skb, (27 + *i2)); pktlen = (__le16 *)skb_push(skb, 2); - *pktlen = dn_htons(skb->len - 2); + *pktlen = cpu_to_le16(skb->len - 2); skb_reset_network_header(skb); @@ -1401,8 +1401,8 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v) mtu2blksize(dev), dn_db->parms.priority, dn_db->parms.state, dn_db->parms.name, - dn_db->router ? dn_addr2asc(dn_ntohs(*(__le16 *)dn_db->router->primary_key), router_buf) : "", - dn_db->peer ? dn_addr2asc(dn_ntohs(*(__le16 *)dn_db->peer->primary_key), peer_buf) : ""); + dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "", + dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : ""); } return 0; } @@ -1445,7 +1445,7 @@ void __init dn_dev_init(void) return; } - decnet_address = dn_htons((addr[0] << 10) | addr[1]); + decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]); dn_dev_devices_on(); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 1ca13b17974..05b5aa05e50 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -250,7 +250,7 @@ static int dn_long_output(struct sk_buff *skb) data = skb_push(skb, sizeof(struct dn_long_packet) + 3); lp = (struct dn_long_packet *)(data+3); - *((__le16 *)data) = dn_htons(skb->len - 2); + *((__le16 *)data) = cpu_to_le16(skb->len - 2); *(data + 2) = 1 | DN_RT_F_PF; /* Padding */ lp->msgflg = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS)); @@ -294,7 +294,7 @@ static int dn_short_output(struct sk_buff *skb) } data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = dn_htons(skb->len - 2); + *((__le16 *)data) = cpu_to_le16(skb->len - 2); sp = (struct dn_short_packet *)(data+2); sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); @@ -336,12 +336,12 @@ static int dn_phase3_output(struct sk_buff *skb) } data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = dn_htons(skb->len - 2); + *((__le16 *)data) = cpu_to_le16(skb->len - 2); sp = (struct dn_short_packet *)(data + 2); sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); - sp->dstnode = cb->dst & dn_htons(0x03ff); - sp->srcnode = cb->src & dn_htons(0x03ff); + sp->dstnode = cb->dst & cpu_to_le16(0x03ff); + sp->srcnode = cb->src & cpu_to_le16(0x03ff); sp->forward = cb->hops & 0x3f; skb_reset_network_header(skb); @@ -394,7 +394,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) if (neigh->dev->type == ARPHRD_ETHER) memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); - dn->blksize = dn_ntohs(msg->blksize); + dn->blksize = le16_to_cpu(msg->blksize); dn->priority = msg->priority; dn->flags &= ~DN_NDFLAG_P3; @@ -410,7 +410,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) } /* Only use routers in our area */ - if ((dn_ntohs(src)>>10) == (dn_ntohs((decnet_address))>>10)) { + if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) { if (!dn_db->router) { dn_db->router = neigh_clone(neigh); } else { @@ -453,7 +453,7 @@ int dn_neigh_endnode_hello(struct sk_buff *skb) if (neigh->dev->type == ARPHRD_ETHER) memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); dn->flags &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2); - dn->blksize = dn_ntohs(msg->blksize); + dn->blksize = le16_to_cpu(msg->blksize); dn->priority = 0; } @@ -543,7 +543,7 @@ static inline void dn_neigh_format_entry(struct seq_file *seq, read_lock(&n->lock); seq_printf(seq, "%-7s %s%s%s %02x %02d %07ld %-8s\n", - dn_addr2asc(dn_ntohs(dn->addr), buf), + dn_addr2asc(le16_to_cpu(dn->addr), buf), (dn->flags&DN_NDFLAG_R1) ? "1" : "-", (dn->flags&DN_NDFLAG_R2) ? "2" : "-", (dn->flags&DN_NDFLAG_P3) ? "3" : "-", diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 4074a6e5d0d..5d8a2a56fd3 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -83,7 +83,9 @@ static void dn_log_martian(struct sk_buff *skb, const char *msg) if (decnet_log_martians && net_ratelimit()) { char *devname = skb->dev ? skb->dev->name : "???"; struct dn_skb_cb *cb = DN_SKB_CB(skb); - printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", msg, devname, dn_ntohs(cb->src), dn_ntohs(cb->dst), dn_ntohs(cb->src_port), dn_ntohs(cb->dst_port)); + printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", + msg, devname, le16_to_cpu(cb->src), le16_to_cpu(cb->dst), + le16_to_cpu(cb->src_port), le16_to_cpu(cb->dst_port)); } } @@ -133,7 +135,7 @@ static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth) if (skb->len < 2) return len; - if ((ack = dn_ntohs(*ptr)) & 0x8000) { + if ((ack = le16_to_cpu(*ptr)) & 0x8000) { skb_pull(skb, 2); ptr++; len += 2; @@ -147,7 +149,7 @@ static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth) if (skb->len < 2) return len; - if ((ack = dn_ntohs(*ptr)) & 0x8000) { + if ((ack = le16_to_cpu(*ptr)) & 0x8000) { skb_pull(skb, 2); len += 2; if ((ack & 0x4000) == 0) { @@ -237,7 +239,7 @@ static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason cb->dst_port = msg->dstaddr; cb->services = msg->services; cb->info = msg->info; - cb->segsize = dn_ntohs(msg->segsize); + cb->segsize = le16_to_cpu(msg->segsize); if (!pskb_may_pull(skb, sizeof(*msg))) goto err_out; @@ -344,7 +346,7 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb) ptr = skb->data; cb->services = *ptr++; cb->info = *ptr++; - cb->segsize = dn_ntohs(*(__le16 *)ptr); + cb->segsize = le16_to_cpu(*(__le16 *)ptr); if ((scp->state == DN_CI) || (scp->state == DN_CD)) { scp->persist = 0; @@ -361,7 +363,7 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb) if (skb->len > 0) { u16 dlen = *skb->data; if ((dlen <= 16) && (dlen <= skb->len)) { - scp->conndata_in.opt_optl = dn_htons(dlen); + scp->conndata_in.opt_optl = cpu_to_le16(dlen); skb_copy_from_linear_data_offset(skb, 1, scp->conndata_in.opt_data, dlen); } @@ -396,17 +398,17 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb) if (skb->len < 2) goto out; - reason = dn_ntohs(*(__le16 *)skb->data); + reason = le16_to_cpu(*(__le16 *)skb->data); skb_pull(skb, 2); - scp->discdata_in.opt_status = dn_htons(reason); + scp->discdata_in.opt_status = cpu_to_le16(reason); scp->discdata_in.opt_optl = 0; memset(scp->discdata_in.opt_data, 0, 16); if (skb->len > 0) { u16 dlen = *skb->data; if ((dlen <= 16) && (dlen <= skb->len)) { - scp->discdata_in.opt_optl = dn_htons(dlen); + scp->discdata_in.opt_optl = cpu_to_le16(dlen); skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen); } } @@ -463,7 +465,7 @@ static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb) if (skb->len != 2) goto out; - reason = dn_ntohs(*(__le16 *)skb->data); + reason = le16_to_cpu(*(__le16 *)skb->data); sk->sk_state = TCP_CLOSE; @@ -512,7 +514,7 @@ static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb) if (skb->len != 4) goto out; - segnum = dn_ntohs(*(__le16 *)ptr); + segnum = le16_to_cpu(*(__le16 *)ptr); ptr += 2; lsflags = *(unsigned char *)ptr++; fcval = *ptr; @@ -620,7 +622,7 @@ static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb) if (skb->len < 2) goto out; - cb->segnum = segnum = dn_ntohs(*(__le16 *)skb->data); + cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); skb_pull(skb, 2); if (seq_next(scp->numoth_rcv, segnum)) { @@ -648,7 +650,7 @@ static void dn_nsp_data(struct sock *sk, struct sk_buff *skb) if (skb->len < 2) goto out; - cb->segnum = segnum = dn_ntohs(*(__le16 *)skb->data); + cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); skb_pull(skb, 2); if (seq_next(scp->numdat_rcv, segnum)) { diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 1964faf203e..2013c25b7f5 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -230,7 +230,6 @@ static inline unsigned dn_nsp_clone_and_send(struct sk_buff *skb, /** * dn_nsp_output - Try and send something from socket queues * @sk: The socket whose queues are to be investigated - * @gfp: The memory allocation flags * * Try and send the packet on the end of the data and other data queues. * Other data gets priority over data, and if we retransmit a packet we @@ -326,8 +325,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c ptr = (__le16 *)dn_mk_common_header(scp, skb, msgflag, hlen); - *ptr++ = dn_htons(acknum); - *ptr++ = dn_htons(ackcrs); + *ptr++ = cpu_to_le16(acknum); + *ptr++ = cpu_to_le16(ackcrs); return ptr; } @@ -345,7 +344,7 @@ static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int o cb->segnum = scp->numdat; seq_add(&scp->numdat, 1); } - *(ptr++) = dn_htons(cb->segnum); + *(ptr++) = cpu_to_le16(cb->segnum); return ptr; } @@ -523,7 +522,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp) struct dn_scp *scp = DN_SK(sk); struct sk_buff *skb = NULL; struct nsp_conn_init_msg *msg; - __u8 len = (__u8)dn_ntohs(scp->conndata_out.opt_optl); + __u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL) return; @@ -534,7 +533,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp) msg->srcaddr = scp->addrloc; msg->services = scp->services_loc; msg->info = scp->info_loc; - msg->segsize = dn_htons(scp->segsize_loc); + msg->segsize = cpu_to_le16(scp->segsize_loc); *skb_put(skb,1) = len; @@ -560,7 +559,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, if ((dst == NULL) || (rem == 0)) { if (net_ratelimit()) - printk(KERN_DEBUG "DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", dn_ntohs(rem), dst); + printk(KERN_DEBUG "DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", le16_to_cpu(rem), dst); return; } @@ -573,7 +572,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, msg += 2; *(__le16 *)msg = loc; msg += 2; - *(__le16 *)msg = dn_htons(reason); + *(__le16 *)msg = cpu_to_le16(reason); msg += 2; if (msgflg == NSP_DISCINIT) *msg++ = ddl; @@ -599,10 +598,10 @@ void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, int ddl = 0; if (msgflg == NSP_DISCINIT) - ddl = dn_ntohs(scp->discdata_out.opt_optl); + ddl = le16_to_cpu(scp->discdata_out.opt_optl); if (reason == 0) - reason = dn_ntohs(scp->discdata_out.opt_status); + reason = le16_to_cpu(scp->discdata_out.opt_status); dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl, scp->discdata_out.opt_data, scp->addrrem, scp->addrloc); @@ -676,7 +675,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) msg->srcaddr = scp->addrloc; msg->services = scp->services_loc; /* Requested flow control */ msg->info = scp->info_loc; /* Version Number */ - msg->segsize = dn_htons(scp->segsize_loc); /* Max segment size */ + msg->segsize = cpu_to_le16(scp->segsize_loc); /* Max segment size */ if (scp->peer.sdn_objnum) type = 0; @@ -709,7 +708,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) if (aux > 0) memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux); - aux = (__u8)dn_ntohs(scp->conndata_out.opt_optl); + aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); *skb_put(skb, 1) = aux; if (aux > 0) memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 821bd1cdec0..b33b254c52c 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -131,7 +131,6 @@ static struct dst_ops dn_dst_ops = { .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, - .entry_size = sizeof(struct dn_route), .entries = ATOMIC_INIT(0), }; @@ -476,7 +475,7 @@ static int dn_route_rx_packet(struct sk_buff *skb) printk(KERN_DEBUG "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", (int)cb->rt_flags, devname, skb->len, - dn_ntohs(cb->src), dn_ntohs(cb->dst), + le16_to_cpu(cb->src), le16_to_cpu(cb->dst), err, skb->pkt_type); } @@ -576,7 +575,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type { struct dn_skb_cb *cb; unsigned char flags = 0; - __u16 len = dn_ntohs(*(__le16 *)skb->data); + __u16 len = le16_to_cpu(*(__le16 *)skb->data); struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; @@ -774,7 +773,7 @@ static int dn_rt_bug(struct sk_buff *skb) struct dn_skb_cb *cb = DN_SKB_CB(skb); printk(KERN_DEBUG "dn_rt_bug: skb from:%04x to:%04x\n", - dn_ntohs(cb->src), dn_ntohs(cb->dst)); + le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); } kfree_skb(skb); @@ -817,7 +816,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) static inline int dn_match_addr(__le16 addr1, __le16 addr2) { - __u16 tmp = dn_ntohs(addr1) ^ dn_ntohs(addr2); + __u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2); int match = 16; while(tmp) { tmp >>= 1; @@ -887,8 +886,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old if (decnet_debug_level & 16) printk(KERN_DEBUG "dn_route_output_slow: dst=%04x src=%04x mark=%d" - " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), - dn_ntohs(oldflp->fld_src), + " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), + le16_to_cpu(oldflp->fld_src), oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); /* If we have an output interface, verify its a DECnet device */ @@ -960,7 +959,7 @@ source_ok: printk(KERN_DEBUG "dn_route_output_slow: initial checks complete." " dst=%o4x src=%04x oif=%d try_hard=%d\n", - dn_ntohs(fl.fld_dst), dn_ntohs(fl.fld_src), + le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), fl.oif, try_hard); /* @@ -1185,7 +1184,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f err = __dn_route_output_key(pprt, flp, flags); if (err == 0 && flp->proto) { - err = xfrm_lookup(pprt, flp, NULL, 0); + err = xfrm_lookup(&init_net, pprt, flp, NULL, 0); } return err; } @@ -1196,8 +1195,8 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); if (err == 0 && fl->proto) { - err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ? - 0 : XFRM_LOOKUP_WAIT); + err = xfrm_lookup(&init_net, pprt, fl, sk, + (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT); } return err; } @@ -1712,8 +1711,8 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", rt->u.dst.dev ? rt->u.dst.dev->name : "*", - dn_addr2asc(dn_ntohs(rt->rt_daddr), buf1), - dn_addr2asc(dn_ntohs(rt->rt_saddr), buf2), + dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), + dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), atomic_read(&rt->u.dst.__refcnt), rt->u.dst.__use, (int) dst_metric(&rt->u.dst, RTAX_RTT)); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 3a2830ac89c..69ad9280c69 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -85,7 +85,7 @@ static int dn_fib_hash_zombies; static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) { - u16 h = dn_ntohs(key.datum)>>(16 - dz->dz_order); + u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order); h ^= (h >> 10); h ^= (h >> 6); h &= DZ_HASHMASK(dz); diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 2f360a1e5e4..965397af9a8 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -126,7 +126,7 @@ static int parse_addr(__le16 *addr, char *str) if (INVALID_END_CHAR(*str)) return -1; - *addr = dn_htons((area << 10) | node); + *addr = cpu_to_le16((area << 10) | node); return 0; } @@ -201,7 +201,7 @@ static int dn_node_address_handler(ctl_table *table, int write, return 0; } - dn_addr2asc(dn_ntohs(decnet_address), addr); + dn_addr2asc(le16_to_cpu(decnet_address), addr); len = strlen(addr); addr[len++] = '\n'; diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c index 54068ef251e..bfb4a643c86 100644 --- a/net/dsa/mv88e6060.c +++ b/net/dsa/mv88e6060.c @@ -222,7 +222,7 @@ static void mv88e6060_poll_link(struct dsa_switch *ds) for (i = 0; i < DSA_MAX_PORTS; i++) { struct net_device *dev; - int port_status; + int uninitialized_var(port_status); int link; int speed; int duplex; diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c index aa6c609c59f..4e4d8b5ad03 100644 --- a/net/dsa/mv88e6xxx.c +++ b/net/dsa/mv88e6xxx.c @@ -358,7 +358,7 @@ void mv88e6xxx_poll_link(struct dsa_switch *ds) for (i = 0; i < DSA_MAX_PORTS; i++) { struct net_device *dev; - int port_status; + int uninitialized_var(port_status); int link; int speed; int duplex; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 37616884b8a..a3a410d20da 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -10,6 +10,7 @@ #include <linux/list.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/phy.h> #include "dsa_priv.h" @@ -49,11 +50,57 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) /* slave device handling ****************************************************/ static int dsa_slave_open(struct net_device *dev) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + int err; + + if (!(master->flags & IFF_UP)) + return -ENETDOWN; + + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) { + err = dev_unicast_add(master, dev->dev_addr, ETH_ALEN); + if (err < 0) + goto out; + } + + if (dev->flags & IFF_ALLMULTI) { + err = dev_set_allmulti(master, 1); + if (err < 0) + goto del_unicast; + } + if (dev->flags & IFF_PROMISC) { + err = dev_set_promiscuity(master, 1); + if (err < 0) + goto clear_allmulti; + } + return 0; + +clear_allmulti: + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(master, -1); +del_unicast: + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) + dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); +out: + return err; } static int dsa_slave_close(struct net_device *dev) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + dev_mc_unsync(master, dev); + dev_unicast_unsync(master, dev); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(master, -1); + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(master, -1); + + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) + dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); + return 0; } @@ -77,9 +124,30 @@ static void dsa_slave_set_rx_mode(struct net_device *dev) dev_unicast_sync(master, dev); } -static int dsa_slave_set_mac_address(struct net_device *dev, void *addr) +static int dsa_slave_set_mac_address(struct net_device *dev, void *a) { - memcpy(dev->dev_addr, addr + 2, 6); + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + struct sockaddr *addr = a; + int err; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + if (!(dev->flags & IFF_UP)) + goto out; + + if (compare_ether_addr(addr->sa_data, master->dev_addr)) { + err = dev_unicast_add(master, addr->sa_data, ETH_ALEN); + if (err < 0) + return err; + } + + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) + dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); + +out: + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); return 0; } @@ -284,7 +352,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, netif_carrier_off(slave_dev); if (p->phy != NULL) { - phy_attach(slave_dev, p->phy->dev.bus_id, + phy_attach(slave_dev, dev_name(&p->phy->dev), 0, PHY_INTERFACE_MODE_GMII); p->phy->autoneg = AUTONEG_ENABLE; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index ff55823a653..f99a019b939 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -159,6 +159,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port]; skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); skb->dev->stats.rx_packets++; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 24b1c76fa7a..328ec957f78 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -178,6 +178,7 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port]; skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); skb->dev->stats.rx_packets++; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 3bfd2e55877..b59132878ad 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -95,6 +95,7 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port]; skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); skb->dev->stats.rx_packets++; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index b9d85af2dd3..280352aba40 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -165,8 +165,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); eth = eth_hdr(skb); - if (is_multicast_ether_addr(eth->h_dest)) { - if (!compare_ether_addr(eth->h_dest, dev->broadcast)) + if (unlikely(is_multicast_ether_addr(eth->h_dest))) { + if (!compare_ether_addr_64bits(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; @@ -181,7 +181,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) */ else if (1 /*dev->flags&IFF_PROMISC */ ) { - if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) + if (unlikely(compare_ether_addr_64bits(eth->h_dest, dev->dev_addr))) skb->pkt_type = PACKET_OTHERHOST; } @@ -282,7 +282,7 @@ EXPORT_SYMBOL(eth_header_cache_update); * This doesn't change hardware matching, so needs to be overridden * for most real devices. */ -static int eth_mac_addr(struct net_device *dev, void *p) +int eth_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; @@ -293,6 +293,7 @@ static int eth_mac_addr(struct net_device *dev, void *p) memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); return 0; } +EXPORT_SYMBOL(eth_mac_addr); /** * eth_change_mtu - set new MTU size @@ -302,21 +303,23 @@ static int eth_mac_addr(struct net_device *dev, void *p) * Allow changing MTU size. Needs to be overridden for devices * supporting jumbo frames. */ -static int eth_change_mtu(struct net_device *dev, int new_mtu) +int eth_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) return -EINVAL; dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL(eth_change_mtu); -static int eth_validate_addr(struct net_device *dev) +int eth_validate_addr(struct net_device *dev) { if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; return 0; } +EXPORT_SYMBOL(eth_validate_addr); const struct header_ops eth_header_ops ____cacheline_aligned = { .create = eth_header, @@ -334,11 +337,11 @@ const struct header_ops eth_header_ops ____cacheline_aligned = { void ether_setup(struct net_device *dev) { dev->header_ops = ð_header_ops; - +#ifdef CONFIG_COMPAT_NET_DEV_OPS dev->change_mtu = eth_change_mtu; dev->set_mac_address = eth_mac_addr; dev->validate_addr = eth_validate_addr; - +#endif dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig deleted file mode 100644 index d2282bb2e4f..00000000000 --- a/net/ieee80211/Kconfig +++ /dev/null @@ -1,50 +0,0 @@ -config IEEE80211 - tristate - select WIRELESS_EXT - select CRYPTO - select CRYPTO_ARC4 - select CRYPTO_ECB - select CRYPTO_AES - select CRYPTO_MICHAEL_MIC - select CRYPTO_ECB - select CRC32 - select IEEE80211_CRYPT_WEP - select IEEE80211_CRYPT_TKIP - select IEEE80211_CRYPT_CCMP - select LIB80211 - ---help--- - This option enables the hardware independent IEEE 802.11 - networking stack. This component is deprecated in favor of the - mac80211 component. - -config IEEE80211_DEBUG - bool "Full debugging output for the old IEEE80211 stack" - depends on IEEE80211 - ---help--- - This option will enable debug tracing output for the - ieee80211 network stack. - - This will result in the kernel module being ~70k larger. You - can control which debug output is sent to the kernel log by - setting the value in - - /proc/net/ieee80211/debug_level - - For example: - - % echo 0x00000FFO > /proc/net/ieee80211/debug_level - - For a list of values you can assign to debug_level, you - can look at the bit mask values in <net/ieee80211.h> - - If you are not trying to debug or develop the ieee80211 - subsystem, you most likely want to say N here. - -config IEEE80211_CRYPT_WEP - tristate - -config IEEE80211_CRYPT_CCMP - tristate - -config IEEE80211_CRYPT_TKIP - tristate diff --git a/net/ieee80211/Makefile b/net/ieee80211/Makefile deleted file mode 100644 index f988417121d..00000000000 --- a/net/ieee80211/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -obj-$(CONFIG_IEEE80211) += ieee80211.o -obj-$(CONFIG_IEEE80211) += ieee80211_crypt.o -obj-$(CONFIG_IEEE80211_CRYPT_WEP) += ieee80211_crypt_wep.o -obj-$(CONFIG_IEEE80211_CRYPT_CCMP) += ieee80211_crypt_ccmp.o -obj-$(CONFIG_IEEE80211_CRYPT_TKIP) += ieee80211_crypt_tkip.o -ieee80211-objs := \ - ieee80211_module.o \ - ieee80211_tx.o \ - ieee80211_rx.o \ - ieee80211_wx.o \ - ieee80211_geo.o - diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c deleted file mode 100644 index df5592c9339..00000000000 --- a/net/ieee80211/ieee80211_crypt.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Host AP crypto routines - * - * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> - * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - * - */ - -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <net/ieee80211.h> - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("HostAP crypto"); -MODULE_LICENSE("GPL"); - -struct ieee80211_crypto_alg { - struct list_head list; - struct ieee80211_crypto_ops *ops; -}; - -static LIST_HEAD(ieee80211_crypto_algs); -static DEFINE_SPINLOCK(ieee80211_crypto_lock); - -void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) -{ - struct ieee80211_crypt_data *entry, *next; - unsigned long flags; - - spin_lock_irqsave(&ieee->lock, flags); - list_for_each_entry_safe(entry, next, &ieee->crypt_deinit_list, list) { - if (atomic_read(&entry->refcnt) != 0 && !force) - continue; - - list_del(&entry->list); - - if (entry->ops) { - entry->ops->deinit(entry->priv); - module_put(entry->ops->owner); - } - kfree(entry); - } - spin_unlock_irqrestore(&ieee->lock, flags); -} - -/* After this, crypt_deinit_list won't accept new members */ -void ieee80211_crypt_quiescing(struct ieee80211_device *ieee) -{ - unsigned long flags; - - spin_lock_irqsave(&ieee->lock, flags); - ieee->crypt_quiesced = 1; - spin_unlock_irqrestore(&ieee->lock, flags); -} - -void ieee80211_crypt_deinit_handler(unsigned long data) -{ - struct ieee80211_device *ieee = (struct ieee80211_device *)data; - unsigned long flags; - - ieee80211_crypt_deinit_entries(ieee, 0); - - spin_lock_irqsave(&ieee->lock, flags); - if (!list_empty(&ieee->crypt_deinit_list) && !ieee->crypt_quiesced) { - printk(KERN_DEBUG "%s: entries remaining in delayed crypt " - "deletion list\n", ieee->dev->name); - ieee->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&ieee->crypt_deinit_timer); - } - spin_unlock_irqrestore(&ieee->lock, flags); -} - -void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee, - struct ieee80211_crypt_data **crypt) -{ - struct ieee80211_crypt_data *tmp; - unsigned long flags; - - if (*crypt == NULL) - return; - - tmp = *crypt; - *crypt = NULL; - - /* must not run ops->deinit() while there may be pending encrypt or - * decrypt operations. Use a list of delayed deinits to avoid needing - * locking. */ - - spin_lock_irqsave(&ieee->lock, flags); - if (!ieee->crypt_quiesced) { - list_add(&tmp->list, &ieee->crypt_deinit_list); - if (!timer_pending(&ieee->crypt_deinit_timer)) { - ieee->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&ieee->crypt_deinit_timer); - } - } - spin_unlock_irqrestore(&ieee->lock, flags); -} - -int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops) -{ - unsigned long flags; - struct ieee80211_crypto_alg *alg; - - alg = kzalloc(sizeof(*alg), GFP_KERNEL); - if (alg == NULL) - return -ENOMEM; - - alg->ops = ops; - - spin_lock_irqsave(&ieee80211_crypto_lock, flags); - list_add(&alg->list, &ieee80211_crypto_algs); - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - - printk(KERN_DEBUG "ieee80211_crypt: registered algorithm '%s'\n", - ops->name); - - return 0; -} - -int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops) -{ - struct ieee80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&ieee80211_crypto_lock, flags); - list_for_each_entry(alg, &ieee80211_crypto_algs, list) { - if (alg->ops == ops) - goto found; - } - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - return -EINVAL; - - found: - printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm " - "'%s'\n", ops->name); - list_del(&alg->list); - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - kfree(alg); - return 0; -} - -struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name) -{ - struct ieee80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&ieee80211_crypto_lock, flags); - list_for_each_entry(alg, &ieee80211_crypto_algs, list) { - if (strcmp(alg->ops->name, name) == 0) - goto found; - } - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - return NULL; - - found: - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - return alg->ops; -} - -static void *ieee80211_crypt_null_init(int keyidx) -{ - return (void *)1; -} - -static void ieee80211_crypt_null_deinit(void *priv) -{ -} - -static struct ieee80211_crypto_ops ieee80211_crypt_null = { - .name = "NULL", - .init = ieee80211_crypt_null_init, - .deinit = ieee80211_crypt_null_deinit, - .owner = THIS_MODULE, -}; - -static int __init ieee80211_crypto_init(void) -{ - return ieee80211_register_crypto_ops(&ieee80211_crypt_null); -} - -static void __exit ieee80211_crypto_deinit(void) -{ - ieee80211_unregister_crypto_ops(&ieee80211_crypt_null); - BUG_ON(!list_empty(&ieee80211_crypto_algs)); -} - -EXPORT_SYMBOL(ieee80211_crypt_deinit_entries); -EXPORT_SYMBOL(ieee80211_crypt_deinit_handler); -EXPORT_SYMBOL(ieee80211_crypt_delayed_deinit); -EXPORT_SYMBOL(ieee80211_crypt_quiescing); - -EXPORT_SYMBOL(ieee80211_register_crypto_ops); -EXPORT_SYMBOL(ieee80211_unregister_crypto_ops); -EXPORT_SYMBOL(ieee80211_get_crypto_ops); - -module_init(ieee80211_crypto_init); -module_exit(ieee80211_crypto_deinit); diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c deleted file mode 100644 index 960ad13f5e9..00000000000 --- a/net/ieee80211/ieee80211_geo.c +++ /dev/null @@ -1,195 +0,0 @@ -/****************************************************************************** - - Copyright(c) 2005 Intel Corporation. All rights reserved. - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - The full GNU General Public License is included in this distribution in the - file called LICENSE. - - Contact Information: - James P. Ketrenos <ipw2100-admin@linux.intel.com> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -******************************************************************************/ -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/if_arp.h> -#include <linux/in6.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/tcp.h> -#include <linux/types.h> -#include <linux/wireless.h> -#include <linux/etherdevice.h> -#include <asm/uaccess.h> - -#include <net/ieee80211.h> - -int ieee80211_is_valid_channel(struct ieee80211_device *ieee, u8 channel) -{ - int i; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) - return 0; - - if (ieee->freq_band & IEEE80211_24GHZ_BAND) - for (i = 0; i < ieee->geo.bg_channels; i++) - /* NOTE: If G mode is currently supported but - * this is a B only channel, we don't see it - * as valid. */ - if ((ieee->geo.bg[i].channel == channel) && - !(ieee->geo.bg[i].flags & IEEE80211_CH_INVALID) && - (!(ieee->mode & IEEE_G) || - !(ieee->geo.bg[i].flags & IEEE80211_CH_B_ONLY))) - return IEEE80211_24GHZ_BAND; - - if (ieee->freq_band & IEEE80211_52GHZ_BAND) - for (i = 0; i < ieee->geo.a_channels; i++) - if ((ieee->geo.a[i].channel == channel) && - !(ieee->geo.a[i].flags & IEEE80211_CH_INVALID)) - return IEEE80211_52GHZ_BAND; - - return 0; -} - -int ieee80211_channel_to_index(struct ieee80211_device *ieee, u8 channel) -{ - int i; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) - return -1; - - if (ieee->freq_band & IEEE80211_24GHZ_BAND) - for (i = 0; i < ieee->geo.bg_channels; i++) - if (ieee->geo.bg[i].channel == channel) - return i; - - if (ieee->freq_band & IEEE80211_52GHZ_BAND) - for (i = 0; i < ieee->geo.a_channels; i++) - if (ieee->geo.a[i].channel == channel) - return i; - - return -1; -} - -u32 ieee80211_channel_to_freq(struct ieee80211_device * ieee, u8 channel) -{ - const struct ieee80211_channel * ch; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) - return 0; - - ch = ieee80211_get_channel(ieee, channel); - if (!ch->channel) - return 0; - return ch->freq; -} - -u8 ieee80211_freq_to_channel(struct ieee80211_device * ieee, u32 freq) -{ - int i; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) - return 0; - - freq /= 100000; - - if (ieee->freq_band & IEEE80211_24GHZ_BAND) - for (i = 0; i < ieee->geo.bg_channels; i++) - if (ieee->geo.bg[i].freq == freq) - return ieee->geo.bg[i].channel; - - if (ieee->freq_band & IEEE80211_52GHZ_BAND) - for (i = 0; i < ieee->geo.a_channels; i++) - if (ieee->geo.a[i].freq == freq) - return ieee->geo.a[i].channel; - - return 0; -} - -int ieee80211_set_geo(struct ieee80211_device *ieee, - const struct ieee80211_geo *geo) -{ - memcpy(ieee->geo.name, geo->name, 3); - ieee->geo.name[3] = '\0'; - ieee->geo.bg_channels = geo->bg_channels; - ieee->geo.a_channels = geo->a_channels; - memcpy(ieee->geo.bg, geo->bg, geo->bg_channels * - sizeof(struct ieee80211_channel)); - memcpy(ieee->geo.a, geo->a, ieee->geo.a_channels * - sizeof(struct ieee80211_channel)); - return 0; -} - -const struct ieee80211_geo *ieee80211_get_geo(struct ieee80211_device *ieee) -{ - return &ieee->geo; -} - -u8 ieee80211_get_channel_flags(struct ieee80211_device * ieee, u8 channel) -{ - int index = ieee80211_channel_to_index(ieee, channel); - - if (index == -1) - return IEEE80211_CH_INVALID; - - if (channel <= IEEE80211_24GHZ_CHANNELS) - return ieee->geo.bg[index].flags; - - return ieee->geo.a[index].flags; -} - -static const struct ieee80211_channel bad_channel = { - .channel = 0, - .flags = IEEE80211_CH_INVALID, - .max_power = 0, -}; - -const struct ieee80211_channel *ieee80211_get_channel(struct ieee80211_device - *ieee, u8 channel) -{ - int index = ieee80211_channel_to_index(ieee, channel); - - if (index == -1) - return &bad_channel; - - if (channel <= IEEE80211_24GHZ_CHANNELS) - return &ieee->geo.bg[index]; - - return &ieee->geo.a[index]; -} - -EXPORT_SYMBOL(ieee80211_get_channel); -EXPORT_SYMBOL(ieee80211_get_channel_flags); -EXPORT_SYMBOL(ieee80211_is_valid_channel); -EXPORT_SYMBOL(ieee80211_freq_to_channel); -EXPORT_SYMBOL(ieee80211_channel_to_freq); -EXPORT_SYMBOL(ieee80211_channel_to_index); -EXPORT_SYMBOL(ieee80211_set_geo); -EXPORT_SYMBOL(ieee80211_get_geo); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c deleted file mode 100644 index d34d4e79b6f..00000000000 --- a/net/ieee80211/ieee80211_module.c +++ /dev/null @@ -1,312 +0,0 @@ -/******************************************************************************* - - Copyright(c) 2004-2005 Intel Corporation. All rights reserved. - - Portions of this file are based on the WEP enablement code provided by the - Host AP project hostap-drivers v0.1.3 - Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - <j@w1.fi> - Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - The full GNU General Public License is included in this distribution in the - file called LICENSE. - - Contact Information: - James P. Ketrenos <ipw2100-admin@linux.intel.com> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/if_arp.h> -#include <linux/in6.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/tcp.h> -#include <linux/types.h> -#include <linux/wireless.h> -#include <linux/etherdevice.h> -#include <asm/uaccess.h> -#include <net/net_namespace.h> -#include <net/arp.h> - -#include <net/ieee80211.h> - -#define DRV_DESCRIPTION "802.11 data/management/control stack" -#define DRV_NAME "ieee80211" -#define DRV_VERSION IEEE80211_VERSION -#define DRV_COPYRIGHT "Copyright (C) 2004-2005 Intel Corporation <jketreno@linux.intel.com>" - -MODULE_VERSION(DRV_VERSION); -MODULE_DESCRIPTION(DRV_DESCRIPTION); -MODULE_AUTHOR(DRV_COPYRIGHT); -MODULE_LICENSE("GPL"); - -static int ieee80211_networks_allocate(struct ieee80211_device *ieee) -{ - if (ieee->networks) - return 0; - - ieee->networks = - kzalloc(MAX_NETWORK_COUNT * sizeof(struct ieee80211_network), - GFP_KERNEL); - if (!ieee->networks) { - printk(KERN_WARNING "%s: Out of memory allocating beacons\n", - ieee->dev->name); - return -ENOMEM; - } - - return 0; -} - -void ieee80211_network_reset(struct ieee80211_network *network) -{ - if (!network) - return; - - if (network->ibss_dfs) { - kfree(network->ibss_dfs); - network->ibss_dfs = NULL; - } -} - -static inline void ieee80211_networks_free(struct ieee80211_device *ieee) -{ - int i; - - if (!ieee->networks) - return; - - for (i = 0; i < MAX_NETWORK_COUNT; i++) - if (ieee->networks[i].ibss_dfs) - kfree(ieee->networks[i].ibss_dfs); - - kfree(ieee->networks); - ieee->networks = NULL; -} - -static void ieee80211_networks_initialize(struct ieee80211_device *ieee) -{ - int i; - - INIT_LIST_HEAD(&ieee->network_free_list); - INIT_LIST_HEAD(&ieee->network_list); - for (i = 0; i < MAX_NETWORK_COUNT; i++) - list_add_tail(&ieee->networks[i].list, - &ieee->network_free_list); -} - -static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > IEEE80211_DATA_LEN)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - -static struct net_device_stats *ieee80211_generic_get_stats( - struct net_device *dev) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - return &ieee->stats; -} - -struct net_device *alloc_ieee80211(int sizeof_priv) -{ - struct ieee80211_device *ieee; - struct net_device *dev; - int err; - - IEEE80211_DEBUG_INFO("Initializing...\n"); - - dev = alloc_etherdev(sizeof(struct ieee80211_device) + sizeof_priv); - if (!dev) { - IEEE80211_ERROR("Unable to allocate network device.\n"); - goto failed; - } - ieee = netdev_priv(dev); - dev->hard_start_xmit = ieee80211_xmit; - dev->change_mtu = ieee80211_change_mtu; - - /* Drivers are free to override this if the generic implementation - * does not meet their needs. */ - dev->get_stats = ieee80211_generic_get_stats; - - ieee->dev = dev; - - err = ieee80211_networks_allocate(ieee); - if (err) { - IEEE80211_ERROR("Unable to allocate beacon storage: %d\n", err); - goto failed_free_netdev; - } - ieee80211_networks_initialize(ieee); - - /* Default fragmentation threshold is maximum payload size */ - ieee->fts = DEFAULT_FTS; - ieee->rts = DEFAULT_FTS; - ieee->scan_age = DEFAULT_MAX_SCAN_AGE; - ieee->open_wep = 1; - - /* Default to enabling full open WEP with host based encrypt/decrypt */ - ieee->host_encrypt = 1; - ieee->host_decrypt = 1; - ieee->host_mc_decrypt = 1; - - /* Host fragementation in Open mode. Default is enabled. - * Note: host fragmentation is always enabled if host encryption - * is enabled. For cards can do hardware encryption, they must do - * hardware fragmentation as well. So we don't need a variable - * like host_enc_frag. */ - ieee->host_open_frag = 1; - ieee->ieee802_1x = 1; /* Default to supporting 802.1x */ - - INIT_LIST_HEAD(&ieee->crypt_deinit_list); - setup_timer(&ieee->crypt_deinit_timer, ieee80211_crypt_deinit_handler, - (unsigned long)ieee); - ieee->crypt_quiesced = 0; - - spin_lock_init(&ieee->lock); - - ieee->wpa_enabled = 0; - ieee->drop_unencrypted = 0; - ieee->privacy_invoked = 0; - - return dev; - -failed_free_netdev: - free_netdev(dev); -failed: - return NULL; -} - -void free_ieee80211(struct net_device *dev) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - - int i; - - ieee80211_crypt_quiescing(ieee); - del_timer_sync(&ieee->crypt_deinit_timer); - ieee80211_crypt_deinit_entries(ieee, 1); - - for (i = 0; i < WEP_KEYS; i++) { - struct ieee80211_crypt_data *crypt = ieee->crypt[i]; - if (crypt) { - if (crypt->ops) { - crypt->ops->deinit(crypt->priv); - module_put(crypt->ops->owner); - } - kfree(crypt); - ieee->crypt[i] = NULL; - } - } - - ieee80211_networks_free(ieee); - free_netdev(dev); -} - -#ifdef CONFIG_IEEE80211_DEBUG - -static int debug = 0; -u32 ieee80211_debug_level = 0; -EXPORT_SYMBOL_GPL(ieee80211_debug_level); -static struct proc_dir_entry *ieee80211_proc = NULL; - -static int show_debug_level(char *page, char **start, off_t offset, - int count, int *eof, void *data) -{ - return snprintf(page, count, "0x%08X\n", ieee80211_debug_level); -} - -static int store_debug_level(struct file *file, const char __user * buffer, - unsigned long count, void *data) -{ - char buf[] = "0x00000000\n"; - unsigned long len = min((unsigned long)sizeof(buf) - 1, count); - unsigned long val; - - if (copy_from_user(buf, buffer, len)) - return count; - buf[len] = 0; - if (sscanf(buf, "%li", &val) != 1) - printk(KERN_INFO DRV_NAME - ": %s is not in hex or decimal form.\n", buf); - else - ieee80211_debug_level = val; - - return strnlen(buf, len); -} -#endif /* CONFIG_IEEE80211_DEBUG */ - -static int __init ieee80211_init(void) -{ -#ifdef CONFIG_IEEE80211_DEBUG - struct proc_dir_entry *e; - - ieee80211_debug_level = debug; - ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net); - if (ieee80211_proc == NULL) { - IEEE80211_ERROR("Unable to create " DRV_NAME - " proc directory\n"); - return -EIO; - } - e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR, - ieee80211_proc); - if (!e) { - remove_proc_entry(DRV_NAME, init_net.proc_net); - ieee80211_proc = NULL; - return -EIO; - } - e->read_proc = show_debug_level; - e->write_proc = store_debug_level; - e->data = NULL; -#endif /* CONFIG_IEEE80211_DEBUG */ - - printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION ", " DRV_VERSION "\n"); - printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n"); - - return 0; -} - -static void __exit ieee80211_exit(void) -{ -#ifdef CONFIG_IEEE80211_DEBUG - if (ieee80211_proc) { - remove_proc_entry("debug_level", ieee80211_proc); - remove_proc_entry(DRV_NAME, init_net.proc_net); - ieee80211_proc = NULL; - } -#endif /* CONFIG_IEEE80211_DEBUG */ -} - -#ifdef CONFIG_IEEE80211_DEBUG -#include <linux/moduleparam.h> -module_param(debug, int, 0444); -MODULE_PARM_DESC(debug, "debug output mask"); -#endif /* CONFIG_IEEE80211_DEBUG */ - -module_exit(ieee80211_exit); -module_init(ieee80211_init); - -EXPORT_SYMBOL(alloc_ieee80211); -EXPORT_SYMBOL(free_ieee80211); diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c deleted file mode 100644 index 3dd58b594f6..00000000000 --- a/net/ieee80211/ieee80211_rx.c +++ /dev/null @@ -1,1799 +0,0 @@ -/* - * Original code based Host AP (software wireless LAN access point) driver - * for Intersil Prism2/2.5/3 - hostap.o module, common routines - * - * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - * <j@w1.fi> - * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> - * Copyright (c) 2004-2005, Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - */ - -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/if_arp.h> -#include <linux/in6.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/tcp.h> -#include <linux/types.h> -#include <linux/wireless.h> -#include <linux/etherdevice.h> -#include <asm/uaccess.h> -#include <linux/ctype.h> - -#include <net/lib80211.h> -#include <net/ieee80211.h> - -static void ieee80211_monitor_rx(struct ieee80211_device *ieee, - struct sk_buff *skb, - struct ieee80211_rx_stats *rx_stats) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - u16 fc = le16_to_cpu(hdr->frame_control); - - skb->dev = ieee->dev; - skb_reset_mac_header(skb); - skb_pull(skb, ieee80211_get_hdrlen(fc)); - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_80211_RAW); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); -} - -/* Called only as a tasklet (software IRQ) */ -static struct ieee80211_frag_entry *ieee80211_frag_cache_find(struct - ieee80211_device - *ieee, - unsigned int seq, - unsigned int frag, - u8 * src, - u8 * dst) -{ - struct ieee80211_frag_entry *entry; - int i; - - for (i = 0; i < IEEE80211_FRAG_CACHE_LEN; i++) { - entry = &ieee->frag_cache[i]; - if (entry->skb != NULL && - time_after(jiffies, entry->first_frag_time + 2 * HZ)) { - IEEE80211_DEBUG_FRAG("expiring fragment cache entry " - "seq=%u last_frag=%u\n", - entry->seq, entry->last_frag); - dev_kfree_skb_any(entry->skb); - entry->skb = NULL; - } - - if (entry->skb != NULL && entry->seq == seq && - (entry->last_frag + 1 == frag || frag == -1) && - !compare_ether_addr(entry->src_addr, src) && - !compare_ether_addr(entry->dst_addr, dst)) - return entry; - } - - return NULL; -} - -/* Called only as a tasklet (software IRQ) */ -static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee, - struct ieee80211_hdr_4addr *hdr) -{ - struct sk_buff *skb = NULL; - u16 sc; - unsigned int frag, seq; - struct ieee80211_frag_entry *entry; - - sc = le16_to_cpu(hdr->seq_ctl); - frag = WLAN_GET_SEQ_FRAG(sc); - seq = WLAN_GET_SEQ_SEQ(sc); - - if (frag == 0) { - /* Reserve enough space to fit maximum frame length */ - skb = dev_alloc_skb(ieee->dev->mtu + - sizeof(struct ieee80211_hdr_4addr) + - 8 /* LLC */ + - 2 /* alignment */ + - 8 /* WEP */ + ETH_ALEN /* WDS */ ); - if (skb == NULL) - return NULL; - - entry = &ieee->frag_cache[ieee->frag_next_idx]; - ieee->frag_next_idx++; - if (ieee->frag_next_idx >= IEEE80211_FRAG_CACHE_LEN) - ieee->frag_next_idx = 0; - - if (entry->skb != NULL) - dev_kfree_skb_any(entry->skb); - - entry->first_frag_time = jiffies; - entry->seq = seq; - entry->last_frag = frag; - entry->skb = skb; - memcpy(entry->src_addr, hdr->addr2, ETH_ALEN); - memcpy(entry->dst_addr, hdr->addr1, ETH_ALEN); - } else { - /* received a fragment of a frame for which the head fragment - * should have already been received */ - entry = ieee80211_frag_cache_find(ieee, seq, frag, hdr->addr2, - hdr->addr1); - if (entry != NULL) { - entry->last_frag = frag; - skb = entry->skb; - } - } - - return skb; -} - -/* Called only as a tasklet (software IRQ) */ -static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee, - struct ieee80211_hdr_4addr *hdr) -{ - u16 sc; - unsigned int seq; - struct ieee80211_frag_entry *entry; - - sc = le16_to_cpu(hdr->seq_ctl); - seq = WLAN_GET_SEQ_SEQ(sc); - - entry = ieee80211_frag_cache_find(ieee, seq, -1, hdr->addr2, - hdr->addr1); - - if (entry == NULL) { - IEEE80211_DEBUG_FRAG("could not invalidate fragment cache " - "entry (seq=%u)\n", seq); - return -1; - } - - entry->skb = NULL; - return 0; -} - -#ifdef NOT_YET -/* ieee80211_rx_frame_mgtmt - * - * Responsible for handling management control frames - * - * Called by ieee80211_rx */ -static int -ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb, - struct ieee80211_rx_stats *rx_stats, u16 type, - u16 stype) -{ - if (ieee->iw_mode == IW_MODE_MASTER) { - printk(KERN_DEBUG "%s: Master mode not yet suppported.\n", - ieee->dev->name); - return 0; -/* - hostap_update_sta_ps(ieee, (struct hostap_ieee80211_hdr_4addr *) - skb->data);*/ - } - - if (ieee->hostapd && type == WLAN_FC_TYPE_MGMT) { - if (stype == WLAN_FC_STYPE_BEACON && - ieee->iw_mode == IW_MODE_MASTER) { - struct sk_buff *skb2; - /* Process beacon frames also in kernel driver to - * update STA(AP) table statistics */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2) - hostap_rx(skb2->dev, skb2, rx_stats); - } - - /* send management frames to the user space daemon for - * processing */ - ieee->apdevstats.rx_packets++; - ieee->apdevstats.rx_bytes += skb->len; - prism2_rx_80211(ieee->apdev, skb, rx_stats, PRISM2_RX_MGMT); - return 0; - } - - if (ieee->iw_mode == IW_MODE_MASTER) { - if (type != WLAN_FC_TYPE_MGMT && type != WLAN_FC_TYPE_CTRL) { - printk(KERN_DEBUG "%s: unknown management frame " - "(type=0x%02x, stype=0x%02x) dropped\n", - skb->dev->name, type, stype); - return -1; - } - - hostap_rx(skb->dev, skb, rx_stats); - return 0; - } - - printk(KERN_DEBUG "%s: hostap_rx_frame_mgmt: management frame " - "received in non-Host AP mode\n", skb->dev->name); - return -1; -} -#endif - -/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ -/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -static unsigned char rfc1042_header[] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; - -/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -static unsigned char bridge_tunnel_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; -/* No encapsulation header if EtherType < 0x600 (=length) */ - -/* Called by ieee80211_rx_frame_decrypt */ -static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee, - struct sk_buff *skb) -{ - struct net_device *dev = ieee->dev; - u16 fc, ethertype; - struct ieee80211_hdr_3addr *hdr; - u8 *pos; - - if (skb->len < 24) - return 0; - - hdr = (struct ieee80211_hdr_3addr *)skb->data; - fc = le16_to_cpu(hdr->frame_ctl); - - /* check that the frame is unicast frame to us */ - if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - IEEE80211_FCTL_TODS && - !compare_ether_addr(hdr->addr1, dev->dev_addr) && - !compare_ether_addr(hdr->addr3, dev->dev_addr)) { - /* ToDS frame with own addr BSSID and DA */ - } else if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - IEEE80211_FCTL_FROMDS && - !compare_ether_addr(hdr->addr1, dev->dev_addr)) { - /* FromDS frame with own addr as DA */ - } else - return 0; - - if (skb->len < 24 + 8) - return 0; - - /* check for port access entity Ethernet type */ - pos = skb->data + 24; - ethertype = (pos[6] << 8) | pos[7]; - if (ethertype == ETH_P_PAE) - return 1; - - return 0; -} - -/* Called only as a tasklet (software IRQ), by ieee80211_rx */ -static int -ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, - struct ieee80211_crypt_data *crypt) -{ - struct ieee80211_hdr_3addr *hdr; - int res, hdrlen; - - if (crypt == NULL || crypt->ops->decrypt_mpdu == NULL) - return 0; - - hdr = (struct ieee80211_hdr_3addr *)skb->data; - hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); - - atomic_inc(&crypt->refcnt); - res = crypt->ops->decrypt_mpdu(skb, hdrlen, crypt->priv); - atomic_dec(&crypt->refcnt); - if (res < 0) { - IEEE80211_DEBUG_DROP("decryption failed (SA=%pM) res=%d\n", - hdr->addr2, res); - if (res == -2) - IEEE80211_DEBUG_DROP("Decryption failed ICV " - "mismatch (key %d)\n", - skb->data[hdrlen + 3] >> 6); - ieee->ieee_stats.rx_discards_undecryptable++; - return -1; - } - - return res; -} - -/* Called only as a tasklet (software IRQ), by ieee80211_rx */ -static int -ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, - struct sk_buff *skb, int keyidx, - struct ieee80211_crypt_data *crypt) -{ - struct ieee80211_hdr_3addr *hdr; - int res, hdrlen; - - if (crypt == NULL || crypt->ops->decrypt_msdu == NULL) - return 0; - - hdr = (struct ieee80211_hdr_3addr *)skb->data; - hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); - - atomic_inc(&crypt->refcnt); - res = crypt->ops->decrypt_msdu(skb, keyidx, hdrlen, crypt->priv); - atomic_dec(&crypt->refcnt); - if (res < 0) { - printk(KERN_DEBUG "%s: MSDU decryption/MIC verification failed" - " (SA=%pM keyidx=%d)\n", ieee->dev->name, hdr->addr2, - keyidx); - return -1; - } - - return 0; -} - -/* All received frames are sent to this function. @skb contains the frame in - * IEEE 802.11 format, i.e., in the format it was sent over air. - * This function is called only as a tasklet (software IRQ). */ -int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, - struct ieee80211_rx_stats *rx_stats) -{ - struct net_device *dev = ieee->dev; - struct ieee80211_hdr_4addr *hdr; - size_t hdrlen; - u16 fc, type, stype, sc; - struct net_device_stats *stats; - unsigned int frag; - u8 *payload; - u16 ethertype; -#ifdef NOT_YET - struct net_device *wds = NULL; - struct sk_buff *skb2 = NULL; - struct net_device *wds = NULL; - int frame_authorized = 0; - int from_assoc_ap = 0; - void *sta = NULL; -#endif - u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; - struct ieee80211_crypt_data *crypt = NULL; - int keyidx = 0; - int can_be_decrypted = 0; - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - stats = &ieee->stats; - - if (skb->len < 10) { - printk(KERN_INFO "%s: SKB length < 10\n", dev->name); - goto rx_dropped; - } - - fc = le16_to_cpu(hdr->frame_ctl); - type = WLAN_FC_GET_TYPE(fc); - stype = WLAN_FC_GET_STYPE(fc); - sc = le16_to_cpu(hdr->seq_ctl); - frag = WLAN_GET_SEQ_FRAG(sc); - hdrlen = ieee80211_get_hdrlen(fc); - - if (skb->len < hdrlen) { - printk(KERN_INFO "%s: invalid SKB length %d\n", - dev->name, skb->len); - goto rx_dropped; - } - - /* Put this code here so that we avoid duplicating it in all - * Rx paths. - Jean II */ -#ifdef CONFIG_WIRELESS_EXT -#ifdef IW_WIRELESS_SPY /* defined in iw_handler.h */ - /* If spy monitoring on */ - if (ieee->spy_data.spy_number > 0) { - struct iw_quality wstats; - - wstats.updated = 0; - if (rx_stats->mask & IEEE80211_STATMASK_RSSI) { - wstats.level = rx_stats->signal; - wstats.updated |= IW_QUAL_LEVEL_UPDATED; - } else - wstats.updated |= IW_QUAL_LEVEL_INVALID; - - if (rx_stats->mask & IEEE80211_STATMASK_NOISE) { - wstats.noise = rx_stats->noise; - wstats.updated |= IW_QUAL_NOISE_UPDATED; - } else - wstats.updated |= IW_QUAL_NOISE_INVALID; - - if (rx_stats->mask & IEEE80211_STATMASK_SIGNAL) { - wstats.qual = rx_stats->signal; - wstats.updated |= IW_QUAL_QUAL_UPDATED; - } else - wstats.updated |= IW_QUAL_QUAL_INVALID; - - /* Update spy records */ - wireless_spy_update(ieee->dev, hdr->addr2, &wstats); - } -#endif /* IW_WIRELESS_SPY */ -#endif /* CONFIG_WIRELESS_EXT */ - -#ifdef NOT_YET - hostap_update_rx_stats(local->ap, hdr, rx_stats); -#endif - - if (ieee->iw_mode == IW_MODE_MONITOR) { - stats->rx_packets++; - stats->rx_bytes += skb->len; - ieee80211_monitor_rx(ieee, skb, rx_stats); - return 1; - } - - can_be_decrypted = (is_multicast_ether_addr(hdr->addr1) || - is_broadcast_ether_addr(hdr->addr2)) ? - ieee->host_mc_decrypt : ieee->host_decrypt; - - if (can_be_decrypted) { - if (skb->len >= hdrlen + 3) { - /* Top two-bits of byte 3 are the key index */ - keyidx = skb->data[hdrlen + 3] >> 6; - } - - /* ieee->crypt[] is WEP_KEY (4) in length. Given that keyidx - * is only allowed 2-bits of storage, no value of keyidx can - * be provided via above code that would result in keyidx - * being out of range */ - crypt = ieee->crypt[keyidx]; - -#ifdef NOT_YET - sta = NULL; - - /* Use station specific key to override default keys if the - * receiver address is a unicast address ("individual RA"). If - * bcrx_sta_key parameter is set, station specific key is used - * even with broad/multicast targets (this is against IEEE - * 802.11, but makes it easier to use different keys with - * stations that do not support WEP key mapping). */ - - if (!(hdr->addr1[0] & 0x01) || local->bcrx_sta_key) - (void)hostap_handle_sta_crypto(local, hdr, &crypt, - &sta); -#endif - - /* allow NULL decrypt to indicate an station specific override - * for default encryption */ - if (crypt && (crypt->ops == NULL || - crypt->ops->decrypt_mpdu == NULL)) - crypt = NULL; - - if (!crypt && (fc & IEEE80211_FCTL_PROTECTED)) { - /* This seems to be triggered by some (multicast?) - * frames from other than current BSS, so just drop the - * frames silently instead of filling system log with - * these reports. */ - IEEE80211_DEBUG_DROP("Decryption failed (not set)" - " (SA=%pM)\n", hdr->addr2); - ieee->ieee_stats.rx_discards_undecryptable++; - goto rx_dropped; - } - } -#ifdef NOT_YET - if (type != WLAN_FC_TYPE_DATA) { - if (type == WLAN_FC_TYPE_MGMT && stype == WLAN_FC_STYPE_AUTH && - fc & IEEE80211_FCTL_PROTECTED && ieee->host_decrypt && - (keyidx = hostap_rx_frame_decrypt(ieee, skb, crypt)) < 0) { - printk(KERN_DEBUG "%s: failed to decrypt mgmt::auth " - "from %pM\n", dev->name, hdr->addr2); - /* TODO: could inform hostapd about this so that it - * could send auth failure report */ - goto rx_dropped; - } - - if (ieee80211_rx_frame_mgmt(ieee, skb, rx_stats, type, stype)) - goto rx_dropped; - else - goto rx_exit; - } -#endif - /* drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.29) */ - if (sc == ieee->prev_seq_ctl) - goto rx_dropped; - else - ieee->prev_seq_ctl = sc; - - /* Data frame - extract src/dst addresses */ - if (skb->len < IEEE80211_3ADDR_LEN) - goto rx_dropped; - - switch (fc & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { - case IEEE80211_FCTL_FROMDS: - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr3, ETH_ALEN); - break; - case IEEE80211_FCTL_TODS: - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - break; - case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS: - if (skb->len < IEEE80211_4ADDR_LEN) - goto rx_dropped; - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr4, ETH_ALEN); - break; - case 0: - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - break; - } - -#ifdef NOT_YET - if (hostap_rx_frame_wds(ieee, hdr, fc, &wds)) - goto rx_dropped; - if (wds) { - skb->dev = dev = wds; - stats = hostap_get_stats(dev); - } - - if (ieee->iw_mode == IW_MODE_MASTER && !wds && - (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - IEEE80211_FCTL_FROMDS && ieee->stadev - && !compare_ether_addr(hdr->addr2, ieee->assoc_ap_addr)) { - /* Frame from BSSID of the AP for which we are a client */ - skb->dev = dev = ieee->stadev; - stats = hostap_get_stats(dev); - from_assoc_ap = 1; - } -#endif - -#ifdef NOT_YET - if ((ieee->iw_mode == IW_MODE_MASTER || - ieee->iw_mode == IW_MODE_REPEAT) && !from_assoc_ap) { - switch (hostap_handle_sta_rx(ieee, dev, skb, rx_stats, - wds != NULL)) { - case AP_RX_CONTINUE_NOT_AUTHORIZED: - frame_authorized = 0; - break; - case AP_RX_CONTINUE: - frame_authorized = 1; - break; - case AP_RX_DROP: - goto rx_dropped; - case AP_RX_EXIT: - goto rx_exit; - } - } -#endif - - /* Nullfunc frames may have PS-bit set, so they must be passed to - * hostap_handle_sta_rx() before being dropped here. */ - - stype &= ~IEEE80211_STYPE_QOS_DATA; - - if (stype != IEEE80211_STYPE_DATA && - stype != IEEE80211_STYPE_DATA_CFACK && - stype != IEEE80211_STYPE_DATA_CFPOLL && - stype != IEEE80211_STYPE_DATA_CFACKPOLL) { - if (stype != IEEE80211_STYPE_NULLFUNC) - IEEE80211_DEBUG_DROP("RX: dropped data frame " - "with no data (type=0x%02x, " - "subtype=0x%02x, len=%d)\n", - type, stype, skb->len); - goto rx_dropped; - } - - /* skb: hdr + (possibly fragmented, possibly encrypted) payload */ - - if ((fc & IEEE80211_FCTL_PROTECTED) && can_be_decrypted && - (keyidx = ieee80211_rx_frame_decrypt(ieee, skb, crypt)) < 0) - goto rx_dropped; - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - - /* skb: hdr + (possibly fragmented) plaintext payload */ - // PR: FIXME: hostap has additional conditions in the "if" below: - // ieee->host_decrypt && (fc & IEEE80211_FCTL_PROTECTED) && - if ((frag != 0) || (fc & IEEE80211_FCTL_MOREFRAGS)) { - int flen; - struct sk_buff *frag_skb = ieee80211_frag_cache_get(ieee, hdr); - IEEE80211_DEBUG_FRAG("Rx Fragment received (%u)\n", frag); - - if (!frag_skb) { - IEEE80211_DEBUG(IEEE80211_DL_RX | IEEE80211_DL_FRAG, - "Rx cannot get skb from fragment " - "cache (morefrag=%d seq=%u frag=%u)\n", - (fc & IEEE80211_FCTL_MOREFRAGS) != 0, - WLAN_GET_SEQ_SEQ(sc), frag); - goto rx_dropped; - } - - flen = skb->len; - if (frag != 0) - flen -= hdrlen; - - if (frag_skb->tail + flen > frag_skb->end) { - printk(KERN_WARNING "%s: host decrypted and " - "reassembled frame did not fit skb\n", - dev->name); - ieee80211_frag_cache_invalidate(ieee, hdr); - goto rx_dropped; - } - - if (frag == 0) { - /* copy first fragment (including full headers) into - * beginning of the fragment cache skb */ - skb_copy_from_linear_data(skb, skb_put(frag_skb, flen), flen); - } else { - /* append frame payload to the end of the fragment - * cache skb */ - skb_copy_from_linear_data_offset(skb, hdrlen, - skb_put(frag_skb, flen), flen); - } - dev_kfree_skb_any(skb); - skb = NULL; - - if (fc & IEEE80211_FCTL_MOREFRAGS) { - /* more fragments expected - leave the skb in fragment - * cache for now; it will be delivered to upper layers - * after all fragments have been received */ - goto rx_exit; - } - - /* this was the last fragment and the frame will be - * delivered, so remove skb from fragment cache */ - skb = frag_skb; - hdr = (struct ieee80211_hdr_4addr *)skb->data; - ieee80211_frag_cache_invalidate(ieee, hdr); - } - - /* skb: hdr + (possible reassembled) full MSDU payload; possibly still - * encrypted/authenticated */ - if ((fc & IEEE80211_FCTL_PROTECTED) && can_be_decrypted && - ieee80211_rx_frame_decrypt_msdu(ieee, skb, keyidx, crypt)) - goto rx_dropped; - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep) { - if ( /*ieee->ieee802_1x && */ - ieee80211_is_eapol_frame(ieee, skb)) { - /* pass unencrypted EAPOL frames even if encryption is - * configured */ - } else { - IEEE80211_DEBUG_DROP("encryption configured, but RX " - "frame not encrypted (SA=%pM)\n", - hdr->addr2); - goto rx_dropped; - } - } - - if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep && - !ieee80211_is_eapol_frame(ieee, skb)) { - IEEE80211_DEBUG_DROP("dropped unencrypted RX data " - "frame from %pM (drop_unencrypted=1)\n", - hdr->addr2); - goto rx_dropped; - } - - /* If the frame was decrypted in hardware, we may need to strip off - * any security data (IV, ICV, etc) that was left behind */ - if (!can_be_decrypted && (fc & IEEE80211_FCTL_PROTECTED) && - ieee->host_strip_iv_icv) { - int trimlen = 0; - - /* Top two-bits of byte 3 are the key index */ - if (skb->len >= hdrlen + 3) - keyidx = skb->data[hdrlen + 3] >> 6; - - /* To strip off any security data which appears before the - * payload, we simply increase hdrlen (as the header gets - * chopped off immediately below). For the security data which - * appears after the payload, we use skb_trim. */ - - switch (ieee->sec.encode_alg[keyidx]) { - case SEC_ALG_WEP: - /* 4 byte IV */ - hdrlen += 4; - /* 4 byte ICV */ - trimlen = 4; - break; - case SEC_ALG_TKIP: - /* 4 byte IV, 4 byte ExtIV */ - hdrlen += 8; - /* 8 byte MIC, 4 byte ICV */ - trimlen = 12; - break; - case SEC_ALG_CCMP: - /* 8 byte CCMP header */ - hdrlen += 8; - /* 8 byte MIC */ - trimlen = 8; - break; - } - - if (skb->len < trimlen) - goto rx_dropped; - - __skb_trim(skb, skb->len - trimlen); - - if (skb->len < hdrlen) - goto rx_dropped; - } - - /* skb: hdr + (possible reassembled) full plaintext payload */ - - payload = skb->data + hdrlen; - ethertype = (payload[6] << 8) | payload[7]; - -#ifdef NOT_YET - /* If IEEE 802.1X is used, check whether the port is authorized to send - * the received frame. */ - if (ieee->ieee802_1x && ieee->iw_mode == IW_MODE_MASTER) { - if (ethertype == ETH_P_PAE) { - printk(KERN_DEBUG "%s: RX: IEEE 802.1X frame\n", - dev->name); - if (ieee->hostapd && ieee->apdev) { - /* Send IEEE 802.1X frames to the user - * space daemon for processing */ - prism2_rx_80211(ieee->apdev, skb, rx_stats, - PRISM2_RX_MGMT); - ieee->apdevstats.rx_packets++; - ieee->apdevstats.rx_bytes += skb->len; - goto rx_exit; - } - } else if (!frame_authorized) { - printk(KERN_DEBUG "%s: dropped frame from " - "unauthorized port (IEEE 802.1X): " - "ethertype=0x%04x\n", dev->name, ethertype); - goto rx_dropped; - } - } -#endif - - /* convert hdr + possible LLC headers into Ethernet header */ - if (skb->len - hdrlen >= 8 && - ((memcmp(payload, rfc1042_header, SNAP_SIZE) == 0 && - ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || - memcmp(payload, bridge_tunnel_header, SNAP_SIZE) == 0)) { - /* remove RFC1042 or Bridge-Tunnel encapsulation and - * replace EtherType */ - skb_pull(skb, hdrlen + SNAP_SIZE); - memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); - } else { - __be16 len; - /* Leave Ethernet header part of hdr and full payload */ - skb_pull(skb, hdrlen); - len = htons(skb->len); - memcpy(skb_push(skb, 2), &len, 2); - memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); - } - -#ifdef NOT_YET - if (wds && ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - IEEE80211_FCTL_TODS) && skb->len >= ETH_HLEN + ETH_ALEN) { - /* Non-standard frame: get addr4 from its bogus location after - * the payload */ - skb_copy_to_linear_data_offset(skb, ETH_ALEN, - skb->data + skb->len - ETH_ALEN, - ETH_ALEN); - skb_trim(skb, skb->len - ETH_ALEN); - } -#endif - - stats->rx_packets++; - stats->rx_bytes += skb->len; - -#ifdef NOT_YET - if (ieee->iw_mode == IW_MODE_MASTER && !wds && ieee->ap->bridge_packets) { - if (dst[0] & 0x01) { - /* copy multicast frame both to the higher layers and - * to the wireless media */ - ieee->ap->bridged_multicast++; - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - printk(KERN_DEBUG "%s: skb_clone failed for " - "multicast frame\n", dev->name); - } else if (hostap_is_sta_assoc(ieee->ap, dst)) { - /* send frame directly to the associated STA using - * wireless media and not passing to higher layers */ - ieee->ap->bridged_unicast++; - skb2 = skb; - skb = NULL; - } - } - - if (skb2 != NULL) { - /* send to wireless media */ - skb2->dev = dev; - skb2->protocol = htons(ETH_P_802_3); - skb_reset_mac_header(skb2); - skb_reset_network_header(skb2); - /* skb2->network_header += ETH_HLEN; */ - dev_queue_xmit(skb2); - } -#endif - - if (skb) { - skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */ - if (netif_rx(skb) == NET_RX_DROP) { - /* netif_rx always succeeds, but it might drop - * the packet. If it drops the packet, we log that - * in our stats. */ - IEEE80211_DEBUG_DROP - ("RX: netif_rx dropped the packet\n"); - stats->rx_dropped++; - } - } - - rx_exit: -#ifdef NOT_YET - if (sta) - hostap_handle_sta_release(sta); -#endif - return 1; - - rx_dropped: - stats->rx_dropped++; - - /* Returning 0 indicates to caller that we have not handled the SKB-- - * so it is still allocated and can be used again by underlying - * hardware as a DMA target */ - return 0; -} - -/* Filter out unrelated packets, call ieee80211_rx[_mgt] - * This function takes over the skb, it should not be used again after calling - * this function. */ -void ieee80211_rx_any(struct ieee80211_device *ieee, - struct sk_buff *skb, struct ieee80211_rx_stats *stats) -{ - struct ieee80211_hdr_4addr *hdr; - int is_packet_for_us; - u16 fc; - - if (ieee->iw_mode == IW_MODE_MONITOR) { - if (!ieee80211_rx(ieee, skb, stats)) - dev_kfree_skb_irq(skb); - return; - } - - if (skb->len < sizeof(struct ieee80211_hdr)) - goto drop_free; - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - fc = le16_to_cpu(hdr->frame_ctl); - - if ((fc & IEEE80211_FCTL_VERS) != 0) - goto drop_free; - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_MGMT: - if (skb->len < sizeof(struct ieee80211_hdr_3addr)) - goto drop_free; - ieee80211_rx_mgt(ieee, hdr, stats); - dev_kfree_skb_irq(skb); - return; - case IEEE80211_FTYPE_DATA: - break; - case IEEE80211_FTYPE_CTL: - return; - default: - return; - } - - is_packet_for_us = 0; - switch (ieee->iw_mode) { - case IW_MODE_ADHOC: - /* our BSS and not from/to DS */ - if (memcmp(hdr->addr3, ieee->bssid, ETH_ALEN) == 0) - if ((fc & (IEEE80211_FCTL_TODS+IEEE80211_FCTL_FROMDS)) == 0) { - /* promisc: get all */ - if (ieee->dev->flags & IFF_PROMISC) - is_packet_for_us = 1; - /* to us */ - else if (memcmp(hdr->addr1, ieee->dev->dev_addr, ETH_ALEN) == 0) - is_packet_for_us = 1; - /* mcast */ - else if (is_multicast_ether_addr(hdr->addr1)) - is_packet_for_us = 1; - } - break; - case IW_MODE_INFRA: - /* our BSS (== from our AP) and from DS */ - if (memcmp(hdr->addr2, ieee->bssid, ETH_ALEN) == 0) - if ((fc & (IEEE80211_FCTL_TODS+IEEE80211_FCTL_FROMDS)) == IEEE80211_FCTL_FROMDS) { - /* promisc: get all */ - if (ieee->dev->flags & IFF_PROMISC) - is_packet_for_us = 1; - /* to us */ - else if (memcmp(hdr->addr1, ieee->dev->dev_addr, ETH_ALEN) == 0) - is_packet_for_us = 1; - /* mcast */ - else if (is_multicast_ether_addr(hdr->addr1)) { - /* not our own packet bcasted from AP */ - if (memcmp(hdr->addr3, ieee->dev->dev_addr, ETH_ALEN)) - is_packet_for_us = 1; - } - } - break; - default: - /* ? */ - break; - } - - if (is_packet_for_us) - if (!ieee80211_rx(ieee, skb, stats)) - dev_kfree_skb_irq(skb); - return; - -drop_free: - dev_kfree_skb_irq(skb); - ieee->stats.rx_dropped++; - return; -} - -#define MGMT_FRAME_FIXED_PART_LENGTH 0x24 - -static u8 qos_oui[QOS_OUI_LEN] = { 0x00, 0x50, 0xF2 }; - -/* -* Make ther structure we read from the beacon packet has -* the right values -*/ -static int ieee80211_verify_qos_info(struct ieee80211_qos_information_element - *info_element, int sub_type) -{ - - if (info_element->qui_subtype != sub_type) - return -1; - if (memcmp(info_element->qui, qos_oui, QOS_OUI_LEN)) - return -1; - if (info_element->qui_type != QOS_OUI_TYPE) - return -1; - if (info_element->version != QOS_VERSION_1) - return -1; - - return 0; -} - -/* - * Parse a QoS parameter element - */ -static int ieee80211_read_qos_param_element(struct ieee80211_qos_parameter_info - *element_param, struct ieee80211_info_element - *info_element) -{ - int ret = 0; - u16 size = sizeof(struct ieee80211_qos_parameter_info) - 2; - - if ((info_element == NULL) || (element_param == NULL)) - return -1; - - if (info_element->id == QOS_ELEMENT_ID && info_element->len == size) { - memcpy(element_param->info_element.qui, info_element->data, - info_element->len); - element_param->info_element.elementID = info_element->id; - element_param->info_element.length = info_element->len; - } else - ret = -1; - if (ret == 0) - ret = ieee80211_verify_qos_info(&element_param->info_element, - QOS_OUI_PARAM_SUB_TYPE); - return ret; -} - -/* - * Parse a QoS information element - */ -static int ieee80211_read_qos_info_element(struct - ieee80211_qos_information_element - *element_info, struct ieee80211_info_element - *info_element) -{ - int ret = 0; - u16 size = sizeof(struct ieee80211_qos_information_element) - 2; - - if (element_info == NULL) - return -1; - if (info_element == NULL) - return -1; - - if ((info_element->id == QOS_ELEMENT_ID) && (info_element->len == size)) { - memcpy(element_info->qui, info_element->data, - info_element->len); - element_info->elementID = info_element->id; - element_info->length = info_element->len; - } else - ret = -1; - - if (ret == 0) - ret = ieee80211_verify_qos_info(element_info, - QOS_OUI_INFO_SUB_TYPE); - return ret; -} - -/* - * Write QoS parameters from the ac parameters. - */ -static int ieee80211_qos_convert_ac_to_parameters(struct - ieee80211_qos_parameter_info - *param_elm, struct - ieee80211_qos_parameters - *qos_param) -{ - int rc = 0; - int i; - struct ieee80211_qos_ac_parameter *ac_params; - u32 txop; - u8 cw_min; - u8 cw_max; - - for (i = 0; i < QOS_QUEUE_NUM; i++) { - ac_params = &(param_elm->ac_params_record[i]); - - qos_param->aifs[i] = (ac_params->aci_aifsn) & 0x0F; - qos_param->aifs[i] -= (qos_param->aifs[i] < 2) ? 0 : 2; - - cw_min = ac_params->ecw_min_max & 0x0F; - qos_param->cw_min[i] = cpu_to_le16((1 << cw_min) - 1); - - cw_max = (ac_params->ecw_min_max & 0xF0) >> 4; - qos_param->cw_max[i] = cpu_to_le16((1 << cw_max) - 1); - - qos_param->flag[i] = - (ac_params->aci_aifsn & 0x10) ? 0x01 : 0x00; - - txop = le16_to_cpu(ac_params->tx_op_limit) * 32; - qos_param->tx_op_limit[i] = cpu_to_le16(txop); - } - return rc; -} - -/* - * we have a generic data element which it may contain QoS information or - * parameters element. check the information element length to decide - * which type to read - */ -static int ieee80211_parse_qos_info_param_IE(struct ieee80211_info_element - *info_element, - struct ieee80211_network *network) -{ - int rc = 0; - struct ieee80211_qos_parameters *qos_param = NULL; - struct ieee80211_qos_information_element qos_info_element; - - rc = ieee80211_read_qos_info_element(&qos_info_element, info_element); - - if (rc == 0) { - network->qos_data.param_count = qos_info_element.ac_info & 0x0F; - network->flags |= NETWORK_HAS_QOS_INFORMATION; - } else { - struct ieee80211_qos_parameter_info param_element; - - rc = ieee80211_read_qos_param_element(¶m_element, - info_element); - if (rc == 0) { - qos_param = &(network->qos_data.parameters); - ieee80211_qos_convert_ac_to_parameters(¶m_element, - qos_param); - network->flags |= NETWORK_HAS_QOS_PARAMETERS; - network->qos_data.param_count = - param_element.info_element.ac_info & 0x0F; - } - } - - if (rc == 0) { - IEEE80211_DEBUG_QOS("QoS is supported\n"); - network->qos_data.supported = 1; - } - return rc; -} - -#ifdef CONFIG_IEEE80211_DEBUG -#define MFIE_STRING(x) case MFIE_TYPE_ ##x: return #x - -static const char *get_info_element_string(u16 id) -{ - switch (id) { - MFIE_STRING(SSID); - MFIE_STRING(RATES); - MFIE_STRING(FH_SET); - MFIE_STRING(DS_SET); - MFIE_STRING(CF_SET); - MFIE_STRING(TIM); - MFIE_STRING(IBSS_SET); - MFIE_STRING(COUNTRY); - MFIE_STRING(HOP_PARAMS); - MFIE_STRING(HOP_TABLE); - MFIE_STRING(REQUEST); - MFIE_STRING(CHALLENGE); - MFIE_STRING(POWER_CONSTRAINT); - MFIE_STRING(POWER_CAPABILITY); - MFIE_STRING(TPC_REQUEST); - MFIE_STRING(TPC_REPORT); - MFIE_STRING(SUPP_CHANNELS); - MFIE_STRING(CSA); - MFIE_STRING(MEASURE_REQUEST); - MFIE_STRING(MEASURE_REPORT); - MFIE_STRING(QUIET); - MFIE_STRING(IBSS_DFS); - MFIE_STRING(ERP_INFO); - MFIE_STRING(RSN); - MFIE_STRING(RATES_EX); - MFIE_STRING(GENERIC); - MFIE_STRING(QOS_PARAMETER); - default: - return "UNKNOWN"; - } -} -#endif - -static int ieee80211_parse_info_param(struct ieee80211_info_element - *info_element, u16 length, - struct ieee80211_network *network) -{ - DECLARE_SSID_BUF(ssid); - u8 i; -#ifdef CONFIG_IEEE80211_DEBUG - char rates_str[64]; - char *p; -#endif - - while (length >= sizeof(*info_element)) { - if (sizeof(*info_element) + info_element->len > length) { - IEEE80211_DEBUG_MGMT("Info elem: parse failed: " - "info_element->len + 2 > left : " - "info_element->len+2=%zd left=%d, id=%d.\n", - info_element->len + - sizeof(*info_element), - length, info_element->id); - /* We stop processing but don't return an error here - * because some misbehaviour APs break this rule. ie. - * Orinoco AP1000. */ - break; - } - - switch (info_element->id) { - case MFIE_TYPE_SSID: - network->ssid_len = min(info_element->len, - (u8) IW_ESSID_MAX_SIZE); - memcpy(network->ssid, info_element->data, - network->ssid_len); - if (network->ssid_len < IW_ESSID_MAX_SIZE) - memset(network->ssid + network->ssid_len, 0, - IW_ESSID_MAX_SIZE - network->ssid_len); - - IEEE80211_DEBUG_MGMT("MFIE_TYPE_SSID: '%s' len=%d.\n", - print_ssid(ssid, network->ssid, - network->ssid_len), - network->ssid_len); - break; - - case MFIE_TYPE_RATES: -#ifdef CONFIG_IEEE80211_DEBUG - p = rates_str; -#endif - network->rates_len = min(info_element->len, - MAX_RATES_LENGTH); - for (i = 0; i < network->rates_len; i++) { - network->rates[i] = info_element->data[i]; -#ifdef CONFIG_IEEE80211_DEBUG - p += snprintf(p, sizeof(rates_str) - - (p - rates_str), "%02X ", - network->rates[i]); -#endif - if (ieee80211_is_ofdm_rate - (info_element->data[i])) { - network->flags |= NETWORK_HAS_OFDM; - if (info_element->data[i] & - IEEE80211_BASIC_RATE_MASK) - network->flags &= - ~NETWORK_HAS_CCK; - } - } - - IEEE80211_DEBUG_MGMT("MFIE_TYPE_RATES: '%s' (%d)\n", - rates_str, network->rates_len); - break; - - case MFIE_TYPE_RATES_EX: -#ifdef CONFIG_IEEE80211_DEBUG - p = rates_str; -#endif - network->rates_ex_len = min(info_element->len, - MAX_RATES_EX_LENGTH); - for (i = 0; i < network->rates_ex_len; i++) { - network->rates_ex[i] = info_element->data[i]; -#ifdef CONFIG_IEEE80211_DEBUG - p += snprintf(p, sizeof(rates_str) - - (p - rates_str), "%02X ", - network->rates[i]); -#endif - if (ieee80211_is_ofdm_rate - (info_element->data[i])) { - network->flags |= NETWORK_HAS_OFDM; - if (info_element->data[i] & - IEEE80211_BASIC_RATE_MASK) - network->flags &= - ~NETWORK_HAS_CCK; - } - } - - IEEE80211_DEBUG_MGMT("MFIE_TYPE_RATES_EX: '%s' (%d)\n", - rates_str, network->rates_ex_len); - break; - - case MFIE_TYPE_DS_SET: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_DS_SET: %d\n", - info_element->data[0]); - network->channel = info_element->data[0]; - break; - - case MFIE_TYPE_FH_SET: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_FH_SET: ignored\n"); - break; - - case MFIE_TYPE_CF_SET: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_CF_SET: ignored\n"); - break; - - case MFIE_TYPE_TIM: - network->tim.tim_count = info_element->data[0]; - network->tim.tim_period = info_element->data[1]; - IEEE80211_DEBUG_MGMT("MFIE_TYPE_TIM: partially ignored\n"); - break; - - case MFIE_TYPE_ERP_INFO: - network->erp_value = info_element->data[0]; - network->flags |= NETWORK_HAS_ERP_VALUE; - IEEE80211_DEBUG_MGMT("MFIE_TYPE_ERP_SET: %d\n", - network->erp_value); - break; - - case MFIE_TYPE_IBSS_SET: - network->atim_window = info_element->data[0]; - IEEE80211_DEBUG_MGMT("MFIE_TYPE_IBSS_SET: %d\n", - network->atim_window); - break; - - case MFIE_TYPE_CHALLENGE: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_CHALLENGE: ignored\n"); - break; - - case MFIE_TYPE_GENERIC: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_GENERIC: %d bytes\n", - info_element->len); - if (!ieee80211_parse_qos_info_param_IE(info_element, - network)) - break; - - if (info_element->len >= 4 && - info_element->data[0] == 0x00 && - info_element->data[1] == 0x50 && - info_element->data[2] == 0xf2 && - info_element->data[3] == 0x01) { - network->wpa_ie_len = min(info_element->len + 2, - MAX_WPA_IE_LEN); - memcpy(network->wpa_ie, info_element, - network->wpa_ie_len); - } - break; - - case MFIE_TYPE_RSN: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_RSN: %d bytes\n", - info_element->len); - network->rsn_ie_len = min(info_element->len + 2, - MAX_WPA_IE_LEN); - memcpy(network->rsn_ie, info_element, - network->rsn_ie_len); - break; - - case MFIE_TYPE_QOS_PARAMETER: - printk(KERN_ERR - "QoS Error need to parse QOS_PARAMETER IE\n"); - break; - /* 802.11h */ - case MFIE_TYPE_POWER_CONSTRAINT: - network->power_constraint = info_element->data[0]; - network->flags |= NETWORK_HAS_POWER_CONSTRAINT; - break; - - case MFIE_TYPE_CSA: - network->power_constraint = info_element->data[0]; - network->flags |= NETWORK_HAS_CSA; - break; - - case MFIE_TYPE_QUIET: - network->quiet.count = info_element->data[0]; - network->quiet.period = info_element->data[1]; - network->quiet.duration = info_element->data[2]; - network->quiet.offset = info_element->data[3]; - network->flags |= NETWORK_HAS_QUIET; - break; - - case MFIE_TYPE_IBSS_DFS: - if (network->ibss_dfs) - break; - network->ibss_dfs = kmemdup(info_element->data, - info_element->len, - GFP_ATOMIC); - if (!network->ibss_dfs) - return 1; - network->flags |= NETWORK_HAS_IBSS_DFS; - break; - - case MFIE_TYPE_TPC_REPORT: - network->tpc_report.transmit_power = - info_element->data[0]; - network->tpc_report.link_margin = info_element->data[1]; - network->flags |= NETWORK_HAS_TPC_REPORT; - break; - - default: - IEEE80211_DEBUG_MGMT - ("Unsupported info element: %s (%d)\n", - get_info_element_string(info_element->id), - info_element->id); - break; - } - - length -= sizeof(*info_element) + info_element->len; - info_element = - (struct ieee80211_info_element *)&info_element-> - data[info_element->len]; - } - - return 0; -} - -static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct ieee80211_assoc_response - *frame, struct ieee80211_rx_stats *stats) -{ - struct ieee80211_network network_resp = { - .ibss_dfs = NULL, - }; - struct ieee80211_network *network = &network_resp; - struct net_device *dev = ieee->dev; - - network->flags = 0; - network->qos_data.active = 0; - network->qos_data.supported = 0; - network->qos_data.param_count = 0; - network->qos_data.old_param_count = 0; - - //network->atim_window = le16_to_cpu(frame->aid) & (0x3FFF); - network->atim_window = le16_to_cpu(frame->aid); - network->listen_interval = le16_to_cpu(frame->status); - memcpy(network->bssid, frame->header.addr3, ETH_ALEN); - network->capability = le16_to_cpu(frame->capability); - network->last_scanned = jiffies; - network->rates_len = network->rates_ex_len = 0; - network->last_associate = 0; - network->ssid_len = 0; - network->erp_value = - (network->capability & WLAN_CAPABILITY_IBSS) ? 0x3 : 0x0; - - if (stats->freq == IEEE80211_52GHZ_BAND) { - /* for A band (No DS info) */ - network->channel = stats->received_channel; - } else - network->flags |= NETWORK_HAS_CCK; - - network->wpa_ie_len = 0; - network->rsn_ie_len = 0; - - if (ieee80211_parse_info_param - (frame->info_element, stats->len - sizeof(*frame), network)) - return 1; - - network->mode = 0; - if (stats->freq == IEEE80211_52GHZ_BAND) - network->mode = IEEE_A; - else { - if (network->flags & NETWORK_HAS_OFDM) - network->mode |= IEEE_G; - if (network->flags & NETWORK_HAS_CCK) - network->mode |= IEEE_B; - } - - memcpy(&network->stats, stats, sizeof(network->stats)); - - if (ieee->handle_assoc_response != NULL) - ieee->handle_assoc_response(dev, frame, network); - - return 0; -} - -/***************************************************/ - -static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee80211_probe_response - *beacon, - struct ieee80211_network *network, - struct ieee80211_rx_stats *stats) -{ - DECLARE_SSID_BUF(ssid); - - network->qos_data.active = 0; - network->qos_data.supported = 0; - network->qos_data.param_count = 0; - network->qos_data.old_param_count = 0; - - /* Pull out fixed field data */ - memcpy(network->bssid, beacon->header.addr3, ETH_ALEN); - network->capability = le16_to_cpu(beacon->capability); - network->last_scanned = jiffies; - network->time_stamp[0] = le32_to_cpu(beacon->time_stamp[0]); - network->time_stamp[1] = le32_to_cpu(beacon->time_stamp[1]); - network->beacon_interval = le16_to_cpu(beacon->beacon_interval); - /* Where to pull this? beacon->listen_interval; */ - network->listen_interval = 0x0A; - network->rates_len = network->rates_ex_len = 0; - network->last_associate = 0; - network->ssid_len = 0; - network->flags = 0; - network->atim_window = 0; - network->erp_value = (network->capability & WLAN_CAPABILITY_IBSS) ? - 0x3 : 0x0; - - if (stats->freq == IEEE80211_52GHZ_BAND) { - /* for A band (No DS info) */ - network->channel = stats->received_channel; - } else - network->flags |= NETWORK_HAS_CCK; - - network->wpa_ie_len = 0; - network->rsn_ie_len = 0; - - if (ieee80211_parse_info_param - (beacon->info_element, stats->len - sizeof(*beacon), network)) - return 1; - - network->mode = 0; - if (stats->freq == IEEE80211_52GHZ_BAND) - network->mode = IEEE_A; - else { - if (network->flags & NETWORK_HAS_OFDM) - network->mode |= IEEE_G; - if (network->flags & NETWORK_HAS_CCK) - network->mode |= IEEE_B; - } - - if (network->mode == 0) { - IEEE80211_DEBUG_SCAN("Filtered out '%s (%pM)' " - "network.\n", - print_ssid(ssid, network->ssid, - network->ssid_len), - network->bssid); - return 1; - } - - memcpy(&network->stats, stats, sizeof(network->stats)); - - return 0; -} - -static inline int is_same_network(struct ieee80211_network *src, - struct ieee80211_network *dst) -{ - /* A network is only a duplicate if the channel, BSSID, and ESSID - * all match. We treat all <hidden> with the same BSSID and channel - * as one network */ - return ((src->ssid_len == dst->ssid_len) && - (src->channel == dst->channel) && - !compare_ether_addr(src->bssid, dst->bssid) && - !memcmp(src->ssid, dst->ssid, src->ssid_len)); -} - -static void update_network(struct ieee80211_network *dst, - struct ieee80211_network *src) -{ - int qos_active; - u8 old_param; - - ieee80211_network_reset(dst); - dst->ibss_dfs = src->ibss_dfs; - - /* We only update the statistics if they were created by receiving - * the network information on the actual channel the network is on. - * - * This keeps beacons received on neighbor channels from bringing - * down the signal level of an AP. */ - if (dst->channel == src->stats.received_channel) - memcpy(&dst->stats, &src->stats, - sizeof(struct ieee80211_rx_stats)); - else - IEEE80211_DEBUG_SCAN("Network %pM info received " - "off channel (%d vs. %d)\n", src->bssid, - dst->channel, src->stats.received_channel); - - dst->capability = src->capability; - memcpy(dst->rates, src->rates, src->rates_len); - dst->rates_len = src->rates_len; - memcpy(dst->rates_ex, src->rates_ex, src->rates_ex_len); - dst->rates_ex_len = src->rates_ex_len; - - dst->mode = src->mode; - dst->flags = src->flags; - dst->time_stamp[0] = src->time_stamp[0]; - dst->time_stamp[1] = src->time_stamp[1]; - - dst->beacon_interval = src->beacon_interval; - dst->listen_interval = src->listen_interval; - dst->atim_window = src->atim_window; - dst->erp_value = src->erp_value; - dst->tim = src->tim; - - memcpy(dst->wpa_ie, src->wpa_ie, src->wpa_ie_len); - dst->wpa_ie_len = src->wpa_ie_len; - memcpy(dst->rsn_ie, src->rsn_ie, src->rsn_ie_len); - dst->rsn_ie_len = src->rsn_ie_len; - - dst->last_scanned = jiffies; - qos_active = src->qos_data.active; - old_param = dst->qos_data.old_param_count; - if (dst->flags & NETWORK_HAS_QOS_MASK) - memcpy(&dst->qos_data, &src->qos_data, - sizeof(struct ieee80211_qos_data)); - else { - dst->qos_data.supported = src->qos_data.supported; - dst->qos_data.param_count = src->qos_data.param_count; - } - - if (dst->qos_data.supported == 1) { - if (dst->ssid_len) - IEEE80211_DEBUG_QOS - ("QoS the network %s is QoS supported\n", - dst->ssid); - else - IEEE80211_DEBUG_QOS - ("QoS the network is QoS supported\n"); - } - dst->qos_data.active = qos_active; - dst->qos_data.old_param_count = old_param; - - /* dst->last_associate is not overwritten */ -} - -static inline int is_beacon(__le16 fc) -{ - return (WLAN_FC_GET_STYPE(le16_to_cpu(fc)) == IEEE80211_STYPE_BEACON); -} - -static void ieee80211_process_probe_response(struct ieee80211_device - *ieee, struct - ieee80211_probe_response - *beacon, struct ieee80211_rx_stats - *stats) -{ - struct net_device *dev = ieee->dev; - struct ieee80211_network network = { - .ibss_dfs = NULL, - }; - struct ieee80211_network *target; - struct ieee80211_network *oldest = NULL; -#ifdef CONFIG_IEEE80211_DEBUG - struct ieee80211_info_element *info_element = beacon->info_element; -#endif - unsigned long flags; - DECLARE_SSID_BUF(ssid); - - IEEE80211_DEBUG_SCAN("'%s' (%pM" - "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", - print_ssid(ssid, info_element->data, info_element->len), - beacon->header.addr3, - (beacon->capability & cpu_to_le16(1 << 0xf)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xe)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xd)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xc)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xb)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xa)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x9)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x8)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x7)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x6)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x5)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x4)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x3)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x2)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x1)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x0)) ? '1' : '0'); - - if (ieee80211_network_init(ieee, beacon, &network, stats)) { - IEEE80211_DEBUG_SCAN("Dropped '%s' (%pM) via %s.\n", - print_ssid(ssid, info_element->data, - info_element->len), - beacon->header.addr3, - is_beacon(beacon->header.frame_ctl) ? - "BEACON" : "PROBE RESPONSE"); - return; - } - - /* The network parsed correctly -- so now we scan our known networks - * to see if we can find it in our list. - * - * NOTE: This search is definitely not optimized. Once its doing - * the "right thing" we'll optimize it for efficiency if - * necessary */ - - /* Search for this entry in the list and update it if it is - * already there. */ - - spin_lock_irqsave(&ieee->lock, flags); - - list_for_each_entry(target, &ieee->network_list, list) { - if (is_same_network(target, &network)) - break; - - if ((oldest == NULL) || - (target->last_scanned < oldest->last_scanned)) - oldest = target; - } - - /* If we didn't find a match, then get a new network slot to initialize - * with this beacon's information */ - if (&target->list == &ieee->network_list) { - if (list_empty(&ieee->network_free_list)) { - /* If there are no more slots, expire the oldest */ - list_del(&oldest->list); - target = oldest; - IEEE80211_DEBUG_SCAN("Expired '%s' (%pM) from " - "network list.\n", - print_ssid(ssid, target->ssid, - target->ssid_len), - target->bssid); - ieee80211_network_reset(target); - } else { - /* Otherwise just pull from the free list */ - target = list_entry(ieee->network_free_list.next, - struct ieee80211_network, list); - list_del(ieee->network_free_list.next); - } - -#ifdef CONFIG_IEEE80211_DEBUG - IEEE80211_DEBUG_SCAN("Adding '%s' (%pM) via %s.\n", - print_ssid(ssid, network.ssid, - network.ssid_len), - network.bssid, - is_beacon(beacon->header.frame_ctl) ? - "BEACON" : "PROBE RESPONSE"); -#endif - memcpy(target, &network, sizeof(*target)); - network.ibss_dfs = NULL; - list_add_tail(&target->list, &ieee->network_list); - } else { - IEEE80211_DEBUG_SCAN("Updating '%s' (%pM) via %s.\n", - print_ssid(ssid, target->ssid, - target->ssid_len), - target->bssid, - is_beacon(beacon->header.frame_ctl) ? - "BEACON" : "PROBE RESPONSE"); - update_network(target, &network); - network.ibss_dfs = NULL; - } - - spin_unlock_irqrestore(&ieee->lock, flags); - - if (is_beacon(beacon->header.frame_ctl)) { - if (ieee->handle_beacon != NULL) - ieee->handle_beacon(dev, beacon, target); - } else { - if (ieee->handle_probe_response != NULL) - ieee->handle_probe_response(dev, beacon, target); - } -} - -void ieee80211_rx_mgt(struct ieee80211_device *ieee, - struct ieee80211_hdr_4addr *header, - struct ieee80211_rx_stats *stats) -{ - switch (WLAN_FC_GET_STYPE(le16_to_cpu(header->frame_ctl))) { - case IEEE80211_STYPE_ASSOC_RESP: - IEEE80211_DEBUG_MGMT("received ASSOCIATION RESPONSE (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - ieee80211_handle_assoc_resp(ieee, - (struct ieee80211_assoc_response *) - header, stats); - break; - - case IEEE80211_STYPE_REASSOC_RESP: - IEEE80211_DEBUG_MGMT("received REASSOCIATION RESPONSE (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - break; - - case IEEE80211_STYPE_PROBE_REQ: - IEEE80211_DEBUG_MGMT("received auth (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - - if (ieee->handle_probe_request != NULL) - ieee->handle_probe_request(ieee->dev, - (struct - ieee80211_probe_request *) - header, stats); - break; - - case IEEE80211_STYPE_PROBE_RESP: - IEEE80211_DEBUG_MGMT("received PROBE RESPONSE (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - IEEE80211_DEBUG_SCAN("Probe response\n"); - ieee80211_process_probe_response(ieee, - (struct - ieee80211_probe_response *) - header, stats); - break; - - case IEEE80211_STYPE_BEACON: - IEEE80211_DEBUG_MGMT("received BEACON (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - IEEE80211_DEBUG_SCAN("Beacon\n"); - ieee80211_process_probe_response(ieee, - (struct - ieee80211_probe_response *) - header, stats); - break; - case IEEE80211_STYPE_AUTH: - - IEEE80211_DEBUG_MGMT("received auth (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - - if (ieee->handle_auth != NULL) - ieee->handle_auth(ieee->dev, - (struct ieee80211_auth *)header); - break; - - case IEEE80211_STYPE_DISASSOC: - if (ieee->handle_disassoc != NULL) - ieee->handle_disassoc(ieee->dev, - (struct ieee80211_disassoc *) - header); - break; - - case IEEE80211_STYPE_ACTION: - IEEE80211_DEBUG_MGMT("ACTION\n"); - if (ieee->handle_action) - ieee->handle_action(ieee->dev, - (struct ieee80211_action *) - header, stats); - break; - - case IEEE80211_STYPE_REASSOC_REQ: - IEEE80211_DEBUG_MGMT("received reassoc (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - - IEEE80211_DEBUG_MGMT("%s: IEEE80211_REASSOC_REQ received\n", - ieee->dev->name); - if (ieee->handle_reassoc_request != NULL) - ieee->handle_reassoc_request(ieee->dev, - (struct ieee80211_reassoc_request *) - header); - break; - - case IEEE80211_STYPE_ASSOC_REQ: - IEEE80211_DEBUG_MGMT("received assoc (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - - IEEE80211_DEBUG_MGMT("%s: IEEE80211_ASSOC_REQ received\n", - ieee->dev->name); - if (ieee->handle_assoc_request != NULL) - ieee->handle_assoc_request(ieee->dev); - break; - - case IEEE80211_STYPE_DEAUTH: - IEEE80211_DEBUG_MGMT("DEAUTH\n"); - if (ieee->handle_deauth != NULL) - ieee->handle_deauth(ieee->dev, - (struct ieee80211_deauth *) - header); - break; - default: - IEEE80211_DEBUG_MGMT("received UNKNOWN (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - IEEE80211_DEBUG_MGMT("%s: Unknown management packet: %d\n", - ieee->dev->name, - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - break; - } -} - -EXPORT_SYMBOL_GPL(ieee80211_rx_any); -EXPORT_SYMBOL(ieee80211_rx_mgt); -EXPORT_SYMBOL(ieee80211_rx); diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c deleted file mode 100644 index d996547f7a6..00000000000 --- a/net/ieee80211/ieee80211_tx.c +++ /dev/null @@ -1,545 +0,0 @@ -/****************************************************************************** - - Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved. - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - The full GNU General Public License is included in this distribution in the - file called LICENSE. - - Contact Information: - James P. Ketrenos <ipw2100-admin@linux.intel.com> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -******************************************************************************/ -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/if_arp.h> -#include <linux/in6.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/tcp.h> -#include <linux/types.h> -#include <linux/wireless.h> -#include <linux/etherdevice.h> -#include <asm/uaccess.h> - -#include <net/ieee80211.h> - -/* - -802.11 Data Frame - - ,-------------------------------------------------------------------. -Bytes | 2 | 2 | 6 | 6 | 6 | 2 | 0..2312 | 4 | - |------|------|---------|---------|---------|------|---------|------| -Desc. | ctrl | dura | DA/RA | TA | SA | Sequ | Frame | fcs | - | | tion | (BSSID) | | | ence | data | | - `--------------------------------------------------| |------' -Total: 28 non-data bytes `----.----' - | - .- 'Frame data' expands, if WEP enabled, to <----------' - | - V - ,-----------------------. -Bytes | 4 | 0-2296 | 4 | - |-----|-----------|-----| -Desc. | IV | Encrypted | ICV | - | | Packet | | - `-----| |-----' - `-----.-----' - | - .- 'Encrypted Packet' expands to - | - V - ,---------------------------------------------------. -Bytes | 1 | 1 | 1 | 3 | 2 | 0-2304 | - |------|------|---------|----------|------|---------| -Desc. | SNAP | SNAP | Control |Eth Tunnel| Type | IP | - | DSAP | SSAP | | | | Packet | - | 0xAA | 0xAA |0x03 (UI)|0x00-00-F8| | | - `---------------------------------------------------- -Total: 8 non-data bytes - -802.3 Ethernet Data Frame - - ,-----------------------------------------. -Bytes | 6 | 6 | 2 | Variable | 4 | - |-------|-------|------|-----------|------| -Desc. | Dest. | Source| Type | IP Packet | fcs | - | MAC | MAC | | | | - `-----------------------------------------' -Total: 18 non-data bytes - -In the event that fragmentation is required, the incoming payload is split into -N parts of size ieee->fts. The first fragment contains the SNAP header and the -remaining packets are just data. - -If encryption is enabled, each fragment payload size is reduced by enough space -to add the prefix and postfix (IV and ICV totalling 8 bytes in the case of WEP) -So if you have 1500 bytes of payload with ieee->fts set to 500 without -encryption it will take 3 frames. With WEP it will take 4 frames as the -payload of each frame is reduced to 492 bytes. - -* SKB visualization -* -* ,- skb->data -* | -* | ETHERNET HEADER ,-<-- PAYLOAD -* | | 14 bytes from skb->data -* | 2 bytes for Type --> ,T. | (sizeof ethhdr) -* | | | | -* |,-Dest.--. ,--Src.---. | | | -* | 6 bytes| | 6 bytes | | | | -* v | | | | | | -* 0 | v 1 | v | v 2 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 -* ^ | ^ | ^ | -* | | | | | | -* | | | | `T' <---- 2 bytes for Type -* | | | | -* | | '---SNAP--' <-------- 6 bytes for SNAP -* | | -* `-IV--' <-------------------- 4 bytes for IV (WEP) -* -* SNAP HEADER -* -*/ - -static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 }; -static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 }; - -static int ieee80211_copy_snap(u8 * data, __be16 h_proto) -{ - struct ieee80211_snap_hdr *snap; - u8 *oui; - - snap = (struct ieee80211_snap_hdr *)data; - snap->dsap = 0xaa; - snap->ssap = 0xaa; - snap->ctrl = 0x03; - - if (h_proto == htons(ETH_P_AARP) || h_proto == htons(ETH_P_IPX)) - oui = P802_1H_OUI; - else - oui = RFC1042_OUI; - snap->oui[0] = oui[0]; - snap->oui[1] = oui[1]; - snap->oui[2] = oui[2]; - - memcpy(data + SNAP_SIZE, &h_proto, sizeof(u16)); - - return SNAP_SIZE + sizeof(u16); -} - -static int ieee80211_encrypt_fragment(struct ieee80211_device *ieee, - struct sk_buff *frag, int hdr_len) -{ - struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx]; - int res; - - if (crypt == NULL) - return -1; - - /* To encrypt, frame format is: - * IV (4 bytes), clear payload (including SNAP), ICV (4 bytes) */ - atomic_inc(&crypt->refcnt); - res = 0; - if (crypt->ops && crypt->ops->encrypt_mpdu) - res = crypt->ops->encrypt_mpdu(frag, hdr_len, crypt->priv); - - atomic_dec(&crypt->refcnt); - if (res < 0) { - printk(KERN_INFO "%s: Encryption failed: len=%d.\n", - ieee->dev->name, frag->len); - ieee->ieee_stats.tx_discards++; - return -1; - } - - return 0; -} - -void ieee80211_txb_free(struct ieee80211_txb *txb) -{ - int i; - if (unlikely(!txb)) - return; - for (i = 0; i < txb->nr_frags; i++) - if (txb->fragments[i]) - dev_kfree_skb_any(txb->fragments[i]); - kfree(txb); -} - -static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, - int headroom, gfp_t gfp_mask) -{ - struct ieee80211_txb *txb; - int i; - txb = kmalloc(sizeof(struct ieee80211_txb) + (sizeof(u8 *) * nr_frags), - gfp_mask); - if (!txb) - return NULL; - - memset(txb, 0, sizeof(struct ieee80211_txb)); - txb->nr_frags = nr_frags; - txb->frag_size = txb_size; - - for (i = 0; i < nr_frags; i++) { - txb->fragments[i] = __dev_alloc_skb(txb_size + headroom, - gfp_mask); - if (unlikely(!txb->fragments[i])) { - i--; - break; - } - skb_reserve(txb->fragments[i], headroom); - } - if (unlikely(i != nr_frags)) { - while (i >= 0) - dev_kfree_skb_any(txb->fragments[i--]); - kfree(txb); - return NULL; - } - return txb; -} - -static int ieee80211_classify(struct sk_buff *skb) -{ - struct ethhdr *eth; - struct iphdr *ip; - - eth = (struct ethhdr *)skb->data; - if (eth->h_proto != htons(ETH_P_IP)) - return 0; - - ip = ip_hdr(skb); - switch (ip->tos & 0xfc) { - case 0x20: - return 2; - case 0x40: - return 1; - case 0x60: - return 3; - case 0x80: - return 4; - case 0xa0: - return 5; - case 0xc0: - return 6; - case 0xe0: - return 7; - default: - return 0; - } -} - -/* Incoming skb is converted to a txb which consists of - * a block of 802.11 fragment packets (stored as skbs) */ -int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - struct ieee80211_txb *txb = NULL; - struct ieee80211_hdr_3addrqos *frag_hdr; - int i, bytes_per_frag, nr_frags, bytes_last_frag, frag_size, - rts_required; - unsigned long flags; - struct net_device_stats *stats = &ieee->stats; - int encrypt, host_encrypt, host_encrypt_msdu, host_build_iv; - __be16 ether_type; - int bytes, fc, hdr_len; - struct sk_buff *skb_frag; - struct ieee80211_hdr_3addrqos header = {/* Ensure zero initialized */ - .duration_id = 0, - .seq_ctl = 0, - .qos_ctl = 0 - }; - u8 dest[ETH_ALEN], src[ETH_ALEN]; - struct ieee80211_crypt_data *crypt; - int priority = skb->priority; - int snapped = 0; - - if (ieee->is_queue_full && (*ieee->is_queue_full) (dev, priority)) - return NETDEV_TX_BUSY; - - spin_lock_irqsave(&ieee->lock, flags); - - /* If there is no driver handler to take the TXB, dont' bother - * creating it... */ - if (!ieee->hard_start_xmit) { - printk(KERN_WARNING "%s: No xmit handler.\n", ieee->dev->name); - goto success; - } - - if (unlikely(skb->len < SNAP_SIZE + sizeof(u16))) { - printk(KERN_WARNING "%s: skb too small (%d).\n", - ieee->dev->name, skb->len); - goto success; - } - - ether_type = ((struct ethhdr *)skb->data)->h_proto; - - crypt = ieee->crypt[ieee->tx_keyidx]; - - encrypt = !(ether_type == htons(ETH_P_PAE) && ieee->ieee802_1x) && - ieee->sec.encrypt; - - host_encrypt = ieee->host_encrypt && encrypt && crypt; - host_encrypt_msdu = ieee->host_encrypt_msdu && encrypt && crypt; - host_build_iv = ieee->host_build_iv && encrypt && crypt; - - if (!encrypt && ieee->ieee802_1x && - ieee->drop_unencrypted && ether_type != htons(ETH_P_PAE)) { - stats->tx_dropped++; - goto success; - } - - /* Save source and destination addresses */ - skb_copy_from_linear_data(skb, dest, ETH_ALEN); - skb_copy_from_linear_data_offset(skb, ETH_ALEN, src, ETH_ALEN); - - if (host_encrypt || host_build_iv) - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA | - IEEE80211_FCTL_PROTECTED; - else - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; - - if (ieee->iw_mode == IW_MODE_INFRA) { - fc |= IEEE80211_FCTL_TODS; - /* To DS: Addr1 = BSSID, Addr2 = SA, Addr3 = DA */ - memcpy(header.addr1, ieee->bssid, ETH_ALEN); - memcpy(header.addr2, src, ETH_ALEN); - memcpy(header.addr3, dest, ETH_ALEN); - } else if (ieee->iw_mode == IW_MODE_ADHOC) { - /* not From/To DS: Addr1 = DA, Addr2 = SA, Addr3 = BSSID */ - memcpy(header.addr1, dest, ETH_ALEN); - memcpy(header.addr2, src, ETH_ALEN); - memcpy(header.addr3, ieee->bssid, ETH_ALEN); - } - hdr_len = IEEE80211_3ADDR_LEN; - - if (ieee->is_qos_active && ieee->is_qos_active(dev, skb)) { - fc |= IEEE80211_STYPE_QOS_DATA; - hdr_len += 2; - - skb->priority = ieee80211_classify(skb); - header.qos_ctl |= cpu_to_le16(skb->priority & IEEE80211_QCTL_TID); - } - header.frame_ctl = cpu_to_le16(fc); - - /* Advance the SKB to the start of the payload */ - skb_pull(skb, sizeof(struct ethhdr)); - - /* Determine total amount of storage required for TXB packets */ - bytes = skb->len + SNAP_SIZE + sizeof(u16); - - /* Encrypt msdu first on the whole data packet. */ - if ((host_encrypt || host_encrypt_msdu) && - crypt && crypt->ops && crypt->ops->encrypt_msdu) { - int res = 0; - int len = bytes + hdr_len + crypt->ops->extra_msdu_prefix_len + - crypt->ops->extra_msdu_postfix_len; - struct sk_buff *skb_new = dev_alloc_skb(len); - - if (unlikely(!skb_new)) - goto failed; - - skb_reserve(skb_new, crypt->ops->extra_msdu_prefix_len); - memcpy(skb_put(skb_new, hdr_len), &header, hdr_len); - snapped = 1; - ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)), - ether_type); - skb_copy_from_linear_data(skb, skb_put(skb_new, skb->len), skb->len); - res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv); - if (res < 0) { - IEEE80211_ERROR("msdu encryption failed\n"); - dev_kfree_skb_any(skb_new); - goto failed; - } - dev_kfree_skb_any(skb); - skb = skb_new; - bytes += crypt->ops->extra_msdu_prefix_len + - crypt->ops->extra_msdu_postfix_len; - skb_pull(skb, hdr_len); - } - - if (host_encrypt || ieee->host_open_frag) { - /* Determine fragmentation size based on destination (multicast - * and broadcast are not fragmented) */ - if (is_multicast_ether_addr(dest) || - is_broadcast_ether_addr(dest)) - frag_size = MAX_FRAG_THRESHOLD; - else - frag_size = ieee->fts; - - /* Determine amount of payload per fragment. Regardless of if - * this stack is providing the full 802.11 header, one will - * eventually be affixed to this fragment -- so we must account - * for it when determining the amount of payload space. */ - bytes_per_frag = frag_size - hdr_len; - if (ieee->config & - (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) - bytes_per_frag -= IEEE80211_FCS_LEN; - - /* Each fragment may need to have room for encryptiong - * pre/postfix */ - if (host_encrypt) - bytes_per_frag -= crypt->ops->extra_mpdu_prefix_len + - crypt->ops->extra_mpdu_postfix_len; - - /* Number of fragments is the total - * bytes_per_frag / payload_per_fragment */ - nr_frags = bytes / bytes_per_frag; - bytes_last_frag = bytes % bytes_per_frag; - if (bytes_last_frag) - nr_frags++; - else - bytes_last_frag = bytes_per_frag; - } else { - nr_frags = 1; - bytes_per_frag = bytes_last_frag = bytes; - frag_size = bytes + hdr_len; - } - - rts_required = (frag_size > ieee->rts - && ieee->config & CFG_IEEE80211_RTS); - if (rts_required) - nr_frags++; - - /* When we allocate the TXB we allocate enough space for the reserve - * and full fragment bytes (bytes_per_frag doesn't include prefix, - * postfix, header, FCS, etc.) */ - txb = ieee80211_alloc_txb(nr_frags, frag_size, - ieee->tx_headroom, GFP_ATOMIC); - if (unlikely(!txb)) { - printk(KERN_WARNING "%s: Could not allocate TXB\n", - ieee->dev->name); - goto failed; - } - txb->encrypted = encrypt; - if (host_encrypt) - txb->payload_size = frag_size * (nr_frags - 1) + - bytes_last_frag; - else - txb->payload_size = bytes; - - if (rts_required) { - skb_frag = txb->fragments[0]; - frag_hdr = - (struct ieee80211_hdr_3addrqos *)skb_put(skb_frag, hdr_len); - - /* - * Set header frame_ctl to the RTS. - */ - header.frame_ctl = - cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS); - memcpy(frag_hdr, &header, hdr_len); - - /* - * Restore header frame_ctl to the original data setting. - */ - header.frame_ctl = cpu_to_le16(fc); - - if (ieee->config & - (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) - skb_put(skb_frag, 4); - - txb->rts_included = 1; - i = 1; - } else - i = 0; - - for (; i < nr_frags; i++) { - skb_frag = txb->fragments[i]; - - if (host_encrypt || host_build_iv) - skb_reserve(skb_frag, - crypt->ops->extra_mpdu_prefix_len); - - frag_hdr = - (struct ieee80211_hdr_3addrqos *)skb_put(skb_frag, hdr_len); - memcpy(frag_hdr, &header, hdr_len); - - /* If this is not the last fragment, then add the MOREFRAGS - * bit to the frame control */ - if (i != nr_frags - 1) { - frag_hdr->frame_ctl = - cpu_to_le16(fc | IEEE80211_FCTL_MOREFRAGS); - bytes = bytes_per_frag; - } else { - /* The last fragment takes the remaining length */ - bytes = bytes_last_frag; - } - - if (i == 0 && !snapped) { - ieee80211_copy_snap(skb_put - (skb_frag, SNAP_SIZE + sizeof(u16)), - ether_type); - bytes -= SNAP_SIZE + sizeof(u16); - } - - skb_copy_from_linear_data(skb, skb_put(skb_frag, bytes), bytes); - - /* Advance the SKB... */ - skb_pull(skb, bytes); - - /* Encryption routine will move the header forward in order - * to insert the IV between the header and the payload */ - if (host_encrypt) - ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len); - else if (host_build_iv) { - atomic_inc(&crypt->refcnt); - if (crypt->ops->build_iv) - crypt->ops->build_iv(skb_frag, hdr_len, - ieee->sec.keys[ieee->sec.active_key], - ieee->sec.key_sizes[ieee->sec.active_key], - crypt->priv); - atomic_dec(&crypt->refcnt); - } - - if (ieee->config & - (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) - skb_put(skb_frag, 4); - } - - success: - spin_unlock_irqrestore(&ieee->lock, flags); - - dev_kfree_skb_any(skb); - - if (txb) { - int ret = (*ieee->hard_start_xmit) (txb, dev, priority); - if (ret == 0) { - stats->tx_packets++; - stats->tx_bytes += txb->payload_size; - return 0; - } - - ieee80211_txb_free(txb); - } - - return 0; - - failed: - spin_unlock_irqrestore(&ieee->lock, flags); - netif_stop_queue(dev); - stats->tx_errors++; - return 1; -} - -EXPORT_SYMBOL(ieee80211_txb_free); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c deleted file mode 100644 index 29eb41695a8..00000000000 --- a/net/ieee80211/ieee80211_wx.c +++ /dev/null @@ -1,756 +0,0 @@ -/****************************************************************************** - - Copyright(c) 2004-2005 Intel Corporation. All rights reserved. - - Portions of this file are based on the WEP enablement code provided by the - Host AP project hostap-drivers v0.1.3 - Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - <j@w1.fi> - Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - The full GNU General Public License is included in this distribution in the - file called LICENSE. - - Contact Information: - James P. Ketrenos <ipw2100-admin@linux.intel.com> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -******************************************************************************/ - -#include <linux/kmod.h> -#include <linux/module.h> -#include <linux/jiffies.h> - -#include <net/lib80211.h> -#include <net/ieee80211.h> -#include <linux/wireless.h> - -static const char *ieee80211_modes[] = { - "?", "a", "b", "ab", "g", "ag", "bg", "abg" -}; - -#define MAX_CUSTOM_LEN 64 -static char *ieee80211_translate_scan(struct ieee80211_device *ieee, - char *start, char *stop, - struct ieee80211_network *network, - struct iw_request_info *info) -{ - char custom[MAX_CUSTOM_LEN]; - char *p; - struct iw_event iwe; - int i, j; - char *current_val; /* For rates */ - u8 rate; - - /* First entry *MUST* be the AP MAC address */ - iwe.cmd = SIOCGIWAP; - iwe.u.ap_addr.sa_family = ARPHRD_ETHER; - memcpy(iwe.u.ap_addr.sa_data, network->bssid, ETH_ALEN); - start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_ADDR_LEN); - - /* Remaining entries will be displayed in the order we provide them */ - - /* Add the ESSID */ - iwe.cmd = SIOCGIWESSID; - iwe.u.data.flags = 1; - iwe.u.data.length = min(network->ssid_len, (u8) 32); - start = iwe_stream_add_point(info, start, stop, - &iwe, network->ssid); - - /* Add the protocol name */ - iwe.cmd = SIOCGIWNAME; - snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s", - ieee80211_modes[network->mode]); - start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_CHAR_LEN); - - /* Add mode */ - iwe.cmd = SIOCGIWMODE; - if (network->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)) { - if (network->capability & WLAN_CAPABILITY_ESS) - iwe.u.mode = IW_MODE_MASTER; - else - iwe.u.mode = IW_MODE_ADHOC; - - start = iwe_stream_add_event(info, start, stop, - &iwe, IW_EV_UINT_LEN); - } - - /* Add channel and frequency */ - /* Note : userspace automatically computes channel using iwrange */ - iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel); - iwe.u.freq.e = 6; - iwe.u.freq.i = 0; - start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_FREQ_LEN); - - /* Add encryption capability */ - iwe.cmd = SIOCGIWENCODE; - if (network->capability & WLAN_CAPABILITY_PRIVACY) - iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY; - else - iwe.u.data.flags = IW_ENCODE_DISABLED; - iwe.u.data.length = 0; - start = iwe_stream_add_point(info, start, stop, - &iwe, network->ssid); - - /* Add basic and extended rates */ - /* Rate : stuffing multiple values in a single event require a bit - * more of magic - Jean II */ - current_val = start + iwe_stream_lcp_len(info); - iwe.cmd = SIOCGIWRATE; - /* Those two flags are ignored... */ - iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; - - for (i = 0, j = 0; i < network->rates_len;) { - if (j < network->rates_ex_len && - ((network->rates_ex[j] & 0x7F) < - (network->rates[i] & 0x7F))) - rate = network->rates_ex[j++] & 0x7F; - else - rate = network->rates[i++] & 0x7F; - /* Bit rate given in 500 kb/s units (+ 0x80) */ - iwe.u.bitrate.value = ((rate & 0x7f) * 500000); - /* Add new value to event */ - current_val = iwe_stream_add_value(info, start, current_val, - stop, &iwe, IW_EV_PARAM_LEN); - } - for (; j < network->rates_ex_len; j++) { - rate = network->rates_ex[j] & 0x7F; - /* Bit rate given in 500 kb/s units (+ 0x80) */ - iwe.u.bitrate.value = ((rate & 0x7f) * 500000); - /* Add new value to event */ - current_val = iwe_stream_add_value(info, start, current_val, - stop, &iwe, IW_EV_PARAM_LEN); - } - /* Check if we added any rate */ - if ((current_val - start) > iwe_stream_lcp_len(info)) - start = current_val; - - /* Add quality statistics */ - iwe.cmd = IWEVQUAL; - iwe.u.qual.updated = IW_QUAL_QUAL_UPDATED | IW_QUAL_LEVEL_UPDATED | - IW_QUAL_NOISE_UPDATED; - - if (!(network->stats.mask & IEEE80211_STATMASK_RSSI)) { - iwe.u.qual.updated |= IW_QUAL_QUAL_INVALID | - IW_QUAL_LEVEL_INVALID; - iwe.u.qual.qual = 0; - } else { - if (ieee->perfect_rssi == ieee->worst_rssi) - iwe.u.qual.qual = 100; - else - iwe.u.qual.qual = - (100 * - (ieee->perfect_rssi - ieee->worst_rssi) * - (ieee->perfect_rssi - ieee->worst_rssi) - - (ieee->perfect_rssi - network->stats.rssi) * - (15 * (ieee->perfect_rssi - ieee->worst_rssi) + - 62 * (ieee->perfect_rssi - - network->stats.rssi))) / - ((ieee->perfect_rssi - - ieee->worst_rssi) * (ieee->perfect_rssi - - ieee->worst_rssi)); - if (iwe.u.qual.qual > 100) - iwe.u.qual.qual = 100; - else if (iwe.u.qual.qual < 1) - iwe.u.qual.qual = 0; - } - - if (!(network->stats.mask & IEEE80211_STATMASK_NOISE)) { - iwe.u.qual.updated |= IW_QUAL_NOISE_INVALID; - iwe.u.qual.noise = 0; - } else { - iwe.u.qual.noise = network->stats.noise; - } - - if (!(network->stats.mask & IEEE80211_STATMASK_SIGNAL)) { - iwe.u.qual.updated |= IW_QUAL_LEVEL_INVALID; - iwe.u.qual.level = 0; - } else { - iwe.u.qual.level = network->stats.signal; - } - - start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_QUAL_LEN); - - iwe.cmd = IWEVCUSTOM; - p = custom; - - iwe.u.data.length = p - custom; - if (iwe.u.data.length) - start = iwe_stream_add_point(info, start, stop, &iwe, custom); - - memset(&iwe, 0, sizeof(iwe)); - if (network->wpa_ie_len) { - char buf[MAX_WPA_IE_LEN]; - memcpy(buf, network->wpa_ie, network->wpa_ie_len); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = network->wpa_ie_len; - start = iwe_stream_add_point(info, start, stop, &iwe, buf); - } - - memset(&iwe, 0, sizeof(iwe)); - if (network->rsn_ie_len) { - char buf[MAX_WPA_IE_LEN]; - memcpy(buf, network->rsn_ie, network->rsn_ie_len); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = network->rsn_ie_len; - start = iwe_stream_add_point(info, start, stop, &iwe, buf); - } - - /* Add EXTRA: Age to display seconds since last beacon/probe response - * for given network. */ - iwe.cmd = IWEVCUSTOM; - p = custom; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), - " Last beacon: %dms ago", - jiffies_to_msecs(jiffies - network->last_scanned)); - iwe.u.data.length = p - custom; - if (iwe.u.data.length) - start = iwe_stream_add_point(info, start, stop, &iwe, custom); - - /* Add spectrum management information */ - iwe.cmd = -1; - p = custom; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), " Channel flags: "); - - if (ieee80211_get_channel_flags(ieee, network->channel) & - IEEE80211_CH_INVALID) { - iwe.cmd = IWEVCUSTOM; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), "INVALID "); - } - - if (ieee80211_get_channel_flags(ieee, network->channel) & - IEEE80211_CH_RADAR_DETECT) { - iwe.cmd = IWEVCUSTOM; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), "DFS "); - } - - if (iwe.cmd == IWEVCUSTOM) { - iwe.u.data.length = p - custom; - start = iwe_stream_add_point(info, start, stop, &iwe, custom); - } - - return start; -} - -#define SCAN_ITEM_SIZE 128 - -int ieee80211_wx_get_scan(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *extra) -{ - struct ieee80211_network *network; - unsigned long flags; - int err = 0; - - char *ev = extra; - char *stop = ev + wrqu->data.length; - int i = 0; - DECLARE_SSID_BUF(ssid); - - IEEE80211_DEBUG_WX("Getting scan\n"); - - spin_lock_irqsave(&ieee->lock, flags); - - list_for_each_entry(network, &ieee->network_list, list) { - i++; - if (stop - ev < SCAN_ITEM_SIZE) { - err = -E2BIG; - break; - } - - if (ieee->scan_age == 0 || - time_after(network->last_scanned + ieee->scan_age, jiffies)) - ev = ieee80211_translate_scan(ieee, ev, stop, network, - info); - else - IEEE80211_DEBUG_SCAN("Not showing network '%s (" - "%pM)' due to age (%dms).\n", - print_ssid(ssid, network->ssid, - network->ssid_len), - network->bssid, - jiffies_to_msecs(jiffies - - network-> - last_scanned)); - } - - spin_unlock_irqrestore(&ieee->lock, flags); - - wrqu->data.length = ev - extra; - wrqu->data.flags = 0; - - IEEE80211_DEBUG_WX("exit: %d networks returned.\n", i); - - return err; -} - -int ieee80211_wx_set_encode(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *keybuf) -{ - struct iw_point *erq = &(wrqu->encoding); - struct net_device *dev = ieee->dev; - struct ieee80211_security sec = { - .flags = 0 - }; - int i, key, key_provided, len; - struct ieee80211_crypt_data **crypt; - int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv; - DECLARE_SSID_BUF(ssid); - - IEEE80211_DEBUG_WX("SET_ENCODE\n"); - - key = erq->flags & IW_ENCODE_INDEX; - if (key) { - if (key > WEP_KEYS) - return -EINVAL; - key--; - key_provided = 1; - } else { - key_provided = 0; - key = ieee->tx_keyidx; - } - - IEEE80211_DEBUG_WX("Key: %d [%s]\n", key, key_provided ? - "provided" : "default"); - - crypt = &ieee->crypt[key]; - - if (erq->flags & IW_ENCODE_DISABLED) { - if (key_provided && *crypt) { - IEEE80211_DEBUG_WX("Disabling encryption on key %d.\n", - key); - ieee80211_crypt_delayed_deinit(ieee, crypt); - } else - IEEE80211_DEBUG_WX("Disabling encryption.\n"); - - /* Check all the keys to see if any are still configured, - * and if no key index was provided, de-init them all */ - for (i = 0; i < WEP_KEYS; i++) { - if (ieee->crypt[i] != NULL) { - if (key_provided) - break; - ieee80211_crypt_delayed_deinit(ieee, - &ieee->crypt[i]); - } - } - - if (i == WEP_KEYS) { - sec.enabled = 0; - sec.encrypt = 0; - sec.level = SEC_LEVEL_0; - sec.flags |= SEC_ENABLED | SEC_LEVEL | SEC_ENCRYPT; - } - - goto done; - } - - sec.enabled = 1; - sec.encrypt = 1; - sec.flags |= SEC_ENABLED | SEC_ENCRYPT; - - if (*crypt != NULL && (*crypt)->ops != NULL && - strcmp((*crypt)->ops->name, "WEP") != 0) { - /* changing to use WEP; deinit previously used algorithm - * on this key */ - ieee80211_crypt_delayed_deinit(ieee, crypt); - } - - if (*crypt == NULL && host_crypto) { - struct ieee80211_crypt_data *new_crypt; - - /* take WEP into use */ - new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), - GFP_KERNEL); - if (new_crypt == NULL) - return -ENOMEM; - new_crypt->ops = ieee80211_get_crypto_ops("WEP"); - if (!new_crypt->ops) { - request_module("ieee80211_crypt_wep"); - new_crypt->ops = ieee80211_get_crypto_ops("WEP"); - } - - if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) - new_crypt->priv = new_crypt->ops->init(key); - - if (!new_crypt->ops || !new_crypt->priv) { - kfree(new_crypt); - new_crypt = NULL; - - printk(KERN_WARNING "%s: could not initialize WEP: " - "load module ieee80211_crypt_wep\n", dev->name); - return -EOPNOTSUPP; - } - *crypt = new_crypt; - } - - /* If a new key was provided, set it up */ - if (erq->length > 0) { - len = erq->length <= 5 ? 5 : 13; - memcpy(sec.keys[key], keybuf, erq->length); - if (len > erq->length) - memset(sec.keys[key] + erq->length, 0, - len - erq->length); - IEEE80211_DEBUG_WX("Setting key %d to '%s' (%d:%d bytes)\n", - key, print_ssid(ssid, sec.keys[key], len), - erq->length, len); - sec.key_sizes[key] = len; - if (*crypt) - (*crypt)->ops->set_key(sec.keys[key], len, NULL, - (*crypt)->priv); - sec.flags |= (1 << key); - /* This ensures a key will be activated if no key is - * explicitly set */ - if (key == sec.active_key) - sec.flags |= SEC_ACTIVE_KEY; - - } else { - if (host_crypto) { - len = (*crypt)->ops->get_key(sec.keys[key], WEP_KEY_LEN, - NULL, (*crypt)->priv); - if (len == 0) { - /* Set a default key of all 0 */ - IEEE80211_DEBUG_WX("Setting key %d to all " - "zero.\n", key); - memset(sec.keys[key], 0, 13); - (*crypt)->ops->set_key(sec.keys[key], 13, NULL, - (*crypt)->priv); - sec.key_sizes[key] = 13; - sec.flags |= (1 << key); - } - } - /* No key data - just set the default TX key index */ - if (key_provided) { - IEEE80211_DEBUG_WX("Setting key %d to default Tx " - "key.\n", key); - ieee->tx_keyidx = key; - sec.active_key = key; - sec.flags |= SEC_ACTIVE_KEY; - } - } - if (erq->flags & (IW_ENCODE_OPEN | IW_ENCODE_RESTRICTED)) { - ieee->open_wep = !(erq->flags & IW_ENCODE_RESTRICTED); - sec.auth_mode = ieee->open_wep ? WLAN_AUTH_OPEN : - WLAN_AUTH_SHARED_KEY; - sec.flags |= SEC_AUTH_MODE; - IEEE80211_DEBUG_WX("Auth: %s\n", - sec.auth_mode == WLAN_AUTH_OPEN ? - "OPEN" : "SHARED KEY"); - } - - /* For now we just support WEP, so only set that security level... - * TODO: When WPA is added this is one place that needs to change */ - sec.flags |= SEC_LEVEL; - sec.level = SEC_LEVEL_1; /* 40 and 104 bit WEP */ - sec.encode_alg[key] = SEC_ALG_WEP; - - done: - if (ieee->set_security) - ieee->set_security(dev, &sec); - - /* Do not reset port if card is in Managed mode since resetting will - * generate new IEEE 802.11 authentication which may end up in looping - * with IEEE 802.1X. If your hardware requires a reset after WEP - * configuration (for example... Prism2), implement the reset_port in - * the callbacks structures used to initialize the 802.11 stack. */ - if (ieee->reset_on_keychange && - ieee->iw_mode != IW_MODE_INFRA && - ieee->reset_port && ieee->reset_port(dev)) { - printk(KERN_DEBUG "%s: reset_port failed\n", dev->name); - return -EINVAL; - } - return 0; -} - -int ieee80211_wx_get_encode(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *keybuf) -{ - struct iw_point *erq = &(wrqu->encoding); - int len, key; - struct ieee80211_crypt_data *crypt; - struct ieee80211_security *sec = &ieee->sec; - - IEEE80211_DEBUG_WX("GET_ENCODE\n"); - - key = erq->flags & IW_ENCODE_INDEX; - if (key) { - if (key > WEP_KEYS) - return -EINVAL; - key--; - } else - key = ieee->tx_keyidx; - - crypt = ieee->crypt[key]; - erq->flags = key + 1; - - if (!sec->enabled) { - erq->length = 0; - erq->flags |= IW_ENCODE_DISABLED; - return 0; - } - - len = sec->key_sizes[key]; - memcpy(keybuf, sec->keys[key], len); - - erq->length = len; - erq->flags |= IW_ENCODE_ENABLED; - - if (ieee->open_wep) - erq->flags |= IW_ENCODE_OPEN; - else - erq->flags |= IW_ENCODE_RESTRICTED; - - return 0; -} - -int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *extra) -{ - struct net_device *dev = ieee->dev; - struct iw_point *encoding = &wrqu->encoding; - struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; - int i, idx, ret = 0; - int group_key = 0; - const char *alg, *module; - struct ieee80211_crypto_ops *ops; - struct ieee80211_crypt_data **crypt; - - struct ieee80211_security sec = { - .flags = 0, - }; - - idx = encoding->flags & IW_ENCODE_INDEX; - if (idx) { - if (idx < 1 || idx > WEP_KEYS) - return -EINVAL; - idx--; - } else - idx = ieee->tx_keyidx; - - if (ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) { - crypt = &ieee->crypt[idx]; - group_key = 1; - } else { - /* some Cisco APs use idx>0 for unicast in dynamic WEP */ - if (idx != 0 && ext->alg != IW_ENCODE_ALG_WEP) - return -EINVAL; - if (ieee->iw_mode == IW_MODE_INFRA) - crypt = &ieee->crypt[idx]; - else - return -EINVAL; - } - - sec.flags |= SEC_ENABLED | SEC_ENCRYPT; - if ((encoding->flags & IW_ENCODE_DISABLED) || - ext->alg == IW_ENCODE_ALG_NONE) { - if (*crypt) - ieee80211_crypt_delayed_deinit(ieee, crypt); - - for (i = 0; i < WEP_KEYS; i++) - if (ieee->crypt[i] != NULL) - break; - - if (i == WEP_KEYS) { - sec.enabled = 0; - sec.encrypt = 0; - sec.level = SEC_LEVEL_0; - sec.flags |= SEC_LEVEL; - } - goto done; - } - - sec.enabled = 1; - sec.encrypt = 1; - - if (group_key ? !ieee->host_mc_decrypt : - !(ieee->host_encrypt || ieee->host_decrypt || - ieee->host_encrypt_msdu)) - goto skip_host_crypt; - - switch (ext->alg) { - case IW_ENCODE_ALG_WEP: - alg = "WEP"; - module = "ieee80211_crypt_wep"; - break; - case IW_ENCODE_ALG_TKIP: - alg = "TKIP"; - module = "ieee80211_crypt_tkip"; - break; - case IW_ENCODE_ALG_CCMP: - alg = "CCMP"; - module = "ieee80211_crypt_ccmp"; - break; - default: - IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n", - dev->name, ext->alg); - ret = -EINVAL; - goto done; - } - - ops = ieee80211_get_crypto_ops(alg); - if (ops == NULL) { - request_module(module); - ops = ieee80211_get_crypto_ops(alg); - } - if (ops == NULL) { - IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n", - dev->name, ext->alg); - ret = -EINVAL; - goto done; - } - - if (*crypt == NULL || (*crypt)->ops != ops) { - struct ieee80211_crypt_data *new_crypt; - - ieee80211_crypt_delayed_deinit(ieee, crypt); - - new_crypt = kzalloc(sizeof(*new_crypt), GFP_KERNEL); - if (new_crypt == NULL) { - ret = -ENOMEM; - goto done; - } - new_crypt->ops = ops; - if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) - new_crypt->priv = new_crypt->ops->init(idx); - if (new_crypt->priv == NULL) { - kfree(new_crypt); - ret = -EINVAL; - goto done; - } - *crypt = new_crypt; - } - - if (ext->key_len > 0 && (*crypt)->ops->set_key && - (*crypt)->ops->set_key(ext->key, ext->key_len, ext->rx_seq, - (*crypt)->priv) < 0) { - IEEE80211_DEBUG_WX("%s: key setting failed\n", dev->name); - ret = -EINVAL; - goto done; - } - - skip_host_crypt: - if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) { - ieee->tx_keyidx = idx; - sec.active_key = idx; - sec.flags |= SEC_ACTIVE_KEY; - } - - if (ext->alg != IW_ENCODE_ALG_NONE) { - memcpy(sec.keys[idx], ext->key, ext->key_len); - sec.key_sizes[idx] = ext->key_len; - sec.flags |= (1 << idx); - if (ext->alg == IW_ENCODE_ALG_WEP) { - sec.encode_alg[idx] = SEC_ALG_WEP; - sec.flags |= SEC_LEVEL; - sec.level = SEC_LEVEL_1; - } else if (ext->alg == IW_ENCODE_ALG_TKIP) { - sec.encode_alg[idx] = SEC_ALG_TKIP; - sec.flags |= SEC_LEVEL; - sec.level = SEC_LEVEL_2; - } else if (ext->alg == IW_ENCODE_ALG_CCMP) { - sec.encode_alg[idx] = SEC_ALG_CCMP; - sec.flags |= SEC_LEVEL; - sec.level = SEC_LEVEL_3; - } - /* Don't set sec level for group keys. */ - if (group_key) - sec.flags &= ~SEC_LEVEL; - } - done: - if (ieee->set_security) - ieee->set_security(ieee->dev, &sec); - - /* - * Do not reset port if card is in Managed mode since resetting will - * generate new IEEE 802.11 authentication which may end up in looping - * with IEEE 802.1X. If your hardware requires a reset after WEP - * configuration (for example... Prism2), implement the reset_port in - * the callbacks structures used to initialize the 802.11 stack. - */ - if (ieee->reset_on_keychange && - ieee->iw_mode != IW_MODE_INFRA && - ieee->reset_port && ieee->reset_port(dev)) { - IEEE80211_DEBUG_WX("%s: reset_port failed\n", dev->name); - return -EINVAL; - } - - return ret; -} - -int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *extra) -{ - struct iw_point *encoding = &wrqu->encoding; - struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; - struct ieee80211_security *sec = &ieee->sec; - int idx, max_key_len; - - max_key_len = encoding->length - sizeof(*ext); - if (max_key_len < 0) - return -EINVAL; - - idx = encoding->flags & IW_ENCODE_INDEX; - if (idx) { - if (idx < 1 || idx > WEP_KEYS) - return -EINVAL; - idx--; - } else - idx = ieee->tx_keyidx; - - if (!(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) && - ext->alg != IW_ENCODE_ALG_WEP) - if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA) - return -EINVAL; - - encoding->flags = idx + 1; - memset(ext, 0, sizeof(*ext)); - - if (!sec->enabled) { - ext->alg = IW_ENCODE_ALG_NONE; - ext->key_len = 0; - encoding->flags |= IW_ENCODE_DISABLED; - } else { - if (sec->encode_alg[idx] == SEC_ALG_WEP) - ext->alg = IW_ENCODE_ALG_WEP; - else if (sec->encode_alg[idx] == SEC_ALG_TKIP) - ext->alg = IW_ENCODE_ALG_TKIP; - else if (sec->encode_alg[idx] == SEC_ALG_CCMP) - ext->alg = IW_ENCODE_ALG_CCMP; - else - return -EINVAL; - - ext->key_len = sec->key_sizes[idx]; - memcpy(ext->key, sec->keys[idx], ext->key_len); - encoding->flags |= IW_ENCODE_ENABLED; - if (ext->key_len && - (ext->alg == IW_ENCODE_ALG_TKIP || - ext->alg == IW_ENCODE_ALG_CCMP)) - ext->ext_flags |= IW_ENCODE_EXT_TX_SEQ_VALID; - - } - - return 0; -} - -EXPORT_SYMBOL(ieee80211_wx_set_encodeext); -EXPORT_SYMBOL(ieee80211_wx_get_encodeext); - -EXPORT_SYMBOL(ieee80211_wx_get_scan); -EXPORT_SYMBOL(ieee80211_wx_set_encode); -EXPORT_SYMBOL(ieee80211_wx_get_encode); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e3286814c8d..fe03048c130 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -245,7 +245,7 @@ static inline int inet_netns_ok(struct net *net, int protocol) int hash; struct net_protocol *ipprot; - if (net == &init_net) + if (net_eq(net, &init_net)) return 1; hash = protocol & (MAX_INET_PROTOS - 1); @@ -272,10 +272,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) int try_loading_module = 0; int err; - if (sock->type != SOCK_RAW && - sock->type != SOCK_DGRAM && - !inet_ehash_secret) - build_ehash_secret(); + if (unlikely(!inet_ehash_secret)) + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) + build_ehash_secret(); sock->state = SS_UNCONNECTED; @@ -1114,6 +1113,7 @@ int inet_sk_rebuild_header(struct sock *sk) }, }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 3f205181712..e878e494296 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -201,6 +201,7 @@ out: static void ah4_err(struct sk_buff *skb, u32 info) { + struct net *net = dev_net(skb->dev); struct iphdr *iph = (struct iphdr *)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; @@ -209,7 +210,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", @@ -293,9 +294,7 @@ static void ah_destroy(struct xfrm_state *x) return; kfree(ahp->work_icv); - ahp->work_icv = NULL; crypto_free_hash(ahp->tfm); - ahp->tfm = NULL; kfree(ahp); } @@ -316,6 +315,7 @@ static struct net_protocol ah4_protocol = { .handler = xfrm4_rcv, .err_handler = ah4_err, .no_policy = 1, + .netns_ok = 1, }; static int __init ah4_init(void) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 957c87dc8e1..29a74c01d8d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -818,18 +818,18 @@ static int arp_process(struct sk_buff *skb) addr_type = rt->rt_type; if (addr_type == RTN_LOCAL) { - n = neigh_event_ns(&arp_tbl, sha, &sip, dev); - if (n) { - int dont_send = 0; - - if (!dont_send) - dont_send |= arp_ignore(in_dev, sip, tip); - if (!dont_send && IN_DEV_ARPFILTER(in_dev)) - dont_send |= arp_filter(sip, tip, dev); - if (!dont_send) - arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); + int dont_send = 0; - neigh_release(n); + if (!dont_send) + dont_send |= arp_ignore(in_dev,sip,tip); + if (!dont_send && IN_DEV_ARPFILTER(in_dev)) + dont_send |= arp_filter(sip,tip,dev); + if (!dont_send) { + n = neigh_event_ns(&arp_tbl, sha, &sip, dev); + if (n) { + arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); + neigh_release(n); + } } goto out; } else if (IN_DEV_FORWARD(in_dev)) { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 95a9c65003f..18bb383ea39 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -413,6 +413,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) static void esp4_err(struct sk_buff *skb, u32 info) { + struct net *net = dev_net(skb->dev); struct iphdr *iph = (struct iphdr *)skb->data; struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; @@ -421,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", @@ -618,6 +619,7 @@ static struct net_protocol esp4_protocol = { .handler = xfrm4_rcv, .err_handler = esp4_err, .no_policy = 1, + .netns_ok = 1, }; static int __init esp4_init(void) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 21e497efbd7..705b33b184a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -321,12 +321,12 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, } static void icmp_push_reply(struct icmp_bxm *icmp_param, - struct ipcm_cookie *ipc, struct rtable *rt) + struct ipcm_cookie *ipc, struct rtable **rt) { struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(dev_net(rt->u.dst.dev)); + sk = icmp_sk(dev_net((*rt)->u.dst.dev)); if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) } if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, icmp_param->data.icmph.code)) - icmp_push_reply(icmp_param, &ipc, rt); + icmp_push_reply(icmp_param, &ipc, &rt); ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); @@ -562,7 +562,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* No need to clone since we're just using its address. */ rt2 = rt; - err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); + err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); switch (err) { case 0: if (rt != rt2) @@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (err) goto relookup_failed; - err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, + err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP); switch (err) { case 0: @@ -635,7 +635,7 @@ route_done: icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &ipc, rt); + icmp_push_reply(&icmp_param, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 36f4cbc7da3..1ccdbba528b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -109,7 +109,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->ib_net == net && tb->port == rover) + if (ib_net(tb) == net && tb->port == rover) goto next; break; next: @@ -137,7 +137,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->ib_net == net && tb->port == snum) + if (ib_net(tb) == net && tb->port == snum) goto tb_found; } tb = NULL; @@ -561,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - atomic_dec(sk->sk_prot->orphan_count); + percpu_counter_dec(sk->sk_prot->orphan_count); sock_put(sk); } @@ -641,7 +641,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_orphan(child); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); inet_csk_destroy_sock(child); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 564230dabcb..588a7796e3e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -718,13 +718,15 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) goto skip_listen_ht; - inet_listen_lock(hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; + struct inet_listen_hashbucket *ilb; num = 0; - sk_for_each(sk, node, &hashinfo->listening_hash[i]) { + ilb = &hashinfo->listening_hash[i]; + spin_lock_bh(&ilb->lock); + sk_nulls_for_each(sk, node, &ilb->head) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { @@ -742,7 +744,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (inet_csk_diag_dump(sk, skb, cb) < 0) { - inet_listen_unlock(hashinfo); + spin_unlock_bh(&ilb->lock); goto done; } @@ -751,7 +753,7 @@ syn_recv: goto next_listen; if (inet_diag_dump_reqs(skb, sk, cb) < 0) { - inet_listen_unlock(hashinfo); + spin_unlock_bh(&ilb->lock); goto done; } @@ -760,12 +762,12 @@ next_listen: cb->args[4] = 0; ++num; } + spin_unlock_bh(&ilb->lock); s_num = 0; cb->args[3] = 0; cb->args[4] = 0; } - inet_listen_unlock(hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; @@ -776,20 +778,21 @@ skip_listen_ht: for (i = s_i; i < hashinfo->ehash_size; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; - rwlock_t *lock = inet_ehash_lockp(hashinfo, i); + spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; num = 0; - if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) + if (hlist_nulls_empty(&head->chain) && + hlist_nulls_empty(&head->twchain)) continue; if (i > s_i) s_num = 0; - read_lock_bh(lock); - sk_for_each(sk, node, &head->chain) { + spin_lock_bh(lock); + sk_nulls_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) @@ -803,7 +806,7 @@ skip_listen_ht: r->id.idiag_dport) goto next_normal; if (inet_csk_diag_dump(sk, skb, cb) < 0) { - read_unlock_bh(lock); + spin_unlock_bh(lock); goto done; } next_normal: @@ -825,14 +828,14 @@ next_normal: r->id.idiag_dport) goto next_dying; if (inet_twsk_diag_dump(tw, skb, cb) < 0) { - read_unlock_bh(lock); + spin_unlock_bh(lock); goto done; } next_dying: ++num; } } - read_unlock_bh(lock); + spin_unlock_bh(lock); } done: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 44981906fb9..6a1045da48d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { - tb->ib_net = hold_net(net); + write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; INIT_HLIST_HEAD(&tb->owners); @@ -51,7 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - release_net(tb->ib_net); + release_net(ib_net(tb)); kmem_cache_free(cachep, tb); } } @@ -110,33 +110,29 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) EXPORT_SYMBOL_GPL(__inet_inherit_port); -/* - * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. - * Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines (wake up each - * exclusive lock release). It should be ifdefed really. - */ -void inet_listen_wlock(struct inet_hashinfo *hashinfo) - __acquires(hashinfo->lhash_lock) +static inline int compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, const __be32 daddr, + const int dif) { - write_lock(&hashinfo->lhash_lock); - - if (atomic_read(&hashinfo->lhash_users)) { - DEFINE_WAIT(wait); + int score = -1; + struct inet_sock *inet = inet_sk(sk); - for (;;) { - prepare_to_wait_exclusive(&hashinfo->lhash_wait, - &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&hashinfo->lhash_users)) - break; - write_unlock_bh(&hashinfo->lhash_lock); - schedule(); - write_lock_bh(&hashinfo->lhash_lock); + if (net_eq(sock_net(sk), net) && inet->num == hnum && + !ipv6_only_sock(sk)) { + __be32 rcv_saddr = inet->rcv_saddr; + score = sk->sk_family == PF_INET ? 1 : 0; + if (rcv_saddr) { + if (rcv_saddr != daddr) + return -1; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 2; } - - finish_wait(&hashinfo->lhash_wait, &wait); } + return score; } /* @@ -145,72 +141,48 @@ void inet_listen_wlock(struct inet_hashinfo *hashinfo) * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *inet_lookup_listener_slow(struct net *net, - const struct hlist_head *head, - const __be32 daddr, - const unsigned short hnum, - const int dif) -{ - struct sock *result = NULL, *sk; - const struct hlist_node *node; - int hiscore = -1; - - sk_for_each(sk, node, head) { - const struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && inet->num == hnum && - !ipv6_only_sock(sk)) { - const __be32 rcv_saddr = inet->rcv_saddr; - int score = sk->sk_family == PF_INET ? 1 : 0; - - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; - score += 2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score += 2; - } - if (score == 5) - return sk; - if (score > hiscore) { - hiscore = score; - result = sk; - } - } - } - return result; -} -/* Optimize the common listener case. */ + struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, const int dif) { - struct sock *sk = NULL; - const struct hlist_head *head; - - read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - if (!hlist_empty(head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); - - if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && - (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) - goto sherry_cache; - sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); + struct sock *sk, *result; + struct hlist_nulls_node *node; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + int score, hiscore; + + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each_rcu(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + result = sk; + hiscore = score; + } } - if (sk) { -sherry_cache: - sock_hold(sk); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; + } } - read_unlock(&hashinfo->lhash_lock); - return sk; + rcu_read_unlock(); + return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -223,35 +195,65 @@ struct sock * __inet_lookup_established(struct net *net, INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { if (INET_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (unlikely(!INET_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begin; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begin; +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { + sk_nulls_for_each_rcu(sk, node, &head->twchain) { if (INET_TW_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begintw; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begintw; sk = NULL; out: - read_unlock(lock); + rcu_read_unlock(); return sk; -hit: - sock_hold(sk); - goto out; } EXPORT_SYMBOL_GPL(__inet_lookup_established); @@ -270,16 +272,15 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, net, hash, acookie, @@ -293,7 +294,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET_MATCH(sk2, net, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; @@ -306,9 +307,9 @@ unique: inet->sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp) { *twp = tw; @@ -324,7 +325,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } @@ -338,8 +339,8 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) void __inet_hash_nolisten(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; - rwlock_t *lock; + struct hlist_nulls_head *list; + spinlock_t *lock; struct inet_ehash_bucket *head; WARN_ON(!sk_unhashed(sk)); @@ -349,18 +350,17 @@ void __inet_hash_nolisten(struct sock *sk) list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock(lock); - __sk_add_node(sk, list); + spin_lock(lock); + __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); static void __inet_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; - rwlock_t *lock; + struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) { __inet_hash_nolisten(sk); @@ -368,14 +368,12 @@ static void __inet_hash(struct sock *sk) } WARN_ON(!sk_unhashed(sk)); - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - inet_listen_wlock(hashinfo); - __sk_add_node(sk, list); + spin_lock(&ilb->lock); + __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); - wake_up(&hashinfo->lhash_wait); + spin_unlock(&ilb->lock); } void inet_hash(struct sock *sk) @@ -390,27 +388,23 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { - rwlock_t *lock; struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + spinlock_t *lock; + int done; if (sk_unhashed(sk)) - goto out; + return; - if (sk->sk_state == TCP_LISTEN) { - local_bh_disable(); - inet_listen_wlock(hashinfo); - lock = &hashinfo->lhash_lock; - } else { + if (sk->sk_state == TCP_LISTEN) + lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; + else lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock_bh(lock); - } - if (__sk_del_node_init(sk)) + spin_lock_bh(lock); + done =__sk_nulls_del_node_init_rcu(sk); + if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(lock); -out: - if (sk->sk_state == TCP_LISTEN) - wake_up(&hashinfo->lhash_wait); + spin_unlock_bh(lock); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -449,7 +443,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->ib_net == net && tb->port == port) { + if (ib_net(tb) == net && tb->port == port) { WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; @@ -524,3 +518,16 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, } EXPORT_SYMBOL_GPL(inet_hash_connect); + +void inet_hashinfo_init(struct inet_hashinfo *h) +{ + int i; + + for (i = 0; i < INET_LHTABLE_SIZE; i++) { + spin_lock_init(&h->listening_hash[i].lock); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, + i + LISTENING_NULLS_BASE); + } +} + +EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index cfd034a2b96..6a667dae315 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -120,7 +120,7 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); tcph->check = 0; - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, lro_desc->ip_tot_len - @@ -135,7 +135,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) __wsum tcp_ps_hdr_csum; tcp_csum = ~csum_unfold(tcph->check); - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, len + TCP_HDR_LEN(tcph), diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1c5fd38f882..8554d0ea171 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - write_lock(lock); - if (hlist_unhashed(&tw->tw_node)) { - write_unlock(lock); + spin_lock(lock); + if (hlist_nulls_unhashed(&tw->tw_node)) { + spin_unlock(lock); return; } - __hlist_del(&tw->tw_node); - sk_node_init(&tw->tw_node); - write_unlock(lock); + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); + spin_unlock(lock); /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, @@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -90,17 +90,21 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); - write_lock(lock); + spin_lock(lock); - /* Step 2: Remove SK from established hash. */ - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - - /* Step 3: Hash TW into TIMEWAIT chain. */ - inet_twsk_add_node(tw, &ehead->twchain); + /* + * Step 2: Hash TW into TIMEWAIT chain. + * Should be done before removing sk from established chain + * because readers are lockless and search established first. + */ atomic_inc(&tw->tw_refcnt); + inet_twsk_add_node_rcu(tw, &ehead->twchain); - write_unlock(lock); + /* Step 3: Remove SK from established hash. */ + if (__sk_nulls_del_node_init_rcu(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + + spin_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); @@ -416,17 +420,17 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, { struct inet_timewait_sock *tw; struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; int h; local_bh_disable(); for (h = 0; h < (hashinfo->ehash_size); h++) { struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, h); - rwlock_t *lock = inet_ehash_lockp(hashinfo, h); + spinlock_t *lock = inet_ehash_lockp(hashinfo, h); restart: - write_lock(lock); - sk_for_each(sk, node, &head->twchain) { + spin_lock(lock); + sk_nulls_for_each(sk, node, &head->twchain) { tw = inet_twsk(sk); if (!net_eq(twsk_net(tw), net) || @@ -434,13 +438,13 @@ restart: continue; atomic_inc(&tw->tw_refcnt); - write_unlock(lock); + spin_unlock(lock); inet_twsk_deschedule(tw, twdr); inet_twsk_put(tw); goto restart; } - write_unlock(lock); + spin_unlock(lock); } local_bh_enable(); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 191ef758813..0101521f366 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -126,8 +126,6 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev); /* Fallback tunnel: no source, no destination, no key, no options */ -static int ipgre_fb_tunnel_init(struct net_device *dev); - #define HASH_SIZE 16 static int ipgre_net_id; @@ -1142,6 +1140,7 @@ static int ipgre_open(struct net_device *dev) static int ipgre_close(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); + if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; in_dev = inetdev_by_index(dev_net(dev), t->mlink); @@ -1155,14 +1154,22 @@ static int ipgre_close(struct net_device *dev) #endif +static const struct net_device_ops ipgre_netdev_ops = { + .ndo_init = ipgre_tunnel_init, + .ndo_uninit = ipgre_tunnel_uninit, +#ifdef CONFIG_NET_IPGRE_BROADCAST + .ndo_open = ipgre_open, + .ndo_stop = ipgre_close, +#endif + .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_do_ioctl = ipgre_tunnel_ioctl, + .ndo_change_mtu = ipgre_tunnel_change_mtu, +}; + static void ipgre_tunnel_setup(struct net_device *dev) { - dev->init = ipgre_tunnel_init; - dev->uninit = ipgre_tunnel_uninit; + dev->netdev_ops = &ipgre_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->do_ioctl = ipgre_tunnel_ioctl; - dev->change_mtu = ipgre_tunnel_change_mtu; dev->type = ARPHRD_IPGRE; dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; @@ -1194,8 +1201,6 @@ static int ipgre_tunnel_init(struct net_device *dev) return -EINVAL; dev->flags = IFF_BROADCAST; dev->header_ops = &ipgre_header_ops; - dev->open = ipgre_open; - dev->stop = ipgre_close; } #endif } else @@ -1204,7 +1209,7 @@ static int ipgre_tunnel_init(struct net_device *dev) return 0; } -static int ipgre_fb_tunnel_init(struct net_device *dev) +static void ipgre_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -1220,7 +1225,6 @@ static int ipgre_fb_tunnel_init(struct net_device *dev) dev_hold(dev); ign->tunnels_wc[0] = tunnel; - return 0; } @@ -1264,9 +1268,9 @@ static int ipgre_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } - - ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; dev_net_set(ign->fb_tunnel_dev, net); + + ipgre_fb_tunnel_init(ign->fb_tunnel_dev); ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; if ((err = register_netdev(ign->fb_tunnel_dev))) @@ -1397,16 +1401,22 @@ static int ipgre_tap_init(struct net_device *dev) return 0; } +static const struct net_device_ops ipgre_tap_netdev_ops = { + .ndo_init = ipgre_tap_init, + .ndo_uninit = ipgre_tunnel_uninit, + .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ipgre_tunnel_change_mtu, +}; + static void ipgre_tap_setup(struct net_device *dev) { ether_setup(dev); - dev->init = ipgre_tap_init; - dev->uninit = ipgre_tunnel_uninit; + dev->netdev_ops = &ipgre_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->change_mtu = ipgre_tunnel_change_mtu; dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 70bedab03b0..1a58a6fa1dc 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -209,9 +209,17 @@ static int ip_local_deliver_finish(struct sk_buff *skb) hash = protocol & (MAX_INET_PROTOS - 1); ipprot = rcu_dereference(inet_protos[hash]); - if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) { + if (ipprot != NULL) { int ret; + if (!net_eq(net, &init_net) && !ipprot->netns_ok) { + if (net_ratelimit()) + printk("%s: proto %d isn't netns-ready\n", + __func__, protocol); + kfree_skb(skb); + goto out; + } + if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 46d7be233ea..8ebe86dd72a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -778,7 +778,7 @@ int ip_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm_cookie *ipc, struct rtable *rt, + struct ipcm_cookie *ipc, struct rtable **rtp, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); @@ -793,6 +793,7 @@ int ip_append_data(struct sock *sk, int offset = 0; unsigned int maxfraglen, fragheaderlen; int csummode = CHECKSUM_NONE; + struct rtable *rt; if (flags&MSG_PROBE) return 0; @@ -812,7 +813,11 @@ int ip_append_data(struct sock *sk, inet->cork.flags |= IPCORK_OPT; inet->cork.addr = ipc->addr; } - dst_hold(&rt->u.dst); + rt = *rtp; + /* + * We steal reference to this route, caller should not release it + */ + *rtp = NULL; inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); @@ -1279,7 +1284,12 @@ int ip_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb->dst = dst_clone(&rt->u.dst); + /* + * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec + * on dst refcount + */ + inet->cork.dst = NULL; + skb->dst = &rt->u.dst; if (iph->protocol == IPPROTO_ICMP) icmp_out_count(net, ((struct icmphdr *) @@ -1391,7 +1401,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, - &ipc, rt, MSG_DONTWAIT); + &ipc, &rt, MSG_DONTWAIT); if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { if (arg->csumoffset >= 0) *((__sum16 *)skb_transport_header(skb) + diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e976efeb145..43c05854d75 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -48,6 +48,7 @@ #define IP_CMSG_RECVOPTS 8 #define IP_CMSG_RETOPTS 16 #define IP_CMSG_PASSSEC 32 +#define IP_CMSG_ORIGDSTADDR 64 /* * SOL_IP control messages. @@ -126,6 +127,27 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) security_release_secctx(secdata, seclen); } +static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) +{ + struct sockaddr_in sin; + struct iphdr *iph = ip_hdr(skb); + __be16 *ports = (__be16 *)skb_transport_header(skb); + + if (skb_transport_offset(skb) + 4 > skb->len) + return; + + /* All current transport protocols have the port numbers in the + * first four bytes of the transport header and this function is + * written with this assumption in mind. + */ + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = iph->daddr; + sin.sin_port = ports[1]; + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); + + put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); +} void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { @@ -160,6 +182,12 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) if (flags & 1) ip_cmsg_recv_security(msg, skb); + + if ((flags>>=1) == 0) + return; + if (flags & 1) + ip_cmsg_recv_dstaddr(msg, skb); + } int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) @@ -421,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || optname == IP_MULTICAST_TTL || - optname == IP_MULTICAST_LOOP) { + optname == IP_MULTICAST_LOOP || + optname == IP_RECVORIGDSTADDR) { if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -509,6 +538,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, else inet->cmsg_flags &= ~IP_CMSG_PASSSEC; break; + case IP_RECVORIGDSTADDR: + if (val) + inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; + else + inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~3; @@ -1022,6 +1057,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_PASSSEC: val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; break; + case IP_RECVORIGDSTADDR: + val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; + break; case IP_TOS: val = inet->tos; break; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index ec8264ae45c..3262ce06294 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -35,7 +35,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; spi = htonl(ntohs(ipch->cpi)); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return; @@ -49,7 +49,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t; - t = xfrm_state_alloc(); + t = xfrm_state_alloc(&init_net); if (t == NULL) goto out; @@ -85,7 +85,7 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x) int err = 0; struct xfrm_state *t; - t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4, + t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4, x->props.saddr.a4, IPPROTO_IPIP, AF_INET); if (!t) { t = ipcomp_tunnel_create(x); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index b3c3d7b0d11..5079dfbc6f3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -130,8 +130,8 @@ struct ipip_net { struct net_device *fb_tunnel_dev; }; -static int ipip_fb_tunnel_init(struct net_device *dev); -static int ipip_tunnel_init(struct net_device *dev); +static void ipip_fb_tunnel_init(struct net_device *dev); +static void ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); static DEFINE_RWLOCK(ipip_lock); @@ -245,9 +245,10 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, } nt = netdev_priv(dev); - dev->init = ipip_tunnel_init; nt->parms = *parms; + ipip_tunnel_init(dev); + if (register_netdevice(dev) < 0) goto failed_free; @@ -691,12 +692,17 @@ static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static const struct net_device_ops ipip_netdev_ops = { + .ndo_uninit = ipip_tunnel_uninit, + .ndo_start_xmit = ipip_tunnel_xmit, + .ndo_do_ioctl = ipip_tunnel_ioctl, + .ndo_change_mtu = ipip_tunnel_change_mtu, + +}; + static void ipip_tunnel_setup(struct net_device *dev) { - dev->uninit = ipip_tunnel_uninit; - dev->hard_start_xmit = ipip_tunnel_xmit; - dev->do_ioctl = ipip_tunnel_ioctl; - dev->change_mtu = ipip_tunnel_change_mtu; + dev->netdev_ops = &ipip_netdev_ops; dev->destructor = free_netdev; dev->type = ARPHRD_TUNNEL; @@ -708,11 +714,9 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static int ipip_tunnel_init(struct net_device *dev) +static void ipip_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); @@ -721,11 +725,9 @@ static int ipip_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip_tunnel_bind_dev(dev); - - return 0; } -static int ipip_fb_tunnel_init(struct net_device *dev) +static void ipip_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -740,7 +742,6 @@ static int ipip_fb_tunnel_init(struct net_device *dev) dev_hold(dev); ipn->tunnels_wc[0] = tunnel; - return 0; } static struct xfrm_tunnel ipip_handler = { @@ -792,10 +793,10 @@ static int ipip_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } - - ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init; dev_net_set(ipn->fb_tunnel_dev, net); + ipip_fb_tunnel_init(ipn->fb_tunnel_dev); + if ((err = register_netdev(ipn->fb_tunnel_dev))) goto err_reg_dev; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 05ed336f798..77fc4d3fdf6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -124,8 +124,8 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { + const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; memset(&p, 0, sizeof(p)); @@ -137,9 +137,13 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) sprintf(p.name, "dvmrp%d", v->vifc_vifi); ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); + set_fs(oldfs); + } } } @@ -151,9 +155,9 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { + const struct net_device_ops *ops = dev->netdev_ops; int err; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; struct in_device *in_dev; @@ -166,9 +170,14 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) sprintf(p.name, "dvmrp%d", v->vifc_vifi); ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + } else + err = -EOPNOTSUPP; dev = NULL; @@ -213,12 +222,16 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } +static const struct net_device_ops reg_vif_netdev_ops = { + .ndo_start_xmit = reg_vif_xmit, +}; + static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; - dev->hard_start_xmit = reg_vif_xmit; + dev->netdev_ops = ®_vif_netdev_ops, dev->destructor = free_netdev; } @@ -1945,13 +1958,14 @@ int __init ip_mr_init(void) goto proc_cache_fail; #endif return 0; -reg_notif_fail: - kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS -proc_vif_fail: - unregister_netdevice_notifier(&ip_mr_notifier); proc_cache_fail: proc_net_remove(&init_net, "ip_mr_vif"); +proc_vif_fail: + unregister_netdevice_notifier(&ip_mr_notifier); #endif +reg_notif_fail: + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); return err; } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 7c145d76384..fdf6811c31a 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -66,7 +66,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif @@ -97,7 +97,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) + if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) return -1; dst_release(skb->dst); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8f5a403f6f6..614958b7c27 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -54,8 +54,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", sock_prot_inuse_get(net, &tcp_prot), - atomic_read(&tcp_orphan_count), - tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), + (int)percpu_counter_sum_positive(&tcp_orphan_count), + tcp_death_row.tw_count, + (int)percpu_counter_sum_positive(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(net, &udp_prot), @@ -234,46 +235,51 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), + SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), + SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), + SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), SNMP_MIB_SENTINEL }; +static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals, + unsigned short *type, int count) +{ + int j; + + if (count) { + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %sType%u", + type[j] & 0x100 ? "Out" : "In", + type[j] & 0xff); + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %lu", vals[j]); + } +} + static void icmpmsg_put(struct seq_file *seq) { #define PERLINE 16 - int j, i, count; - static int out[PERLINE]; + int i, count; + unsigned short type[PERLINE]; + unsigned long vals[PERLINE], val; struct net *net = seq->private; count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - - if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i)) - out[count++] = i; - if (count < PERLINE) - continue; - - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In", - i & 0xff); - seq_printf(seq, "\nIcmpMsg: "); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %lu", - snmp_fold_field((void **) net->mib.icmpmsg_statistics, - out[j])); - seq_putc(seq, '\n'); - } - if (count) { - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" : - "In", out[j] & 0xff); - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %lu", snmp_fold_field((void **) - net->mib.icmpmsg_statistics, out[j])); + val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); + if (val) { + type[count] = i; + vals[count++] = val; + } + if (count == PERLINE) { + icmpmsg_put_line(seq, vals, type, count); + count = 0; + } } + icmpmsg_put_line(seq, vals, type, count); #undef PERLINE } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 998fcffc9e1..dff8bc4e0fa 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -572,7 +572,7 @@ back_from_confirm: ipc.addr = rt->rt_dst; lock_sock(sk); err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, - &ipc, rt, msg->msg_flags); + &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0dc0c382676..77bfba97595 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -160,7 +160,6 @@ static struct dst_ops ipv4_dst_ops = { .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, .local_out = __ip_local_out, - .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; @@ -2701,7 +2700,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .destroy = ipv4_dst_destroy, .check = ipv4_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; @@ -2763,7 +2761,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; - err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, + err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, flags ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) err = ipv4_dst_blackhole(net, rp, flp); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 60c28add96b..01924340862 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -277,8 +277,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; -atomic_t tcp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); int sysctl_tcp_mem[3] __read_mostly; @@ -290,9 +289,12 @@ EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_t tcp_memory_allocated; /* Current allocated memory. */ -atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */ - EXPORT_SYMBOL(tcp_memory_allocated); + +/* + * Current number of TCP sockets. + */ +struct percpu_counter tcp_sockets_allocated; EXPORT_SYMBOL(tcp_sockets_allocated); /* @@ -1374,8 +1376,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || - signal_pending(current) || - (flags & MSG_PEEK)) + signal_pending(current)) break; } else { if (sock_flag(sk, SOCK_DONE)) @@ -1835,7 +1836,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); @@ -1886,9 +1887,11 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { + int orphan_count = percpu_counter_read_positive( + sk->sk_prot->orphan_count); + sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, - atomic_read(sk->sk_prot->orphan_count))) { + if (tcp_too_many_orphans(sk, orphan_count)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -2686,6 +2689,8 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + percpu_counter_init(&tcp_sockets_allocated, 0); + percpu_counter_init(&tcp_orphan_count, 0); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, @@ -2708,8 +2713,8 @@ void __init tcp_init(void) thash_entries ? 0 : 512 * 1024); tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; for (i = 0; i < tcp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 838d491dfda..fcbcd4ff6c5 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -34,7 +34,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, tcp_get_info(sk, info); } -static struct inet_diag_handler tcp_diag_handler = { +static const struct inet_diag_handler tcp_diag_handler = { .idiag_hashinfo = &tcp_hashinfo, .idiag_get_info = tcp_diag_get_info, .idiag_type = TCPDIAG_GETSOCK, diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index af99776146f..937549b8a92 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -69,9 +69,12 @@ static u32 htcp_cwnd_undo(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - ca->last_cong = ca->undo_last_cong; - ca->maxRTT = ca->undo_maxRTT; - ca->old_maxB = ca->undo_old_maxB; + if (ca->undo_last_cong) { + ca->last_cong = ca->undo_last_cong; + ca->maxRTT = ca->undo_maxRTT; + ca->old_maxB = ca->undo_old_maxB; + ca->undo_last_cong = 0; + } return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta); } @@ -268,7 +271,10 @@ static void htcp_state(struct sock *sk, u8 new_state) case TCP_CA_Open: { struct htcp *ca = inet_csk_ca(sk); - ca->last_cong = jiffies; + if (ca->undo_last_cong) { + ca->last_cong = jiffies; + ca->undo_last_cong = 0; + } } break; case TCP_CA_CWR: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 097294b7da3..d67b6e9cc54 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1002,7 +1002,8 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) } } -void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) +static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, + struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); @@ -1241,26 +1242,47 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, * aligned portion of it that matches. Therefore we might need to fragment * which may fail and creates some hassle (caller must handle error case * returns). + * + * FIXME: this could be merged to shift decision code */ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, u32 start_seq, u32 end_seq) { int in_sack, err; unsigned int pkt_len; + unsigned int mss; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); if (tcp_skb_pcount(skb) > 1 && !in_sack && after(TCP_SKB_CB(skb)->end_seq, start_seq)) { - + mss = tcp_skb_mss(skb); in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); - if (!in_sack) + if (!in_sack) { pkt_len = start_seq - TCP_SKB_CB(skb)->seq; - else + if (pkt_len < mss) + pkt_len = mss; + } else { pkt_len = end_seq - TCP_SKB_CB(skb)->seq; - err = tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size); + if (pkt_len < mss) + return -EINVAL; + } + + /* Round if necessary so that SACKs cover only full MSSes + * and/or the remaining small portion (if present) + */ + if (pkt_len > mss) { + unsigned int new_len = (pkt_len / mss) * mss; + if (!in_sack && new_len < pkt_len) { + new_len += mss; + if (new_len > skb->len) + return 0; + } + pkt_len = new_len; + } + err = tcp_fragment(sk, skb, pkt_len, mss); if (err < 0) return err; } @@ -1269,7 +1291,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, } static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, - int *reord, int dup_sack, int fack_count) + int *reord, int dup_sack, int fack_count, + u8 *sackedto, int pcount) { struct tcp_sock *tp = tcp_sk(sk); u8 sacked = TCP_SKB_CB(skb)->sacked; @@ -1294,10 +1317,9 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, * that retransmission is still in flight. */ if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= - ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); - tp->lost_out -= tcp_skb_pcount(skb); - tp->retrans_out -= tcp_skb_pcount(skb); + *sackedto &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); + tp->lost_out -= pcount; + tp->retrans_out -= pcount; } } else { if (!(sacked & TCPCB_RETRANS)) { @@ -1314,48 +1336,280 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, } if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - tp->lost_out -= tcp_skb_pcount(skb); + *sackedto &= ~TCPCB_LOST; + tp->lost_out -= pcount; } } - TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; + *sackedto |= TCPCB_SACKED_ACKED; flag |= FLAG_DATA_SACKED; - tp->sacked_out += tcp_skb_pcount(skb); + tp->sacked_out += pcount; - fack_count += tcp_skb_pcount(skb); + fack_count += pcount; /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) - tp->lost_cnt_hint += tcp_skb_pcount(skb); + tp->lost_cnt_hint += pcount; if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; - - if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - tcp_advance_highest_sack(sk, skb); } /* D-SACK. We can detect redundant retransmission in S|R and plain R * frames and clear it. undo_retrans is decreased above, L|R frames * are accounted above as well. */ - if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); + if (dup_sack && (*sackedto & TCPCB_SACKED_RETRANS)) { + *sackedto &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= pcount; } return flag; } +static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, + struct sk_buff *skb, unsigned int pcount, + int shifted, int fack_count, int *reord, + int *flag, int mss) +{ + struct tcp_sock *tp = tcp_sk(sk); + u8 dummy_sacked = TCP_SKB_CB(skb)->sacked; /* We discard results */ + + BUG_ON(!pcount); + + /* Tweak before seqno plays */ + if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && + !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) + tp->lost_cnt_hint += pcount; + + TCP_SKB_CB(prev)->end_seq += shifted; + TCP_SKB_CB(skb)->seq += shifted; + + skb_shinfo(prev)->gso_segs += pcount; + BUG_ON(skb_shinfo(skb)->gso_segs < pcount); + skb_shinfo(skb)->gso_segs -= pcount; + + /* When we're adding to gso_segs == 1, gso_size will be zero, + * in theory this shouldn't be necessary but as long as DSACK + * code can come after this skb later on it's better to keep + * setting gso_size to something. + */ + if (!skb_shinfo(prev)->gso_size) { + skb_shinfo(prev)->gso_size = mss; + skb_shinfo(prev)->gso_type = sk->sk_gso_type; + } + + /* CHECKME: To clear or not to clear? Mimics normal skb currently */ + if (skb_shinfo(skb)->gso_segs <= 1) { + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; + } + + *flag |= tcp_sacktag_one(skb, sk, reord, 0, fack_count, &dummy_sacked, + pcount); + + /* Difference in this won't matter, both ACKed by the same cumul. ACK */ + TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); + + if (skb->len > 0) { + BUG_ON(!tcp_skb_pcount(skb)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); + return 0; + } + + /* Whole SKB was eaten :-) */ + + if (skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = prev; + if (skb == tp->scoreboard_skb_hint) + tp->scoreboard_skb_hint = prev; + if (skb == tp->lost_skb_hint) { + tp->lost_skb_hint = prev; + tp->lost_cnt_hint -= tcp_skb_pcount(prev); + } + + TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; + if (skb == tcp_highest_sack(sk)) + tcp_advance_highest_sack(sk, skb); + + tcp_unlink_write_queue(skb, sk); + sk_wmem_free_skb(sk, skb); + + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); + + return 1; +} + +/* I wish gso_size would have a bit more sane initialization than + * something-or-zero which complicates things + */ +static int tcp_shift_mss(struct sk_buff *skb) +{ + int mss = tcp_skb_mss(skb); + + if (!mss) + mss = skb->len; + + return mss; +} + +/* Shifting pages past head area doesn't work */ +static int skb_can_shift(struct sk_buff *skb) +{ + return !skb_headlen(skb) && skb_is_nonlinear(skb); +} + +/* Try collapsing SACK blocks spanning across multiple skbs to a single + * skb. + */ +static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, + u32 start_seq, u32 end_seq, + int dup_sack, int *fack_count, + int *reord, int *flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev; + int mss; + int pcount = 0; + int len; + int in_sack; + + if (!sk_can_gso(sk)) + goto fallback; + + /* Normally R but no L won't result in plain S */ + if (!dup_sack && + (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) == TCPCB_SACKED_RETRANS) + goto fallback; + if (!skb_can_shift(skb)) + goto fallback; + /* This frame is about to be dropped (was ACKed). */ + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + goto fallback; + + /* Can only happen with delayed DSACK + discard craziness */ + if (unlikely(skb == tcp_write_queue_head(sk))) + goto fallback; + prev = tcp_write_queue_prev(sk, skb); + + if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + goto fallback; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && + !before(end_seq, TCP_SKB_CB(skb)->end_seq); + + if (in_sack) { + len = skb->len; + pcount = tcp_skb_pcount(skb); + mss = tcp_shift_mss(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_shift_mss(prev)) + goto fallback; + } else { + if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) + goto noop; + /* CHECKME: This is non-MSS split case only?, this will + * cause skipped skbs due to advancing loop btw, original + * has that feature too + */ + if (tcp_skb_pcount(skb) <= 1) + goto noop; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); + if (!in_sack) { + /* TODO: head merge to next could be attempted here + * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)), + * though it might not be worth of the additional hassle + * + * ...we can probably just fallback to what was done + * previously. We could try merging non-SACKed ones + * as well but it probably isn't going to buy off + * because later SACKs might again split them, and + * it would make skb timestamp tracking considerably + * harder problem. + */ + goto fallback; + } + + len = end_seq - TCP_SKB_CB(skb)->seq; + BUG_ON(len < 0); + BUG_ON(len > skb->len); + + /* MSS boundaries should be honoured or else pcount will + * severely break even though it makes things bit trickier. + * Optimize common case to avoid most of the divides + */ + mss = tcp_skb_mss(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_shift_mss(prev)) + goto fallback; + + if (len == mss) { + pcount = 1; + } else if (len < mss) { + goto noop; + } else { + pcount = len / mss; + len = pcount * mss; + } + } + + if (!skb_shift(prev, skb, len)) + goto fallback; + if (!tcp_shifted_skb(sk, prev, skb, pcount, len, *fack_count, reord, + flag, mss)) + goto out; + + /* Hole filled allows collapsing with the next as well, this is very + * useful when hole on every nth skb pattern happens + */ + if (prev == tcp_write_queue_tail(sk)) + goto out; + skb = tcp_write_queue_next(sk, prev); + + if (!skb_can_shift(skb)) + goto out; + if (skb == tcp_send_head(sk)) + goto out; + if ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + goto out; + + len = skb->len; + if (skb_shift(prev, skb, len)) { + pcount += tcp_skb_pcount(skb); + tcp_shifted_skb(sk, prev, skb, tcp_skb_pcount(skb), len, + *fack_count, reord, flag, mss); + } + +out: + *fack_count += pcount; + return prev; + +noop: + return skb; + +fallback: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); + return NULL; +} + static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, u32 start_seq, u32 end_seq, int dup_sack_in, int *fack_count, int *reord, int *flag) { + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *tmp; + tcp_for_write_queue_from(skb, sk) { int in_sack = 0; int dup_sack = dup_sack_in; @@ -1376,15 +1630,41 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack = 1; } - if (in_sack <= 0) - in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, - end_seq); + /* skb reference here is a bit tricky to get right, since + * shifting can eat and free both this skb and the next, + * so not even _safe variant of the loop is enough. + */ + if (in_sack <= 0) { + tmp = tcp_shift_skb_data(sk, skb, start_seq, + end_seq, dup_sack, + fack_count, reord, flag); + if (tmp != NULL) { + if (tmp != skb) { + skb = tmp; + continue; + } + + in_sack = 0; + } else { + in_sack = tcp_match_skb_to_sack(sk, skb, + start_seq, + end_seq); + } + } + if (unlikely(in_sack < 0)) break; - if (in_sack) + if (in_sack) { *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, - *fack_count); + *fack_count, + &(TCP_SKB_CB(skb)->sacked), + tcp_skb_pcount(skb)); + + if (!before(TCP_SKB_CB(skb)->seq, + tcp_highest_sack_seq(tp))) + tcp_advance_highest_sack(sk, skb); + } *fack_count += tcp_skb_pcount(skb); } @@ -1401,7 +1681,7 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, if (skb == tcp_send_head(sk)) break; - if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) + if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) break; *fack_count += tcp_skb_pcount(skb); @@ -1660,7 +1940,7 @@ out: /* Limits sacked_out so that sum with lost_out isn't ever larger than * packets_out. Returns zero if sacked_out adjustement wasn't necessary. */ -int tcp_limit_reno_sacked(struct tcp_sock *tp) +static int tcp_limit_reno_sacked(struct tcp_sock *tp) { u32 holes; @@ -2559,6 +2839,56 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } +/* Do a simple retransmit without using the backoff mechanisms in + * tcp_timer. This is used for path mtu discovery. + * The socket is already locked here. + */ +void tcp_simple_retransmit(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + unsigned int mss = tcp_current_mss(sk, 0); + u32 prior_lost = tp->lost_out; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + if (skb->len > mss && + !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + } + tcp_skb_mark_lost_uncond_verify(tp, skb); + } + } + + tcp_clear_retrans_hints_partial(tp); + + if (prior_lost == tp->lost_out) + return; + + if (tcp_is_reno(tp)) + tcp_limit_reno_sacked(tp); + + tcp_verify_left_out(tp); + + /* Don't muck with the congestion window here. + * Reason is that we do not increase amount of _data_ + * in network, but units changed and effective + * cwnd/ssthresh really reduced now. + */ + if (icsk->icsk_ca_state != TCP_CA_Loss) { + tp->high_seq = tp->snd_nxt; + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->prior_ssthresh = 0; + tp->undo_marker = 0; + tcp_set_ca_state(sk, TCP_CA_Loss); + } + tcp_xmit_retransmit_queue(sk); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d49233f409b..26b9030747c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,11 +97,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) } #endif -struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { - .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), -}; +struct inet_hashinfo tcp_hashinfo; static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) { @@ -492,7 +488,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = tcp_v4_check(len, inet->saddr, inet->daddr, - csum_partial((char *)th, + csum_partial(th, th->doff << 2, skb->csum)); } @@ -726,7 +722,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, th->check = tcp_v4_check(skb->len, ireq->loc_addr, ireq->rmt_addr, - csum_partial((char *)th, skb->len, + csum_partial(th, skb->len, skb->csum)); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, @@ -1801,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } @@ -1849,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } - atomic_dec(&tcp_sockets_allocated); + percpu_counter_dec(&tcp_sockets_allocated); } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -1857,32 +1853,35 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) +static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) { - return hlist_empty(head) ? NULL : + return hlist_nulls_empty(head) ? NULL : list_entry(head->first, struct inet_timewait_sock, tw_node); } static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) { - return tw->tw_node.next ? - hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; + return !is_a_nulls(tw->tw_node.next) ? + hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; } static void *listening_get_next(struct seq_file *seq, void *cur) { struct inet_connection_sock *icsk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *sk = cur; + struct inet_listen_hashbucket *ilb; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); if (!sk) { st->bucket = 0; - sk = sk_head(&tcp_hashinfo.listening_hash[0]); + ilb = &tcp_hashinfo.listening_hash[0]; + spin_lock_bh(&ilb->lock); + sk = sk_nulls_head(&ilb->head); goto get_sk; } - + ilb = &tcp_hashinfo.listening_hash[st->bucket]; ++st->num; if (st->state == TCP_SEQ_STATE_OPENREQ) { @@ -1915,7 +1914,7 @@ get_req: sk = sk_next(sk); } get_sk: - sk_for_each_from(sk, node) { + sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { cur = sk; goto out; @@ -1932,8 +1931,11 @@ start_req: } read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } + spin_unlock_bh(&ilb->lock); if (++st->bucket < INET_LHTABLE_SIZE) { - sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); + ilb = &tcp_hashinfo.listening_hash[st->bucket]; + spin_lock_bh(&ilb->lock); + sk = sk_nulls_head(&ilb->head); goto get_sk; } cur = NULL; @@ -1954,8 +1956,8 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) static inline int empty_bucket(struct tcp_iter_state *st) { - return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && - hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); + return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && + hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); } static void *established_get_first(struct seq_file *seq) @@ -1966,16 +1968,16 @@ static void *established_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); + spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); /* Lockless fast path for the common case of empty buckets */ if (empty_bucket(st)) continue; - read_lock_bh(lock); - sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { + spin_lock_bh(lock); + sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || !net_eq(sock_net(sk), net)) { continue; @@ -1993,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq) rc = tw; goto out; } - read_unlock_bh(lock); + spin_unlock_bh(lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2004,7 +2006,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) { struct sock *sk = cur; struct inet_timewait_sock *tw; - struct hlist_node *node; + struct hlist_nulls_node *node; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); @@ -2021,7 +2023,7 @@ get_tw: cur = tw; goto out; } - read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); st->state = TCP_SEQ_STATE_ESTABLISHED; /* Look for next non empty bucket */ @@ -2031,12 +2033,12 @@ get_tw: if (st->bucket >= tcp_hashinfo.ehash_size) return NULL; - read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); + spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); } else - sk = sk_next(sk); + sk = sk_nulls_next(sk); - sk_for_each_from(sk, node) { + sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) goto found; } @@ -2066,12 +2068,10 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) void *rc; struct tcp_iter_state *st = seq->private; - inet_listen_lock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_idx(seq, &pos); if (!rc) { - inet_listen_unlock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); } @@ -2103,7 +2103,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) case TCP_SEQ_STATE_LISTENING: rc = listening_get_next(seq, v); if (!rc) { - inet_listen_unlock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); } @@ -2130,12 +2129,12 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - inet_listen_unlock(&tcp_hashinfo); + spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); break; case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); break; } } @@ -2375,6 +2374,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, @@ -2404,6 +2404,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { void __init tcp_v4_init(void) { + inet_hashinfo_init(&tcp_hashinfo); if (register_pernet_device(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a524627923a..76f840917bc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -722,7 +722,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk)) { + if (skb->len <= mss_now || !sk_can_gso(sk) || + tcp_urg_mode(tcp_sk(sk))) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1163,7 +1164,9 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { + if (!tso_segs || + (tso_segs > 1 && (tcp_skb_mss(skb) != mss_now || + tcp_urg_mode(tcp_sk(sk))))) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } @@ -1766,46 +1769,22 @@ u32 __tcp_select_window(struct sock *sk) return window; } -/* Attempt to collapse two adjacent SKB's during retransmission. */ -static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, - int mss_now) +/* Collapses two adjacent SKB's during retransmission. */ +static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); int skb_size, next_skb_size; u16 flags; - /* The first test we must make is that neither of these two - * SKB's are still referenced by someone else. - */ - if (skb_cloned(skb) || skb_cloned(next_skb)) - return; - skb_size = skb->len; next_skb_size = next_skb->len; flags = TCP_SKB_CB(skb)->flags; - /* Also punt if next skb has been SACK'd. */ - if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) - return; - - /* Next skb is out of window. */ - if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp))) - return; - - /* Punt if not enough space exists in the first SKB for - * the data in the second, or the total combined payload - * would exceed the MSS. - */ - if ((next_skb_size > skb_tailroom(skb)) || - ((skb_size + next_skb_size) > mss_now)) - return; - BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); tcp_highest_sack_combine(sk, next_skb, skb); - /* Ok. We will be able to collapse the packet. */ tcp_unlink_write_queue(next_skb, sk); skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), @@ -1847,54 +1826,60 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, sk_wmem_free_skb(sk, next_skb); } -/* Do a simple retransmit without using the backoff mechanisms in - * tcp_timer. This is used for path mtu discovery. - * The socket is already locked here. - */ -void tcp_simple_retransmit(struct sock *sk) +static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) +{ + if (tcp_skb_pcount(skb) > 1) + return 0; + /* TODO: SACK collapsing could be used to remove this condition */ + if (skb_shinfo(skb)->nr_frags != 0) + return 0; + if (skb_cloned(skb)) + return 0; + if (skb == tcp_send_head(sk)) + return 0; + /* Some heurestics for collapsing over SACK'd could be invented */ + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + return 0; + + return 1; +} + +static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, + int space) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk, 0); - u32 prior_lost = tp->lost_out; + struct sk_buff *skb = to, *tmp; + int first = 1; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) + if (!sysctl_tcp_retrans_collapse) + return; + if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) + return; + + tcp_for_write_queue_from_safe(skb, tmp, sk) { + if (!tcp_can_collapse(sk, skb)) break; - if (skb->len > mss && - !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_skb_mark_lost_uncond_verify(tp, skb); - } - } - tcp_clear_retrans_hints_partial(tp); + space -= skb->len; - if (prior_lost == tp->lost_out) - return; + if (first) { + first = 0; + continue; + } - if (tcp_is_reno(tp)) - tcp_limit_reno_sacked(tp); + if (space < 0) + break; + /* Punt if not enough space exists in the first SKB for + * the data in the second + */ + if (skb->len > skb_tailroom(to)) + break; - tcp_verify_left_out(tp); + if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) + break; - /* Don't muck with the congestion window here. - * Reason is that we do not increase amount of _data_ - * in network, but units changed and effective - * cwnd/ssthresh really reduced now. - */ - if (icsk->icsk_ca_state != TCP_CA_Loss) { - tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->prior_ssthresh = 0; - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Loss); + tcp_collapse_retrans(sk, to); } - tcp_xmit_retransmit_queue(sk); } /* This retransmits one SKB. Policy decisions and retransmit queue @@ -1946,17 +1931,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; /* We'll try again later. */ } - /* Collapse two adjacent packets if worthwhile and we can. */ - if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && - (skb->len < (cur_mss >> 1)) && - (!tcp_skb_is_last(sk, skb)) && - (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && - (skb_shinfo(skb)->nr_frags == 0 && - skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && - (tcp_skb_pcount(skb) == 1 && - tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && - (sysctl_tcp_retrans_collapse != 0)) - tcp_retrans_try_collapse(sk, skb, cur_mss); + tcp_retrans_try_collapse(sk, skb, cur_mss); /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3df339e3e36..cc4e6d27ded 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = atomic_read(&tcp_orphan_count); + int orphans = percpu_counter_read_positive(&tcp_orphan_count); /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7e4d9c87115..cf5ab0581eb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -127,9 +127,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct sock *sk2)) { struct sock *sk2; - struct hlist_node *node; + struct hlist_nulls_node *node; - sk_for_each(sk2, node, &hslot->head) + sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_hash == num && @@ -189,12 +189,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - /* - * We need that previous write to sk->sk_hash committed - * before write to sk->next done in following add_node() variant - */ - smp_wmb(); - sk_add_node_rcu(sk, &hslot->head); + sk_nulls_add_node_rcu(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; @@ -261,7 +256,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node, *next; + struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -271,13 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, begin: result = NULL; badness = -1; - sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { - /* - * lockless reader, and SLAB_DESTROY_BY_RCU items: - * We must check this item was not moved to another chain - */ - if (udp_hashfn(net, sk->sk_hash) != hash) - goto begin; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { @@ -285,6 +274,14 @@ begin: badness = score; } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash) + goto begin; + if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; @@ -320,19 +317,20 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, } EXPORT_SYMBOL_GPL(udp4_lib_lookup); -static inline struct sock *udp_v4_mcast_next(struct sock *sk, +static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *s = sk; unsigned short hnum = ntohs(loc_port); - sk_for_each_from(s, node) { + sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (s->sk_hash != hnum || + if (!net_eq(sock_net(s), net) || + s->sk_hash != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || @@ -668,6 +666,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .saddr = saddr, .tos = tos } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; @@ -720,7 +719,7 @@ do_append_data: up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, rt, + sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_flush_pending_frames(sk); @@ -971,16 +970,18 @@ int udp_disconnect(struct sock *sk, int flags) void udp_lib_unhash(struct sock *sk) { - struct udp_table *udptable = sk->sk_prot->h.udp_table; - unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); - struct udp_hslot *hslot = &udptable->hash[hash]; + if (sk_hashed(sk)) { + struct udp_table *udptable = sk->sk_prot->h.udp_table; + unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); + struct udp_hslot *hslot = &udptable->hash[hash]; - spin_lock_bh(&hslot->lock); - if (sk_del_node_init_rcu(sk)) { - inet_sk(sk)->num = 0; - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_lock_bh(&hslot->lock); + if (sk_nulls_del_node_init_rcu(sk)) { + inet_sk(sk)->num = 0; + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + } + spin_unlock_bh(&hslot->lock); } - spin_unlock_bh(&hslot->lock); } EXPORT_SYMBOL(udp_lib_unhash); @@ -1129,17 +1130,18 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif; spin_lock(&hslot->lock); - sk = sk_head(&hslot->head); + sk = sk_nulls_head(&hslot->head); dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { struct sock *sknext = NULL; do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, - uh->source, saddr, dif); + sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, + daddr, uh->source, saddr, + dif); if (sknext) skb1 = skb_clone(skb, GFP_ATOMIC); @@ -1558,10 +1560,10 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) struct net *net = seq_file_net(seq); for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; spin_lock_bh(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + sk_nulls_for_each(sk, node, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == state->family) @@ -1580,7 +1582,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) struct net *net = seq_file_net(seq); do { - sk = sk_next(sk); + sk = sk_nulls_next(sk); } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { @@ -1751,7 +1753,7 @@ void __init udp_table_init(struct udp_table *table) int i; for (i = 0; i < UDP_HTABLE_SIZE; i++) { - INIT_HLIST_HEAD(&table->hash[i].head); + INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); spin_lock_init(&table->hash[i].lock); } } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index f9a775b7e79..2ad24ba31f9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -18,7 +18,8 @@ static struct dst_ops xfrm4_dst_ops; static struct xfrm_policy_afinfo xfrm4_policy_afinfo; -static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct flowi fl = { @@ -36,19 +37,20 @@ static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) fl.fl4_src = saddr->a4; - err = __ip_route_output_key(&init_net, &rt, &fl); + err = __ip_route_output_key(net, &rt, &fl); dst = &rt->u.dst; if (err) dst = ERR_PTR(err); return dst; } -static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +static int xfrm4_get_saddr(struct net *net, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct rtable *rt; - dst = xfrm4_dst_lookup(0, NULL, daddr); + dst = xfrm4_dst_lookup(net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; @@ -187,7 +189,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm4_garbage_collect(struct dst_ops *ops) { - xfrm4_policy_afinfo.garbage_collect(); + xfrm4_policy_afinfo.garbage_collect(&init_net); return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); } @@ -246,7 +248,6 @@ static struct dst_ops xfrm4_dst_ops = { .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, .gc_thresh = 1024, - .entry_size = sizeof(struct xfrm_dst), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 07735ed280d..1ef1366a0a0 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -13,8 +13,6 @@ #include <linux/ipsec.h> #include <linux/netfilter_ipv4.h> -static struct xfrm_state_afinfo xfrm4_state_afinfo; - static int xfrm4_init_flags(struct xfrm_state *x) { if (ipv4_config.no_pmtu_disc) @@ -33,6 +31,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET; x->sel.prefixlen_d = 32; x->sel.prefixlen_s = 32; x->sel.proto = fl->proto; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 07ee758de9e..e92ad8455c6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2031,8 +2031,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT) { + const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; err = -EADDRNOTAVAIL; @@ -2048,9 +2048,14 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) p.iph.ttl = 64; ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + } else + err = -EOPNOTSUPP; if (err == 0) { err = -ENOBUFS; @@ -2483,8 +2488,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!idev && dev->mtu >= IPV6_MIN_MTU) idev = ipv6_add_dev(dev); - if (idev) + if (idev) { idev->if_flags |= IF_READY; + run_pending = 1; + } } else { if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 01edac88851..437b750b98f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -637,7 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; return err; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7a8a01369e5..52449f7a1b7 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -407,6 +407,7 @@ out: static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); struct xfrm_state *x; @@ -415,7 +416,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; @@ -509,9 +510,7 @@ static void ah6_destroy(struct xfrm_state *x) return; kfree(ahp->work_icv); - ahp->work_icv = NULL; crypto_free_hash(ahp->tfm); - ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 410046a8cc9..e2bdc6d83a4 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -175,7 +175,8 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -661,6 +662,11 @@ int datagram_send_ctl(struct net *net, switch (rthdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: + if (rthdr->hdrlen != 2 || + rthdr->segments_left != 1) { + err = -EINVAL; + goto exit_f; + } break; #endif default: diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c02a6308def..c2f250150db 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -356,6 +356,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; @@ -364,7 +365,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index be351009fd0..4f433847d95 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -233,7 +233,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6h->icmp6_cksum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { - skb->csum = csum_partial((char *)icmp6h, + skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, @@ -246,7 +246,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_add(tmp_csum, skb->csum); } - tmp_csum = csum_partial((char *)icmp6h, + tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, @@ -427,7 +427,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, /* No need to clone since we're just using its address. */ dst2 = dst; - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); switch (err) { case 0: if (dst != dst2) @@ -446,7 +446,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (ip6_dst_lookup(sk, &dst2, &fl)) goto relookup_failed; - err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); @@ -552,7 +552,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 16d43f20b32..3c3732d50c1 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -219,7 +219,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_route_caps = 0; kfree_skb(skb); return err; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1646a565825..8fe267feb81 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -25,26 +25,30 @@ void __inet6_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; - rwlock_t *lock; WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; - inet_listen_wlock(hashinfo); + struct inet_listen_hashbucket *ilb; + + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + spin_lock(&ilb->lock); + __sk_nulls_add_node_rcu(sk, &ilb->head); + spin_unlock(&ilb->lock); } else { unsigned int hash; + struct hlist_nulls_head *list; + spinlock_t *lock; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); - write_lock(lock); + spin_lock(lock); + __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); } - __sk_add_node(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL(__inet6_hash); @@ -63,77 +67,122 @@ struct sock *__inet6_lookup_established(struct net *net, const int dif) { struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begin; + } + goto out; + } } + if (get_nulls_value(node) != slot) + goto begin; + +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { - if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; + sk_nulls_for_each_rcu(sk, node, &head->twchain) { + if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begintw; + } + goto out; + } } - read_unlock(lock); - return NULL; - -hit: - sock_hold(sk); - read_unlock(lock); + if (get_nulls_value(node) != slot) + goto begintw; + sk = NULL; +out: + rcu_read_unlock(); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); +static int inline compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, + const struct in6_addr *daddr, + const int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && + sk->sk_family == PF_INET6) { + const struct ipv6_pinfo *np = inet6_sk(sk); + + score = 1; + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { struct sock *sk; - const struct hlist_node *node; - struct sock *result = NULL; - int score, hiscore = 0; - - read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, - &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { - if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && - sk->sk_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - - score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 3) { - result = sk; - break; - } - if (score > hiscore) { - hiscore = score; - result = sk; - } + const struct hlist_nulls_node *node; + struct sock *result; + int score, hiscore; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + hiscore = score; + result = sk; + } + } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; } } - if (result) - sock_hold(result); - read_unlock(&hashinfo->lhash_lock); + rcu_read_unlock(); return result; } @@ -170,16 +219,15 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { @@ -192,7 +240,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) goto not_unique; } @@ -203,10 +251,10 @@ unique: inet->num = lport; inet->sport = htons(lport); WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp != NULL) { *twp = tw; @@ -221,7 +269,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7927a8498d1..5656e8aa47d 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -464,7 +464,7 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { - int err; + int uninitialized_var(err); struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 64ce3d33d9c..58e2b0d9375 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -74,8 +74,8 @@ MODULE_LICENSE("GPL"); (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ (HASH_SIZE - 1)) -static int ip6_fb_tnl_dev_init(struct net_device *dev); -static int ip6_tnl_dev_init(struct net_device *dev); +static void ip6_fb_tnl_dev_init(struct net_device *dev); +static void ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); static int ip6_tnl_net_id; @@ -249,7 +249,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) } t = netdev_priv(dev); - dev->init = ip6_tnl_dev_init; + ip6_tnl_dev_init(dev); t->parms = *p; if ((err = register_netdevice(dev)) < 0) @@ -846,6 +846,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { + struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -861,9 +862,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(dev_net(dev), NULL, fl); + dst = ip6_route_output(net, NULL, fl); - if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0) + if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) goto tx_err_link_failure; } @@ -1150,7 +1151,6 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) * ip6_tnl_change - update the tunnel parameters * @t: tunnel to be changed * @p: tunnel configuration parameters - * @active: != 0 if tunnel is ready for use * * Description: * ip6_tnl_change() updates the tunnel parameters @@ -1306,6 +1306,14 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) return 0; } + +static const struct net_device_ops ip6_tnl_netdev_ops = { + .ndo_uninit = ip6_tnl_dev_uninit, + .ndo_start_xmit = ip6_tnl_xmit, + .ndo_do_ioctl = ip6_tnl_ioctl, + .ndo_change_mtu = ip6_tnl_change_mtu, +}; + /** * ip6_tnl_dev_setup - setup virtual tunnel device * @dev: virtual device associated with tunnel @@ -1316,11 +1324,8 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static void ip6_tnl_dev_setup(struct net_device *dev) { - dev->uninit = ip6_tnl_dev_uninit; + dev->netdev_ops = &ip6_tnl_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ip6_tnl_xmit; - dev->do_ioctl = ip6_tnl_ioctl; - dev->change_mtu = ip6_tnl_change_mtu; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); @@ -1349,13 +1354,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev) * @dev: virtual device associated with tunnel **/ -static int -ip6_tnl_dev_init(struct net_device *dev) +static void ip6_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); ip6_tnl_dev_init_gen(dev); ip6_tnl_link_config(t); - return 0; } /** @@ -1365,8 +1368,7 @@ ip6_tnl_dev_init(struct net_device *dev) * Return: 0 **/ -static int -ip6_fb_tnl_dev_init(struct net_device *dev) +static void ip6_fb_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); @@ -1376,7 +1378,6 @@ ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); ip6n->tnls_wc[0] = t; - return 0; } static struct xfrm6_tunnel ip4ip6_handler = { @@ -1428,10 +1429,10 @@ static int ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; - - ip6n->fb_tnl_dev->init = ip6_fb_tnl_dev_init; dev_net_set(ip6n->fb_tnl_dev, net); + ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); + err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) goto err_register; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index c491fb98a5e..dfba9fd0c24 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -224,7 +224,7 @@ static struct file_operations ip6mr_vif_fops = { .open = ip6mr_vif_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) @@ -337,7 +337,7 @@ static struct file_operations ip6mr_mfc_fops = { .open = ipmr_mfc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; #endif @@ -416,12 +416,16 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } +static const struct net_device_ops reg_vif_netdev_ops = { + .ndo_start_xmit = reg_vif_xmit, +}; + static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; - dev->hard_start_xmit = reg_vif_xmit; + dev->netdev_ops = ®_vif_netdev_ops; dev->destructor = free_netdev; } @@ -980,14 +984,15 @@ int __init ip6_mr_init(void) goto proc_cache_fail; #endif return 0; -reg_notif_fail: - kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS -proc_vif_fail: - unregister_netdevice_notifier(&ip6_mr_notifier); proc_cache_fail: proc_net_remove(&init_net, "ip6_mr_vif"); +proc_vif_fail: + unregister_netdevice_notifier(&ip6_mr_notifier); #endif +reg_notif_fail: + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); return err; } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index d4576a9c154..3a0b3be7ece 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -63,7 +63,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; spi = htonl(ntohs(ipcomph->cpi)); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return; @@ -76,7 +76,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t = NULL; - t = xfrm_state_alloc(); + t = xfrm_state_alloc(&init_net); if (!t) goto out; @@ -114,7 +114,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr); if (spi) - t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr, + t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4e5eac301f9..2aa294be0c7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -366,11 +366,16 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } /* routing header option needs extra check */ + retv = -EINVAL; if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: + if (rthdr->hdrlen != 2 || + rthdr->segments_left != 1) + goto sticky_done; + break; #endif default: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a76199ecad2..0f389603283 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1466,7 +1466,7 @@ static void mld_sendpack(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; @@ -1817,7 +1817,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, len, 0)); + csum_partial(hdr, len, 0)); idev = in6_dev_get(skb->dev); @@ -1831,7 +1831,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 31295c8f619..f995e19c87a 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -205,6 +205,7 @@ static inline int mip6_report_rl_allow(struct timeval *stamp, static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) { + struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; @@ -247,7 +248,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct sel.sport_mask = htons(~0); sel.ifindex = fl->oif; - err = km_report(IPPROTO_DSTOPTS, &sel, + err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); out: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2a6752dae09..e4acc212345 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -437,38 +437,20 @@ static void pndisc_destructor(struct pneigh_entry *n) ipv6_dev_mc_dec(dev, &maddr); } -/* - * Send a Neighbour Advertisement - */ -static void __ndisc_send(struct net_device *dev, - struct neighbour *neigh, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, const struct in6_addr *target, - int llinfo) +struct sk_buff *ndisc_build_skb(struct net_device *dev, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, + const struct in6_addr *target, + int llinfo) { - struct flowi fl; - struct dst_entry *dst; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; struct sk_buff *skb; struct icmp6hdr *hdr; - struct inet6_dev *idev; int len; int err; - u8 *opt, type; - - type = icmp6h->icmp6_type; - - icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); - - dst = icmp6_dst_alloc(dev, neigh, daddr); - if (!dst) - return; - - err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) - return; + u8 *opt; if (!dev->addr_len) llinfo = 0; @@ -485,8 +467,7 @@ static void __ndisc_send(struct net_device *dev, ND_PRINTK0(KERN_ERR "ICMPv6 ND: %s() failed to allocate an skb.\n", __func__); - dst_release(dst); - return; + return NULL; } skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -510,9 +491,45 @@ static void __ndisc_send(struct net_device *dev, hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, + csum_partial(hdr, len, 0)); + return skb; +} + +EXPORT_SYMBOL(ndisc_build_skb); + +void ndisc_send_skb(struct sk_buff *skb, + struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h) +{ + struct flowi fl; + struct dst_entry *dst; + struct net *net = dev_net(dev); + struct sock *sk = net->ipv6.ndisc_sk; + struct inet6_dev *idev; + int err; + u8 type; + + type = icmp6h->icmp6_type; + + icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); + + dst = icmp6_dst_alloc(dev, neigh, daddr); + if (!dst) { + kfree_skb(skb); + return; + } + + err = xfrm_lookup(net, &dst, &fl, NULL, 0); + if (err < 0) { + kfree_skb(skb); + return; + } + skb->dst = dst; idev = in6_dev_get(dst->dev); @@ -529,6 +546,27 @@ static void __ndisc_send(struct net_device *dev, in6_dev_put(idev); } +EXPORT_SYMBOL(ndisc_send_skb); + +/* + * Send a Neighbour Discover packet + */ +static void __ndisc_send(struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, const struct in6_addr *target, + int llinfo) +{ + struct sk_buff *skb; + + skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo); + if (!skb) + return; + + ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h); +} + static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, @@ -1486,7 +1524,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (dst == NULL) return; - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err) return; @@ -1574,7 +1612,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr, len, IPPROTO_ICMPV6, - csum_partial((u8 *) icmph, len, 0)); + csum_partial(icmph, len, 0)); buff->dst = dst; idev = in6_dev_get(dst->dev); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 0b88c563279..834cea69fb5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -29,7 +29,7 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET6) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 0981b4ccb8b..5a2d0a41694 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -97,7 +97,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; - if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) + if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 07f0b76e742..97c17fdd6f7 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -132,7 +132,7 @@ static struct snmp_mib snmp6_udplite6_list[] = { static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) { - static char name[32]; + char name[32]; int i; /* print by name -- deprecated items */ @@ -144,7 +144,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) p = icmp6type2name[icmptype]; if (!p) /* don't print un-named types here */ continue; - (void) snprintf(name, sizeof(name)-1, "Icmp6%s%s", + snprintf(name, sizeof(name), "Icmp6%s%s", i & 0x100 ? "Out" : "In", p); seq_printf(seq, "%-32s\t%lu\n", name, snmp_fold_field(mib, i)); @@ -157,7 +157,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) val = snmp_fold_field(mib, i); if (!val) continue; - (void) snprintf(name, sizeof(name)-1, "Icmp6%sType%u", + snprintf(name, sizeof(name), "Icmp6%sType%u", i & 0x100 ? "Out" : "In", i & 0xff); seq_printf(seq, "%-32s\t%lu\n", name, val); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2ba04d41dc2..61f6827e590 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -860,7 +860,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d40dc214b2..9da1ece466a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -108,7 +108,6 @@ static struct dst_ops ip6_dst_ops_template = { .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .local_out = __ip6_local_out, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b7a50e96850..d3467e563f0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -62,8 +62,8 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) -static int ipip6_fb_tunnel_init(struct net_device *dev); -static int ipip6_tunnel_init(struct net_device *dev); +static void ipip6_fb_tunnel_init(struct net_device *dev); +static void ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static int sit_net_id; @@ -188,7 +188,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, } nt = netdev_priv(dev); - dev->init = ipip6_tunnel_init; + ipip6_tunnel_init(dev); + nt->parms = *parms; if (parms->i_flags & SIT_ISATAP) @@ -926,13 +927,17 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static const struct net_device_ops ipip6_netdev_ops = { + .ndo_uninit = ipip6_tunnel_uninit, + .ndo_start_xmit = ipip6_tunnel_xmit, + .ndo_do_ioctl = ipip6_tunnel_ioctl, + .ndo_change_mtu = ipip6_tunnel_change_mtu, +}; + static void ipip6_tunnel_setup(struct net_device *dev) { - dev->uninit = ipip6_tunnel_uninit; + dev->netdev_ops = &ipip6_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ipip6_tunnel_xmit; - dev->do_ioctl = ipip6_tunnel_ioctl; - dev->change_mtu = ipip6_tunnel_change_mtu; dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); @@ -943,11 +948,9 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static int ipip6_tunnel_init(struct net_device *dev) +static void ipip6_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); @@ -956,11 +959,9 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - - return 0; } -static int ipip6_fb_tunnel_init(struct net_device *dev) +static void ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -977,7 +978,6 @@ static int ipip6_fb_tunnel_init(struct net_device *dev) dev_hold(dev); sitn->tunnels_wc[0] = tunnel; - return 0; } static struct xfrm_tunnel sit_handler = { @@ -1025,16 +1025,17 @@ static int sit_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } - - sitn->fb_tunnel_dev->init = ipip6_fb_tunnel_init; dev_net_set(sitn->fb_tunnel_dev, net); + ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); + if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; return 0; err_reg_dev: + dev_put(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: /* nothing */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 676c80b5b14..711175e0571 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -259,7 +259,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out_free; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 984276463a8..8702b06cb60 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -260,7 +260,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -390,7 +391,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; goto out; } @@ -492,7 +493,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) goto done; if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto done; skb = tcp_make_synack(sk, dst, req); @@ -501,7 +502,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) th->check = tcp_v6_check(th, skb->len, &treq->loc_addr, &treq->rmt_addr, - csum_partial((char *)th, skb->len, skb->csum)); + csum_partial(th, skb->len, skb->csum)); ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); @@ -915,7 +916,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, - csum_partial((char *)th, th->doff<<2, + csum_partial(th, th->doff<<2, skb->csum)); } } @@ -997,7 +998,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - buff->csum = csum_partial((char *)t1, tot_len, 0); + buff->csum = csum_partial(t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); @@ -1018,7 +1019,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * namespace */ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) @@ -1316,7 +1317,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } @@ -1829,7 +1830,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } @@ -2043,6 +2044,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 32d914db6c4..38390dd1963 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node, *next; + struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -108,19 +108,21 @@ static struct sock *__udp6_lib_lookup(struct net *net, begin: result = NULL; badness = -1; - sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { - /* - * lockless reader, and SLAB_DESTROY_BY_RCU items: - * We must check this item was not moved to another chain - */ - if (udp_hashfn(net, sk->sk_hash) != hash) - goto begin; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash) + goto begin; + if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; @@ -165,6 +167,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int peeked; int err; int is_udplite = IS_UDPLITE(sk); + int is_udp4; if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -185,6 +188,8 @@ try_again: else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; + is_udp4 = (skb->protocol == htons(ETH_P_IP)); + /* * If checksum is needed at all, try to do it while copying the * data. If the data is truncated, or if we only want a partial @@ -207,9 +212,14 @@ try_again: if (err) goto out_free; - if (!peeked) - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + if (!peeked) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + } sock_recv_timestamp(msg, sk, skb); @@ -223,7 +233,7 @@ try_again: sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; - if (skb->protocol == htons(ETH_P_IP)) + if (is_udp4) ipv6_addr_set(&sin6->sin6_addr, 0, 0, htonl(0xffff), ip_hdr(skb)->saddr); else { @@ -234,7 +244,7 @@ try_again: } } - if (skb->protocol == htons(ETH_P_IP)) { + if (is_udp4) { if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } else { @@ -255,8 +265,14 @@ out: csum_copy_err: lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + if (!skb_kill_datagram(sk, skb, flags)) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } release_sock(sk); if (flags & MSG_DONTWAIT) @@ -355,19 +371,19 @@ drop: return -1; } -static struct sock *udp_v6_mcast_next(struct sock *sk, +static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, struct in6_addr *loc_addr, __be16 rmt_port, struct in6_addr *rmt_addr, int dif) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *s = sk; unsigned short num = ntohs(loc_port); - sk_for_each_from(s, node) { + sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (sock_net(s) != sock_net(sk)) + if (!net_eq(sock_net(s), net)) continue; if (s->sk_hash == num && s->sk_family == PF_INET6) { @@ -409,16 +425,16 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif; spin_lock(&hslot->lock); - sk = sk_head(&hslot->head); + sk = sk_nulls_head(&hslot->head); dif = inet6_iif(skb); - sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { kfree_skb(skb); goto out; } sk2 = sk; - while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, + while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { @@ -833,7 +849,8 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a71c7ddcb41..9084582d236 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -58,6 +58,7 @@ EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { + struct net *net = dev_net(skb->dev); struct xfrm_state *x = NULL; int i = 0; @@ -67,7 +68,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, sp = secpath_dup(skb->sp); if (!sp) { - XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (skb->sp) @@ -76,7 +77,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (1 + skb->sp->len == XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } @@ -100,7 +101,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; } - x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + x = xfrm_state_lookup_byaddr(net, dst, src, proto, AF_INET6); if (!x) continue; @@ -122,7 +123,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (!x) { - XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound_simple(skb, AF_INET6); goto drop; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 604bc0a96c0..97ab068e8cc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -27,7 +27,8 @@ static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, +static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct flowi fl = {}; @@ -38,7 +39,7 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); err = dst->error; if (dst->error) { @@ -49,12 +50,13 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, return dst; } -static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +static int xfrm6_get_saddr(struct net *net, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(0, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; @@ -220,7 +222,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm6_garbage_collect(struct dst_ops *ops) { - xfrm6_policy_afinfo.garbage_collect(); + xfrm6_policy_afinfo.garbage_collect(&init_net); return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); } @@ -277,7 +279,6 @@ static struct dst_ops xfrm6_dst_ops = { .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 1024, - .entry_size = sizeof(struct xfrm_dst), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 89884a4f23a..0e685b05496 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -19,8 +19,6 @@ #include <net/ipv6.h> #include <net/addrconf.h> -static struct xfrm_state_afinfo xfrm6_state_afinfo; - static void __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, @@ -34,6 +32,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET6; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; x->sel.proto = fl->proto; diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 9a1cd87e714..774d73a7685 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -207,7 +207,7 @@ static struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr) if (!dev) return NULL; - self = dev->priv; + self = netdev_priv(dev); self->dev = dev; /* diff --git a/net/key/af_key.c b/net/key/af_key.c index e55e0441e4d..f8bd8df5e25 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -27,6 +27,7 @@ #include <linux/proc_fs.h> #include <linux/init.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/xfrm.h> #include <net/sock.h> @@ -34,15 +35,16 @@ #define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x)) #define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x)) - -/* List of all pfkey sockets. */ -static HLIST_HEAD(pfkey_table); +static int pfkey_net_id; +struct netns_pfkey { + /* List of all pfkey sockets. */ + struct hlist_head table; + atomic_t socks_nr; +}; static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait); static DEFINE_RWLOCK(pfkey_table_lock); static atomic_t pfkey_table_users = ATOMIC_INIT(0); -static atomic_t pfkey_socks_nr = ATOMIC_INIT(0); - struct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk; @@ -89,6 +91,9 @@ static void pfkey_terminate_dump(struct pfkey_sock *pfk) static void pfkey_sock_destruct(struct sock *sk) { + struct net *net = sock_net(sk); + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); @@ -100,7 +105,7 @@ static void pfkey_sock_destruct(struct sock *sk) WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(atomic_read(&sk->sk_wmem_alloc)); - atomic_dec(&pfkey_socks_nr); + atomic_dec(&net_pfkey->socks_nr); } static void pfkey_table_grab(void) @@ -151,8 +156,11 @@ static const struct proto_ops pfkey_ops; static void pfkey_insert(struct sock *sk) { + struct net *net = sock_net(sk); + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + pfkey_table_grab(); - sk_add_node(sk, &pfkey_table); + sk_add_node(sk, &net_pfkey->table); pfkey_table_ungrab(); } @@ -171,12 +179,10 @@ static struct proto key_proto = { static int pfkey_create(struct net *net, struct socket *sock, int protocol) { + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; int err; - if (net != &init_net) - return -EAFNOSUPPORT; - if (!capable(CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) @@ -195,7 +201,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol) sk->sk_family = PF_KEY; sk->sk_destruct = pfkey_sock_destruct; - atomic_inc(&pfkey_socks_nr); + atomic_inc(&net_pfkey->socks_nr); pfkey_insert(sk); @@ -255,8 +261,10 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, - int broadcast_flags, struct sock *one_sk) + int broadcast_flags, struct sock *one_sk, + struct net *net) { + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; struct hlist_node *node; struct sk_buff *skb2 = NULL; @@ -269,7 +277,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, return -ENOMEM; pfkey_lock_table(); - sk_for_each(sk, node, &pfkey_table) { + sk_for_each(sk, node, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; @@ -328,7 +336,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, - &pfk->sk); + &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } @@ -367,7 +375,7 @@ static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -645,7 +653,7 @@ int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr) xaddr); } -static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **ext_hdrs) +static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_msg *hdr, void **ext_hdrs) { struct sadb_sa *sa; struct sadb_address *addr; @@ -683,7 +691,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void ** if (!xaddr) return NULL; - return xfrm_state_lookup(xaddr, sa->sadb_sa_spi, proto, family); + return xfrm_state_lookup(net, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) @@ -1058,7 +1066,8 @@ static inline struct sk_buff *pfkey_xfrm_state2msg_expire(struct xfrm_state *x, return __pfkey_xfrm_state2msg(x, 0, hsc); } -static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, +static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, + struct sadb_msg *hdr, void **ext_hdrs) { struct xfrm_state *x; @@ -1122,7 +1131,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t))) return ERR_PTR(-EINVAL); - x = xfrm_state_alloc(); + x = xfrm_state_alloc(net); if (x == NULL) return ERR_PTR(-ENOBUFS); @@ -1298,6 +1307,7 @@ static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct sk_buff *resp_skb; struct sadb_x_sa2 *sa2; struct sadb_address *saddr, *daddr; @@ -1348,7 +1358,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (hdr->sadb_msg_seq) { - x = xfrm_find_acq_byseq(hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq); if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; @@ -1356,7 +1366,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (!x) - x = xfrm_find_acq(mode, reqid, proto, xdaddr, xsaddr, 1, family); + x = xfrm_find_acq(net, mode, reqid, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; @@ -1389,13 +1399,14 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h xfrm_state_put(x); - pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk); + pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct xfrm_state *x; if (hdr->sadb_msg_len != sizeof(struct sadb_msg)/8) @@ -1404,14 +1415,14 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg * if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; - x = xfrm_find_acq_byseq(hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq); if (x == NULL) return 0; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_ACQ) { x->km.state = XFRM_STATE_ERROR; - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); } spin_unlock_bh(&x->lock); xfrm_state_put(x); @@ -1476,18 +1487,19 @@ static int key_notify_sa(struct xfrm_state *x, struct km_event *c) hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->pid; - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct xfrm_state *x; int err; struct km_event c; - x = pfkey_msg2xfrm_state(hdr, ext_hdrs); + x = pfkey_msg2xfrm_state(net, hdr, ext_hdrs); if (IS_ERR(x)) return PTR_ERR(x); @@ -1521,6 +1533,7 @@ out: static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct xfrm_state *x; struct km_event c; int err; @@ -1530,7 +1543,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; - x = pfkey_xfrm_state_lookup(hdr, ext_hdrs); + x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; @@ -1562,6 +1575,7 @@ out: static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); __u8 proto; struct sk_buff *out_skb; struct sadb_msg *out_hdr; @@ -1572,7 +1586,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; - x = pfkey_xfrm_state_lookup(hdr, ext_hdrs); + x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; @@ -1590,7 +1604,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1691,7 +1705,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg return -ENOBUFS; } - pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk); + pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, sock_net(sk)); return 0; } @@ -1713,13 +1727,14 @@ static int key_notify_sa_flush(struct km_event *c) hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); unsigned proto; struct km_event c; struct xfrm_audit audit_info; @@ -1732,13 +1747,14 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; - err = xfrm_state_flush(proto, &audit_info); + err = xfrm_state_flush(net, proto, &audit_info); if (err) return err; c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; + c.net = net; km_state_notify(NULL, &c); return 0; @@ -1768,7 +1784,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, - &pfk->sk); + &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; @@ -1776,7 +1792,8 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) static int pfkey_dump_sa(struct pfkey_sock *pfk) { - return xfrm_state_walk(&pfk->dump.u.state, dump_sa, (void *) pfk); + struct net *net = sock_net(&pfk->sk); + return xfrm_state_walk(net, &pfk->dump.u.state, dump_sa, (void *) pfk); } static void pfkey_dump_sa_done(struct pfkey_sock *pfk) @@ -1817,7 +1834,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg * return -EINVAL; pfk->promisc = satype; } - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL); + pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } @@ -1833,7 +1850,7 @@ static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr) return 0; } -static u32 gen_reqid(void) +static u32 gen_reqid(struct net *net) { struct xfrm_policy_walk walk; u32 start; @@ -1846,7 +1863,7 @@ static u32 gen_reqid(void) if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); - rc = xfrm_policy_walk(&walk, check_reqid, (void*)&reqid); + rc = xfrm_policy_walk(net, &walk, check_reqid, (void*)&reqid); xfrm_policy_walk_done(&walk); if (rc != -EEXIST) return reqid; @@ -1857,6 +1874,7 @@ static u32 gen_reqid(void) static int parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) { + struct net *net = xp_net(xp); struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr; int mode; @@ -1876,7 +1894,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) t->reqid = rq->sadb_x_ipsecrequest_reqid; if (t->reqid > IPSEC_MANUAL_REQID_MAX) t->reqid = 0; - if (!t->reqid && !(t->reqid = gen_reqid())) + if (!t->reqid && !(t->reqid = gen_reqid(net))) return -ENOBUFS; } @@ -2075,7 +2093,6 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in req_size += socklen * 2; } else { size -= 2*socklen; - socklen = 0; } rq = (void*)skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; @@ -2148,7 +2165,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); out: return 0; @@ -2156,6 +2173,7 @@ out: static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); int err = 0; struct sadb_lifetime *lifetime; struct sadb_address *sa; @@ -2175,7 +2193,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; - xp = xfrm_policy_alloc(GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_KERNEL); if (xp == NULL) return -ENOBUFS; @@ -2276,6 +2294,7 @@ out: static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); int err; struct sadb_address *sa; struct sadb_x_policy *pol; @@ -2325,7 +2344,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(net, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2373,7 +2392,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: @@ -2558,6 +2577,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); unsigned int dir; int err = 0, delete; struct sadb_x_policy *pol; @@ -2572,8 +2592,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, - delete, &err); + xp = xfrm_policy_byid(net, XFRM_POLICY_TYPE_MAIN, dir, + pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; @@ -2626,7 +2646,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, - &pfk->sk); + &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; @@ -2634,7 +2654,8 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) static int pfkey_dump_sp(struct pfkey_sock *pfk) { - return xfrm_policy_walk(&pfk->dump.u.policy, dump_sp, (void *) pfk); + struct net *net = sock_net(&pfk->sk); + return xfrm_policy_walk(net, &pfk->dump.u.policy, dump_sp, (void *) pfk); } static void pfkey_dump_sp_done(struct pfkey_sock *pfk) @@ -2673,13 +2694,14 @@ static int key_notify_policy_flush(struct km_event *c) hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct km_event c; struct xfrm_audit audit_info; int err; @@ -2687,13 +2709,14 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; - err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); + err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); if (err) return err; c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; + c.net = net; km_policy_notify(NULL, 0, &c); return 0; @@ -2733,7 +2756,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg * int err; pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, - BROADCAST_PROMISC_ONLY, NULL); + BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); @@ -2936,13 +2959,16 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); return 0; } static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) { - if (atomic_read(&pfkey_socks_nr) == 0) + struct net *net = x ? xs_net(x) : c->net; + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + + if (atomic_read(&net_pfkey->socks_nr) == 0) return 0; switch (c->event) { @@ -3104,12 +3130,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_ctx->ctx_len); } - return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { + struct net *net = sock_net(sk); struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; @@ -3142,7 +3169,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND)) return NULL; - xp = xfrm_policy_alloc(GFP_ATOMIC); + xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; @@ -3189,6 +3216,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, return xp; out: + xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } @@ -3300,7 +3328,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; - return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE @@ -3491,7 +3519,7 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } /* broadcast migrate message to sockets */ - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; @@ -3645,6 +3673,8 @@ static int pfkey_seq_show(struct seq_file *f, void *v) static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) { + struct net *net = seq_file_net(f); + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *s; struct hlist_node *node; loff_t pos = *ppos; @@ -3653,7 +3683,7 @@ static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) if (pos == 0) return SEQ_START_TOKEN; - sk_for_each(s, node, &pfkey_table) + sk_for_each(s, node, &net_pfkey->table) if (pos-- == 1) return s; @@ -3662,9 +3692,12 @@ static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) { + struct net *net = seq_file_net(f); + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + ++*ppos; return (v == SEQ_START_TOKEN) ? - sk_head(&pfkey_table) : + sk_head(&net_pfkey->table) : sk_next((struct sock *)v); } @@ -3682,38 +3715,39 @@ static struct seq_operations pfkey_seq_ops = { static int pfkey_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &pfkey_seq_ops); + return seq_open_net(inode, file, &pfkey_seq_ops, + sizeof(struct seq_net_private)); } static struct file_operations pfkey_proc_ops = { .open = pfkey_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; -static int pfkey_init_proc(void) +static int __net_init pfkey_init_proc(struct net *net) { struct proc_dir_entry *e; - e = proc_net_fops_create(&init_net, "pfkey", 0, &pfkey_proc_ops); + e = proc_net_fops_create(net, "pfkey", 0, &pfkey_proc_ops); if (e == NULL) return -ENOMEM; return 0; } -static void pfkey_exit_proc(void) +static void pfkey_exit_proc(struct net *net) { - proc_net_remove(&init_net, "pfkey"); + proc_net_remove(net, "pfkey"); } #else -static inline int pfkey_init_proc(void) +static int __net_init pfkey_init_proc(struct net *net) { return 0; } -static inline void pfkey_exit_proc(void) +static void pfkey_exit_proc(struct net *net) { } #endif @@ -3729,10 +3763,51 @@ static struct xfrm_mgr pfkeyv2_mgr = .migrate = pfkey_send_migrate, }; +static int __net_init pfkey_net_init(struct net *net) +{ + struct netns_pfkey *net_pfkey; + int rv; + + net_pfkey = kmalloc(sizeof(struct netns_pfkey), GFP_KERNEL); + if (!net_pfkey) { + rv = -ENOMEM; + goto out_kmalloc; + } + INIT_HLIST_HEAD(&net_pfkey->table); + atomic_set(&net_pfkey->socks_nr, 0); + rv = net_assign_generic(net, pfkey_net_id, net_pfkey); + if (rv < 0) + goto out_assign; + rv = pfkey_init_proc(net); + if (rv < 0) + goto out_proc; + return 0; + +out_proc: +out_assign: + kfree(net_pfkey); +out_kmalloc: + return rv; +} + +static void __net_exit pfkey_net_exit(struct net *net) +{ + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + + pfkey_exit_proc(net); + BUG_ON(!hlist_empty(&net_pfkey->table)); + kfree(net_pfkey); +} + +static struct pernet_operations pfkey_net_ops = { + .init = pfkey_net_init, + .exit = pfkey_net_exit, +}; + static void __exit ipsec_pfkey_exit(void) { + unregister_pernet_gen_subsys(pfkey_net_id, &pfkey_net_ops); xfrm_unregister_km(&pfkeyv2_mgr); - pfkey_exit_proc(); sock_unregister(PF_KEY); proto_unregister(&key_proto); } @@ -3747,16 +3822,16 @@ static int __init ipsec_pfkey_init(void) err = sock_register(&pfkey_family_ops); if (err != 0) goto out_unregister_key_proto; - err = pfkey_init_proc(); + err = xfrm_register_km(&pfkeyv2_mgr); if (err != 0) goto out_sock_unregister; - err = xfrm_register_km(&pfkeyv2_mgr); + err = register_pernet_gen_subsys(&pfkey_net_id, &pfkey_net_ops); if (err != 0) - goto out_remove_proc_entry; + goto out_xfrm_unregister_km; out: return err; -out_remove_proc_entry: - pfkey_exit_proc(); +out_xfrm_unregister_km: + xfrm_unregister_km(&pfkeyv2_mgr); out_sock_unregister: sock_unregister(PF_KEY); out_unregister_key_proto: diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 5bcc452a247..56fd85ab358 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -103,7 +103,6 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) * llc_ui_send_data - send data via reliable llc2 connection * @sk: Connection the socket is using. * @skb: Data the user wishes to send. - * @addr: Source and destination fields provided by the user. * @noblock: can we block waiting for data? * * Send data via reliable llc2 connection. diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 7f710a27e91..60c16162474 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -16,20 +16,20 @@ menu "Rate control algorithm selection" config MAC80211_RC_PID bool "PID controller based rate control algorithm" if EMBEDDED - default y ---help--- This option enables a TX rate control algorithm for mac80211 that uses a PID controller to select the TX rate. config MAC80211_RC_MINSTREL - bool "Minstrel" + bool "Minstrel" if EMBEDDED + default y ---help--- This option enables the 'minstrel' TX rate control algorithm choice prompt "Default rate control algorithm" - default MAC80211_RC_DEFAULT_PID + default MAC80211_RC_DEFAULT_MINSTREL ---help--- This option selects the default rate control algorithm mac80211 will use. Note that this default can still be @@ -55,8 +55,8 @@ endchoice config MAC80211_RC_DEFAULT string - default "pid" if MAC80211_RC_DEFAULT_PID default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL + default "pid" if MAC80211_RC_DEFAULT_PID default "" endmenu diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 91f56a48e2b..16423f94801 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -26,6 +26,8 @@ static bool nl80211_type_check(enum nl80211_iftype type) #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: #endif + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: return true; default: @@ -1046,11 +1048,53 @@ static int ieee80211_change_bss(struct wiphy *wiphy, changed |= BSS_CHANGED_ERP_SLOT; } + if (params->basic_rates) { + int i, j; + u32 rates = 0; + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_supported_band *sband = + wiphy->bands[local->oper_channel->band]; + + for (i = 0; i < params->basic_rates_len; i++) { + int rate = (params->basic_rates[i] & 0x7f) * 5; + for (j = 0; j < sband->n_bitrates; j++) { + if (sband->bitrates[j].bitrate == rate) + rates |= BIT(j); + } + } + sdata->vif.bss_conf.basic_rates = rates; + changed |= BSS_CHANGED_BASIC_RATES; + } + ieee80211_bss_info_change_notify(sdata, changed); return 0; } +static int ieee80211_set_txq_params(struct wiphy *wiphy, + struct ieee80211_txq_params *params) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_tx_queue_params p; + + if (!local->ops->conf_tx) + return -EOPNOTSUPP; + + memset(&p, 0, sizeof(p)); + p.aifs = params->aifs; + p.cw_max = params->cwmax; + p.cw_min = params->cwmin; + p.txop = params->txop; + if (local->ops->conf_tx(local_to_hw(local), params->queue, &p)) { + printk(KERN_DEBUG "%s: failed to set TX queue " + "parameters for queue %d\n", local->mdev->name, + params->queue); + return -EINVAL; + } + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1077,4 +1121,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_mesh_params = ieee80211_get_mesh_params, #endif .change_bss = ieee80211_change_bss, + .set_txq_params = ieee80211_set_txq_params, }; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 08009d4b7d6..a1eed7032c9 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -36,7 +36,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, ht_cap->ht_supported = true; - ht_cap->cap = ht_cap->cap & sband->ht_cap.cap; + ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap; ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS; ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS; @@ -987,7 +987,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, { struct ieee80211_hw *hw = &local->hw; u16 capab; - u16 tid; + u16 tid, start_seq_num; u8 *state; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); @@ -1024,6 +1024,14 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, local->hw.ampdu_queues) ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (local->ops->ampdu_action) { + (void)local->ops->ampdu_action(hw, + IEEE80211_AMPDU_TX_RESUME, + &sta->sta, tid, &start_seq_num); + } +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ spin_unlock_bh(&sta->lock); } else { sta->ampdu_mlme.addba_req_num[tid]++; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2c91108e390..155a2041001 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -212,9 +212,6 @@ struct ieee80211_if_ap { struct list_head vlans; - u8 ssid[IEEE80211_MAX_SSID_LEN]; - size_t ssid_len; - /* yes, this looks ugly, but guarantees that we can later use * bitmap_empty :) * NB: don't touch this bitmap, use sta_info_{set,clear}_tim_bit */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index cde145221b6..46082125f3e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -229,8 +229,14 @@ static int ieee80211_open(struct net_device *dev) if (res) goto err_stop; - if (ieee80211_vif_is_mesh(&sdata->vif)) + if (ieee80211_vif_is_mesh(&sdata->vif)) { + local->fif_other_bss++; + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + ieee80211_start_mesh(sdata); + } changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); ieee80211_enable_keys(sdata); @@ -456,8 +462,15 @@ static int ieee80211_stop(struct net_device *dev) /* fall through */ case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) { - /* allmulti is always set on mesh ifaces */ + /* other_bss and allmulti are always set on mesh + * ifaces */ + local->fif_other_bss--; atomic_dec(&local->iff_allmultis); + + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + ieee80211_stop_mesh(sdata); } /* fall through */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index fa0cc7a1e6b..cec9b6d3e1c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -171,19 +171,13 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) conf.changed = changed; if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { + sdata->vif.type == NL80211_IFTYPE_ADHOC) conf.bssid = sdata->u.sta.bssid; - conf.ssid = sdata->u.sta.ssid; - conf.ssid_len = sdata->u.sta.ssid_len; - } else if (sdata->vif.type == NL80211_IFTYPE_AP) { + else if (sdata->vif.type == NL80211_IFTYPE_AP) conf.bssid = sdata->dev->dev_addr; - conf.ssid = sdata->u.ap.ssid; - conf.ssid_len = sdata->u.ap.ssid_len; - } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + else if (ieee80211_vif_is_mesh(&sdata->vif)) { u8 zero[ETH_ALEN] = { 0 }; conf.bssid = zero; - conf.ssid = zero; - conf.ssid_len = 0; } else { WARN_ON(1); return -EINVAL; @@ -192,9 +186,6 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID))) return -EINVAL; - if (WARN_ON(!conf.ssid && (changed & IEEE80211_IFCC_SSID))) - return -EINVAL; - return local->ops->config_interface(local_to_hw(local), &sdata->vif, &conf); } @@ -731,7 +722,6 @@ EXPORT_SYMBOL(ieee80211_alloc_hw); int ieee80211_register_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - const char *name; int result; enum ieee80211_band band; struct net_device *mdev; @@ -796,8 +786,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) mdev->header_ops = &ieee80211_header_ops; mdev->set_multicast_list = ieee80211_master_set_multicast_list; - name = wiphy_dev(local->hw.wiphy)->driver->name; - local->hw.workqueue = create_freezeable_workqueue(name); + local->hw.workqueue = + create_freezeable_workqueue(wiphy_name(local->hw.wiphy)); if (!local->hw.workqueue) { result = -ENOMEM; goto fail_workqueue; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d3b6e1a648b..82f568e9436 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -238,7 +238,7 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) pos = skb_put(skb, 21); *pos++ = WLAN_EID_MESH_CONFIG; - *pos++ = MESH_CFG_LEN; + *pos++ = IEEE80211_MESH_CONFIG_LEN; /* Version */ *pos++ = 1; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index e10471c6ba4..c197ab545e5 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -145,9 +145,6 @@ struct mesh_rmc { }; -/* Mesh IEs constants */ -#define MESH_CFG_LEN 19 - /* * MESH_CFG_COMP_LEN Includes: * - Active path selection protocol ID. @@ -157,7 +154,7 @@ struct mesh_rmc { * Does not include mesh capabilities, which may vary across nodes in the same * mesh */ -#define MESH_CFG_CMP_LEN 17 +#define MESH_CFG_CMP_LEN (IEEE80211_MESH_CONFIG_LEN - 2) /* Default values, timeouts in ms */ #define MESH_TTL 5 diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index e8d573d592e..71fe6096123 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -763,7 +763,6 @@ enddiscovery: * * @skb: 802.11 frame to be sent * @sdata: network subif the frame will be sent through - * @fwd_frame: true if this frame was originally from a different host * * Returns: 0 if the next hop was found. Nonzero otherwise. If no next hop is * found, the function will start a path discovery and queue the frame so it is diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 29fafbe440b..7600ac9b87f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -14,7 +14,6 @@ #include <linux/delay.h> #include <linux/if_ether.h> #include <linux/skbuff.h> -#include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/wireless.h> #include <linux/random.h> @@ -803,6 +802,10 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } +/* + * The disassoc 'reason' argument can be either our own reason + * if self disconnected or a reason code from the AP. + */ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, bool deauth, bool self_disconnected, u16 reason) @@ -849,7 +852,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_sta_send_apinfo(sdata, ifsta); - if (self_disconnected) + if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) ifsta->state = IEEE80211_STA_MLME_DISABLED; sta_info_unlink(&sta); @@ -1123,7 +1126,8 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) - printk(KERN_DEBUG "%s: deauthenticated\n", sdata->dev->name); + printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n", + sdata->dev->name, reason_code); if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE || ifsta->state == IEEE80211_STA_MLME_ASSOCIATE || @@ -1154,7 +1158,8 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); if (ifsta->flags & IEEE80211_STA_ASSOCIATED) - printk(KERN_DEBUG "%s: disassociated\n", sdata->dev->name); + printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n", + sdata->dev->name, reason_code); if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; @@ -1162,7 +1167,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, IEEE80211_RETRY_AUTH_INTERVAL); } - ieee80211_set_disassoc(sdata, ifsta, false, false, 0); + ieee80211_set_disassoc(sdata, ifsta, false, false, reason_code); } @@ -1289,29 +1294,35 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, for (i = 0; i < elems.supp_rates_len; i++) { int rate = (elems.supp_rates[i] & 0x7f) * 5; + bool is_basic = !!(elems.supp_rates[i] & 0x80); if (rate > 110) have_higher_than_11mbit = true; for (j = 0; j < sband->n_bitrates; j++) { - if (sband->bitrates[j].bitrate == rate) + if (sband->bitrates[j].bitrate == rate) { rates |= BIT(j); - if (elems.supp_rates[i] & 0x80) - basic_rates |= BIT(j); + if (is_basic) + basic_rates |= BIT(j); + break; + } } } for (i = 0; i < elems.ext_supp_rates_len; i++) { int rate = (elems.ext_supp_rates[i] & 0x7f) * 5; + bool is_basic = !!(elems.supp_rates[i] & 0x80); if (rate > 110) have_higher_than_11mbit = true; for (j = 0; j < sband->n_bitrates; j++) { - if (sband->bitrates[j].bitrate == rate) + if (sband->bitrates[j].bitrate == rate) { rates |= BIT(j); - if (elems.ext_supp_rates[i] & 0x80) - basic_rates |= BIT(j); + if (is_basic) + basic_rates |= BIT(j); + break; + } } } @@ -1729,6 +1740,13 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ap_ht_cap_flags); } + if (elems.country_elem) { + /* Note we are only reviewing this on beacons + * for the BSSID we are associated to */ + regulatory_hint_11d(local->hw.wiphy, + elems.country_elem, elems.country_elem_len); + } + ieee80211_bss_info_change_notify(sdata, changed); } @@ -2414,7 +2432,6 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) { struct ieee80211_if_sta *ifsta; - int res; if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; @@ -2426,19 +2443,6 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size memcpy(ifsta->ssid, ssid, len); ifsta->ssid_len = len; ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; - - res = 0; - /* - * Hack! MLME code needs to be cleaned up to have different - * entry points for configuration and internal selection change - */ - if (netif_running(sdata->dev)) - res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); - if (res) { - printk(KERN_DEBUG "%s: Failed to config new SSID to " - "the low-level driver\n", sdata->dev->name); - return res; - } } if (len) @@ -2566,25 +2570,3 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) ieee80211_restart_sta_timer(sdata); rcu_read_unlock(); } - -/* driver notification call */ -void ieee80211_notify_mac(struct ieee80211_hw *hw, - enum ieee80211_notification_types notif_type) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata; - - switch (notif_type) { - case IEEE80211_NOTIFY_RE_ASSOC: - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type != NL80211_IFTYPE_STATION) - continue; - - ieee80211_sta_req_auth(sdata, &sdata->u.sta); - } - rcu_read_unlock(); - break; - } -} -EXPORT_SYMBOL(ieee80211_notify_mac); diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index c643e373fc5..2b3b490a607 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -225,7 +225,7 @@ minstrel_get_next_sample(struct minstrel_sta_info *mi) return sample_ndx; } -void +static void minstrel_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct ieee80211_tx_rate_control *txrc) { diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index 2328ba56803..b16801cde06 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -256,7 +256,7 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba if (!(info->flags & IEEE80211_TX_STAT_ACK)) { spinfo->tx_num_failed += 2; spinfo->tx_num_xmit++; - } else if (info->status.rates[0].count) { + } else if (info->status.rates[0].count > 1) { spinfo->tx_num_failed++; spinfo->tx_num_xmit++; } @@ -403,11 +403,11 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw, S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sampling_period); de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR, - debugfsdir, &pinfo->coeff_p); + debugfsdir, (u32 *)&pinfo->coeff_p); de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR, - debugfsdir, &pinfo->coeff_i); + debugfsdir, (u32 *)&pinfo->coeff_i); de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR, - debugfsdir, &pinfo->coeff_d); + debugfsdir, (u32 *)&pinfo->coeff_d); de->smoothing_shift = debugfs_create_u32("smoothing_shift", S_IRUSR | S_IWUSR, debugfsdir, &pinfo->smoothing_shift); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 7372d7abb8c..f5c7c337192 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -159,7 +159,7 @@ ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_i { struct ieee80211_bss *bss; - if (mesh_config_len != MESH_CFG_LEN) + if (mesh_config_len != IEEE80211_MESH_CONFIG_LEN) return NULL; bss = kzalloc(sizeof(*bss), GFP_ATOMIC); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5ad9250b63a..dc2606d0ae7 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -169,9 +169,6 @@ struct sta_ampdu_mlme { * @lock: used for locking all fields that require locking, see comments * in the header file. * @flaglock: spinlock for flags accesses - * @addr: MAC address of this STA - * @aid: STA's unique AID (1..2007, 0 = not assigned yet), - * only used in AP (and IBSS?) mode * @listen_interval: listen interval of this station, when we're acting as AP * @pin_status: used internally for pinning a STA struct into memory * @flags: STA flags, see &enum ieee80211_sta_info_flags diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d6392af9cd2..0d81b2cfd1a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -698,7 +698,6 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) left = payload_len - per_fragm; for (i = 0; i < num_fragm - 1; i++) { struct ieee80211_hdr *fhdr; - struct ieee80211_tx_info *info; size_t copylen; if (left <= 0) diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 7bbb98e846a..7043ddc7549 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -17,6 +17,7 @@ #include <linux/err.h> #include <linux/mm.h> #include <linux/scatterlist.h> +#include <asm/unaligned.h> #include <net/mac80211.h> #include "ieee80211_i.h" @@ -125,10 +126,10 @@ void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, { struct blkcipher_desc desc = { .tfm = tfm }; struct scatterlist sg; - __le32 *icv; + __le32 icv; - icv = (__le32 *)(data + data_len); - *icv = cpu_to_le32(~crc32_le(~0, data, data_len)); + icv = cpu_to_le32(~crc32_le(~0, data, data_len)); + put_unaligned(icv, (__le32 *)(data + data_len)); crypto_blkcipher_setkey(tfm, rc4key, klen); sg_init_one(&sg, data, data_len + WEP_ICV_LEN); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 231cab57351..b3ce28d3561 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -271,6 +271,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, __u32 *mode, char *extra) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int type; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -281,6 +282,13 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, type = NL80211_IFTYPE_STATION; break; case IW_MODE_ADHOC: + /* Setting ad-hoc mode on non ibss channel is not + * supported. + */ + if (local->oper_channel && + (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS)) + return -EOPNOTSUPP; + type = NL80211_IFTYPE_ADHOC; break; case IW_MODE_REPEAT: @@ -407,13 +415,6 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, return 0; } - if (sdata->vif.type == NL80211_IFTYPE_AP) { - memcpy(sdata->u.ap.ssid, ssid, len); - memset(sdata->u.ap.ssid + len, 0, - IEEE80211_MAX_SSID_LEN - len); - sdata->u.ap.ssid_len = len; - return ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); - } return -EOPNOTSUPP; } @@ -437,15 +438,6 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, return res; } - if (sdata->vif.type == NL80211_IFTYPE_AP) { - len = sdata->u.ap.ssid_len; - if (len > IW_ESSID_MAX_SIZE) - len = IW_ESSID_MAX_SIZE; - memcpy(ssid, sdata->u.ap.ssid, len); - data->length = len; - data->flags = 1; - return 0; - } return -EOPNOTSUPP; } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index dd4566ea2bf..8cba4180285 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -192,8 +192,8 @@ tcp_snat_handler(struct sk_buff *skb, /* Adjust TCP checksums */ if (skb->ip_summed == CHECKSUM_PARTIAL) { tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, - htonl(oldlen), - htonl(skb->len - tcphoff)); + htons(oldlen), + htons(skb->len - tcphoff)); } else if (!cp->app) { /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, @@ -267,8 +267,8 @@ tcp_dnat_handler(struct sk_buff *skb, */ if (skb->ip_summed == CHECKSUM_PARTIAL) { tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, - htonl(oldlen), - htonl(skb->len - tcphoff)); + htons(oldlen), + htons(skb->len - tcphoff)); } else if (!cp->app) { /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 6eb6039d634..d2930a71084 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -203,8 +203,8 @@ udp_snat_handler(struct sk_buff *skb, */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, - htonl(oldlen), - htonl(skb->len - udphoff)); + htons(oldlen), + htons(skb->len - udphoff)); } else if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, @@ -279,8 +279,8 @@ udp_dnat_handler(struct sk_buff *skb, */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, - htonl(oldlen), - htonl(skb->len - udphoff)); + htons(oldlen), + htons(skb->len - udphoff)); } else if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 2f367219073..425ab144f15 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -711,7 +711,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, iph = ipv6_hdr(skb); iph->version = 6; iph->nexthdr = IPPROTO_IPV6; - iph->payload_len = old_iph->payload_len + sizeof(old_iph); + iph->payload_len = old_iph->payload_len; + be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); iph->daddr = rt->rt6i_dst.addr; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 73419de3a93..7e83f74cd5d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -306,9 +306,7 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - spin_lock_bh(&nf_conntrack_lock); __nf_conntrack_hash_insert(ct, hash, repl_hash); - spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 9e4b74b95ce..a51bdac9f3a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -21,6 +21,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/rculist.h> +#include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l3proto.h> @@ -195,10 +196,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); + rtnl_lock(); spin_lock_bh(&nf_conntrack_lock); for_each_net(net) __nf_conntrack_helper_unregister(me, net); spin_unlock_bh(&nf_conntrack_lock); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ccc5ef1d757..00e8c27130f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1133,7 +1133,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], int err = -EINVAL; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL); + ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); if (ct == NULL || IS_ERR(ct)) return -ENOMEM; @@ -1220,7 +1220,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } } - nf_ct_acct_ext_add(ct, GFP_KERNEL); + nf_ct_acct_ext_add(ct, GFP_ATOMIC); #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) @@ -1297,7 +1297,6 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, nf_conntrack_get(&master_ct->ct_general); } - spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) err = ctnetlink_create_conntrack(cda, @@ -1306,6 +1305,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, master_ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); + spin_unlock_bh(&nf_conntrack_lock); if (err < 0 && master_ct) nf_ct_put(master_ct); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a59a307e685..592d73344d4 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -22,6 +22,7 @@ #include <linux/notifier.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l3proto.h> @@ -221,8 +222,10 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ + rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, kill_l3proto, proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -333,8 +336,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ + rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 0a0ef17b2a4..1821c5d50fb 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -596,7 +596,6 @@ listdef_failure: /** * netlbl_mgmt_protocols_cb - Write an individual PROTOCOL message response * @skb: the skb to write to - * @seq: the NETLINK sequence number * @cb: the NETLINK callback * @protocol: the NetLabel protocol to use in the message * diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 480184a857d..9eb895c7a2a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -452,6 +452,10 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol) if (err < 0) goto out_module; + local_bh_disable(); + sock_prot_inuse_add(net, &netlink_proto, 1); + local_bh_enable(); + nlk = nlk_sk(sock->sk); nlk->module = module; out: @@ -511,6 +515,9 @@ static int netlink_release(struct socket *sock) kfree(nlk->groups); nlk->groups = NULL; + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); + local_bh_enable(); sock_put(sk); return 0; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c718e7e3f7d..5f94db2f3e9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -872,6 +872,7 @@ static int packet_release(struct socket *sock) write_lock_bh(&net->packet.sklist_lock); sk_del_node_init(sk); + sock_prot_inuse_add(net, sk->sk_prot, -1); write_unlock_bh(&net->packet.sklist_lock); /* @@ -1084,6 +1085,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol) write_lock_bh(&net->packet.sklist_lock); sk_add_node(sk, &net->packet.sklist); + sock_prot_inuse_add(net, &packet_proto, 1); write_unlock_bh(&net->packet.sklist_lock); return(0); out: diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index defeb7a0d50..9d211f12582 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -33,9 +33,30 @@ #include <net/phonet/phonet.h> #include <net/phonet/pn_dev.h> -static struct net_proto_family phonet_proto_family; -static struct phonet_protocol *phonet_proto_get(int protocol); -static inline void phonet_proto_put(struct phonet_protocol *pp); +/* Transport protocol registration */ +static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static DEFINE_SPINLOCK(proto_tab_lock); + +static struct phonet_protocol *phonet_proto_get(int protocol) +{ + struct phonet_protocol *pp; + + if (protocol >= PHONET_NPROTO) + return NULL; + + spin_lock(&proto_tab_lock); + pp = proto_tab[protocol]; + if (pp && !try_module_get(pp->prot->owner)) + pp = NULL; + spin_unlock(&proto_tab_lock); + + return pp; +} + +static inline void phonet_proto_put(struct phonet_protocol *pp) +{ + module_put(pp->prot->owner); +} /* protocol family functions */ @@ -144,8 +165,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, struct phonethdr *ph; int err; - if (skb->len + 2 > 0xffff) { - /* Phonet length field would overflow */ + if (skb->len + 2 > 0xffff /* Phonet length field limit */ || + skb->len + sizeof(struct phonethdr) > dev->mtu) { err = -EMSGSIZE; goto drop; } @@ -375,10 +396,6 @@ static struct packet_type phonet_packet_type = { .func = phonet_rcv, }; -/* Transport protocol registration */ -static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; -static DEFINE_SPINLOCK(proto_tab_lock); - int __init_or_module phonet_proto_register(int protocol, struct phonet_protocol *pp) { @@ -412,27 +429,6 @@ void phonet_proto_unregister(int protocol, struct phonet_protocol *pp) } EXPORT_SYMBOL(phonet_proto_unregister); -static struct phonet_protocol *phonet_proto_get(int protocol) -{ - struct phonet_protocol *pp; - - if (protocol >= PHONET_NPROTO) - return NULL; - - spin_lock(&proto_tab_lock); - pp = proto_tab[protocol]; - if (pp && !try_module_get(pp->prot->owner)) - pp = NULL; - spin_unlock(&proto_tab_lock); - - return pp; -} - -static inline void phonet_proto_put(struct phonet_protocol *pp) -{ - module_put(pp->prot->owner); -} - /* Module registration */ static int __init phonet_init(void) { diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 9978afbd9f2..e6e8e44852e 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -41,7 +41,6 @@ struct gprs_dev { void (*old_write_space)(struct sock *); struct net_device *net; - struct net_device_stats stats; struct sk_buff_head tx_queue; struct work_struct tx_work; @@ -49,14 +48,14 @@ struct gprs_dev { unsigned tx_max; }; -static int gprs_type_trans(struct sk_buff *skb) +static __be16 gprs_type_trans(struct sk_buff *skb) { const u8 *pvfc; u8 buf; pvfc = skb_header_pointer(skb, 0, 1, &buf); if (!pvfc) - return 0; + return htons(0); /* Look at IP version field */ switch (*pvfc >> 4) { case 4: @@ -64,7 +63,7 @@ static int gprs_type_trans(struct sk_buff *skb) case 6: return htons(ETH_P_IPV6); } - return 0; + return htons(0); } /* @@ -83,8 +82,9 @@ static void gprs_state_change(struct sock *sk) static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) { + struct net_device *net = dev->net; int err = 0; - u16 protocol = gprs_type_trans(skb); + __be16 protocol = gprs_type_trans(skb); if (!protocol) { err = -EINVAL; @@ -99,7 +99,7 @@ static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) * so wrap the IP packet as a single fragment of an head-less * socket buffer. The network stack will pull what it needs, * but at least, the whole IP payload is not memcpy'd. */ - rskb = netdev_alloc_skb(dev->net, 0); + rskb = netdev_alloc_skb(net, 0); if (!rskb) { err = -ENOBUFS; goto drop; @@ -123,11 +123,11 @@ static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) skb->protocol = protocol; skb_reset_mac_header(skb); - skb->dev = dev->net; + skb->dev = net; - if (likely(dev->net->flags & IFF_UP)) { - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + if (likely(net->flags & IFF_UP)) { + net->stats.rx_packets++; + net->stats.rx_bytes += skb->len; netif_rx(skb); skb = NULL; } else @@ -136,7 +136,7 @@ static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) drop: if (skb) { dev_kfree_skb(skb); - dev->stats.rx_dropped++; + net->stats.rx_dropped++; } return err; } @@ -199,14 +199,15 @@ static int gprs_xmit(struct sk_buff *skb, struct net_device *net) static void gprs_tx(struct work_struct *work) { struct gprs_dev *dev = container_of(work, struct gprs_dev, tx_work); + struct net_device *net = dev->net; struct sock *sk = dev->sk; struct sk_buff *skb; while ((skb = skb_dequeue(&dev->tx_queue)) != NULL) { int err; - dev->stats.tx_bytes += skb->len; - dev->stats.tx_packets++; + net->stats.tx_bytes += skb->len; + net->stats.tx_packets++; skb_orphan(skb); skb_set_owner_w(skb, sk); @@ -215,9 +216,9 @@ static void gprs_tx(struct work_struct *work) err = pep_write(sk, skb); if (err) { LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n", - dev->net->name, err); - dev->stats.tx_aborted_errors++; - dev->stats.tx_errors++; + net->name, err); + net->stats.tx_aborted_errors++; + net->stats.tx_errors++; } release_sock(sk); } @@ -236,13 +237,6 @@ static int gprs_set_mtu(struct net_device *net, int new_mtu) return 0; } -static struct net_device_stats *gprs_get_stats(struct net_device *net) -{ - struct gprs_dev *dev = netdev_priv(net); - - return &dev->stats; -} - static void gprs_setup(struct net_device *net) { net->features = NETIF_F_FRAGLIST; @@ -256,7 +250,6 @@ static void gprs_setup(struct net_device *net) net->destructor = free_netdev; net->hard_start_xmit = gprs_xmit; /* mandatory */ net->change_mtu = gprs_set_mtu; - net->get_stats = gprs_get_stats; } /* diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index c9180c8771c..051d2c9ea66 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -565,16 +565,22 @@ static void rfkill_release(struct device *dev) #ifdef CONFIG_PM static int rfkill_suspend(struct device *dev, pm_message_t state) { + struct rfkill *rfkill = to_rfkill(dev); + /* mark class device as suspended */ if (dev->power.power_state.event != state.event) dev->power.power_state = state; + /* store state for the resume handler */ + rfkill->state_for_resume = rfkill->state; + return 0; } static int rfkill_resume(struct device *dev) { struct rfkill *rfkill = to_rfkill(dev); + enum rfkill_state newstate; if (dev->power.power_state.event != PM_EVENT_ON) { mutex_lock(&rfkill->mutex); @@ -582,6 +588,15 @@ static int rfkill_resume(struct device *dev) dev->power.power_state.event = PM_EVENT_ON; /* + * rfkill->state could have been modified before we got + * called, and won't be updated by rfkill_toggle_radio() + * in force mode. Sync it FIRST. + */ + if (rfkill->get_state && + !rfkill->get_state(rfkill->data, &newstate)) + rfkill->state = newstate; + + /* * If we are under EPO, kick transmitter offline, * otherwise restore to pre-suspend state. * @@ -590,7 +605,7 @@ static int rfkill_resume(struct device *dev) rfkill_toggle_radio(rfkill, rfkill_epo_lock_active ? RFKILL_STATE_SOFT_BLOCKED : - rfkill->state, + rfkill->state_for_resume, 1); mutex_unlock(&rfkill->mutex); @@ -666,7 +681,7 @@ static int rfkill_check_duplicity(const struct rfkill *rfkill) } /* 0: first switch of its kind */ - return test_bit(rfkill->type, seen); + return (test_bit(rfkill->type, seen)) ? 1 : 0; } static int rfkill_add_switch(struct rfkill *rfkill) @@ -774,7 +789,7 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill) int error; if (!rfkill->led_trigger.name) - rfkill->led_trigger.name = rfkill->dev.bus_id; + rfkill->led_trigger.name = dev_name(&rfkill->dev); if (!rfkill->led_trigger.activate) rfkill->led_trigger.activate = rfkill_led_trigger_activate; error = led_trigger_register(&rfkill->led_trigger); @@ -815,8 +830,7 @@ int __must_check rfkill_register(struct rfkill *rfkill) "badly initialized rfkill struct\n")) return -EINVAL; - snprintf(dev->bus_id, sizeof(dev->bus_id), - "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); + dev_set_name(dev, "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); rfkill_led_trigger_register(rfkill); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index a7f1ce11bc2..0c1cc761280 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1072,6 +1072,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int n, size, qbit = 0; + /* ROSE empty frame has no meaning : don't send */ + if (len == 0) + return 0; + if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1265,6 +1269,12 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; + /* ROSE empty frame has no meaning : ignore it */ + if (copied == 0) { + skb_free_datagram(sk, skb); + return copied; + } + if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 6767e54155d..4f7ef0db302 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -194,6 +194,17 @@ config NET_SCH_NETEM If unsure, say N. +config NET_SCH_DRR + tristate "Deficit Round Robin scheduler (DRR)" + help + Say Y here if you want to use the Deficit Round Robin (DRR) packet + scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_drr. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT @@ -316,6 +327,17 @@ config NET_CLS_FLOW To compile this code as a module, choose M here: the module will be called cls_flow. +config NET_CLS_CGROUP + bool "Control Group Classifier" + select NET_CLS + depends on CGROUPS + ---help--- + Say Y here if you want to classify packets based on the control + cgroup of their process. + + To compile this code as a module, choose M here: the + module will be called cls_cgroup. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index e60c9925b26..54d950cd4b8 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o +obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o @@ -38,6 +39,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o +obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8f457f1e0ac..9d03cc33b6c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -214,12 +214,14 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, } EXPORT_SYMBOL(tcf_hash_check); -struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) +struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, + struct tc_action *a, int size, int bind, + u32 *idx_gen, struct tcf_hashinfo *hinfo) { struct tcf_common *p = kzalloc(size, GFP_KERNEL); if (unlikely(!p)) - return p; + return ERR_PTR(-ENOMEM); p->tcfc_refcnt = 1; if (bind) p->tcfc_bindcnt = 1; @@ -228,9 +230,15 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_acti p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; - if (est) - gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, - &p->tcfc_lock, est); + if (est) { + int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, + &p->tcfc_lock, est); + if (err) { + kfree(p); + return ERR_PTR(err); + } + } + a->priv = (void *) p; return p; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ac04289da5d..e7f796aec65 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -88,8 +88,8 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind, &gact_idx_gen, &gact_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0453d79ebf5..082c520b0de 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -136,8 +136,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, &ipt_idx_gen, &ipt_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 70341c020b6..b9aaab4e035 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -105,8 +105,8 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, &mirred_idx_gen, &mirred_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7b39ed485bc..d885ba31156 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -68,8 +68,8 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &nat_idx_gen, &nat_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); p = to_tcf_nat(pc); ret = ACT_P_CREATED; } else { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index d5f4e340486..96c0ed115e2 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -68,8 +68,8 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &pedit_idx_gen, &pedit_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 38015b49394..5c72a116b1a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -182,17 +182,32 @@ override: R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); if (R_tab == NULL) goto failure; + + if (!est && (ret == ACT_P_CREATED || + !gen_estimator_active(&police->tcf_bstats, + &police->tcf_rate_est))) { + err = -EINVAL; + goto failure; + } + if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE]); - if (P_tab == NULL) { - qdisc_put_rtab(R_tab); + if (P_tab == NULL) goto failure; - } } } - /* No failure allowed after this point */ + spin_lock_bh(&police->tcf_lock); + if (est) { + err = gen_replace_estimator(&police->tcf_bstats, + &police->tcf_rate_est, + &police->tcf_lock, est); + if (err) + goto failure_unlock; + } + + /* No failure allowed after this point */ if (R_tab != NULL) { qdisc_put_rtab(police->tcfp_R_tab); police->tcfp_R_tab = R_tab; @@ -217,10 +232,6 @@ override: if (tb[TCA_POLICE_AVRATE]) police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]); - if (est) - gen_replace_estimator(&police->tcf_bstats, - &police->tcf_rate_est, - &police->tcf_lock, est); spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -238,7 +249,13 @@ override: a->priv = police; return ret; +failure_unlock: + spin_unlock_bh(&police->tcf_lock); failure: + if (P_tab) + qdisc_put_rtab(P_tab); + if (R_tab) + qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) kfree(police); return err; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e7851ce92cf..8daa1ebc741 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -124,8 +124,8 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &simp_idx_gen, &simp_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); d = to_defact(pc); ret = alloc_defdata(d, defdata); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fe9777e77f3..4ab916b8074 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -104,8 +104,8 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &skbedit_idx_gen, &skbedit_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); d = to_skbedit(pc); ret = ACT_P_CREATED; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 16e7ac9774e..173fcc4b050 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -531,7 +531,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, if (src->action) { struct tc_action *act; tcf_tree_lock(tp); - act = xchg(&dst->action, src->action); + act = dst->action; + dst->action = src->action; tcf_tree_unlock(tp); if (act) tcf_action_destroy(act, TCA_ACT_UNBIND); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 956915c217d..4e2bda85411 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -102,7 +102,7 @@ static inline void basic_delete_filter(struct tcf_proto *tp, static void basic_destroy(struct tcf_proto *tp) { - struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL); + struct basic_head *head = tp->root; struct basic_filter *f, *n; list_for_each_entry_safe(f, n, &head->flist, link) { diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c new file mode 100644 index 00000000000..0d68b197598 --- /dev/null +++ b/net/sched/cls_cgroup.c @@ -0,0 +1,288 @@ +/* + * net/sched/cls_cgroup.c Control Group Classifier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf <tgraf@suug.ch> + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/cgroup.h> +#include <net/rtnetlink.h> +#include <net/pkt_cls.h> + +struct cgroup_cls_state +{ + struct cgroup_subsys_state css; + u32 classid; +}; + +static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp) +{ + return (struct cgroup_cls_state *) + cgroup_subsys_state(cgrp, net_cls_subsys_id); +} + +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp) +{ + struct cgroup_cls_state *cs; + + if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) + return ERR_PTR(-ENOMEM); + + if (cgrp->parent) + cs->classid = net_cls_state(cgrp->parent)->classid; + + return &cs->css; +} + +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + kfree(ss); +} + +static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) +{ + return net_cls_state(cgrp)->classid; +} + +static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value) +{ + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + + net_cls_state(cgrp)->classid = (u32) value; + + cgroup_unlock(); + + return 0; +} + +static struct cftype ss_files[] = { + { + .name = "classid", + .read_u64 = read_classid, + .write_u64 = write_classid, + }, +}; + +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); +} + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .create = cgrp_create, + .destroy = cgrp_destroy, + .populate = cgrp_populate, + .subsys_id = net_cls_subsys_id, +}; + +struct cls_cgroup_head +{ + u32 handle; + struct tcf_exts exts; + struct tcf_ematch_tree ematches; +}; + +static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + struct cls_cgroup_head *head = tp->root; + struct cgroup_cls_state *cs; + int ret = 0; + + /* + * Due to the nature of the classifier it is required to ignore all + * packets originating from softirq context as accessing `current' + * would lead to false results. + * + * This test assumes that all callers of dev_queue_xmit() explicitely + * disable bh. Knowing this, it is possible to detect softirq based + * calls by looking at the number of nested bh disable calls because + * softirqs always disables bh. + */ + if (softirq_count() != SOFTIRQ_OFFSET) + return -1; + + rcu_read_lock(); + cs = (struct cgroup_cls_state *) task_subsys_state(current, + net_cls_subsys_id); + if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) { + res->classid = cs->classid; + res->class = 0; + ret = tcf_exts_exec(skb, &head->exts, res); + } else + ret = -1; + + rcu_read_unlock(); + + return ret; +} + +static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) +{ + return 0UL; +} + +static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static int cls_cgroup_init(struct tcf_proto *tp) +{ + return 0; +} + +static const struct tcf_ext_map cgroup_ext_map = { + .action = TCA_CGROUP_ACT, + .police = TCA_CGROUP_POLICE, +}; + +static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { + [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, +}; + +static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg) +{ + struct nlattr *tb[TCA_CGROUP_MAX+1]; + struct cls_cgroup_head *head = tp->root; + struct tcf_ematch_tree t; + struct tcf_exts e; + int err; + + if (head == NULL) { + if (!handle) + return -EINVAL; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + head->handle = handle; + + tcf_tree_lock(tp); + tp->root = head; + tcf_tree_unlock(tp); + } + + if (handle != head->handle) + return -ENOENT; + + err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], + cgroup_policy); + if (err < 0) + return err; + + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map); + if (err < 0) + return err; + + err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); + if (err < 0) + return err; + + tcf_exts_change(tp, &head->exts, &e); + tcf_em_tree_change(tp, &head->ematches, &t); + + return 0; +} + +static void cls_cgroup_destroy(struct tcf_proto *tp) +{ + struct cls_cgroup_head *head = tp->root; + + if (head) { + tcf_exts_destroy(tp, &head->exts); + tcf_em_tree_destroy(tp, &head->ematches); + kfree(head); + } +} + +static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg) +{ + return -EOPNOTSUPP; +} + +static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct cls_cgroup_head *head = tp->root; + + if (arg->count < arg->skip) + goto skip; + + if (arg->fn(tp, (unsigned long) head, arg) < 0) { + arg->stop = 1; + return; + } +skip: + arg->count++; +} + +static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct cls_cgroup_head *head = tp->root; + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + + t->tcm_handle = head->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + + if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 || + tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { + .kind = "cgroup", + .init = cls_cgroup_init, + .change = cls_cgroup_change, + .classify = cls_cgroup_classify, + .destroy = cls_cgroup_destroy, + .get = cls_cgroup_get, + .put = cls_cgroup_put, + .delete = cls_cgroup_delete, + .walk = cls_cgroup_walk, + .dump = cls_cgroup_dump, + .owner = THIS_MODULE, +}; + +static int __init init_cgroup_cls(void) +{ + return register_tcf_proto_ops(&cls_cgroup_ops); +} + +static void __exit exit_cgroup_cls(void) +{ + unregister_tcf_proto_ops(&cls_cgroup_ops); +} + +module_init(init_cgroup_cls); +module_exit(exit_cgroup_cls); +MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index b0f90e593af..6d6e87585fb 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -148,7 +148,7 @@ fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) static void fw_destroy(struct tcf_proto *tp) { - struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL); + struct fw_head *head = tp->root; struct fw_filter *f; int h; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index e3d8455eebc..bdf1f4172ee 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -260,7 +260,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) static void route4_destroy(struct tcf_proto *tp) { - struct route4_head *head = xchg(&tp->root, NULL); + struct route4_head *head = tp->root; int h1, h2; if (head == NULL) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 7a7bff5ded2..e806f2314b5 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -13,12 +13,6 @@ #include <net/netlink.h> #include <net/pkt_cls.h> - -/* - * Not quite sure if we need all the xchgs Alexey uses when accessing things. - * Can always add them later ... :) - */ - /* * Passing parameters to the root seems to be done more awkwardly than really * necessary. At least, u32 doesn't seem to use such dirty hacks. To be diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 246f9065ce3..05d178008cb 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -387,7 +387,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) static void u32_destroy(struct tcf_proto *tp) { struct tc_u_common *tp_c = tp->data; - struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); + struct tc_u_hnode *root_ht = tp->root; WARN_ON(root_ht == NULL); @@ -479,7 +479,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, err = -EINVAL; if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); - struct tc_u_hnode *ht_down = NULL; + struct tc_u_hnode *ht_down = NULL, *ht_old; if (TC_U32_KEY(handle)) goto errout; @@ -493,11 +493,12 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, } tcf_tree_lock(tp); - ht_down = xchg(&n->ht_down, ht_down); + ht_old = n->ht_down; + n->ht_down = ht_down; tcf_tree_unlock(tp); - if (ht_down) - ht_down->refcnt--; + if (ht_old) + ht_old->refcnt--; } if (tb[TCA_U32_CLASSID]) { n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index e82519e548d..aab59409728 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -71,7 +71,7 @@ * * static void __exit exit_my_ematch(void) * { - * return tcf_em_unregister(&my_ops); + * tcf_em_unregister(&my_ops); * } * * module_init(init_my_ematch); @@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register); * * Returns -ENOENT if no matching ematch was found. */ -int tcf_em_unregister(struct tcf_ematch_ops *ops) +void tcf_em_unregister(struct tcf_ematch_ops *ops) { - int err = 0; - struct tcf_ematch_ops *e; - write_lock(&ematch_mod_lock); - list_for_each_entry(e, &ematch_ops, link) { - if (e == ops) { - list_del(&e->link); - goto out; - } - } - - err = -ENOENT; -out: + list_del(&ops->link); write_unlock(&ematch_mod_lock); - return err; } EXPORT_SYMBOL(tcf_em_unregister); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e5646614e88..f859dd5fabf 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -97,11 +97,6 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, Auxiliary routines: - ---requeue - - requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if netif_queue_stopped()=0. - ---peek like dequeue but without removing a packet from the queue @@ -151,8 +146,6 @@ int register_qdisc(struct Qdisc_ops *qops) if (qops->enqueue == NULL) qops->enqueue = noop_qdisc_ops.enqueue; - if (qops->requeue == NULL) - qops->requeue = noop_qdisc_ops.requeue; if (qops->peek == NULL) { if (qops->dequeue == NULL) { qops->peek = noop_qdisc_ops.peek; @@ -211,28 +204,16 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } -/* - * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen() - * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue() - */ -static DEFINE_SPINLOCK(qdisc_list_lock); - static void qdisc_list_add(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - spin_lock_bh(&qdisc_list_lock); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); - spin_unlock_bh(&qdisc_list_lock); - } } void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - spin_lock_bh(&qdisc_list_lock); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) list_del(&q->list); - spin_unlock_bh(&qdisc_list_lock); - } } EXPORT_SYMBOL(qdisc_list_del); @@ -241,22 +222,17 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) unsigned int i; struct Qdisc *q; - spin_lock_bh(&qdisc_list_lock); - for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct Qdisc *txq_root = txq->qdisc_sleeping; q = qdisc_match_from_root(txq_root, handle); if (q) - goto unlock; + goto out; } q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); - -unlock: - spin_unlock_bh(&qdisc_list_lock); - +out: return q; } @@ -429,6 +405,8 @@ static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) struct nlattr *nest; nest = nla_nest_start(skb, TCA_STAB); + if (nest == NULL) + goto nla_put_failure; NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); nla_nest_end(skb, nest); @@ -902,9 +880,12 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) sch->stab = stab; if (tca[TCA_RATE]) + /* NB: ignores errors from replace_estimator + because change can't be undone. */ gen_replace_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 6eb9a650b63..2a8b83af7c4 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -62,7 +62,7 @@ struct atm_qdisc_data { struct atm_flow_data link; /* unclassified skbs go here */ struct atm_flow_data *flows; /* NB: "link" is also on this list */ - struct tasklet_struct task; /* requeue tasklet */ + struct tasklet_struct task; /* dequeue tasklet */ }; /* ------------------------- Class/flow operations ------------------------- */ @@ -102,7 +102,8 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, return -EINVAL; if (!new) new = &noop_qdisc; - *old = xchg(&flow->q, new); + *old = flow->q; + flow->q = new; if (*old) qdisc_reset(*old); return 0; @@ -534,23 +535,6 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch) return p->link.q->ops->peek(p->link.q); } -static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - int ret; - - pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - ret = p->link.q->ops->requeue(skb, p->link.q); - if (!ret) { - sch->q.qlen++; - sch->qstats.requeues++; - } else if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - p->link.qstats.drops++; - } - return ret; -} - static unsigned int atm_tc_drop(struct Qdisc *sch) { struct atm_qdisc_data *p = qdisc_priv(sch); @@ -707,7 +691,6 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = { .enqueue = atm_tc_enqueue, .dequeue = atm_tc_dequeue, .peek = atm_tc_peek, - .requeue = atm_tc_requeue, .drop = atm_tc_drop, .init = atm_tc_init, .reset = atm_tc_reset, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 63efa70abbe..9e43ed94916 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -static int -cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int ret; - - if ((cl = q->tx_class) == NULL) { - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_CN; - } - q->tx_class = NULL; - - cbq_mark_toplevel(q, cl); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; - cl->q->__parent = sch; -#endif - if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - if (!cl->next_alive) - cbq_activate_class(cl); - return 0; - } - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; -} - /* Overlimit actions */ /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */ @@ -1669,7 +1635,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, #endif } sch_tree_lock(sch); - *old = xchg(&cl->q, new); + *old = cl->q; + cl->q = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -1798,11 +1765,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t } if (tb[TCA_CBQ_RATE]) { - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]); + rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), + tb[TCA_CBQ_RTAB]); if (rtab == NULL) return -EINVAL; } + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + if (rtab) + qdisc_put_rtab(rtab); + return err; + } + } + /* Change class parameters */ sch_tree_lock(sch); @@ -1810,8 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cbq_deactivate_class(cl); if (rtab) { - rtab = xchg(&cl->R_tab, rtab); - qdisc_put_rtab(rtab); + qdisc_put_rtab(cl->R_tab); + cl->R_tab = rtab; } if (tb[TCA_CBQ_LSSOPT]) @@ -1838,10 +1817,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t sch_tree_unlock(sch); - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); return 0; } @@ -1888,6 +1863,17 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl = kzalloc(sizeof(*cl), GFP_KERNEL); if (cl == NULL) goto failure; + + if (tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + kfree(cl); + goto failure; + } + } + cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; @@ -1929,10 +1915,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t qdisc_class_hash_grow(sch, &q->clhash); - if (tca[TCA_RATE]) - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); - *arg = (unsigned long)cl; return 0; @@ -2067,7 +2049,6 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { .enqueue = cbq_enqueue, .dequeue = cbq_dequeue, .peek = qdisc_peek_dequeued, - .requeue = cbq_requeue, .drop = cbq_drop, .init = cbq_init, .reset = cbq_reset, diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c new file mode 100644 index 00000000000..f6b4fa97df7 --- /dev/null +++ b/net/sched/sch_drr.c @@ -0,0 +1,519 @@ +/* + * net/sched/sch_drr.c Deficit Round Robin scheduler + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/pkt_sched.h> +#include <net/sch_generic.h> +#include <net/pkt_sched.h> +#include <net/pkt_cls.h> + +struct drr_class { + struct Qdisc_class_common common; + unsigned int refcnt; + unsigned int filter_cnt; + + struct gnet_stats_basic bstats; + struct gnet_stats_queue qstats; + struct gnet_stats_rate_est rate_est; + struct list_head alist; + struct Qdisc *qdisc; + + u32 quantum; + u32 deficit; +}; + +struct drr_sched { + struct list_head active; + struct tcf_proto *filter_list; + struct Qdisc_class_hash clhash; +}; + +static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) +{ + struct drr_sched *q = qdisc_priv(sch); + struct Qdisc_class_common *clc; + + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct drr_class, common); +} + +static void drr_purge_queue(struct drr_class *cl) +{ + unsigned int len = cl->qdisc->q.qlen; + + qdisc_reset(cl->qdisc); + qdisc_tree_decrease_qlen(cl->qdisc, len); +} + +static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { + [TCA_DRR_QUANTUM] = { .type = NLA_U32 }, +}; + +static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)*arg; + struct nlattr *tb[TCA_DRR_MAX + 1]; + u32 quantum; + int err; + + err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy); + if (err < 0) + return err; + + if (tb[TCA_DRR_QUANTUM]) { + quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]); + if (quantum == 0) + return -EINVAL; + } else + quantum = psched_mtu(qdisc_dev(sch)); + + if (cl != NULL) { + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } + + sch_tree_lock(sch); + if (tb[TCA_DRR_QUANTUM]) + cl->quantum = quantum; + sch_tree_unlock(sch); + + return 0; + } + + cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL); + if (cl == NULL) + return -ENOBUFS; + + cl->refcnt = 1; + cl->common.classid = classid; + cl->quantum = quantum; + cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); + if (cl->qdisc == NULL) + cl->qdisc = &noop_qdisc; + + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + qdisc_destroy(cl->qdisc); + kfree(cl); + return err; + } + } + + sch_tree_lock(sch); + qdisc_class_hash_insert(&q->clhash, &cl->common); + sch_tree_unlock(sch); + + qdisc_class_hash_grow(sch, &q->clhash); + + *arg = (unsigned long)cl; + return 0; +} + +static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) +{ + gen_kill_estimator(&cl->bstats, &cl->rate_est); + qdisc_destroy(cl->qdisc); + kfree(cl); +} + +static int drr_delete_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->filter_cnt > 0) + return -EBUSY; + + sch_tree_lock(sch); + + drr_purge_queue(cl); + qdisc_class_hash_remove(&q->clhash, &cl->common); + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); + + sch_tree_unlock(sch); + return 0; +} + +static unsigned long drr_get_class(struct Qdisc *sch, u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->refcnt++; + + return (unsigned long)cl; +} + +static void drr_put_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); +} + +static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl) +{ + struct drr_sched *q = qdisc_priv(sch); + + if (cl) + return NULL; + + return &q->filter_list; +} + +static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->filter_cnt++; + + return (unsigned long)cl; +} + +static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + cl->filter_cnt--; +} + +static int drr_graft_class(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (new == NULL) { + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, cl->common.classid); + if (new == NULL) + new = &noop_qdisc; + } + + sch_tree_lock(sch); + drr_purge_queue(cl); + *old = cl->qdisc; + cl->qdisc = new; + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + return cl->qdisc; +} + +static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); +} + +static int drr_dump_class(struct Qdisc *sch, unsigned long arg, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct nlattr *nest; + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle = cl->common.classid; + tcm->tcm_info = cl->qdisc->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum); + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, + struct gnet_dump *d) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct tc_drr_stats xstats; + + memset(&xstats, 0, sizeof(xstats)); + if (cl->qdisc->q.qlen) + xstats.deficit = cl->deficit; + + if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) + return -1; + + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); +} + +static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, (unsigned long)cl, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + } +} + +static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct tcf_result res; + int result; + + if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { + cl = drr_find_class(sch, skb->priority); + if (cl != NULL) + return cl; + } + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return NULL; + } +#endif + cl = (struct drr_class *)res.class; + if (cl == NULL) + cl = drr_find_class(sch, res.classid); + return cl; + } + return NULL; +} + +static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + int err; + + cl = drr_classify(skb, sch, &err); + if (cl == NULL) { + if (err & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return err; + } + + len = qdisc_pkt_len(skb); + err = qdisc_enqueue(skb, cl->qdisc); + if (unlikely(err != NET_XMIT_SUCCESS)) { + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + sch->qstats.drops++; + } + return err; + } + + if (cl->qdisc->q.qlen == 1) { + list_add_tail(&cl->alist, &q->active); + cl->deficit = cl->quantum; + } + + cl->bstats.packets++; + cl->bstats.bytes += len; + sch->bstats.packets++; + sch->bstats.bytes += len; + + sch->q.qlen++; + return err; +} + +static struct sk_buff *drr_dequeue(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct sk_buff *skb; + unsigned int len; + + if (list_empty(&q->active)) + goto out; + while (1) { + cl = list_first_entry(&q->active, struct drr_class, alist); + skb = cl->qdisc->ops->peek(cl->qdisc); + if (skb == NULL) + goto out; + + len = qdisc_pkt_len(skb); + if (len <= cl->deficit) { + cl->deficit -= len; + skb = qdisc_dequeue_peeked(cl->qdisc); + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + sch->q.qlen--; + return skb; + } + + cl->deficit += cl->quantum; + list_move_tail(&cl->alist, &q->active); + } +out: + return NULL; +} + +static unsigned int drr_drop(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + + list_for_each_entry(cl, &q->active, alist) { + if (cl->qdisc->ops->drop) { + len = cl->qdisc->ops->drop(cl->qdisc); + if (len > 0) { + sch->q.qlen--; + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + return len; + } + } + } + return 0; +} + +static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +{ + struct drr_sched *q = qdisc_priv(sch); + int err; + + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; + INIT_LIST_HEAD(&q->active); + return 0; +} + +static void drr_reset_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (cl->qdisc->q.qlen) + list_del(&cl->alist); + qdisc_reset(cl->qdisc); + } + } + sch->q.qlen = 0; +} + +static void drr_destroy_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n, *next; + unsigned int i; + + tcf_destroy_chain(&q->filter_list); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + common.hnode) + drr_destroy_class(sch, cl); + } + qdisc_class_hash_destroy(&q->clhash); +} + +static const struct Qdisc_class_ops drr_class_ops = { + .change = drr_change_class, + .delete = drr_delete_class, + .get = drr_get_class, + .put = drr_put_class, + .tcf_chain = drr_tcf_chain, + .bind_tcf = drr_bind_tcf, + .unbind_tcf = drr_unbind_tcf, + .graft = drr_graft_class, + .leaf = drr_class_leaf, + .qlen_notify = drr_qlen_notify, + .dump = drr_dump_class, + .dump_stats = drr_dump_class_stats, + .walk = drr_walk, +}; + +static struct Qdisc_ops drr_qdisc_ops __read_mostly = { + .cl_ops = &drr_class_ops, + .id = "drr", + .priv_size = sizeof(struct drr_sched), + .enqueue = drr_enqueue, + .dequeue = drr_dequeue, + .peek = qdisc_peek_dequeued, + .drop = drr_drop, + .init = drr_init_qdisc, + .reset = drr_reset_qdisc, + .destroy = drr_destroy_qdisc, + .owner = THIS_MODULE, +}; + +static int __init drr_init(void) +{ + return register_qdisc(&drr_qdisc_ops); +} + +static void __exit drr_exit(void) +{ + unregister_qdisc(&drr_qdisc_ops); +} + +module_init(drr_init); +module_exit(drr_exit); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 3e491479ea8..d303daa45d4 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -68,7 +68,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, } sch_tree_lock(sch); - *old = xchg(&p->q, new); + *old = p->q; + p->q = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -322,26 +323,6 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch) return p->q->ops->peek(p->q); } -static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - - err = p->q->ops->requeue(skb, p->q); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - sch->qstats.drops++; - return err; - } - - sch->q.qlen++; - sch->qstats.requeues++; - - return NET_XMIT_SUCCESS; -} - static unsigned int dsmark_drop(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); @@ -506,7 +487,6 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { .enqueue = dsmark_enqueue, .dequeue = dsmark_dequeue, .peek = dsmark_peek, - .requeue = dsmark_requeue, .drop = dsmark_drop, .init = dsmark_init, .reset = dsmark_reset, diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 8825e8806f4..92cfc9d7e3b 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -84,7 +84,6 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .requeue = qdisc_requeue, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, @@ -100,7 +99,6 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .requeue = qdisc_requeue, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1192da22983..5f5efe4e607 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg) char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", dev->name, netdev_drivername(dev, drivername, 64)); - dev->tx_timeout(dev); + dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + @@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg) void __netdev_watchdog_up(struct net_device *dev) { - if (dev->tx_timeout) { + if (dev->netdev_ops->ndo_tx_timeout) { if (dev->watchdog_timeo <= 0) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, @@ -270,6 +270,8 @@ static void dev_watchdog_down(struct net_device *dev) void netif_carrier_on(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -285,8 +287,11 @@ EXPORT_SYMBOL(netif_carrier_on); */ void netif_carrier_off(struct net_device *dev) { - if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) + if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); + } } EXPORT_SYMBOL(netif_carrier_off); @@ -306,22 +311,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) return NULL; } -static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - if (net_ratelimit()) - printk(KERN_DEBUG "%s deferred output. It is buggy.\n", - skb->dev->name); - kfree_skb(skb); - return NET_XMIT_CN; -} - struct Qdisc_ops noop_qdisc_ops __read_mostly = { .id = "noop", .priv_size = 0, .enqueue = noop_enqueue, .dequeue = noop_dequeue, .peek = noop_dequeue, - .requeue = noop_requeue, .owner = THIS_MODULE, }; @@ -336,7 +331,6 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; @@ -348,7 +342,6 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .enqueue = noop_enqueue, .dequeue = noop_dequeue, .peek = noop_dequeue, - .requeue = noop_requeue, .owner = THIS_MODULE, }; @@ -364,7 +357,6 @@ static struct Qdisc noqueue_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .dev_queue = &noqueue_netdev_queue, }; @@ -426,12 +418,6 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) return NULL; } -static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - qdisc->q.qlen++; - return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); -} - static void pfifo_fast_reset(struct Qdisc* qdisc) { int prio; @@ -473,7 +459,6 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .enqueue = pfifo_fast_enqueue, .dequeue = pfifo_fast_dequeue, .peek = pfifo_fast_peek, - .requeue = pfifo_fast_requeue, .init = pfifo_fast_init, .reset = pfifo_fast_reset, .dump = pfifo_fast_dump, @@ -499,7 +484,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); - skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; @@ -571,8 +555,6 @@ void qdisc_destroy(struct Qdisc *qdisc) dev_put(qdisc_dev(qdisc)); kfree_skb(qdisc->gso_skb); - __skb_queue_purge(&qdisc->requeue); - kfree((char *) qdisc - qdisc->padded); } EXPORT_SYMBOL(qdisc_destroy); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index cb20ee3b9fc..40408d595c0 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -240,26 +240,6 @@ congestion_drop: return NET_XMIT_CN; } -static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct gred_sched *t = qdisc_priv(sch); - struct gred_sched_data *q; - u16 dp = tc_index_to_dp(skb); - - if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { - if (net_ratelimit()) - printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " - "for requeue, screwing up backlog.\n", - tc_index_to_dp(skb)); - } else { - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - q->backlog += qdisc_pkt_len(skb); - } - - return qdisc_requeue(skb, sch); -} - static struct sk_buff *gred_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -603,7 +583,6 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .enqueue = gred_enqueue, .dequeue = gred_dequeue, .peek = qdisc_peek_head, - .requeue = gred_requeue, .drop = gred_drop, .init = gred_init, .reset = gred_reset, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d90b1652f2a..45c31b1a4e1 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -184,7 +184,6 @@ struct hfsc_sched struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ - struct sk_buff_head requeue; /* requeued packet */ struct qdisc_watchdog watchdog; /* watchdog timer */ }; @@ -1019,6 +1018,14 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } cur_time = psched_get_time(); + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } + sch_tree_lock(sch); if (rsc != NULL) hfsc_change_rsc(cl, rsc, cur_time); @@ -1035,10 +1042,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_unlock(sch); - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); return 0; } @@ -1064,6 +1067,16 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + if (tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + kfree(cl); + return err; + } + } + if (rsc != NULL) hfsc_change_rsc(cl, rsc, 0); if (fsc != NULL) @@ -1094,9 +1107,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, qdisc_class_hash_grow(sch, &q->clhash); - if (tca[TCA_RATE]) - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } @@ -1203,7 +1213,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); hfsc_purge_queue(sch, cl); - *old = xchg(&cl->qdisc, new); + *old = cl->qdisc; + cl->qdisc = new; sch_tree_unlock(sch); return 0; } @@ -1432,7 +1443,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); - skb_queue_head_init(&q->requeue); q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; @@ -1517,7 +1527,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } - __skb_queue_purge(&q->requeue); q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); @@ -1542,7 +1551,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch) hfsc_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); - __skb_queue_purge(&q->requeue); qdisc_watchdog_cancel(&q->watchdog); } @@ -1566,7 +1574,7 @@ static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct hfsc_class *cl; - int err; + int uninitialized_var(err); cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { @@ -1609,8 +1617,6 @@ hfsc_dequeue(struct Qdisc *sch) if (sch->q.qlen == 0) return NULL; - if ((skb = __skb_dequeue(&q->requeue))) - goto out; cur_time = psched_get_time(); @@ -1659,24 +1665,12 @@ hfsc_dequeue(struct Qdisc *sch) set_passive(cl); } - out: sch->flags &= ~TCQ_F_THROTTLED; sch->q.qlen--; return skb; } -static int -hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct hfsc_sched *q = qdisc_priv(sch); - - __skb_queue_head(&q->requeue, skb); - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; -} - static unsigned int hfsc_drop(struct Qdisc *sch) { @@ -1728,7 +1722,6 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { .enqueue = hfsc_enqueue, .dequeue = hfsc_dequeue, .peek = qdisc_peek_dequeued, - .requeue = hfsc_requeue, .drop = hfsc_drop, .cl_ops = &hfsc_class_ops, .priv_size = sizeof(struct hfsc_sched), diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3fda8199713..8a451998973 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -551,7 +551,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret; + int uninitialized_var(ret); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); @@ -591,47 +591,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -/* TODO: requeuing packet charges it to policers again !! */ -static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - int ret; - struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_classify(skb, sch, &ret); - struct sk_buff *tskb; - - if (cl == HTB_DIRECT) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_head(&q->direct_queue, skb); - } else { - __skb_queue_head(&q->direct_queue, skb); - tskb = __skb_dequeue_tail(&q->direct_queue); - kfree_skb(tskb); - sch->qstats.drops++; - return NET_XMIT_CN; - } -#ifdef CONFIG_NET_CLS_ACT - } else if (!cl) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; -#endif - } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != - NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; - } else - htb_activate(q, cl); - - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; -} - /** * htb_charge_class - charges amount "bytes" to leaf and ancestors * @@ -1141,7 +1100,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, == NULL) return -ENOBUFS; sch_tree_lock(sch); - if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { + *old = cl->un.leaf.q; + cl->un.leaf.q = new; + if (*old != NULL) { qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); } @@ -1371,9 +1332,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE] ? : &est.nla); + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE] ? : &est.nla); + if (err) { + kfree(cl); + goto failure; + } + cl->refcnt = 1; cl->children = 0; INIT_LIST_HEAD(&cl->un.leaf.drop_list); @@ -1425,10 +1391,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (parent) parent->children++; } else { - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } sch_tree_lock(sch); } @@ -1566,7 +1535,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .enqueue = htb_enqueue, .dequeue = htb_dequeue, .peek = qdisc_peek_dequeued, - .requeue = htb_requeue, .drop = htb_drop, .init = htb_init, .reset = htb_reset, diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 155648d23b7..7e151861794 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct Qdisc *qdisc; - struct multiq_sched_data *q = qdisc_priv(sch); - int ret; - - qdisc = multiq_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif - - ret = qdisc->ops->requeue(skb, qdisc); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - if (q->curband) - q->curband--; - else - q->curband = q->bands - 1; - return NET_XMIT_SUCCESS; - } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; -} - - static struct sk_buff *multiq_dequeue(struct Qdisc *sch) { struct multiq_sched_data *q = qdisc_priv(sch); @@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) q->curband = 0; /* Check that target subqueue is available before - * pulling an skb to avoid excessive requeues + * pulling an skb to avoid head-of-line blocking. */ if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { qdisc = q->queues[q->curband]; @@ -170,7 +136,7 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch) curband = 0; /* Check that target subqueue is available before - * pulling an skb to avoid excessive requeues + * pulling an skb to avoid head-of-line blocking. */ if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) { qdisc = q->queues[curband]; @@ -248,7 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) q->bands = qopt->bands; for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { - struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } @@ -258,7 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child; + struct Qdisc *child, *old; child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, @@ -266,12 +233,13 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) i + 1)); if (child) { sch_tree_lock(sch); - child = xchg(&q->queues[i], child); + old = q->queues[i]; + q->queues[i] = child; - if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, - child->q.qlen); - qdisc_destroy(child); + if (old != &noop_qdisc) { + qdisc_tree_decrease_qlen(old, + old->q.qlen); + qdisc_destroy(old); } sch_tree_unlock(sch); } @@ -480,7 +448,6 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { .enqueue = multiq_enqueue, .dequeue = multiq_dequeue, .peek = multiq_peek, - .requeue = multiq_requeue, .drop = multiq_drop, .init = multiq_init, .reset = multiq_reset, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index f69698ff88d..f840d6b27c6 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -252,20 +252,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -/* Requeue packets but don't change time stamp */ -static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct netem_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int netem_drop(struct Qdisc* sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -345,10 +331,9 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); - d = xchg(&q->delay_dist, d); + kfree(q->delay_dist); + q->delay_dist = d; spin_unlock_bh(root_lock); - - kfree(d); return 0; } @@ -531,7 +516,6 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { .enqueue = tfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .requeue = qdisc_requeue, .drop = qdisc_queue_drop, .init = tfifo_init, .reset = qdisc_reset_queue, @@ -620,7 +604,6 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .enqueue = netem_enqueue, .dequeue = netem_dequeue, .peek = qdisc_peek_dequeued, - .requeue = netem_requeue, .drop = netem_drop, .init = netem_init, .reset = netem_reset, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3651da3e280..94cecef7014 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -93,33 +93,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -prio_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct Qdisc *qdisc; - int ret; - - qdisc = prio_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif - - if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; - } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; -} - static struct sk_buff *prio_peek(struct Qdisc *sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -214,7 +187,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { - struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); @@ -224,18 +198,19 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; i<q->bands; i++) { if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child; + struct Qdisc *child, *old; child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); - child = xchg(&q->queues[i], child); + old = q->queues[i]; + q->queues[i] = child; - if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, - child->q.qlen); - qdisc_destroy(child); + if (old != &noop_qdisc) { + qdisc_tree_decrease_qlen(old, + old->q.qlen); + qdisc_destroy(old); } sch_tree_unlock(sch); } @@ -435,7 +410,6 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .enqueue = prio_enqueue, .dequeue = prio_dequeue, .peek = prio_peek, - .requeue = prio_requeue, .drop = prio_drop, .init = prio_init, .reset = prio_reset, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 7abc51454c2..2bdf241f631 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -108,23 +108,6 @@ congestion_drop: return NET_XMIT_CN; } -static int red_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct red_sched_data *q = qdisc_priv(sch); - struct Qdisc *child = q->qdisc; - int ret; - - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - - ret = child->ops->requeue(skb, child); - if (likely(ret == NET_XMIT_SUCCESS)) { - sch->qstats.requeues++; - sch->q.qlen++; - } - return ret; -} - static struct sk_buff * red_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -219,7 +202,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); - qdisc_destroy(xchg(&q->qdisc, child)); + qdisc_destroy(q->qdisc); + q->qdisc = child; } red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, @@ -300,7 +284,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); + *old = q->qdisc; + q->qdisc = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -370,7 +355,6 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = { .enqueue = red_enqueue, .dequeue = red_dequeue, .peek = red_peek, - .requeue = red_requeue, .drop = red_drop, .init = red_init, .reset = red_reset, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 198b83d42ba..ab8cfee3c9c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -329,68 +329,6 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; } -static int -sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct sfq_sched_data *q = qdisc_priv(sch); - unsigned int hash; - sfq_index x; - int ret; - - hash = sfq_classify(skb, sch, &ret); - if (hash == 0) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } - hash--; - - x = q->ht[hash]; - if (x == SFQ_DEPTH) { - q->ht[hash] = x = q->dep[SFQ_DEPTH].next; - q->hash[x] = hash; - } - - sch->qstats.backlog += qdisc_pkt_len(skb); - __skb_queue_head(&q->qs[x], skb); - /* If selected queue has length q->limit+1, this means that - * all another queues are empty and we do simple tail drop. - * This packet is still requeued at head of queue, tail packet - * is dropped. - */ - if (q->qs[x].qlen > q->limit) { - skb = q->qs[x].prev; - __skb_unlink(skb, &q->qs[x]); - sch->qstats.drops++; - sch->qstats.backlog -= qdisc_pkt_len(skb); - kfree_skb(skb); - return NET_XMIT_CN; - } - - sfq_inc(q, x); - if (q->qs[x].qlen == 1) { /* The flow is new */ - if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->tail = x; - q->next[x] = x; - q->allot[x] = q->quantum; - } else { - q->next[x] = q->next[q->tail]; - q->next[q->tail] = x; - q->tail = x; - } - } - - if (++sch->q.qlen <= q->limit) { - sch->qstats.requeues++; - return 0; - } - - sch->qstats.drops++; - sfq_drop(sch); - return NET_XMIT_CN; -} - static struct sk_buff * sfq_peek(struct Qdisc *sch) { @@ -636,7 +574,6 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, .peek = sfq_peek, - .requeue = sfq_requeue, .drop = sfq_drop, .init = sfq_init, .reset = sfq_reset, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 435076cf620..a2f93c09f3c 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return 0; } -static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct tbf_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int tbf_drop(struct Qdisc* sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -249,6 +236,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) struct tc_tbf_qopt *qopt; struct qdisc_rate_table *rtab = NULL; struct qdisc_rate_table *ptab = NULL; + struct qdisc_rate_table *tmp; struct Qdisc *child = NULL; int max_size,n; @@ -297,7 +285,8 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); - qdisc_destroy(xchg(&q->qdisc, child)); + qdisc_destroy(q->qdisc); + q->qdisc = child; } q->limit = qopt->limit; q->mtu = qopt->mtu; @@ -305,8 +294,14 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) q->buffer = qopt->buffer; q->tokens = q->buffer; q->ptokens = q->mtu; - rtab = xchg(&q->R_tab, rtab); - ptab = xchg(&q->P_tab, ptab); + + tmp = q->R_tab; + q->R_tab = rtab; + rtab = tmp; + + tmp = q->P_tab; + q->P_tab = ptab; + ptab = tmp; sch_tree_unlock(sch); err = 0; done: @@ -396,7 +391,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); + *old = q->qdisc; + q->qdisc = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -468,7 +464,6 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, .peek = qdisc_peek_dequeued, - .requeue = tbf_requeue, .drop = tbf_drop, .init = tbf_init, .reset = tbf_reset, diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index bf03e7fa184..cfc8e7caba6 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) return NET_XMIT_DROP; } -static int -teql_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct teql_sched_data *q = qdisc_priv(sch); - - __skb_queue_head(&q->q, skb); - sch->qstats.requeues++; - return 0; -} - static struct sk_buff * teql_dequeue(struct Qdisc* sch) { @@ -441,7 +431,6 @@ static __init void teql_master_setup(struct net_device *dev) ops->enqueue = teql_enqueue; ops->dequeue = teql_dequeue; ops->peek = teql_peek; - ops->requeue = teql_requeue; ops->init = teql_qdisc_init; ops->reset = teql_reset; ops->destroy = teql_destroy; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a8ca743241e..b78e3be6901 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -102,6 +102,8 @@ struct sock *sctp_get_ctl_sock(void) /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { + if (percpu_counter_init(&sctp_sockets_allocated, 0)) + goto out_nomem; #ifdef CONFIG_PROC_FS if (!proc_net_sctp) { struct proc_dir_entry *ent; @@ -110,7 +112,7 @@ static __init int sctp_proc_init(void) ent->owner = THIS_MODULE; proc_net_sctp = ent; } else - goto out_nomem; + goto out_free_percpu; } if (sctp_snmp_proc_init()) @@ -135,11 +137,14 @@ out_snmp_proc_init: proc_net_sctp = NULL; remove_proc_entry("sctp", init_net.proc_net); } -out_nomem: - return -ENOMEM; +out_free_percpu: + percpu_counter_destroy(&sctp_sockets_allocated); #else return 0; #endif /* CONFIG_PROC_FS */ + +out_nomem: + return -ENOMEM; } /* Clean up the proc fs entry for the SCTP protocol. diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a1b904529d5..a2de585888d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -114,7 +114,7 @@ extern int sysctl_sctp_wmem[3]; static int sctp_memory_pressure; static atomic_t sctp_memory_allocated; -static atomic_t sctp_sockets_allocated; +struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) { @@ -3613,7 +3613,12 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); - atomic_inc(&sctp_sockets_allocated); + percpu_counter_inc(&sctp_sockets_allocated); + + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + local_bh_enable(); + return 0; } @@ -3627,7 +3632,10 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - atomic_dec(&sctp_sockets_allocated); + percpu_counter_dec(&sctp_sockets_allocated); + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + local_bh_enable(); } /* API 4.1.7 shutdown() - TCP Style Syntax diff --git a/net/socket.c b/net/socket.c index d23cdba10d7..e9d65ea2687 100644 --- a/net/socket.c +++ b/net/socket.c @@ -989,7 +989,6 @@ static int sock_close(struct inode *inode, struct file *filp) printk(KERN_DEBUG "sock_close: NULL inode\n"); return 0; } - sock_fasync(-1, filp, 0); sock_release(SOCKET_I(inode)); return 0; } @@ -1426,8 +1425,8 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags) +asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; @@ -1510,66 +1509,10 @@ out_fd: goto out_put; } -#if 0 -#ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const sigset_t __user *sigmask, - size_t sigsetsize, int flags) -{ - sigset_t ksigmask, sigsaved; - int ret; - - if (sigmask) { - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) - return -EFAULT; - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); - - if (ret < 0 && signal_pending(current)) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return ret; -} -#else -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const sigset_t __user *sigmask, - size_t sigsetsize, int flags) -{ - /* The platform does not support restoring the signal mask in the - * return path. So we do not allow using paccept() with a signal - * mask. */ - if (sigmask) - return -EINVAL; - - return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); -} -#endif -#endif - asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) { - return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); + return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); } /* @@ -2096,7 +2039,7 @@ static const unsigned char nargs[19]={ AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(6) + AL(4) }; #undef AL @@ -2115,7 +2058,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) unsigned long a0, a1; int err; - if (call < 1 || call > SYS_PACCEPT) + if (call < 1 || call > SYS_ACCEPT4) return -EINVAL; /* copy_from_user should be SMP safe. */ @@ -2143,9 +2086,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) err = sys_listen(a0, a1); break; case SYS_ACCEPT: - err = - do_accept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], 0); + err = sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], 0); break; case SYS_GETSOCKNAME: err = @@ -2192,12 +2134,9 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; - case SYS_PACCEPT: - err = - sys_paccept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], - (const sigset_t __user *) a[3], - a[4], a[5]); + case SYS_ACCEPT4: + err = sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], a[3]); break; default: err = -EINVAL; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 744b79fdcb1..4028502f052 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -133,13 +133,29 @@ static int generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) { struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); + int i; if (gcred->acred.uid != acred->uid || gcred->acred.gid != acred->gid || - gcred->acred.group_info != acred->group_info || gcred->acred.machine_cred != acred->machine_cred) - return 0; + goto out_nomatch; + + /* Optimisation in the case where pointers are identical... */ + if (gcred->acred.group_info == acred->group_info) + goto out_match; + + /* Slow path... */ + if (gcred->acred.group_info->ngroups != acred->group_info->ngroups) + goto out_nomatch; + for (i = 0; i < gcred->acred.group_info->ngroups; i++) { + if (GROUP_AT(gcred->acred.group_info, i) != + GROUP_AT(acred->group_info, i)) + goto out_nomatch; + } +out_match: return 1; +out_nomatch: + return 0; } void __init rpc_init_generic_auth(void) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f8500f018f8..8872914aa4c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -827,7 +827,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; - int dma_mr_acc; + int uninitialized_var(dma_mr_acc); int need_dma_mr; int ret; int i; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 78f7f728ef1..3b21e0cc5e6 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -276,7 +276,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; +#ifdef RPC_DEBUG struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; +#endif struct ib_qp_attr attr; struct ib_qp_init_attr iattr; int connstate = 0; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index cd72e22b132..acab41a48d6 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -555,7 +555,7 @@ static struct name_seq *nametbl_find_seq(u32 type) struct name_seq *ns; dbg("find_seq %u,(%u,0x%x) table = %p, hash[type] = %u\n", - type, ntohl(type), type, table.types, hash(type)); + type, htonl(type), type, table.types, hash(type)); seq_head = &table.types[hash(type)]; hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0b80634b2b7..5aaf23e43f1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -164,7 +164,7 @@ static inline int unix_our_peer(struct sock *sk, struct sock *osk) static inline int unix_may_send(struct sock *sk, struct sock *osk) { - return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); + return unix_peer(osk) == NULL || unix_our_peer(sk, osk); } static inline int unix_recvq_full(struct sock const *sk) @@ -197,7 +197,7 @@ static inline void unix_release_addr(struct unix_address *addr) * - if started by zero, it is abstract name. */ -static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp) +static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp) { if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; @@ -216,7 +216,7 @@ static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp) return len; } - *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0)); + *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); return len; } @@ -295,8 +295,7 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) if (!net_eq(sock_net(s), net)) continue; - if(dentry && dentry->d_inode == i) - { + if (dentry && dentry->d_inode == i) { sock_hold(s); goto found; } @@ -354,7 +353,7 @@ static void unix_sock_destructor(struct sock *sk) WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { - printk("Attempt to release alive unix socket: %p\n", sk); + printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk); return; } @@ -362,12 +361,16 @@ static void unix_sock_destructor(struct sock *sk) unix_release_addr(u->addr); atomic_dec(&unix_nr_socks); + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + local_bh_enable(); #ifdef UNIX_REFCNT_DEBUG - printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks)); + printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, + atomic_read(&unix_nr_socks)); #endif } -static int unix_release_sock (struct sock *sk, int embrion) +static int unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct dentry *dentry; @@ -453,11 +456,11 @@ static int unix_listen(struct socket *sock, int backlog) struct unix_sock *u = unix_sk(sk); err = -EOPNOTSUPP; - if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) - goto out; /* Only stream/seqpacket sockets accept */ + if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) + goto out; /* Only stream/seqpacket sockets accept */ err = -EINVAL; if (!u->addr) - goto out; /* No listens on an unbound socket */ + goto out; /* No listens on an unbound socket */ unix_state_lock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) goto out_unlock; @@ -566,9 +569,9 @@ static const struct proto_ops unix_seqpacket_ops = { }; static struct proto unix_proto = { - .name = "UNIX", - .owner = THIS_MODULE, - .obj_size = sizeof(struct unix_sock), + .name = "UNIX", + .owner = THIS_MODULE, + .obj_size = sizeof(struct unix_sock), }; /* @@ -579,7 +582,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock * unix_create1(struct net *net, struct socket *sock) +static struct sock *unix_create1(struct net *net, struct socket *sock) { struct sock *sk = NULL; struct unix_sock *u; @@ -592,7 +595,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) if (!sk) goto out; - sock_init_data(sock,sk); + sock_init_data(sock, sk); lockdep_set_class(&sk->sk_receive_queue.lock, &af_unix_sk_receive_queue_lock_key); @@ -611,6 +614,11 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) out: if (sk == NULL) atomic_dec(&unix_nr_socks); + else { + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + local_bh_enable(); + } return sk; } @@ -653,7 +661,7 @@ static int unix_release(struct socket *sock) sock->sk = NULL; - return unix_release_sock (sk, 0); + return unix_release_sock(sk, 0); } static int unix_autobind(struct socket *sock) @@ -662,7 +670,7 @@ static int unix_autobind(struct socket *sock) struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); static u32 ordernum = 1; - struct unix_address * addr; + struct unix_address *addr; int err; mutex_lock(&u->readlock); @@ -681,7 +689,7 @@ static int unix_autobind(struct socket *sock) retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); - addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); + addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; @@ -768,7 +776,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; - struct dentry * dentry = NULL; + struct dentry *dentry = NULL; struct nameidata nd; int err; unsigned hash; @@ -1207,7 +1215,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) int err; err = -EOPNOTSUPP; - if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) + if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; err = -EINVAL; @@ -1286,7 +1294,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) skb->destructor = sock_wfree; UNIXCB(skb).fp = NULL; - for (i=scm->fp->count-1; i>=0; i--) + for (i = scm->fp->count-1; i >= 0; i--) unix_notinflight(scm->fp->fp[i]); } @@ -1302,14 +1310,23 @@ static void unix_destruct_fds(struct sk_buff *skb) sock_wfree(skb); } -static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; - for (i=scm->fp->count-1; i>=0; i--) + + /* + * Need to duplicate file references for the sake of garbage + * collection. Otherwise a socket in the fps might become a + * candidate for GC while the skb is not yet queued. + */ + UNIXCB(skb).fp = scm_fp_dup(scm->fp); + if (!UNIXCB(skb).fp) + return -ENOMEM; + + for (i = scm->fp->count-1; i >= 0; i--) unix_inflight(scm->fp->fp[i]); - UNIXCB(skb).fp = scm->fp; skb->destructor = unix_destruct_fds; - scm->fp = NULL; + return 0; } /* @@ -1368,12 +1385,15 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) - unix_attach_fds(siocb->scm, skb); + if (siocb->scm->fp) { + err = unix_attach_fds(siocb->scm, skb); + if (err) + goto out_free; + } unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); - err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); if (err) goto out_free; @@ -1474,7 +1494,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock *sk = sock->sk; struct sock *other = NULL; struct sockaddr_un *sunaddr = msg->msg_name; - int err,size; + int err, size; struct sk_buff *skb; int sent = 0; struct scm_cookie tmp_scm; @@ -1503,8 +1523,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (sk->sk_shutdown & SEND_SHUTDOWN) goto pipe_err; - while(sent < len) - { + while (sent < len) { /* * Optimisation for the fact that under 0.01% of X * messages typically need breaking up. @@ -1523,7 +1542,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, * Grab a buffer */ - skb = sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT, + &err); if (skb == NULL) goto out_err; @@ -1538,10 +1558,16 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, size = min_t(int, size, skb_tailroom(skb)); memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) - unix_attach_fds(siocb->scm, skb); + if (siocb->scm->fp) { + err = unix_attach_fds(siocb->scm, skb); + if (err) { + kfree_skb(skb); + goto out_err; + } + } - if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { + err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); + if (err) { kfree_skb(skb); goto out_err; } @@ -1567,8 +1593,8 @@ pipe_err_free: unix_state_unlock(other); kfree_skb(skb); pipe_err: - if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL)) - send_sig(SIGPIPE,current,0); + if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) + send_sig(SIGPIPE, current, 0); err = -EPIPE; out_err: scm_destroy(siocb->scm); @@ -1658,13 +1684,10 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm->creds = *UNIXCREDS(skb); unix_set_secdata(siocb->scm, skb); - if (!(flags & MSG_PEEK)) - { + if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); - } - else - { + } else { /* It is questionable: on PEEK we could: - do not return fds - good, but too simple 8) - return fds, and do not return them on read (old strategy, @@ -1685,7 +1708,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, scm_recv(sock, msg, siocb->scm, flags); out_free: - skb_free_datagram(sk,skb); + skb_free_datagram(sk, skb); out_unlock: mutex_unlock(&u->readlock); out: @@ -1696,7 +1719,7 @@ out: * Sleep until data has arrive. But check for races.. */ -static long unix_stream_data_wait(struct sock * sk, long timeo) +static long unix_stream_data_wait(struct sock *sk, long timeo) { DEFINE_WAIT(wait); @@ -1765,15 +1788,13 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, mutex_lock(&u->readlock); - do - { + do { int chunk; struct sk_buff *skb; unix_state_lock(sk); skb = skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) - { + if (skb == NULL) { if (copied >= target) goto unlock; @@ -1781,7 +1802,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, * POSIX 1003.1g mandates this order. */ - if ((err = sock_error(sk)) != 0) + err = sock_error(sk); + if (err) goto unlock; if (sk->sk_shutdown & RCV_SHUTDOWN) goto unlock; @@ -1808,7 +1830,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (check_creds) { /* Never glue messages from different writers */ - if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) { + if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, + sizeof(siocb->scm->creds)) != 0) { skb_queue_head(&sk->sk_receive_queue, skb); break; } @@ -1819,8 +1842,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } /* Copy address just once */ - if (sunaddr) - { + if (sunaddr) { unix_copy_addr(msg, skb->sk); sunaddr = NULL; } @@ -1836,16 +1858,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, size -= chunk; /* Mark read part of skb as used */ - if (!(flags & MSG_PEEK)) - { + if (!(flags & MSG_PEEK)) { skb_pull(skb, chunk); if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); /* put the skb back if we didn't use it up.. */ - if (skb->len) - { + if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); break; } @@ -1854,9 +1874,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (siocb->scm->fp) break; - } - else - { + } else { /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) @@ -1922,13 +1940,12 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) long amount = 0; int err; - switch(cmd) - { - case SIOCOUTQ: - amount = atomic_read(&sk->sk_wmem_alloc); - err = put_user(amount, (int __user *)arg); - break; - case SIOCINQ: + switch (cmd) { + case SIOCOUTQ: + amount = atomic_read(&sk->sk_wmem_alloc); + err = put_user(amount, (int __user *)arg); + break; + case SIOCINQ: { struct sk_buff *skb; @@ -1952,14 +1969,14 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } - default: - err = -ENOIOCTLCMD; - break; + default: + err = -ENOIOCTLCMD; + break; } return err; } -static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait) +static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; unsigned int mask; @@ -1981,7 +1998,8 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ - if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE) + if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && + sk->sk_state == TCP_CLOSE) mask |= POLLHUP; /* @@ -2212,7 +2230,7 @@ static int unix_net_init(struct net *net) #endif error = 0; out: - return 0; + return error; } static void unix_net_exit(struct net *net) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 00734e22ec1..5a0061d6b9b 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), */ struct sock *sk = unix_get_socket(*fp++); if (sk) { - hit = true; - func(unix_sk(sk)); + struct unix_sock *u = unix_sk(sk); + + /* + * Ignore non-candidates, they could + * have been added to the queues after + * starting the garbage collection + */ + if (u->gc_candidate) { + hit = true; + func(u); + } } } if (hit && hitlist != NULL) { @@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u) { atomic_long_inc(&u->inflight); /* - * If this is still a candidate, move it to the end of the - * list, so that it's checked even if it was already passed - * over + * If this still might be part of a cycle, move it to the end + * of the list, so that it's checked even if it was already + * passed over */ - if (u->gc_candidate) + if (u->gc_maybe_cycle) list_move_tail(&u->link, &gc_candidates); } @@ -267,6 +276,7 @@ void unix_gc(void) struct unix_sock *next; struct sk_buff_head hitlist; struct list_head cursor; + LIST_HEAD(not_cycle_list); spin_lock(&unix_gc_lock); @@ -282,10 +292,14 @@ void unix_gc(void) * * Holding unix_gc_lock will protect these candidates from * being detached, and hence from gaining an external - * reference. This also means, that since there are no - * possible receivers, the receive queues of these sockets are - * static during the GC, even though the dequeue is done - * before the detach without atomicity guarantees. + * reference. Since there are no possible receivers, all + * buffers currently on the candidates' queues stay there + * during the garbage collection. + * + * We also know that no new candidate can be added onto the + * receive queues. Other, non candidate sockets _can_ be + * added to queue, so we must make sure only to touch + * candidates. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { long total_refs; @@ -299,6 +313,7 @@ void unix_gc(void) if (total_refs == inflight_refs) { list_move_tail(&u->link, &gc_candidates); u->gc_candidate = 1; + u->gc_maybe_cycle = 1; } } @@ -325,14 +340,24 @@ void unix_gc(void) list_move(&cursor, &u->link); if (atomic_long_read(&u->inflight) > 0) { - list_move_tail(&u->link, &gc_inflight_list); - u->gc_candidate = 0; + list_move_tail(&u->link, ¬_cycle_list); + u->gc_maybe_cycle = 0; scan_children(&u->sk, inc_inflight_move_tail, NULL); } } list_del(&cursor); /* + * not_cycle_list contains those sockets which do not make up a + * cycle. Restore these to the inflight list. + */ + while (!list_empty(¬_cycle_list)) { + u = list_entry(not_cycle_list.next, struct unix_sock, link); + u->gc_candidate = 0; + list_move_tail(&u->link, &gc_inflight_list); + } + + /* * Now gc_candidates contains only garbage. Restore original * inflight counters for these as well, and remove the skbuffs * which are creating the cycle(s). diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 7f07152bc10..39701dec1db 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -60,6 +60,8 @@ #define KMEM_SAFETYZONE 8 +#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) + /* * Function Prototypes */ @@ -511,7 +513,7 @@ static int wanrouter_device_shutdown(struct wan_device *wandev) if (err) return err; /* The above function deallocates the current dev - * structure. Therefore, we cannot use dev->priv + * structure. Therefore, we cannot use netdev_priv(dev) * as the next element: wandev->dev points to the * next element */ dev = wandev->dev; @@ -589,10 +591,6 @@ static int wanrouter_device_new_if(struct wan_device *wandev, err = -EPROTONOSUPPORT; goto out; } else { - dev = kzalloc(sizeof(struct net_device), GFP_KERNEL); - err = -ENOBUFS; - if (dev == NULL) - goto out; err = wandev->new_if(wandev, dev, cnf); } @@ -622,10 +620,9 @@ static int wanrouter_device_new_if(struct wan_device *wandev, wandev->dev = dev; } else { for (slave=wandev->dev; - *((struct net_device **)slave->priv); - slave = *((struct net_device **)slave->priv)); - - *((struct net_device **)slave->priv) = dev; + DEV_TO_SLAVE(slave); + slave = DEV_TO_SLAVE(slave)) + DEV_TO_SLAVE(slave) = dev; } ++wandev->ndev; @@ -636,15 +633,9 @@ static int wanrouter_device_new_if(struct wan_device *wandev, } if (wandev->del_if) wandev->del_if(wandev, dev); + free_netdev(dev); } - /* This code has moved from del_if() function */ - kfree(dev->priv); - dev->priv = NULL; - - /* Sync PPP is disabled */ - if (cnf->config_id != WANCONFIG_MPPP) - kfree(dev); out: kfree(cnf); return err; @@ -734,7 +725,7 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) dev = wandev->dev; prev = NULL; while (dev && strcmp(name, dev->name)) { - struct net_device **slave = dev->priv; + struct net_device **slave = netdev_priv(dev); prev = dev; dev = *slave; } @@ -751,12 +742,12 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) lock_adapter_irq(&wandev->lock, &smp_flags); if (prev) { - struct net_device **prev_slave = prev->priv; - struct net_device **slave = dev->priv; + struct net_device **prev_slave = netdev_priv(prev); + struct net_device **slave = netdev_priv(dev); *prev_slave = *slave; } else { - struct net_device **slave = dev->priv; + struct net_device **slave = netdev_priv(dev); wandev->dev = *slave; } --wandev->ndev; @@ -764,11 +755,6 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name); - /* Due to new interface linking method using dev->priv, - * this code has moved from del_if() function.*/ - kfree(dev->priv); - dev->priv=NULL; - unregister_netdev(dev); free_netdev(dev); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index ae7f2262dfb..e28e2b8fa43 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -1,6 +1,15 @@ config CFG80211 tristate "Improved wireless configuration API" +config CFG80211_REG_DEBUG + bool "cfg80211 regulatory debugging" + depends on CFG80211 + default n + ---help--- + You can enable this if you want to debug regulatory changes. + + If unsure, say N. + config NL80211 bool "nl80211 new netlink interface support" depends on CFG80211 @@ -40,6 +49,8 @@ config WIRELESS_OLD_REGULATORY ieee80211_regdom module parameter. This is being phased out and you should stop using them ASAP. + Note: You will need CRDA if you want 802.11d support + Say Y unless you have installed a new userspace application. Also say Y if have one currently depending on the ieee80211_regdom module parameter and cannot port it to use the new userspace @@ -82,3 +93,12 @@ config LIB80211 Drivers should select this themselves if needed. Say Y if you want this built into your kernel. + +config LIB80211_CRYPT_WEP + tristate + +config LIB80211_CRYPT_CCMP + tristate + +config LIB80211_CRYPT_TKIP + tristate diff --git a/net/wireless/Makefile b/net/wireless/Makefile index d2d848d445f..cc547edb111 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,6 +1,9 @@ obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o obj-$(CONFIG_LIB80211) += lib80211.o +obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o +obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o +obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o cfg80211-$(CONFIG_NL80211) += nl80211.o diff --git a/net/wireless/core.c b/net/wireless/core.c index 72825afe2bf..b96fc0c3f1c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -19,7 +19,6 @@ #include "nl80211.h" #include "core.h" #include "sysfs.h" -#include "reg.h" /* name for sysfs, %d is appended */ #define PHY_NAME "phy" @@ -236,8 +235,7 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_unlock(&cfg80211_drv_mutex); /* give it a proper name */ - snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE, - PHY_NAME "%d", drv->idx); + dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->idx); mutex_init(&drv->mtx); mutex_init(&drv->devlist_mtx); @@ -349,6 +347,10 @@ void wiphy_unregister(struct wiphy *wiphy) /* unlock again before freeing */ mutex_unlock(&drv->mtx); + /* If this device got a regulatory hint tell core its + * free to listen now to a new shiny device regulatory hint */ + reg_device_remove(wiphy); + list_del(&drv->list); device_del(&drv->wiphy.dev); debugfs_remove(drv->wiphy.debugfsdir); diff --git a/net/wireless/core.h b/net/wireless/core.h index 771cc5cc765..f7fb9f41302 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -11,6 +11,7 @@ #include <net/genetlink.h> #include <net/wireless.h> #include <net/cfg80211.h> +#include "reg.h" struct cfg80211_registered_device { struct cfg80211_ops *ops; @@ -21,6 +22,18 @@ struct cfg80211_registered_device { * any call is in progress */ struct mutex mtx; + /* ISO / IEC 3166 alpha2 for which this device is receiving + * country IEs on, this can help disregard country IEs from APs + * on the same alpha2 quickly. The alpha2 may differ from + * cfg80211_regdomain's alpha2 when an intersection has occurred. + * If the AP is reconfigured this can also be used to tell us if + * the country on the country IE changed. */ + char country_ie_alpha2[2]; + + /* If a Country IE has been received this tells us the environment + * which its telling us its in. This defaults to ENVIRON_ANY */ + enum environment_cap env; + /* wiphy index, internal only */ int idx; diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index e71f7d08562..97d411f7450 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -3,11 +3,23 @@ * * Copyright(c) 2008 John W. Linville <linville@tuxdriver.com> * + * Portions copied from old ieee80211 component, w/ original copyright + * notices below: + * + * Host AP crypto routines + * + * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> + * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com> + * */ #include <linux/module.h> #include <linux/ctype.h> #include <linux/ieee80211.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/string.h> #include <net/lib80211.h> @@ -19,6 +31,14 @@ MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); MODULE_LICENSE("GPL"); +struct lib80211_crypto_alg { + struct list_head list; + struct lib80211_crypto_ops *ops; +}; + +static LIST_HEAD(lib80211_crypto_algs); +static DEFINE_SPINLOCK(lib80211_crypto_lock); + const char *print_ssid(char *buf, const char *ssid, u8 ssid_len) { const char *s = ssid; @@ -51,15 +71,214 @@ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len) } EXPORT_SYMBOL(print_ssid); -static int __init ieee80211_init(void) +int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, + spinlock_t *lock) { - printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + memset(info, 0, sizeof(*info)); + + info->name = name; + info->lock = lock; + + INIT_LIST_HEAD(&info->crypt_deinit_list); + setup_timer(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler, + (unsigned long)info); + return 0; } +EXPORT_SYMBOL(lib80211_crypt_info_init); + +void lib80211_crypt_info_free(struct lib80211_crypt_info *info) +{ + int i; + + lib80211_crypt_quiescing(info); + del_timer_sync(&info->crypt_deinit_timer); + lib80211_crypt_deinit_entries(info, 1); + + for (i = 0; i < NUM_WEP_KEYS; i++) { + struct lib80211_crypt_data *crypt = info->crypt[i]; + if (crypt) { + if (crypt->ops) { + crypt->ops->deinit(crypt->priv); + module_put(crypt->ops->owner); + } + kfree(crypt); + info->crypt[i] = NULL; + } + } +} +EXPORT_SYMBOL(lib80211_crypt_info_free); + +void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, int force) +{ + struct lib80211_crypt_data *entry, *next; + unsigned long flags; + + spin_lock_irqsave(info->lock, flags); + list_for_each_entry_safe(entry, next, &info->crypt_deinit_list, list) { + if (atomic_read(&entry->refcnt) != 0 && !force) + continue; + + list_del(&entry->list); + + if (entry->ops) { + entry->ops->deinit(entry->priv); + module_put(entry->ops->owner); + } + kfree(entry); + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_deinit_entries); + +/* After this, crypt_deinit_list won't accept new members */ +void lib80211_crypt_quiescing(struct lib80211_crypt_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(info->lock, flags); + info->crypt_quiesced = 1; + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_quiescing); + +void lib80211_crypt_deinit_handler(unsigned long data) +{ + struct lib80211_crypt_info *info = (struct lib80211_crypt_info *)data; + unsigned long flags; + + lib80211_crypt_deinit_entries(info, 0); + + spin_lock_irqsave(info->lock, flags); + if (!list_empty(&info->crypt_deinit_list) && !info->crypt_quiesced) { + printk(KERN_DEBUG "%s: entries remaining in delayed crypt " + "deletion list\n", info->name); + info->crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&info->crypt_deinit_timer); + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_deinit_handler); + +void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, + struct lib80211_crypt_data **crypt) +{ + struct lib80211_crypt_data *tmp; + unsigned long flags; + + if (*crypt == NULL) + return; + + tmp = *crypt; + *crypt = NULL; + + /* must not run ops->deinit() while there may be pending encrypt or + * decrypt operations. Use a list of delayed deinits to avoid needing + * locking. */ + + spin_lock_irqsave(info->lock, flags); + if (!info->crypt_quiesced) { + list_add(&tmp->list, &info->crypt_deinit_list); + if (!timer_pending(&info->crypt_deinit_timer)) { + info->crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&info->crypt_deinit_timer); + } + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_delayed_deinit); + +int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops) +{ + unsigned long flags; + struct lib80211_crypto_alg *alg; + + alg = kzalloc(sizeof(*alg), GFP_KERNEL); + if (alg == NULL) + return -ENOMEM; + + alg->ops = ops; + + spin_lock_irqsave(&lib80211_crypto_lock, flags); + list_add(&alg->list, &lib80211_crypto_algs); + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + + printk(KERN_DEBUG "lib80211_crypt: registered algorithm '%s'\n", + ops->name); + + return 0; +} +EXPORT_SYMBOL(lib80211_register_crypto_ops); + +int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops) +{ + struct lib80211_crypto_alg *alg; + unsigned long flags; + + spin_lock_irqsave(&lib80211_crypto_lock, flags); + list_for_each_entry(alg, &lib80211_crypto_algs, list) { + if (alg->ops == ops) + goto found; + } + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + return -EINVAL; + + found: + printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm " + "'%s'\n", ops->name); + list_del(&alg->list); + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + kfree(alg); + return 0; +} +EXPORT_SYMBOL(lib80211_unregister_crypto_ops); + +struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name) +{ + struct lib80211_crypto_alg *alg; + unsigned long flags; + + spin_lock_irqsave(&lib80211_crypto_lock, flags); + list_for_each_entry(alg, &lib80211_crypto_algs, list) { + if (strcmp(alg->ops->name, name) == 0) + goto found; + } + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + return NULL; + + found: + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + return alg->ops; +} +EXPORT_SYMBOL(lib80211_get_crypto_ops); + +static void *lib80211_crypt_null_init(int keyidx) +{ + return (void *)1; +} + +static void lib80211_crypt_null_deinit(void *priv) +{ +} + +static struct lib80211_crypto_ops lib80211_crypt_null = { + .name = "NULL", + .init = lib80211_crypt_null_init, + .deinit = lib80211_crypt_null_deinit, + .owner = THIS_MODULE, +}; + +static int __init lib80211_init(void) +{ + printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + return lib80211_register_crypto_ops(&lib80211_crypt_null); +} -static void __exit ieee80211_exit(void) +static void __exit lib80211_exit(void) { + lib80211_unregister_crypto_ops(&lib80211_crypt_null); + BUG_ON(!list_empty(&lib80211_crypto_algs)); } -module_init(ieee80211_init); -module_exit(ieee80211_exit); +module_init(lib80211_init); +module_exit(lib80211_exit); diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index bea04af0b48..db428194c16 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -1,7 +1,8 @@ /* - * Host AP crypt: host-based CCMP encryption implementation for Host AP driver + * lib80211 crypt: host-based CCMP encryption implementation for lib80211 * * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi> + * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -22,10 +23,12 @@ #include <asm/string.h> #include <linux/wireless.h> -#include <net/ieee80211.h> +#include <linux/ieee80211.h> #include <linux/crypto.h> +#include <net/lib80211.h> + MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Host AP crypt: CCMP"); MODULE_LICENSE("GPL"); @@ -36,7 +39,7 @@ MODULE_LICENSE("GPL"); #define CCMP_TK_LEN 16 #define CCMP_PN_LEN 6 -struct ieee80211_ccmp_data { +struct lib80211_ccmp_data { u8 key[CCMP_TK_LEN]; int key_set; @@ -57,15 +60,15 @@ struct ieee80211_ccmp_data { u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN]; }; -static inline void ieee80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, +static inline void lib80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, const u8 pt[16], u8 ct[16]) { crypto_cipher_encrypt_one(tfm, ct, pt); } -static void *ieee80211_ccmp_init(int key_idx) +static void *lib80211_ccmp_init(int key_idx) { - struct ieee80211_ccmp_data *priv; + struct lib80211_ccmp_data *priv; priv = kzalloc(sizeof(*priv), GFP_ATOMIC); if (priv == NULL) @@ -74,7 +77,7 @@ static void *ieee80211_ccmp_init(int key_idx) priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tfm)) { - printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_ccmp: could not allocate " "crypto API aes\n"); priv->tfm = NULL; goto fail; @@ -92,9 +95,9 @@ static void *ieee80211_ccmp_init(int key_idx) return NULL; } -static void ieee80211_ccmp_deinit(void *priv) +static void lib80211_ccmp_deinit(void *priv) { - struct ieee80211_ccmp_data *_priv = priv; + struct lib80211_ccmp_data *_priv = priv; if (_priv && _priv->tfm) crypto_free_cipher(_priv->tfm); kfree(priv); @@ -108,20 +111,17 @@ static inline void xor_block(u8 * b, u8 * a, size_t len) } static void ccmp_init_blocks(struct crypto_cipher *tfm, - struct ieee80211_hdr_4addr *hdr, + struct ieee80211_hdr *hdr, u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) { u8 *pos, qc = 0; size_t aad_len; - u16 fc; int a4_included, qc_included; u8 aad[2 * AES_BLOCK_LEN]; - fc = le16_to_cpu(hdr->frame_ctl); - a4_included = ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)); - qc_included = ((WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA) && - (WLAN_FC_GET_STYPE(fc) & IEEE80211_STYPE_QOS_DATA)); + a4_included = ieee80211_has_a4(hdr->frame_control); + qc_included = ieee80211_is_data_qos(hdr->frame_control); + aad_len = 22; if (a4_included) aad_len += 6; @@ -158,7 +158,7 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm, aad[2] = pos[0] & 0x8f; aad[3] = pos[1] & 0xc7; memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN); - pos = (u8 *) & hdr->seq_ctl; + pos = (u8 *) & hdr->seq_ctrl; aad[22] = pos[0] & 0x0f; aad[23] = 0; /* all bits masked */ memset(aad + 24, 0, 8); @@ -170,20 +170,20 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm, } /* Start with the first block and AAD */ - ieee80211_ccmp_aes_encrypt(tfm, b0, auth); + lib80211_ccmp_aes_encrypt(tfm, b0, auth); xor_block(auth, aad, AES_BLOCK_LEN); - ieee80211_ccmp_aes_encrypt(tfm, auth, auth); + lib80211_ccmp_aes_encrypt(tfm, auth, auth); xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN); - ieee80211_ccmp_aes_encrypt(tfm, auth, auth); + lib80211_ccmp_aes_encrypt(tfm, auth, auth); b0[0] &= 0x07; b0[14] = b0[15] = 0; - ieee80211_ccmp_aes_encrypt(tfm, b0, s0); + lib80211_ccmp_aes_encrypt(tfm, b0, s0); } -static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, +static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, u8 *aeskey, int keylen, void *priv) { - struct ieee80211_ccmp_data *key = priv; + struct lib80211_ccmp_data *key = priv; int i; u8 *pos; @@ -217,12 +217,12 @@ static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, return CCMP_HDR_LEN; } -static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_ccmp_data *key = priv; + struct lib80211_ccmp_data *key = priv; int data_len, i, blocks, last, len; u8 *pos, *mic; - struct ieee80211_hdr_4addr *hdr; + struct ieee80211_hdr *hdr; u8 *b0 = key->tx_b0; u8 *b = key->tx_b; u8 *e = key->tx_e; @@ -232,13 +232,13 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) return -1; data_len = skb->len - hdr_len; - len = ieee80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv); + len = lib80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv); if (len < 0) return -1; pos = skb->data + hdr_len + CCMP_HDR_LEN; mic = skb_put(skb, CCMP_MIC_LEN); - hdr = (struct ieee80211_hdr_4addr *)skb->data; + hdr = (struct ieee80211_hdr *)skb->data; ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); @@ -248,11 +248,11 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) len = (i == blocks && last) ? last : AES_BLOCK_LEN; /* Authentication */ xor_block(b, pos, len); - ieee80211_ccmp_aes_encrypt(key->tfm, b, b); + lib80211_ccmp_aes_encrypt(key->tfm, b, b); /* Encryption, with counter */ b0[14] = (i >> 8) & 0xff; b0[15] = i & 0xff; - ieee80211_ccmp_aes_encrypt(key->tfm, b0, e); + lib80211_ccmp_aes_encrypt(key->tfm, b0, e); xor_block(pos, e, len); pos += len; } @@ -284,11 +284,11 @@ static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o) return 0; } -static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_ccmp_data *key = priv; + struct lib80211_ccmp_data *key = priv; u8 keyidx, *pos; - struct ieee80211_hdr_4addr *hdr; + struct ieee80211_hdr *hdr; u8 *b0 = key->rx_b0; u8 *b = key->rx_b; u8 *a = key->rx_a; @@ -302,7 +302,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return -1; } - hdr = (struct ieee80211_hdr_4addr *)skb->data; + hdr = (struct ieee80211_hdr *)skb->data; pos = skb->data + hdr_len; keyidx = pos[3]; if (!(keyidx & (1 << 5))) { @@ -337,8 +337,8 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) pos += 8; if (ccmp_replay_check(pn, key->rx_pn)) { - if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { - IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=%pM " + if (net_ratelimit()) { + printk(KERN_DEBUG "CCMP: replay detected: STA=%pM " "previous PN %02x%02x%02x%02x%02x%02x " "received PN %02x%02x%02x%02x%02x%02x\n", hdr->addr2, @@ -361,11 +361,11 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) /* Decrypt, with counter */ b0[14] = (i >> 8) & 0xff; b0[15] = i & 0xff; - ieee80211_ccmp_aes_encrypt(key->tfm, b0, b); + lib80211_ccmp_aes_encrypt(key->tfm, b0, b); xor_block(pos, b, len); /* Authentication */ xor_block(a, pos, len); - ieee80211_ccmp_aes_encrypt(key->tfm, a, a); + lib80211_ccmp_aes_encrypt(key->tfm, a, a); pos += len; } @@ -388,9 +388,9 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return keyidx; } -static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) { - struct ieee80211_ccmp_data *data = priv; + struct lib80211_ccmp_data *data = priv; int keyidx; struct crypto_cipher *tfm = data->tfm; @@ -418,9 +418,9 @@ static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) return 0; } -static int ieee80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) { - struct ieee80211_ccmp_data *data = priv; + struct lib80211_ccmp_data *data = priv; if (len < CCMP_TK_LEN) return -1; @@ -441,9 +441,9 @@ static int ieee80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) return CCMP_TK_LEN; } -static char *ieee80211_ccmp_print_stats(char *p, void *priv) +static char *lib80211_ccmp_print_stats(char *p, void *priv) { - struct ieee80211_ccmp_data *ccmp = priv; + struct lib80211_ccmp_data *ccmp = priv; p += sprintf(p, "key[%d] alg=CCMP key_set=%d " "tx_pn=%02x%02x%02x%02x%02x%02x " @@ -461,32 +461,32 @@ static char *ieee80211_ccmp_print_stats(char *p, void *priv) return p; } -static struct ieee80211_crypto_ops ieee80211_crypt_ccmp = { +static struct lib80211_crypto_ops lib80211_crypt_ccmp = { .name = "CCMP", - .init = ieee80211_ccmp_init, - .deinit = ieee80211_ccmp_deinit, - .build_iv = ieee80211_ccmp_hdr, - .encrypt_mpdu = ieee80211_ccmp_encrypt, - .decrypt_mpdu = ieee80211_ccmp_decrypt, + .init = lib80211_ccmp_init, + .deinit = lib80211_ccmp_deinit, + .build_iv = lib80211_ccmp_hdr, + .encrypt_mpdu = lib80211_ccmp_encrypt, + .decrypt_mpdu = lib80211_ccmp_decrypt, .encrypt_msdu = NULL, .decrypt_msdu = NULL, - .set_key = ieee80211_ccmp_set_key, - .get_key = ieee80211_ccmp_get_key, - .print_stats = ieee80211_ccmp_print_stats, + .set_key = lib80211_ccmp_set_key, + .get_key = lib80211_ccmp_get_key, + .print_stats = lib80211_ccmp_print_stats, .extra_mpdu_prefix_len = CCMP_HDR_LEN, .extra_mpdu_postfix_len = CCMP_MIC_LEN, .owner = THIS_MODULE, }; -static int __init ieee80211_crypto_ccmp_init(void) +static int __init lib80211_crypto_ccmp_init(void) { - return ieee80211_register_crypto_ops(&ieee80211_crypt_ccmp); + return lib80211_register_crypto_ops(&lib80211_crypt_ccmp); } -static void __exit ieee80211_crypto_ccmp_exit(void) +static void __exit lib80211_crypto_ccmp_exit(void) { - ieee80211_unregister_crypto_ops(&ieee80211_crypt_ccmp); + lib80211_unregister_crypto_ops(&lib80211_crypt_ccmp); } -module_init(ieee80211_crypto_ccmp_init); -module_exit(ieee80211_crypto_ccmp_exit); +module_init(lib80211_crypto_ccmp_init); +module_exit(lib80211_crypto_ccmp_exit); diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index d12da1da632..7e8e22bfed9 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -1,7 +1,8 @@ /* - * Host AP crypt: host-based TKIP encryption implementation for Host AP driver + * lib80211 crypt: host-based TKIP encryption implementation for lib80211 * * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi> + * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -22,16 +23,20 @@ #include <linux/if_arp.h> #include <asm/string.h> -#include <net/ieee80211.h> +#include <linux/wireless.h> +#include <linux/ieee80211.h> +#include <net/iw_handler.h> #include <linux/crypto.h> #include <linux/crc32.h> +#include <net/lib80211.h> + MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("Host AP crypt: TKIP"); +MODULE_DESCRIPTION("lib80211 crypt: TKIP"); MODULE_LICENSE("GPL"); -struct ieee80211_tkip_data { +struct lib80211_tkip_data { #define TKIP_KEY_LEN 32 u8 key[TKIP_KEY_LEN]; int key_set; @@ -65,23 +70,23 @@ struct ieee80211_tkip_data { unsigned long flags; }; -static unsigned long ieee80211_tkip_set_flags(unsigned long flags, void *priv) +static unsigned long lib80211_tkip_set_flags(unsigned long flags, void *priv) { - struct ieee80211_tkip_data *_priv = priv; + struct lib80211_tkip_data *_priv = priv; unsigned long old_flags = _priv->flags; _priv->flags = flags; return old_flags; } -static unsigned long ieee80211_tkip_get_flags(void *priv) +static unsigned long lib80211_tkip_get_flags(void *priv) { - struct ieee80211_tkip_data *_priv = priv; + struct lib80211_tkip_data *_priv = priv; return _priv->flags; } -static void *ieee80211_tkip_init(int key_idx) +static void *lib80211_tkip_init(int key_idx) { - struct ieee80211_tkip_data *priv; + struct lib80211_tkip_data *priv; priv = kzalloc(sizeof(*priv), GFP_ATOMIC); if (priv == NULL) @@ -92,7 +97,7 @@ static void *ieee80211_tkip_init(int key_idx) priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_arc4)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " "crypto API arc4\n"); priv->tx_tfm_arc4 = NULL; goto fail; @@ -101,7 +106,7 @@ static void *ieee80211_tkip_init(int key_idx) priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_michael)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " "crypto API michael_mic\n"); priv->tx_tfm_michael = NULL; goto fail; @@ -110,7 +115,7 @@ static void *ieee80211_tkip_init(int key_idx) priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_arc4)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " "crypto API arc4\n"); priv->rx_tfm_arc4 = NULL; goto fail; @@ -119,7 +124,7 @@ static void *ieee80211_tkip_init(int key_idx) priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_michael)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " "crypto API michael_mic\n"); priv->rx_tfm_michael = NULL; goto fail; @@ -143,9 +148,9 @@ static void *ieee80211_tkip_init(int key_idx) return NULL; } -static void ieee80211_tkip_deinit(void *priv) +static void lib80211_tkip_deinit(void *priv) { - struct ieee80211_tkip_data *_priv = priv; + struct lib80211_tkip_data *_priv = priv; if (_priv) { if (_priv->tx_tfm_michael) crypto_free_hash(_priv->tx_tfm_michael); @@ -305,15 +310,15 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, #endif } -static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, +static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len, u8 * rc4key, int keylen, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; int len; u8 *pos; - struct ieee80211_hdr_4addr *hdr; + struct ieee80211_hdr *hdr; - hdr = (struct ieee80211_hdr_4addr *)skb->data; + hdr = (struct ieee80211_hdr *)skb->data; if (skb_headroom(skb) < 8 || skb->len < hdr_len) return -1; @@ -351,9 +356,9 @@ static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, return 8; } -static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 }; int len; u8 rc4key[16], *pos, *icv; @@ -362,8 +367,8 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { - struct ieee80211_hdr_4addr *hdr = - (struct ieee80211_hdr_4addr *)skb->data; + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *)skb->data; printk(KERN_DEBUG ": TKIP countermeasures: dropped " "TX packet to %pM\n", hdr->addr1); } @@ -376,7 +381,7 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) len = skb->len - hdr_len; pos = skb->data + hdr_len; - if ((ieee80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) + if ((lib80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) return -1; icv = skb_put(skb, 4); @@ -405,21 +410,21 @@ static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n, return 0; } -static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 }; u8 rc4key[16]; u8 keyidx, *pos; u32 iv32; u16 iv16; - struct ieee80211_hdr_4addr *hdr; + struct ieee80211_hdr *hdr; u8 icv[4]; u32 crc; struct scatterlist sg; int plen; - hdr = (struct ieee80211_hdr_4addr *)skb->data; + hdr = (struct ieee80211_hdr *)skb->data; if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { @@ -460,8 +465,8 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) pos += 8; if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { - if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { - IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=%pM" + if (net_ratelimit()) { + printk(KERN_DEBUG "TKIP: replay detected: STA=%pM" " previous TSC %08x%04x received TSC " "%08x%04x\n", hdr->addr2, tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); @@ -500,8 +505,8 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) * it needs to be recalculated for the next packet. */ tkey->rx_phase1_done = 0; } - if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { - IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA=" + if (net_ratelimit()) { + printk(KERN_DEBUG "TKIP: ICV error detected: STA=" "%pM\n", hdr->addr2); } tkey->dot11RSNAStatsTKIPICVErrors++; @@ -545,13 +550,11 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) { - struct ieee80211_hdr_4addr *hdr11; - u16 stype; + struct ieee80211_hdr *hdr11; - hdr11 = (struct ieee80211_hdr_4addr *)skb->data; - stype = WLAN_FC_GET_STYPE(le16_to_cpu(hdr11->frame_ctl)); + hdr11 = (struct ieee80211_hdr *)skb->data; - switch (le16_to_cpu(hdr11->frame_ctl) & + switch (le16_to_cpu(hdr11->frame_control) & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { case IEEE80211_FCTL_TODS: memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ @@ -571,20 +574,19 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) break; } - if (stype & IEEE80211_STYPE_QOS_DATA) { - const struct ieee80211_hdr_3addrqos *qoshdr = - (struct ieee80211_hdr_3addrqos *)skb->data; - hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID; + if (ieee80211_is_data_qos(hdr11->frame_control)) { + hdr[12] = le16_to_cpu(*ieee80211_get_qos_ctl(hdr11)) + & IEEE80211_QOS_CTL_TID_MASK; } else hdr[12] = 0; /* priority */ hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ } -static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, +static int lib80211_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; u8 *pos; if (skb_tailroom(skb) < 8 || skb->len < hdr_len) { @@ -603,8 +605,8 @@ static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, return 0; } -static void ieee80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr_4addr *hdr, +static void lib80211_michael_mic_failure(struct net_device *dev, + struct ieee80211_hdr *hdr, int keyidx) { union iwreq_data wrqu; @@ -624,10 +626,10 @@ static void ieee80211_michael_mic_failure(struct net_device *dev, wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev); } -static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, +static int lib80211_michael_mic_verify(struct sk_buff *skb, int keyidx, int hdr_len, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; u8 mic[8]; if (!tkey->key_set) @@ -638,14 +640,14 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) return -1; if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { - struct ieee80211_hdr_4addr *hdr; - hdr = (struct ieee80211_hdr_4addr *)skb->data; + struct ieee80211_hdr *hdr; + hdr = (struct ieee80211_hdr *)skb->data; printk(KERN_DEBUG "%s: Michael MIC verification failed for " "MSDU from %pM keyidx=%d\n", skb->dev ? skb->dev->name : "N/A", hdr->addr2, keyidx); if (skb->dev) - ieee80211_michael_mic_failure(skb->dev, hdr, keyidx); + lib80211_michael_mic_failure(skb->dev, hdr, keyidx); tkey->dot11RSNAStatsTKIPLocalMICFailures++; return -1; } @@ -660,9 +662,9 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, return 0; } -static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; int keyidx; struct crypto_hash *tfm = tkey->tx_tfm_michael; struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4; @@ -693,9 +695,9 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) return 0; } -static int ieee80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; if (len < TKIP_KEY_LEN) return -1; @@ -722,9 +724,9 @@ static int ieee80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) return TKIP_KEY_LEN; } -static char *ieee80211_tkip_print_stats(char *p, void *priv) +static char *lib80211_tkip_print_stats(char *p, void *priv) { - struct ieee80211_tkip_data *tkip = priv; + struct lib80211_tkip_data *tkip = priv; p += sprintf(p, "key[%d] alg=TKIP key_set=%d " "tx_pn=%02x%02x%02x%02x%02x%02x " "rx_pn=%02x%02x%02x%02x%02x%02x " @@ -748,35 +750,35 @@ static char *ieee80211_tkip_print_stats(char *p, void *priv) return p; } -static struct ieee80211_crypto_ops ieee80211_crypt_tkip = { +static struct lib80211_crypto_ops lib80211_crypt_tkip = { .name = "TKIP", - .init = ieee80211_tkip_init, - .deinit = ieee80211_tkip_deinit, - .build_iv = ieee80211_tkip_hdr, - .encrypt_mpdu = ieee80211_tkip_encrypt, - .decrypt_mpdu = ieee80211_tkip_decrypt, - .encrypt_msdu = ieee80211_michael_mic_add, - .decrypt_msdu = ieee80211_michael_mic_verify, - .set_key = ieee80211_tkip_set_key, - .get_key = ieee80211_tkip_get_key, - .print_stats = ieee80211_tkip_print_stats, + .init = lib80211_tkip_init, + .deinit = lib80211_tkip_deinit, + .build_iv = lib80211_tkip_hdr, + .encrypt_mpdu = lib80211_tkip_encrypt, + .decrypt_mpdu = lib80211_tkip_decrypt, + .encrypt_msdu = lib80211_michael_mic_add, + .decrypt_msdu = lib80211_michael_mic_verify, + .set_key = lib80211_tkip_set_key, + .get_key = lib80211_tkip_get_key, + .print_stats = lib80211_tkip_print_stats, .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */ .extra_mpdu_postfix_len = 4, /* ICV */ .extra_msdu_postfix_len = 8, /* MIC */ - .get_flags = ieee80211_tkip_get_flags, - .set_flags = ieee80211_tkip_set_flags, + .get_flags = lib80211_tkip_get_flags, + .set_flags = lib80211_tkip_set_flags, .owner = THIS_MODULE, }; -static int __init ieee80211_crypto_tkip_init(void) +static int __init lib80211_crypto_tkip_init(void) { - return ieee80211_register_crypto_ops(&ieee80211_crypt_tkip); + return lib80211_register_crypto_ops(&lib80211_crypt_tkip); } -static void __exit ieee80211_crypto_tkip_exit(void) +static void __exit lib80211_crypto_tkip_exit(void) { - ieee80211_unregister_crypto_ops(&ieee80211_crypt_tkip); + lib80211_unregister_crypto_ops(&lib80211_crypt_tkip); } -module_init(ieee80211_crypto_tkip_init); -module_exit(ieee80211_crypto_tkip_exit); +module_init(lib80211_crypto_tkip_init); +module_exit(lib80211_crypto_tkip_exit); diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c index 3fa30c40779..6d41e05ca33 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/wireless/lib80211_crypt_wep.c @@ -1,7 +1,8 @@ /* - * Host AP crypt: host-based WEP encryption implementation for Host AP driver + * lib80211 crypt: host-based WEP encryption implementation for lib80211 * * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi> + * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,16 +20,16 @@ #include <linux/mm.h> #include <asm/string.h> -#include <net/ieee80211.h> +#include <net/lib80211.h> #include <linux/crypto.h> #include <linux/crc32.h> MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("Host AP crypt: WEP"); +MODULE_DESCRIPTION("lib80211 crypt: WEP"); MODULE_LICENSE("GPL"); -struct prism2_wep_data { +struct lib80211_wep_data { u32 iv; #define WEP_KEY_LEN 13 u8 key[WEP_KEY_LEN + 1]; @@ -38,9 +39,9 @@ struct prism2_wep_data { struct crypto_blkcipher *rx_tfm; }; -static void *prism2_wep_init(int keyidx) +static void *lib80211_wep_init(int keyidx) { - struct prism2_wep_data *priv; + struct lib80211_wep_data *priv; priv = kzalloc(sizeof(*priv), GFP_ATOMIC); if (priv == NULL) @@ -49,7 +50,7 @@ static void *prism2_wep_init(int keyidx) priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm)) { - printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate " "crypto API arc4\n"); priv->tx_tfm = NULL; goto fail; @@ -57,7 +58,7 @@ static void *prism2_wep_init(int keyidx) priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm)) { - printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate " "crypto API arc4\n"); priv->rx_tfm = NULL; goto fail; @@ -78,9 +79,9 @@ static void *prism2_wep_init(int keyidx) return NULL; } -static void prism2_wep_deinit(void *priv) +static void lib80211_wep_deinit(void *priv) { - struct prism2_wep_data *_priv = priv; + struct lib80211_wep_data *_priv = priv; if (_priv) { if (_priv->tx_tfm) crypto_free_blkcipher(_priv->tx_tfm); @@ -91,10 +92,10 @@ static void prism2_wep_deinit(void *priv) } /* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */ -static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, +static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len, u8 *key, int keylen, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; u32 klen, len; u8 *pos; @@ -134,21 +135,21 @@ static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, * * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) */ -static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; struct blkcipher_desc desc = { .tfm = wep->tx_tfm }; u32 crc, klen, len; u8 *pos, *icv; struct scatterlist sg; u8 key[WEP_KEY_LEN + 3]; - /* other checks are in prism2_wep_build_iv */ + /* other checks are in lib80211_wep_build_iv */ if (skb_tailroom(skb) < 4) return -1; /* add the IV to the frame */ - if (prism2_wep_build_iv(skb, hdr_len, NULL, 0, priv)) + if (lib80211_wep_build_iv(skb, hdr_len, NULL, 0, priv)) return -1; /* Copy the IV into the first 3 bytes of the key */ @@ -181,9 +182,9 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on * failure. If frame is OK, IV and ICV will be removed. */ -static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; struct blkcipher_desc desc = { .tfm = wep->rx_tfm }; u32 crc, klen, plen; u8 key[WEP_KEY_LEN + 3]; @@ -232,9 +233,9 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return 0; } -static int prism2_wep_set_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_wep_set_key(void *key, int len, u8 * seq, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; if (len < 0 || len > WEP_KEY_LEN) return -1; @@ -245,9 +246,9 @@ static int prism2_wep_set_key(void *key, int len, u8 * seq, void *priv) return 0; } -static int prism2_wep_get_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; if (len < wep->key_len) return -1; @@ -257,39 +258,39 @@ static int prism2_wep_get_key(void *key, int len, u8 * seq, void *priv) return wep->key_len; } -static char *prism2_wep_print_stats(char *p, void *priv) +static char *lib80211_wep_print_stats(char *p, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); return p; } -static struct ieee80211_crypto_ops ieee80211_crypt_wep = { +static struct lib80211_crypto_ops lib80211_crypt_wep = { .name = "WEP", - .init = prism2_wep_init, - .deinit = prism2_wep_deinit, - .build_iv = prism2_wep_build_iv, - .encrypt_mpdu = prism2_wep_encrypt, - .decrypt_mpdu = prism2_wep_decrypt, + .init = lib80211_wep_init, + .deinit = lib80211_wep_deinit, + .build_iv = lib80211_wep_build_iv, + .encrypt_mpdu = lib80211_wep_encrypt, + .decrypt_mpdu = lib80211_wep_decrypt, .encrypt_msdu = NULL, .decrypt_msdu = NULL, - .set_key = prism2_wep_set_key, - .get_key = prism2_wep_get_key, - .print_stats = prism2_wep_print_stats, + .set_key = lib80211_wep_set_key, + .get_key = lib80211_wep_get_key, + .print_stats = lib80211_wep_print_stats, .extra_mpdu_prefix_len = 4, /* IV */ .extra_mpdu_postfix_len = 4, /* ICV */ .owner = THIS_MODULE, }; -static int __init ieee80211_crypto_wep_init(void) +static int __init lib80211_crypto_wep_init(void) { - return ieee80211_register_crypto_ops(&ieee80211_crypt_wep); + return lib80211_register_crypto_ops(&lib80211_crypt_wep); } -static void __exit ieee80211_crypto_wep_exit(void) +static void __exit lib80211_crypto_wep_exit(void) { - ieee80211_unregister_crypto_ops(&ieee80211_crypt_wep); + lib80211_unregister_crypto_ops(&lib80211_crypt_wep); } -module_init(ieee80211_crypto_wep_init); -module_exit(ieee80211_crypto_wep_exit); +module_init(lib80211_crypto_wep_init); +module_exit(lib80211_crypto_wep_exit); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5e1d658a8b5..c9141e3df9b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -58,6 +58,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = BUS_ID_SIZE-1 }, + [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, @@ -95,6 +96,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, + [NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, [NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED }, @@ -195,6 +198,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (chan->flags & IEEE80211_CHAN_RADAR) NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_RADAR); + NLA_PUT_U32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, + DBM_TO_MBM(chan->max_power)); + nla_nest_end(msg, nl_freq); } @@ -284,20 +290,76 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) return -ENOBUFS; } +static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { + [NL80211_TXQ_ATTR_QUEUE] = { .type = NLA_U8 }, + [NL80211_TXQ_ATTR_TXOP] = { .type = NLA_U16 }, + [NL80211_TXQ_ATTR_CWMIN] = { .type = NLA_U16 }, + [NL80211_TXQ_ATTR_CWMAX] = { .type = NLA_U16 }, + [NL80211_TXQ_ATTR_AIFS] = { .type = NLA_U8 }, +}; + +static int parse_txq_params(struct nlattr *tb[], + struct ieee80211_txq_params *txq_params) +{ + if (!tb[NL80211_TXQ_ATTR_QUEUE] || !tb[NL80211_TXQ_ATTR_TXOP] || + !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || + !tb[NL80211_TXQ_ATTR_AIFS]) + return -EINVAL; + + txq_params->queue = nla_get_u8(tb[NL80211_TXQ_ATTR_QUEUE]); + txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); + txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); + txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); + txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); + + return 0; +} + static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; - int result; - - if (!info->attrs[NL80211_ATTR_WIPHY_NAME]) - return -EINVAL; + int result = 0, rem_txq_params = 0; + struct nlattr *nl_txq_params; rdev = cfg80211_get_dev_from_info(info); if (IS_ERR(rdev)) return PTR_ERR(rdev); - result = cfg80211_dev_rename(rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); + if (info->attrs[NL80211_ATTR_WIPHY_NAME]) { + result = cfg80211_dev_rename( + rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); + if (result) + goto bad_res; + } + + if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { + struct ieee80211_txq_params txq_params; + struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; + + if (!rdev->ops->set_txq_params) { + result = -EOPNOTSUPP; + goto bad_res; + } + nla_for_each_nested(nl_txq_params, + info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], + rem_txq_params) { + nla_parse(tb, NL80211_TXQ_ATTR_MAX, + nla_data(nl_txq_params), + nla_len(nl_txq_params), + txq_params_policy); + result = parse_txq_params(tb, &txq_params); + if (result) + goto bad_res; + + result = rdev->ops->set_txq_params(&rdev->wiphy, + &txq_params); + if (result) + goto bad_res; + } + } + +bad_res: cfg80211_put_dev(rdev); return result; } @@ -1613,6 +1675,12 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]) params.use_short_slot_time = nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]); + if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { + params.basic_rates = + nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + params.basic_rates_len = + nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + } err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) @@ -1695,7 +1763,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) return -EINVAL; #endif mutex_lock(&cfg80211_drv_mutex); - r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data); + r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); mutex_unlock(&cfg80211_drv_mutex); return r; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 9dff716d1b0..0990059f7e4 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -42,16 +42,39 @@ #include "core.h" #include "reg.h" -/* - * wiphy is set if this request's initiator is - * REGDOM_SET_BY_COUNTRY_IE or _DRIVER +/** + * struct regulatory_request - receipt of last regulatory request + * + * @wiphy: this is set if this request's initiator is + * %REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This + * can be used by the wireless core to deal with conflicts + * and potentially inform users of which devices specifically + * cased the conflicts. + * @initiator: indicates who sent this request, could be any of + * of those set in reg_set_by, %REGDOM_SET_BY_* + * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested + * regulatory domain. We have a few special codes: + * 00 - World regulatory domain + * 99 - built by driver but a specific alpha2 cannot be determined + * 98 - result of an intersection between two regulatory domains + * @intersect: indicates whether the wireless core should intersect + * the requested regulatory domain with the presently set regulatory + * domain. + * @country_ie_checksum: checksum of the last processed and accepted + * country IE + * @country_ie_env: lets us know if the AP is telling us we are outdoor, + * indoor, or if it doesn't matter */ struct regulatory_request { struct wiphy *wiphy; enum reg_set_by initiator; char alpha2[2]; + bool intersect; + u32 country_ie_checksum; + enum environment_cap country_ie_env; }; +/* Receipt of information from last regulatory request */ static struct regulatory_request *last_request; /* To trigger userspace events */ @@ -68,6 +91,11 @@ static u32 supported_bandwidths[] = { * information to give us an alpha2 */ static const struct ieee80211_regdomain *cfg80211_regdomain; +/* We use this as a place for the rd structure built from the + * last parsed country IE to rest until CRDA gets back to us with + * what it thinks should apply for the same country */ +static const struct ieee80211_regdomain *country_ie_regdomain; + /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 1, @@ -247,6 +275,18 @@ static bool is_unknown_alpha2(const char *alpha2) return false; } +static bool is_intersected_alpha2(const char *alpha2) +{ + if (!alpha2) + return false; + /* Special case where regulatory domain is the + * result of an intersection between two regulatory domain + * structures */ + if (alpha2[0] == '9' && alpha2[1] == '8') + return true; + return false; +} + static bool is_an_alpha2(const char *alpha2) { if (!alpha2) @@ -275,6 +315,25 @@ static bool regdom_changed(const char *alpha2) return true; } +/** + * country_ie_integrity_changes - tells us if the country IE has changed + * @checksum: checksum of country IE of fields we are interested in + * + * If the country IE has not changed you can ignore it safely. This is + * useful to determine if two devices are seeing two different country IEs + * even on the same alpha2. Note that this will return false if no IE has + * been set on the wireless core yet. + */ +static bool country_ie_integrity_changes(u32 checksum) +{ + /* If no IE has been set then the checksum doesn't change */ + if (unlikely(!last_request->country_ie_checksum)) + return false; + if (unlikely(last_request->country_ie_checksum != checksum)) + return true; + return false; +} + /* This lets us keep regulatory code which is updated on a regulatory * basis in userspace. */ static int call_crda(const char *alpha2) @@ -313,7 +372,7 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) const struct ieee80211_freq_range *freq_range = &rule->freq_range; u32 freq_diff; - if (freq_range->start_freq_khz == 0 || freq_range->end_freq_khz == 0) + if (freq_range->start_freq_khz <= 0 || freq_range->end_freq_khz <= 0) return false; if (freq_range->start_freq_khz > freq_range->end_freq_khz) @@ -321,7 +380,7 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; - if (freq_range->max_bandwidth_khz > freq_diff) + if (freq_diff <= 0 || freq_range->max_bandwidth_khz > freq_diff) return false; return true; @@ -335,6 +394,9 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) if (!rd->n_reg_rules) return false; + if (WARN_ON(rd->n_reg_rules > NL80211_MAX_SUPP_REG_RULES)) + return false; + for (i = 0; i < rd->n_reg_rules; i++) { reg_rule = &rd->reg_rules[i]; if (!is_valid_reg_rule(reg_rule)) @@ -359,6 +421,311 @@ static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range, return 0; } +/* Converts a country IE to a regulatory domain. A regulatory domain + * structure has a lot of information which the IE doesn't yet have, + * so for the other values we use upper max values as we will intersect + * with our userspace regulatory agent to get lower bounds. */ +static struct ieee80211_regdomain *country_ie_2_rd( + u8 *country_ie, + u8 country_ie_len, + u32 *checksum) +{ + struct ieee80211_regdomain *rd = NULL; + unsigned int i = 0; + char alpha2[2]; + u32 flags = 0; + u32 num_rules = 0, size_of_regd = 0; + u8 *triplets_start = NULL; + u8 len_at_triplet = 0; + /* the last channel we have registered in a subband (triplet) */ + int last_sub_max_channel = 0; + + *checksum = 0xDEADBEEF; + + /* Country IE requirements */ + BUG_ON(country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN || + country_ie_len & 0x01); + + alpha2[0] = country_ie[0]; + alpha2[1] = country_ie[1]; + + /* + * Third octet can be: + * 'I' - Indoor + * 'O' - Outdoor + * + * anything else we assume is no restrictions + */ + if (country_ie[2] == 'I') + flags = NL80211_RRF_NO_OUTDOOR; + else if (country_ie[2] == 'O') + flags = NL80211_RRF_NO_INDOOR; + + country_ie += 3; + country_ie_len -= 3; + + triplets_start = country_ie; + len_at_triplet = country_ie_len; + + *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); + + /* We need to build a reg rule for each triplet, but first we must + * calculate the number of reg rules we will need. We will need one + * for each channel subband */ + while (country_ie_len >= 3) { + struct ieee80211_country_ie_triplet *triplet = + (struct ieee80211_country_ie_triplet *) country_ie; + int cur_sub_max_channel = 0, cur_channel = 0; + + if (triplet->ext.reg_extension_id >= + IEEE80211_COUNTRY_EXTENSION_ID) { + country_ie += 3; + country_ie_len -= 3; + continue; + } + + cur_channel = triplet->chans.first_channel; + cur_sub_max_channel = ieee80211_channel_to_frequency( + cur_channel + triplet->chans.num_channels); + + /* Basic sanity check */ + if (cur_sub_max_channel < cur_channel) + return NULL; + + /* Do not allow overlapping channels. Also channels + * passed in each subband must be monotonically + * increasing */ + if (last_sub_max_channel) { + if (cur_channel <= last_sub_max_channel) + return NULL; + if (cur_sub_max_channel <= last_sub_max_channel) + return NULL; + } + + /* When dot11RegulatoryClassesRequired is supported + * we can throw ext triplets as part of this soup, + * for now we don't care when those change as we + * don't support them */ + *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | + ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | + ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); + + last_sub_max_channel = cur_sub_max_channel; + + country_ie += 3; + country_ie_len -= 3; + num_rules++; + + /* Note: this is not a IEEE requirement but + * simply a memory requirement */ + if (num_rules > NL80211_MAX_SUPP_REG_RULES) + return NULL; + } + + country_ie = triplets_start; + country_ie_len = len_at_triplet; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + (num_rules * sizeof(struct ieee80211_reg_rule)); + + rd = kzalloc(size_of_regd, GFP_KERNEL); + if (!rd) + return NULL; + + rd->n_reg_rules = num_rules; + rd->alpha2[0] = alpha2[0]; + rd->alpha2[1] = alpha2[1]; + + /* This time around we fill in the rd */ + while (country_ie_len >= 3) { + struct ieee80211_country_ie_triplet *triplet = + (struct ieee80211_country_ie_triplet *) country_ie; + struct ieee80211_reg_rule *reg_rule = NULL; + struct ieee80211_freq_range *freq_range = NULL; + struct ieee80211_power_rule *power_rule = NULL; + + /* Must parse if dot11RegulatoryClassesRequired is true, + * we don't support this yet */ + if (triplet->ext.reg_extension_id >= + IEEE80211_COUNTRY_EXTENSION_ID) { + country_ie += 3; + country_ie_len -= 3; + continue; + } + + reg_rule = &rd->reg_rules[i]; + freq_range = ®_rule->freq_range; + power_rule = ®_rule->power_rule; + + reg_rule->flags = flags; + + /* The +10 is since the regulatory domain expects + * the actual band edge, not the center of freq for + * its start and end freqs, assuming 20 MHz bandwidth on + * the channels passed */ + freq_range->start_freq_khz = + MHZ_TO_KHZ(ieee80211_channel_to_frequency( + triplet->chans.first_channel) - 10); + freq_range->end_freq_khz = + MHZ_TO_KHZ(ieee80211_channel_to_frequency( + triplet->chans.first_channel + + triplet->chans.num_channels) + 10); + + /* Large arbitrary values, we intersect later */ + /* Increment this if we ever support >= 40 MHz channels + * in IEEE 802.11 */ + freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); + power_rule->max_antenna_gain = DBI_TO_MBI(100); + power_rule->max_eirp = DBM_TO_MBM(100); + + country_ie += 3; + country_ie_len -= 3; + i++; + + BUG_ON(i > NL80211_MAX_SUPP_REG_RULES); + } + + return rd; +} + + +/* Helper for regdom_intersect(), this does the real + * mathematical intersection fun */ +static int reg_rules_intersect( + const struct ieee80211_reg_rule *rule1, + const struct ieee80211_reg_rule *rule2, + struct ieee80211_reg_rule *intersected_rule) +{ + const struct ieee80211_freq_range *freq_range1, *freq_range2; + struct ieee80211_freq_range *freq_range; + const struct ieee80211_power_rule *power_rule1, *power_rule2; + struct ieee80211_power_rule *power_rule; + u32 freq_diff; + + freq_range1 = &rule1->freq_range; + freq_range2 = &rule2->freq_range; + freq_range = &intersected_rule->freq_range; + + power_rule1 = &rule1->power_rule; + power_rule2 = &rule2->power_rule; + power_rule = &intersected_rule->power_rule; + + freq_range->start_freq_khz = max(freq_range1->start_freq_khz, + freq_range2->start_freq_khz); + freq_range->end_freq_khz = min(freq_range1->end_freq_khz, + freq_range2->end_freq_khz); + freq_range->max_bandwidth_khz = min(freq_range1->max_bandwidth_khz, + freq_range2->max_bandwidth_khz); + + freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; + if (freq_range->max_bandwidth_khz > freq_diff) + freq_range->max_bandwidth_khz = freq_diff; + + power_rule->max_eirp = min(power_rule1->max_eirp, + power_rule2->max_eirp); + power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain, + power_rule2->max_antenna_gain); + + intersected_rule->flags = (rule1->flags | rule2->flags); + + if (!is_valid_reg_rule(intersected_rule)) + return -EINVAL; + + return 0; +} + +/** + * regdom_intersect - do the intersection between two regulatory domains + * @rd1: first regulatory domain + * @rd2: second regulatory domain + * + * Use this function to get the intersection between two regulatory domains. + * Once completed we will mark the alpha2 for the rd as intersected, "98", + * as no one single alpha2 can represent this regulatory domain. + * + * Returns a pointer to the regulatory domain structure which will hold the + * resulting intersection of rules between rd1 and rd2. We will + * kzalloc() this structure for you. + */ +static struct ieee80211_regdomain *regdom_intersect( + const struct ieee80211_regdomain *rd1, + const struct ieee80211_regdomain *rd2) +{ + int r, size_of_regd; + unsigned int x, y; + unsigned int num_rules = 0, rule_idx = 0; + const struct ieee80211_reg_rule *rule1, *rule2; + struct ieee80211_reg_rule *intersected_rule; + struct ieee80211_regdomain *rd; + /* This is just a dummy holder to help us count */ + struct ieee80211_reg_rule irule; + + /* Uses the stack temporarily for counter arithmetic */ + intersected_rule = &irule; + + memset(intersected_rule, 0, sizeof(struct ieee80211_reg_rule)); + + if (!rd1 || !rd2) + return NULL; + + /* First we get a count of the rules we'll need, then we actually + * build them. This is to so we can malloc() and free() a + * regdomain once. The reason we use reg_rules_intersect() here + * is it will return -EINVAL if the rule computed makes no sense. + * All rules that do check out OK are valid. */ + + for (x = 0; x < rd1->n_reg_rules; x++) { + rule1 = &rd1->reg_rules[x]; + for (y = 0; y < rd2->n_reg_rules; y++) { + rule2 = &rd2->reg_rules[y]; + if (!reg_rules_intersect(rule1, rule2, + intersected_rule)) + num_rules++; + memset(intersected_rule, 0, + sizeof(struct ieee80211_reg_rule)); + } + } + + if (!num_rules) + return NULL; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + ((num_rules + 1) * sizeof(struct ieee80211_reg_rule)); + + rd = kzalloc(size_of_regd, GFP_KERNEL); + if (!rd) + return NULL; + + for (x = 0; x < rd1->n_reg_rules; x++) { + rule1 = &rd1->reg_rules[x]; + for (y = 0; y < rd2->n_reg_rules; y++) { + rule2 = &rd2->reg_rules[y]; + /* This time around instead of using the stack lets + * write to the target rule directly saving ourselves + * a memcpy() */ + intersected_rule = &rd->reg_rules[rule_idx]; + r = reg_rules_intersect(rule1, rule2, + intersected_rule); + /* No need to memset here the intersected rule here as + * we're not using the stack anymore */ + if (r) + continue; + rule_idx++; + } + } + + if (rule_idx != num_rules) { + kfree(rd); + return NULL; + } + + rd->n_reg_rules = num_rules; + rd->alpha2[0] = '9'; + rd->alpha2[1] = '8'; + + return rd; +} + /* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may * want to just have the channel structure use these */ static u32 map_regdom_flags(u32 rd_flags) @@ -449,12 +816,23 @@ static void handle_band(struct ieee80211_supported_band *sband) handle_channel(&sband->channels[i]); } +static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) +{ + if (!last_request) + return true; + if (setby == REGDOM_SET_BY_CORE && + wiphy->fw_handles_regulatory) + return true; + return false; +} + static void update_all_wiphy_regulatory(enum reg_set_by setby) { struct cfg80211_registered_device *drv; list_for_each_entry(drv, &cfg80211_drv_list, list) - wiphy_update_regulatory(&drv->wiphy, setby); + if (!ignore_reg_update(&drv->wiphy, setby)) + wiphy_update_regulatory(&drv->wiphy, setby); } void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) @@ -468,6 +846,10 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) } } +/* Return value which can be used by ignore_request() to indicate + * it has been determined we should intersect two regulatory domains */ +#define REG_INTERSECT 1 + /* This has the logic which determines when a new request * should be ignored. */ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, @@ -502,28 +884,25 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return -EOPNOTSUPP; return -EALREADY; } - /* Two consecutive Country IE hints on the same wiphy */ - if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + /* Two consecutive Country IE hints on the same wiphy. + * This should be picked up early by the driver/stack */ + if (WARN_ON(!alpha2_equal(cfg80211_regdomain->alpha2, + alpha2))) return 0; return -EALREADY; } - /* - * Ignore Country IE hints for now, need to think about - * what we need to do to support multi-domain operation. - */ - return -EOPNOTSUPP; + return REG_INTERSECT; case REGDOM_SET_BY_DRIVER: if (last_request->initiator == REGDOM_SET_BY_DRIVER) return -EALREADY; return 0; case REGDOM_SET_BY_USER: - /* - * If the user wants to override the AP's hint, we may - * need to follow both and use the intersection. For now, - * reject any such attempt (but we don't support country - * IEs right now anyway.) - */ if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + return REG_INTERSECT; + /* If the user knows better the user should set the regdom + * to their country before the IE is picked up */ + if (last_request->initiator == REGDOM_SET_BY_USER && + last_request->intersect) return -EOPNOTSUPP; return 0; } @@ -533,44 +912,47 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, /* Caller must hold &cfg80211_drv_mutex */ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2) + const char *alpha2, + u32 country_ie_checksum, + enum environment_cap env) { struct regulatory_request *request; + bool intersect = false; int r = 0; r = ignore_request(wiphy, set_by, alpha2); - if (r) + + if (r == REG_INTERSECT) + intersect = true; + else if (r) return r; - switch (set_by) { - case REGDOM_SET_BY_CORE: - case REGDOM_SET_BY_COUNTRY_IE: - case REGDOM_SET_BY_DRIVER: - case REGDOM_SET_BY_USER: - request = kzalloc(sizeof(struct regulatory_request), - GFP_KERNEL); - if (!request) - return -ENOMEM; - - request->alpha2[0] = alpha2[0]; - request->alpha2[1] = alpha2[1]; - request->initiator = set_by; - request->wiphy = wiphy; - - kfree(last_request); - last_request = request; - r = call_crda(alpha2); -#ifndef CONFIG_WIRELESS_OLD_REGULATORY - if (r) - printk(KERN_ERR "cfg80211: Failed calling CRDA\n"); -#endif - break; - default: - r = -ENOTSUPP; - break; - } + request = kzalloc(sizeof(struct regulatory_request), + GFP_KERNEL); + if (!request) + return -ENOMEM; - return r; + request->alpha2[0] = alpha2[0]; + request->alpha2[1] = alpha2[1]; + request->initiator = set_by; + request->wiphy = wiphy; + request->intersect = intersect; + request->country_ie_checksum = country_ie_checksum; + request->country_ie_env = env; + + kfree(last_request); + last_request = request; + /* + * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled + * AND if CRDA is NOT present nothing will happen, if someone + * wants to bother with 11d with OLD_REG you can add a timer. + * If after x amount of time nothing happens you can call: + * + * return set_regdom(country_ie_regdomain); + * + * to intersect with the static rd + */ + return call_crda(alpha2); } void regulatory_hint(struct wiphy *wiphy, const char *alpha2) @@ -578,11 +960,120 @@ void regulatory_hint(struct wiphy *wiphy, const char *alpha2) BUG_ON(!alpha2); mutex_lock(&cfg80211_drv_mutex); - __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2); + __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, 0, ENVIRON_ANY); mutex_unlock(&cfg80211_drv_mutex); } EXPORT_SYMBOL(regulatory_hint); +static bool reg_same_country_ie_hint(struct wiphy *wiphy, + u32 country_ie_checksum) +{ + if (!last_request->wiphy) + return false; + if (likely(last_request->wiphy != wiphy)) + return !country_ie_integrity_changes(country_ie_checksum); + /* We should not have let these through at this point, they + * should have been picked up earlier by the first alpha2 check + * on the device */ + if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) + return true; + return false; +} + +void regulatory_hint_11d(struct wiphy *wiphy, + u8 *country_ie, + u8 country_ie_len) +{ + struct ieee80211_regdomain *rd = NULL; + char alpha2[2]; + u32 checksum = 0; + enum environment_cap env = ENVIRON_ANY; + + mutex_lock(&cfg80211_drv_mutex); + + /* IE len must be evenly divisible by 2 */ + if (country_ie_len & 0x01) + goto out; + + if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) + goto out; + + /* Pending country IE processing, this can happen after we + * call CRDA and wait for a response if a beacon was received before + * we were able to process the last regulatory_hint_11d() call */ + if (country_ie_regdomain) + goto out; + + alpha2[0] = country_ie[0]; + alpha2[1] = country_ie[1]; + + if (country_ie[2] == 'I') + env = ENVIRON_INDOOR; + else if (country_ie[2] == 'O') + env = ENVIRON_OUTDOOR; + + /* We will run this for *every* beacon processed for the BSSID, so + * we optimize an early check to exit out early if we don't have to + * do anything */ + if (likely(last_request->wiphy)) { + struct cfg80211_registered_device *drv_last_ie; + + drv_last_ie = wiphy_to_dev(last_request->wiphy); + + /* Lets keep this simple -- we trust the first AP + * after we intersect with CRDA */ + if (likely(last_request->wiphy == wiphy)) { + /* Ignore IEs coming in on this wiphy with + * the same alpha2 and environment cap */ + if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, + alpha2) && + env == drv_last_ie->env)) { + goto out; + } + /* the wiphy moved on to another BSSID or the AP + * was reconfigured. XXX: We need to deal with the + * case where the user suspends and goes to goes + * to another country, and then gets IEs from an + * AP with different settings */ + goto out; + } else { + /* Ignore IEs coming in on two separate wiphys with + * the same alpha2 and environment cap */ + if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, + alpha2) && + env == drv_last_ie->env)) { + goto out; + } + /* We could potentially intersect though */ + goto out; + } + } + + rd = country_ie_2_rd(country_ie, country_ie_len, &checksum); + if (!rd) + goto out; + + /* This will not happen right now but we leave it here for the + * the future when we want to add suspend/resume support and having + * the user move to another country after doing so, or having the user + * move to another AP. Right now we just trust the first AP. This is why + * this is marked as likley(). If we hit this before we add this support + * we want to be informed of it as it would indicate a mistake in the + * current design */ + if (likely(WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))) + goto out; + + /* We keep this around for when CRDA comes back with a response so + * we can intersect with that */ + country_ie_regdomain = rd; + + __regulatory_hint(wiphy, REGDOM_SET_BY_COUNTRY_IE, + country_ie_regdomain->alpha2, checksum, env); + +out: + mutex_unlock(&cfg80211_drv_mutex); +} +EXPORT_SYMBOL(regulatory_hint_11d); static void print_rd_rules(const struct ieee80211_regdomain *rd) { @@ -622,7 +1113,25 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) static void print_regdomain(const struct ieee80211_regdomain *rd) { - if (is_world_regdom(rd->alpha2)) + if (is_intersected_alpha2(rd->alpha2)) { + struct wiphy *wiphy = NULL; + struct cfg80211_registered_device *drv; + + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { + if (last_request->wiphy) { + wiphy = last_request->wiphy; + drv = wiphy_to_dev(wiphy); + printk(KERN_INFO "cfg80211: Current regulatory " + "domain updated by AP to: %c%c\n", + drv->country_ie_alpha2[0], + drv->country_ie_alpha2[1]); + } else + printk(KERN_INFO "cfg80211: Current regulatory " + "domain intersected: \n"); + } else + printk(KERN_INFO "cfg80211: Current regulatory " + "intersected: \n"); + } else if (is_world_regdom(rd->alpha2)) printk(KERN_INFO "cfg80211: World regulatory " "domain updated:\n"); else { @@ -638,16 +1147,46 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) print_rd_rules(rd); } -void print_regdomain_info(const struct ieee80211_regdomain *rd) +static void print_regdomain_info(const struct ieee80211_regdomain *rd) { printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]); print_rd_rules(rd); } +#ifdef CONFIG_CFG80211_REG_DEBUG +static void reg_country_ie_process_debug( + const struct ieee80211_regdomain *rd, + const struct ieee80211_regdomain *country_ie_regdomain, + const struct ieee80211_regdomain *intersected_rd) +{ + printk(KERN_DEBUG "cfg80211: Received country IE:\n"); + print_regdomain_info(country_ie_regdomain); + printk(KERN_DEBUG "cfg80211: CRDA thinks this should applied:\n"); + print_regdomain_info(rd); + if (intersected_rd) { + printk(KERN_DEBUG "cfg80211: We intersect both of these " + "and get:\n"); + print_regdomain_info(rd); + return; + } + printk(KERN_DEBUG "cfg80211: Intersection between both failed\n"); +} +#else +static inline void reg_country_ie_process_debug( + const struct ieee80211_regdomain *rd, + const struct ieee80211_regdomain *country_ie_regdomain, + const struct ieee80211_regdomain *intersected_rd) +{ +} +#endif + /* Takes ownership of rd only if it doesn't fail */ static int __set_regdom(const struct ieee80211_regdomain *rd) { + const struct ieee80211_regdomain *intersected_rd = NULL; + struct cfg80211_registered_device *drv = NULL; + struct wiphy *wiphy = NULL; /* Some basic sanity checks first */ if (is_world_regdom(rd->alpha2)) { @@ -664,10 +1203,18 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (!last_request) return -EINVAL; - /* allow overriding the static definitions if CRDA is present */ - if (!is_old_static_regdom(cfg80211_regdomain) && - !regdom_changed(rd->alpha2)) - return -EINVAL; + /* Lets only bother proceeding on the same alpha2 if the current + * rd is non static (it means CRDA was present and was used last) + * and the pending request came in from a country IE */ + if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { + /* If someone else asked us to change the rd lets only bother + * checking if the alpha2 changes if CRDA was already called */ + if (!is_old_static_regdom(cfg80211_regdomain) && + !regdom_changed(rd->alpha2)) + return -EINVAL; + } + + wiphy = last_request->wiphy; /* Now lets set the regulatory domain, update all driver channels * and finally inform them of what we have done, in case they want @@ -677,28 +1224,78 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (WARN_ON(!reg_is_valid_request(rd->alpha2))) return -EINVAL; - reset_regdomains(); + if (!is_valid_rd(rd)) { + printk(KERN_ERR "cfg80211: Invalid " + "regulatory domain detected:\n"); + print_regdomain_info(rd); + return -EINVAL; + } - /* Country IE parsing coming soon */ - switch (last_request->initiator) { - case REGDOM_SET_BY_CORE: - case REGDOM_SET_BY_DRIVER: - case REGDOM_SET_BY_USER: - if (!is_valid_rd(rd)) { - printk(KERN_ERR "cfg80211: Invalid " - "regulatory domain detected:\n"); - print_regdomain_info(rd); + if (!last_request->intersect) { + reset_regdomains(); + cfg80211_regdomain = rd; + return 0; + } + + /* Intersection requires a bit more work */ + + if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { + + intersected_rd = regdom_intersect(rd, cfg80211_regdomain); + if (!intersected_rd) return -EINVAL; - } - break; - case REGDOM_SET_BY_COUNTRY_IE: /* Not yet */ - WARN_ON(1); - default: - return -EOPNOTSUPP; + + /* We can trash what CRDA provided now */ + kfree(rd); + rd = NULL; + + reset_regdomains(); + cfg80211_regdomain = intersected_rd; + + return 0; } - /* Tada! */ - cfg80211_regdomain = rd; + /* + * Country IE requests are handled a bit differently, we intersect + * the country IE rd with what CRDA believes that country should have + */ + + BUG_ON(!country_ie_regdomain); + + if (rd != country_ie_regdomain) { + /* Intersect what CRDA returned and our what we + * had built from the Country IE received */ + + intersected_rd = regdom_intersect(rd, country_ie_regdomain); + + reg_country_ie_process_debug(rd, country_ie_regdomain, + intersected_rd); + + kfree(country_ie_regdomain); + country_ie_regdomain = NULL; + } else { + /* This would happen when CRDA was not present and + * OLD_REGULATORY was enabled. We intersect our Country + * IE rd and what was set on cfg80211 originally */ + intersected_rd = regdom_intersect(rd, cfg80211_regdomain); + } + + if (!intersected_rd) + return -EINVAL; + + drv = wiphy_to_dev(wiphy); + + drv->country_ie_alpha2[0] = rd->alpha2[0]; + drv->country_ie_alpha2[1] = rd->alpha2[1]; + drv->env = last_request->country_ie_env; + + BUG_ON(intersected_rd == rd); + + kfree(rd); + rd = NULL; + + reset_regdomains(); + cfg80211_regdomain = intersected_rd; return 0; } @@ -719,16 +1316,28 @@ int set_regdom(const struct ieee80211_regdomain *rd) } /* This would make this whole thing pointless */ - BUG_ON(rd != cfg80211_regdomain); + if (!last_request->intersect) + BUG_ON(rd != cfg80211_regdomain); /* update all wiphys now with the new established regulatory domain */ update_all_wiphy_regulatory(last_request->initiator); - print_regdomain(rd); + print_regdomain(cfg80211_regdomain); return r; } +/* Caller must hold cfg80211_drv_mutex */ +void reg_device_remove(struct wiphy *wiphy) +{ + if (!last_request->wiphy) + return; + if (last_request->wiphy != wiphy) + return; + last_request->wiphy = NULL; + last_request->country_ie_env = ENVIRON_ANY; +} + int regulatory_init(void) { int err; @@ -748,11 +1357,11 @@ int regulatory_init(void) * that is not a valid ISO / IEC 3166 alpha2 */ if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, - ieee80211_regdom); + ieee80211_regdom, 0, ENVIRON_ANY); #else cfg80211_regdomain = cfg80211_world_regdom; - err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00"); + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", 0, ENVIRON_ANY); if (err) printk(KERN_ERR "cfg80211: calling CRDA failed - " "unable to update world regulatory domain, " @@ -768,6 +1377,9 @@ void regulatory_exit(void) reset_regdomains(); + kfree(country_ie_regdomain); + country_ie_regdomain = NULL; + kfree(last_request); platform_device_unregister(reg_pdev); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index c9b6b6358bb..a76ea3ff7cd 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -4,28 +4,41 @@ bool is_world_regdom(const char *alpha2); bool reg_is_valid_request(const char *alpha2); +void reg_device_remove(struct wiphy *wiphy); + int regulatory_init(void); void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); +enum environment_cap { + ENVIRON_ANY, + ENVIRON_INDOOR, + ENVIRON_OUTDOOR, +}; + + /** * __regulatory_hint - hint to the wireless core a regulatory domain * @wiphy: if the hint comes from country information from an AP, this * is required to be set to the wiphy that received the information * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain * should be in. + * @country_ie_checksum: checksum of processed country IE, set this to 0 + * if the hint did not come from a country IE + * @country_ie_env: the environment the IE told us we are in, %ENVIRON_* * * The Wireless subsystem can use this function to hint to the wireless core - * what it believes should be the current regulatory domain by - * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory - * domain should be in. + * what it believes should be the current regulatory domain by giving it an + * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be + * in. * * Returns zero if all went fine, %-EALREADY if a regulatory domain had * already been set or other standard error codes. * */ extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2); + const char *alpha2, u32 country_ie_checksum, + enum environment_cap country_ie_env); #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 29f820e1825..79a38287764 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -23,25 +23,20 @@ static inline struct cfg80211_registered_device *dev_to_rdev( return container_of(dev, struct cfg80211_registered_device, wiphy.dev); } -static ssize_t _show_index(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", dev_to_rdev(dev)->idx); +#define SHOW_FMT(name, fmt, member) \ +static ssize_t name ## _show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ } -static ssize_t _show_permaddr(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - unsigned char *addr = dev_to_rdev(dev)->wiphy.perm_addr; - - return sprintf(buf, "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); -} +SHOW_FMT(index, "%d", idx); +SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); static struct device_attribute ieee80211_dev_attrs[] = { - __ATTR(index, S_IRUGO, _show_index, NULL), - __ATTR(macaddress, S_IRUGO, _show_permaddr, NULL), + __ATTR_RO(index), + __ATTR_RO(macaddress), {} }; diff --git a/net/wireless/util.c b/net/wireless/util.c index f54424693a3..e76cc28b034 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -7,6 +7,25 @@ #include <asm/bitops.h> #include "core.h" +struct ieee80211_rate * +ieee80211_get_response_rate(struct ieee80211_supported_band *sband, + u64 basic_rates, int bitrate) +{ + struct ieee80211_rate *result = &sband->bitrates[0]; + int i; + + for (i = 0; i < sband->n_bitrates; i++) { + if (!(basic_rates & BIT(i))) + continue; + if (sband->bitrates[i].bitrate > bitrate) + continue; + result = &sband->bitrates[i]; + } + + return result; +} +EXPORT_SYMBOL(ieee80211_get_response_rate); + int ieee80211_channel_to_frequency(int chan) { if (chan < 14) diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 0f439a72cca..c631047e1b2 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,8 +3,8 @@ # obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ - xfrm_input.o xfrm_output.o xfrm_algo.o + xfrm_input.o xfrm_output.o xfrm_algo.o \ + xfrm_sysctl.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o - diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 75279402ccf..b4a13178fb4 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -104,6 +104,7 @@ EXPORT_SYMBOL(xfrm_prepare_input); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { + struct net *net = dev_net(skb->dev); int err; __be32 seq; struct xfrm_state *x; @@ -127,7 +128,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) sp = secpath_dup(skb->sp); if (!sp) { - XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (skb->sp) @@ -141,19 +142,19 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } do { if (skb->sp->len == XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } - x = xfrm_state_lookup(daddr, spi, nexthdr, family); + x = xfrm_state_lookup(net, daddr, spi, nexthdr, family); if (x == NULL) { - XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); goto drop; } @@ -162,22 +163,22 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_lock(&x->lock); if (unlikely(x->km.state != XFRM_STATE_VALID)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); goto drop_unlock; } if ((x->encap ? x->encap->encap_type : 0) != encap_type) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); goto drop_unlock; } if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATESEQERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } if (xfrm_state_check_expire(x)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEEXPIRED); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED); goto drop_unlock; } @@ -198,7 +199,7 @@ resume: x->type->proto); x->stats.integrity_failed++; } - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEPROTOERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); goto drop_unlock; } @@ -224,7 +225,7 @@ resume: } if (inner_mode->input(x, skb)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -242,7 +243,7 @@ resume: err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } } while (!err); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index dc50f1e71f7..c235597ba8d 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -41,6 +41,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; + struct net *net = xs_net(x); if (err <= 0) goto resume; @@ -48,33 +49,33 @@ static int xfrm_output_one(struct sk_buff *skb, int err) do { err = xfrm_state_check_space(x, skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); goto error_nolock; } err = x->outer_mode->output(x, skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEMODEERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); goto error_nolock; } spin_lock_bh(&x->lock); err = xfrm_state_check_expire(x); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEEXPIRED); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); goto error; } if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; if (unlikely(x->replay.oseq == 0)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATESEQERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); x->replay.oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; goto error; } - if (xfrm_aevent_is_on()) + if (xfrm_aevent_is_on(net)) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } @@ -89,12 +90,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err) resume: if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEPROTOERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); goto error_nolock; } if (!(skb->dst = dst_pop(dst))) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } @@ -178,6 +179,7 @@ static int xfrm_output_gso(struct sk_buff *skb) int xfrm_output(struct sk_buff *skb) { + struct net *net = dev_net(skb->dst->dev); int err; if (skb_is_gso(skb)) @@ -186,7 +188,7 @@ int xfrm_output(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); kfree_skb(skb); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index fe596c6ef35..393cc65dbfa 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -34,28 +34,16 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly = 1; - -#ifdef CONFIG_XFRM_STATISTICS -DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; -EXPORT_SYMBOL(xfrm_statistics); -#endif - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -static struct list_head xfrm_policy_all; -unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_count); - static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; static struct kmem_cache *xfrm_dst_cache __read_mostly; -static struct work_struct xfrm_policy_gc_work; static HLIST_HEAD(xfrm_policy_gc_list); static DEFINE_SPINLOCK(xfrm_policy_gc_lock); @@ -97,7 +85,7 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } -static inline struct dst_entry *__xfrm_dst_lookup(int tos, +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, xfrm_address_t *saddr, xfrm_address_t *daddr, int family) @@ -109,7 +97,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(int tos, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(tos, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, saddr, daddr); xfrm_policy_put_afinfo(afinfo); @@ -121,6 +109,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, xfrm_address_t *prev_daddr, int family) { + struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; struct dst_entry *dst; @@ -134,7 +123,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(tos, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -229,13 +218,14 @@ expired: * SPD calls. */ -struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) +struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) { struct xfrm_policy *policy; policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { + write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); @@ -296,6 +286,7 @@ static void xfrm_policy_gc_task(struct work_struct *work) hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) xfrm_policy_gc_kill(policy); } +static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task); /* Rule must be locked. Release descentant resources, announce * entry dead. The rule must be unlinked from lists to the moment. @@ -315,45 +306,36 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) return; } - spin_lock(&xfrm_policy_gc_lock); + spin_lock_bh(&xfrm_policy_gc_lock); hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); - spin_unlock(&xfrm_policy_gc_lock); + spin_unlock_bh(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } -struct xfrm_policy_hash { - struct hlist_head *table; - unsigned int hmask; -}; - -static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; -static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; -static struct hlist_head *xfrm_policy_byidx __read_mostly; -static unsigned int xfrm_idx_hmask __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; -static inline unsigned int idx_hash(u32 index) +static inline unsigned int idx_hash(struct net *net, u32 index) { - return __idx_hash(index, xfrm_idx_hmask); + return __idx_hash(index, net->xfrm.policy_idx_hmask); } -static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) +static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __sel_hash(sel, family, hmask); return (hash == hmask + 1 ? - &xfrm_policy_inexact[dir] : - xfrm_policy_bydst[dir].table + hash); + &net->xfrm.policy_inexact[dir] : + net->xfrm.policy_bydst[dir].table + hash); } -static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __addr_hash(daddr, saddr, family, hmask); - return xfrm_policy_bydst[dir].table + hash; + return net->xfrm.policy_bydst[dir].table + hash; } static void xfrm_dst_hash_transfer(struct hlist_head *list, @@ -408,12 +390,12 @@ static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) return ((old_hmask + 1) << 1) - 1; } -static void xfrm_bydst_resize(int dir) +static void xfrm_bydst_resize(struct net *net, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = xfrm_policy_bydst[dir].table; + struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; struct hlist_head *ndst = xfrm_hash_alloc(nsize); int i; @@ -425,20 +407,20 @@ static void xfrm_bydst_resize(int dir) for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); - xfrm_policy_bydst[dir].table = ndst; - xfrm_policy_bydst[dir].hmask = nhashmask; + net->xfrm.policy_bydst[dir].table = ndst; + net->xfrm.policy_bydst[dir].hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } -static void xfrm_byidx_resize(int total) +static void xfrm_byidx_resize(struct net *net, int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *oidx = xfrm_policy_byidx; + struct hlist_head *oidx = net->xfrm.policy_byidx; struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; @@ -450,18 +432,18 @@ static void xfrm_byidx_resize(int total) for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); - xfrm_policy_byidx = nidx; - xfrm_idx_hmask = nhashmask; + net->xfrm.policy_byidx = nidx; + net->xfrm.policy_idx_hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } -static inline int xfrm_bydst_should_resize(int dir, int *total) +static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) { - unsigned int cnt = xfrm_policy_count[dir]; - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int cnt = net->xfrm.policy_count[dir]; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; if (total) *total += cnt; @@ -473,9 +455,9 @@ static inline int xfrm_bydst_should_resize(int dir, int *total) return 0; } -static inline int xfrm_byidx_should_resize(int total) +static inline int xfrm_byidx_should_resize(struct net *net, int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; if ((hmask + 1) < xfrm_policy_hashmax && total > hmask) @@ -487,41 +469,40 @@ static inline int xfrm_byidx_should_resize(int total) void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) { read_lock_bh(&xfrm_policy_lock); - si->incnt = xfrm_policy_count[XFRM_POLICY_IN]; - si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT]; - si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD]; - si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; - si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; - si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; - si->spdhcnt = xfrm_idx_hmask; + si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN]; + si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD]; + si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->spdhcnt = init_net.xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; read_unlock_bh(&xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(struct work_struct *__unused) +static void xfrm_hash_resize(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.policy_hash_work); int dir, total; mutex_lock(&hash_resize_mutex); total = 0; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - if (xfrm_bydst_should_resize(dir, &total)) - xfrm_bydst_resize(dir); + if (xfrm_bydst_should_resize(net, dir, &total)) + xfrm_bydst_resize(net, dir); } - if (xfrm_byidx_should_resize(total)) - xfrm_byidx_resize(total); + if (xfrm_byidx_should_resize(net, total)) + xfrm_byidx_resize(net, total); mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); - /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(u8 type, int dir) +static u32 xfrm_gen_index(struct net *net, int dir) { static u32 idx_generator; @@ -536,7 +517,7 @@ static u32 xfrm_gen_index(u8 type, int dir) idx_generator += 8; if (idx == 0) idx = 8; - list = xfrm_policy_byidx + idx_hash(idx); + list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; hlist_for_each_entry(p, entry, list, byidx) { if (p->index == idx) { @@ -566,6 +547,7 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { + struct net *net = xp_net(policy); struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; @@ -573,7 +555,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(&policy->selector, policy->family, dir); + chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { @@ -600,27 +582,27 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - xfrm_policy_count[dir]++; + net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); if (delpol) { hlist_del(&delpol->bydst); hlist_del(&delpol->byidx); list_del(&delpol->walk.all); - xfrm_policy_count[dir]--; + net->xfrm.policy_count[dir]--; } - policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); - hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); + hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add(&policy->walk.all, &xfrm_policy_all); + list_add(&policy->walk.all, &net->xfrm.policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); - else if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); + else if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); read_lock_bh(&xfrm_policy_lock); gc_list = NULL; @@ -654,7 +636,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) @@ -665,7 +647,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(sel, sel->family, dir); + chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == type && @@ -682,7 +664,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, hlist_del(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); - xfrm_policy_count[dir]--; + net->xfrm.policy_count[dir]--; } ret = pol; break; @@ -698,8 +680,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, - int *err) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id, + int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -711,7 +693,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = xfrm_policy_byidx + idx_hash(id); + chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, entry, chain, byidx) { if (pol->type == type && pol->index == id) { @@ -726,7 +708,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, hlist_del(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); - xfrm_policy_count[dir]--; + net->xfrm.policy_count[dir]--; } ret = pol; break; @@ -744,7 +726,7 @@ EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0; @@ -754,7 +736,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) int i; hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; err = security_xfrm_policy_delete(pol->security); @@ -766,9 +748,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) return err; } } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; @@ -788,19 +770,19 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) } #else static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { return 0; } #endif -int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0; write_lock_bh(&xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, audit_info); if (err) goto out; @@ -812,7 +794,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) killed = 0; again1: hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; hlist_del(&pol->bydst); @@ -830,10 +812,10 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) goto again1; } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; @@ -854,7 +836,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) } } - xfrm_policy_count[dir] -= killed; + net->xfrm.policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); out: @@ -863,7 +845,7 @@ out: } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(struct xfrm_policy_walk *walk, +int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { @@ -880,10 +862,10 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, write_lock_bh(&xfrm_policy_lock); if (list_empty(&walk->walk.all)) - x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all); + x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); - list_for_each_entry_from(x, &xfrm_policy_all, all) { + list_for_each_entry_from(x, &net->xfrm.policy_all, all) { if (x->dead) continue; pol = container_of(x, struct xfrm_policy, walk); @@ -952,7 +934,8 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, return ret; } -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, +static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, + struct flowi *fl, u16 family, u8 dir) { int err; @@ -968,7 +951,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, return NULL; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(daddr, saddr, family, dir); + chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); @@ -985,7 +968,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, break; } } - chain = &xfrm_policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { @@ -1008,14 +991,14 @@ fail: return ret; } -static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, + u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; int err = 0; #ifdef CONFIG_XFRM_SUB_POLICY - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1023,7 +1006,7 @@ static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, if (pol || err) goto end; #endif - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1082,29 +1065,32 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - struct hlist_head *chain = policy_hash_bysel(&pol->selector, + struct net *net = xp_net(pol); + struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, pol->family, dir); - list_add(&pol->walk.all, &xfrm_policy_all); + list_add(&pol->walk.all, &net->xfrm.policy_all); hlist_add_head(&pol->bydst, chain); - hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); - xfrm_policy_count[dir]++; + hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); + net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); - if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); + if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { + struct net *net = xp_net(pol); + if (hlist_unhashed(&pol->bydst)) return NULL; hlist_del(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); - xfrm_policy_count[dir]--; + net->xfrm.policy_count[dir]--; return pol; } @@ -1126,6 +1112,7 @@ EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { + struct net *net = xp_net(pol); struct xfrm_policy *old_pol; #ifdef CONFIG_XFRM_SUB_POLICY @@ -1138,7 +1125,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = get_seconds(); - pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) @@ -1153,7 +1140,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) { - struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); + struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); if (newp) { newp->selector = old->selector; @@ -1193,7 +1180,7 @@ int __xfrm_sk_clone_policy(struct sock *sk) } static int -xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, +xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; @@ -1201,7 +1188,7 @@ xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(local, remote); + err = afinfo->get_saddr(net, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1213,6 +1200,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { + struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); @@ -1231,7 +1219,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, local = &tmpl->saddr; family = tmpl->encap_family; if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(&tmp, remote, family); + error = xfrm_get_saddr(net, &tmp, remote, family); if (error) goto fail; local = &tmp; @@ -1545,7 +1533,7 @@ static int stale_bundle(struct dst_entry *dst); * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; @@ -1575,7 +1563,7 @@ restart: policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); err = PTR_ERR(policy); if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); goto dropdst; } } @@ -1583,14 +1571,14 @@ restart: if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || - !xfrm_policy_count[XFRM_POLICY_OUT]) + !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - policy = flow_cache_lookup(fl, dst_orig->ops->family, + policy = flow_cache_lookup(net, fl, dst_orig->ops->family, dir, xfrm_policy_lookup); err = PTR_ERR(policy); if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); goto dropdst; } } @@ -1613,7 +1601,7 @@ restart: default: case XFRM_POLICY_BLOCK: /* Prohibit the flow */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; @@ -1633,7 +1621,7 @@ restart: */ dst = xfrm_find_bundle(fl, policy, family); if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); err = PTR_ERR(dst); goto error; } @@ -1643,17 +1631,18 @@ restart: #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, + XFRM_POLICY_TYPE_MAIN, fl, family, XFRM_POLICY_OUT); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); err = PTR_ERR(pols[1]); goto error; } if (pols[1]->action == XFRM_POLICY_BLOCK) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; } @@ -1680,27 +1669,27 @@ restart: if (unlikely(nx<0)) { err = nx; - if (err == -EAGAIN && sysctl_xfrm_larval_drop) { + if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) { /* EREMOTE tells the caller to generate * a one-shot blackhole route. */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); xfrm_pol_put(policy); return -EREMOTE; } if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&km_waitq, &wait); + add_wait_queue(&net->xfrm.km_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); schedule(); set_current_state(TASK_RUNNING); - remove_wait_queue(&km_waitq, &wait); + remove_wait_queue(&net->xfrm.km_waitq, &wait); nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); err = -ERESTART; goto error; } @@ -1712,7 +1701,7 @@ restart: err = nx; } if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); goto error; } } @@ -1725,7 +1714,7 @@ restart: dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); err = PTR_ERR(dst); if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); goto error; } @@ -1746,9 +1735,9 @@ restart: dst_free(dst); if (pol_dead) - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD); else - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); err = -EHOSTUNREACH; goto error; } @@ -1760,7 +1749,7 @@ restart: if (unlikely(err)) { write_unlock_bh(&policy->lock); dst_free(dst); - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); goto error; } @@ -1789,10 +1778,10 @@ nopol: } EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { - int err = __xfrm_lookup(dst_p, fl, sk, flags); + int err = __xfrm_lookup(net, dst_p, fl, sk, flags); if (err == -EREMOTE) { dst_release(*dst_p); @@ -1900,6 +1889,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; @@ -1915,7 +1905,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -1928,7 +1918,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (i=skb->sp->len-1; i>=0; i--) { struct xfrm_state *x = skb->sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; } } @@ -1938,24 +1928,24 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } } if (!pol) - pol = flow_cache_lookup(&fl, family, fl_dir, + pol = flow_cache_lookup(net, &fl, family, fl_dir, xfrm_policy_lookup); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } if (!pol) { if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); - XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } return 1; @@ -1967,12 +1957,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, npols ++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } pols[1]->curlft.use_time = get_seconds(); @@ -1996,11 +1986,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (pi = 0; pi < npols; pi++) { if (pols[pi] != pol && pols[pi]->action != XFRM_POLICY_ALLOW) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); goto reject; } if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto reject_error; } for (i = 0; i < pols[pi]->xfrm_nr; i++) @@ -2024,20 +2014,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (k < -1) /* "-2 - errored_index" returned */ xerr_idx = -(2+k); - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } } if (secpath_has_nontransport(sp, k, &xerr_idx)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } xfrm_pols_put(pols, npols); return 1; } - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); reject: xfrm_secpath_reject(xerr_idx, skb, &fl); @@ -2049,15 +2039,16 @@ EXPORT_SYMBOL(__xfrm_policy_check); int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct flowi fl; if (xfrm_decode_session(skb, &fl, family) < 0) { /* XXX: we should have something like FWDHDRERROR here. */ - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; + return xfrm_lookup(net, &skb->dst, &fl, NULL, 0) == 0; } EXPORT_SYMBOL(__xfrm_route_forward); @@ -2141,7 +2132,7 @@ static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_ent write_unlock(&pol->lock); } -static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) +static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *)) { struct dst_entry *gc_list = NULL; int dir; @@ -2154,11 +2145,11 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) int i; hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) + &net->xfrm.policy_inexact[dir], bydst) prune_one_bundle(pol, func, &gc_list); - table = xfrm_policy_bydst[dir].table; - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + table = net->xfrm.policy_bydst[dir].table; + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, table + i, bydst) prune_one_bundle(pol, func, &gc_list); } @@ -2177,14 +2168,14 @@ static int unused_bundle(struct dst_entry *dst) return !atomic_read(&dst->__refcnt); } -static void __xfrm_garbage_collect(void) +static void __xfrm_garbage_collect(struct net *net) { - xfrm_prune_bundles(unused_bundle); + xfrm_prune_bundles(net, unused_bundle); } -static int xfrm_flush_bundles(void) +static int xfrm_flush_bundles(struct net *net) { - xfrm_prune_bundles(stale_bundle); + xfrm_prune_bundles(net, stale_bundle); return 0; } @@ -2370,12 +2361,9 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void { struct net_device *dev = ptr; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - switch (event) { case NETDEV_DOWN: - xfrm_flush_bundles(); + xfrm_flush_bundles(dev_net(dev)); } return NOTIFY_DONE; } @@ -2385,21 +2373,42 @@ static struct notifier_block xfrm_dev_notifier = { }; #ifdef CONFIG_XFRM_STATISTICS -static int __init xfrm_statistics_init(void) +static int __net_init xfrm_statistics_init(struct net *net) { - if (snmp_mib_init((void **)xfrm_statistics, + int rv; + + if (snmp_mib_init((void **)net->mib.xfrm_statistics, sizeof(struct linux_xfrm_mib)) < 0) return -ENOMEM; + rv = xfrm_proc_init(net); + if (rv < 0) + snmp_mib_free((void **)net->mib.xfrm_statistics); + return rv; +} + +static void xfrm_statistics_fini(struct net *net) +{ + xfrm_proc_fini(net); + snmp_mib_free((void **)net->mib.xfrm_statistics); +} +#else +static int __net_init xfrm_statistics_init(struct net *net) +{ return 0; } + +static void xfrm_statistics_fini(struct net *net) +{ +} #endif -static void __init xfrm_policy_init(void) +static int __net_init xfrm_policy_init(struct net *net) { unsigned int hmask, sz; int dir; - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", + if (net_eq(net, &init_net)) + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); @@ -2407,39 +2416,124 @@ static void __init xfrm_policy_init(void) hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); - xfrm_policy_byidx = xfrm_hash_alloc(sz); - xfrm_idx_hmask = hmask; - if (!xfrm_policy_byidx) - panic("XFRM: failed to allocate byidx hash\n"); + net->xfrm.policy_byidx = xfrm_hash_alloc(sz); + if (!net->xfrm.policy_byidx) + goto out_byidx; + net->xfrm.policy_idx_hmask = hmask; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { struct xfrm_policy_hash *htab; - INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); + net->xfrm.policy_count[dir] = 0; + INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); - htab = &xfrm_policy_bydst[dir]; + htab = &net->xfrm.policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); - htab->hmask = hmask; if (!htab->table) - panic("XFRM: failed to allocate bydst hash\n"); + goto out_bydst; + htab->hmask = hmask; } - INIT_LIST_HEAD(&xfrm_policy_all); - INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); - register_netdevice_notifier(&xfrm_dev_notifier); + INIT_LIST_HEAD(&net->xfrm.policy_all); + INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); + if (net_eq(net, &init_net)) + register_netdevice_notifier(&xfrm_dev_notifier); + return 0; + +out_bydst: + for (dir--; dir >= 0; dir--) { + struct xfrm_policy_hash *htab; + + htab = &net->xfrm.policy_bydst[dir]; + xfrm_hash_free(htab->table, sz); + } + xfrm_hash_free(net->xfrm.policy_byidx, sz); +out_byidx: + return -ENOMEM; } -void __init xfrm_init(void) +static void xfrm_policy_fini(struct net *net) { -#ifdef CONFIG_XFRM_STATISTICS - xfrm_statistics_init(); + struct xfrm_audit audit_info; + unsigned int sz; + int dir; + + flush_work(&net->xfrm.policy_hash_work); +#ifdef CONFIG_XFRM_SUB_POLICY + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); #endif - xfrm_state_init(); - xfrm_policy_init(); + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + flush_work(&xfrm_policy_gc_work); + + WARN_ON(!list_empty(&net->xfrm.policy_all)); + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); + + htab = &net->xfrm.policy_bydst[dir]; + sz = (htab->hmask + 1); + WARN_ON(!hlist_empty(htab->table)); + xfrm_hash_free(htab->table, sz); + } + + sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); + xfrm_hash_free(net->xfrm.policy_byidx, sz); +} + +static int __net_init xfrm_net_init(struct net *net) +{ + int rv; + + rv = xfrm_statistics_init(net); + if (rv < 0) + goto out_statistics; + rv = xfrm_state_init(net); + if (rv < 0) + goto out_state; + rv = xfrm_policy_init(net); + if (rv < 0) + goto out_policy; + rv = xfrm_sysctl_init(net); + if (rv < 0) + goto out_sysctl; + return 0; + +out_sysctl: + xfrm_policy_fini(net); +out_policy: + xfrm_state_fini(net); +out_state: + xfrm_statistics_fini(net); +out_statistics: + return rv; +} + +static void __net_exit xfrm_net_exit(struct net *net) +{ + xfrm_sysctl_fini(net); + xfrm_policy_fini(net); + xfrm_state_fini(net); + xfrm_statistics_fini(net); +} + +static struct pernet_operations __net_initdata xfrm_net_ops = { + .init = xfrm_net_init, + .exit = xfrm_net_exit, +}; + +void __init xfrm_init(void) +{ + register_pernet_subsys(&xfrm_net_ops); xfrm_input_init(); -#ifdef CONFIG_XFRM_STATISTICS - xfrm_proc_init(); -#endif } #ifdef CONFIG_AUDITSYSCALL @@ -2539,7 +2633,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, u32 priority = ~0U; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir); + chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { @@ -2548,7 +2642,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, break; } } - chain = &xfrm_policy_inexact[dir]; + chain = &init_net.xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 2b0db13f0cd..284eaef1dbf 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -59,17 +59,18 @@ fold_field(void *mib[], int offt) static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; int i; for (i=0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - fold_field((void **)xfrm_statistics, + fold_field((void **)net->mib.xfrm_statistics, xfrm_mib_list[i].entry)); return 0; } static int xfrm_statistics_seq_open(struct inode *inode, struct file *file) { - return single_open(file, xfrm_statistics_seq_show, NULL); + return single_open_net(inode, file, xfrm_statistics_seq_show); } static struct file_operations xfrm_statistics_seq_fops = { @@ -77,21 +78,18 @@ static struct file_operations xfrm_statistics_seq_fops = { .open = xfrm_statistics_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; -int __init xfrm_proc_init(void) +int __net_init xfrm_proc_init(struct net *net) { - int rc = 0; - - if (!proc_net_fops_create(&init_net, "xfrm_stat", S_IRUGO, + if (!proc_net_fops_create(net, "xfrm_stat", S_IRUGO, &xfrm_statistics_seq_fops)) - goto stat_fail; - - out: - return rc; + return -ENOMEM; + return 0; +} - stat_fail: - rc = -ENOMEM; - goto out; +void xfrm_proc_fini(struct net *net) +{ + proc_net_remove(net, "xfrm_stat"); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index cd9d9171ded..2fd57f8f77c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -24,17 +24,6 @@ #include "xfrm_hash.h" -struct sock *xfrm_nl; -EXPORT_SYMBOL(xfrm_nl); - -u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; -EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); - -u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE; -EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); - -u32 sysctl_xfrm_acq_expires __read_mostly = 30; - /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -44,19 +33,7 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30; static DEFINE_SPINLOCK(xfrm_state_lock); -/* Hash table to find appropriate SA towards given target (endpoint - * of tunnel or destination of transport mode) allowed by selector. - * - * Main use is finding SA after policy selected tunnel or transport mode. - * Also, it can be used by ah/esp icmp error handler to find offending SA. - */ -static LIST_HEAD(xfrm_state_all); -static struct hlist_head *xfrm_state_bydst __read_mostly; -static struct hlist_head *xfrm_state_bysrc __read_mostly; -static struct hlist_head *xfrm_state_byspi __read_mostly; -static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); @@ -69,25 +46,27 @@ static void xfrm_audit_state_replay(struct xfrm_state *x, #define xfrm_audit_state_replay(x, s, sq) do { ; } while (0) #endif /* CONFIG_AUDITSYSCALL */ -static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, +static inline unsigned int xfrm_dst_hash(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, u32 reqid, unsigned short family) { - return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); + return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); } -static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr, +static inline unsigned int xfrm_src_hash(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family) { - return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask); + return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); } static inline unsigned int -xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +xfrm_spi_hash(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); + return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } static void xfrm_hash_transfer(struct hlist_head *list, @@ -121,16 +100,16 @@ static void xfrm_hash_transfer(struct hlist_head *list, } } -static unsigned long xfrm_hash_new_size(void) +static unsigned long xfrm_hash_new_size(unsigned int state_hmask) { - return ((xfrm_state_hmask + 1) << 1) * - sizeof(struct hlist_head); + return ((state_hmask + 1) << 1) * sizeof(struct hlist_head); } static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(struct work_struct *__unused) +static void xfrm_hash_resize(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.state_hash_work); struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; unsigned long nsize, osize; unsigned int nhashmask, ohashmask; @@ -138,7 +117,7 @@ static void xfrm_hash_resize(struct work_struct *__unused) mutex_lock(&hash_resize_mutex); - nsize = xfrm_hash_new_size(); + nsize = xfrm_hash_new_size(net->xfrm.state_hmask); ndst = xfrm_hash_alloc(nsize); if (!ndst) goto out_unlock; @@ -157,19 +136,19 @@ static void xfrm_hash_resize(struct work_struct *__unused) spin_lock_bh(&xfrm_state_lock); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; - for (i = xfrm_state_hmask; i >= 0; i--) - xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, + for (i = net->xfrm.state_hmask; i >= 0; i--) + xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, nhashmask); - odst = xfrm_state_bydst; - osrc = xfrm_state_bysrc; - ospi = xfrm_state_byspi; - ohashmask = xfrm_state_hmask; + odst = net->xfrm.state_bydst; + osrc = net->xfrm.state_bysrc; + ospi = net->xfrm.state_byspi; + ohashmask = net->xfrm.state_hmask; - xfrm_state_bydst = ndst; - xfrm_state_bysrc = nsrc; - xfrm_state_byspi = nspi; - xfrm_state_hmask = nhashmask; + net->xfrm.state_bydst = ndst; + net->xfrm.state_bysrc = nsrc; + net->xfrm.state_byspi = nspi; + net->xfrm.state_hmask = nhashmask; spin_unlock_bh(&xfrm_state_lock); @@ -182,16 +161,9 @@ out_unlock: mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); - -DECLARE_WAIT_QUEUE_HEAD(km_waitq); -EXPORT_SYMBOL(km_waitq); - static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; -static struct work_struct xfrm_state_gc_work; -static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -401,20 +373,21 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x); } -static void xfrm_state_gc_task(struct work_struct *data) +static void xfrm_state_gc_task(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; struct hlist_node *entry, *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - hlist_move_list(&xfrm_state_gc_list, &gc_list); + hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); } static inline unsigned long make_jiffies(long secs) @@ -428,6 +401,7 @@ static inline unsigned long make_jiffies(long secs) static void xfrm_timer_handler(unsigned long data) { struct xfrm_state *x = (struct xfrm_state*)data; + struct net *net = xs_net(x); unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -485,7 +459,7 @@ resched: expired: if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { x->km.state = XFRM_STATE_EXPIRED; - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); next = 2; goto resched; } @@ -504,13 +478,14 @@ out: static void xfrm_replay_timer_handler(unsigned long data); -struct xfrm_state *xfrm_state_alloc(void) +struct xfrm_state *xfrm_state_alloc(struct net *net) { struct xfrm_state *x; x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC); if (x) { + write_pnet(&x->xs_net, net); atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->km.all); @@ -537,17 +512,20 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { + struct net *net = xs_net(x); + WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&xfrm_state_gc_work); + schedule_work(&net->xfrm.state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); int __xfrm_state_delete(struct xfrm_state *x) { + struct net *net = xs_net(x); int err = -ESRCH; if (x->km.state != XFRM_STATE_DEAD) { @@ -558,7 +536,7 @@ int __xfrm_state_delete(struct xfrm_state *x) hlist_del(&x->bysrc); if (x->id.spi) hlist_del(&x->byspi); - xfrm_state_num--; + net->xfrm.state_num--; spin_unlock(&xfrm_state_lock); /* All xfrm_state objects are created by xfrm_state_alloc. @@ -587,15 +565,15 @@ EXPORT_SYMBOL(xfrm_state_delete); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) { int i, err = 0; - for (i = 0; i <= xfrm_state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, @@ -611,26 +589,26 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) } #else static inline int -xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) { return 0; } #endif -int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) +int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) { int i, err = 0; spin_lock_bh(&xfrm_state_lock); - err = xfrm_state_flush_secctx_check(proto, audit_info); + err = xfrm_state_flush_secctx_check(net, proto, audit_info); if (err) goto out; - for (i = 0; i <= xfrm_state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -652,7 +630,7 @@ restart: out: spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); return err; } EXPORT_SYMBOL(xfrm_state_flush); @@ -660,8 +638,8 @@ EXPORT_SYMBOL(xfrm_state_flush); void xfrm_sad_getinfo(struct xfrmk_sadinfo *si) { spin_lock_bh(&xfrm_state_lock); - si->sadcnt = xfrm_state_num; - si->sadhcnt = xfrm_state_hmask; + si->sadcnt = init_net.xfrm.state_num; + si->sadhcnt = init_net.xfrm.state_hmask; si->sadhmcnt = xfrm_state_hashmax; spin_unlock_bh(&xfrm_state_lock); } @@ -681,13 +659,13 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); + unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { + hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto) @@ -713,13 +691,13 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(daddr, saddr, family); + unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto) continue; @@ -751,21 +729,23 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { + struct net *net = xs_net(x); + if (use_spi) - return __xfrm_state_lookup(&x->id.daddr, x->id.spi, + return __xfrm_state_lookup(net, &x->id.daddr, x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(&x->id.daddr, + return __xfrm_state_lookup_byaddr(net, &x->id.daddr, &x->props.saddr, x->id.proto, family); } -static void xfrm_hash_grow_check(int have_hash_collision) +static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) { if (have_hash_collision && - (xfrm_state_hmask + 1) < xfrm_state_hashmax && - xfrm_state_num > xfrm_state_hmask) - schedule_work(&xfrm_hash_work); + (net->xfrm.state_hmask + 1) < xfrm_state_hashmax && + net->xfrm.state_num > net->xfrm.state_hmask) + schedule_work(&net->xfrm.state_hash_work); } struct xfrm_state * @@ -774,6 +754,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { + struct net *net = xp_net(pol); unsigned int h; struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; @@ -784,8 +765,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, to_put = NULL; spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -829,13 +810,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, + (x0 = __xfrm_state_lookup(net, daddr, tmpl->id.spi, tmpl->id.proto, family)) != NULL) { to_put = x0; error = -EEXIST; goto out; } - x = xfrm_state_alloc(); + x = xfrm_state_alloc(net); if (x == NULL) { error = -ENOMEM; goto out; @@ -854,19 +835,19 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add(&x->km.all, &xfrm_state_all); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(daddr, saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; + x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; add_timer(&x->timer); - xfrm_state_num++; - xfrm_hash_grow_check(x->bydst.next != NULL); + net->xfrm.state_num++; + xfrm_hash_grow_check(net, x->bydst.next != NULL); } else { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -886,7 +867,8 @@ out: } struct xfrm_state * -xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_stateonly_find(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid) { unsigned int h; @@ -894,8 +876,8 @@ xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct hlist_node *entry; spin_lock(&xfrm_state_lock); - h = xfrm_dst_hash(daddr, saddr, reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + h = xfrm_dst_hash(net, daddr, saddr, reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -919,48 +901,50 @@ EXPORT_SYMBOL(xfrm_stateonly_find); static void __xfrm_state_insert(struct xfrm_state *x) { + struct net *net = xs_net(x); unsigned int h; x->genid = ++xfrm_state_genid; - list_add(&x->km.all, &xfrm_state_all); + list_add(&x->km.all, &net->xfrm.state_all); - h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); - h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } mod_timer(&x->timer, jiffies + HZ); if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); - xfrm_state_num++; + net->xfrm.state_num++; - xfrm_hash_grow_check(x->bydst.next != NULL); + xfrm_hash_grow_check(net, x->bydst.next != NULL); } /* xfrm_state_lock is held */ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) { + struct net *net = xs_net(xnew); unsigned short family = xnew->props.family; u32 reqid = xnew->props.reqid; struct xfrm_state *x; struct hlist_node *entry; unsigned int h; - h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && @@ -979,13 +963,13 @@ void xfrm_state_insert(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); + unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1017,7 +1001,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re if (!create) return NULL; - x = xfrm_state_alloc(); + x = xfrm_state_alloc(net); if (likely(x)) { switch (family) { case AF_INET: @@ -1048,27 +1032,28 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; add_timer(&x->timer); - list_add(&x->km.all, &xfrm_state_all); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(daddr, saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); - xfrm_state_num++; + net->xfrm.state_num++; - xfrm_hash_grow_check(x->bydst.next != NULL); + xfrm_hash_grow_check(net, x->bydst.next != NULL); } return x; } -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq); int xfrm_state_add(struct xfrm_state *x) { + struct net *net = xs_net(x); struct xfrm_state *x1, *to_put; int family; int err; @@ -1089,7 +1074,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && x->km.seq) { - x1 = __xfrm_find_acq_byseq(x->km.seq); + x1 = __xfrm_find_acq_byseq(net, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; @@ -1098,7 +1083,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = __find_acq_core(family, x->props.mode, x->props.reqid, + x1 = __find_acq_core(net, family, x->props.mode, x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); @@ -1124,8 +1109,9 @@ EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) { + struct net *net = xs_net(orig); int err = -ENOMEM; - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(net); if (!x) goto error; @@ -1206,9 +1192,9 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) struct hlist_node *entry; if (m->reqid) { - h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr, + h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1223,9 +1209,9 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) return x; } } else { - h = xfrm_src_hash(&m->old_daddr, &m->old_saddr, + h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1369,40 +1355,41 @@ int xfrm_state_check_expire(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_check_expire); struct xfrm_state * -xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, +xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup(daddr, spi, proto, family); + x = __xfrm_state_lookup(net, daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * -xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_state_lookup_byaddr(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); + x = __xfrm_state_lookup_byaddr(net, daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * -xfrm_find_acq(u8 mode, u32 reqid, u8 proto, +xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(net, family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); return x; @@ -1449,15 +1436,15 @@ EXPORT_SYMBOL(xfrm_state_sort); /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq) { int i; - for (i = 0; i <= xfrm_state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); @@ -1468,12 +1455,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_find_acq_byseq(seq); + x = __xfrm_find_acq_byseq(net, seq); spin_unlock_bh(&xfrm_state_lock); return x; } @@ -1494,6 +1481,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { + struct net *net = xs_net(x); unsigned int h; struct xfrm_state *x0; int err = -ENOENT; @@ -1511,7 +1499,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) err = -ENOENT; if (minspi == maxspi) { - x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, &x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); goto unlock; @@ -1521,7 +1509,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) u32 spi = 0; for (h=0; h<high-low+1; h++) { spi = low + net_random()%(high-low+1); - x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); break; @@ -1531,8 +1519,8 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) } if (x->id.spi) { spin_lock_bh(&xfrm_state_lock); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); spin_unlock_bh(&xfrm_state_lock); err = 0; @@ -1545,7 +1533,7 @@ unlock: } EXPORT_SYMBOL(xfrm_alloc_spi); -int xfrm_state_walk(struct xfrm_state_walk *walk, +int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) { @@ -1558,10 +1546,10 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, spin_lock_bh(&xfrm_state_lock); if (list_empty(&walk->all)) - x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all); + x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all); else x = list_entry(&walk->all, struct xfrm_state_walk, all); - list_for_each_entry_from(x, &xfrm_state_all, all) { + list_for_each_entry_from(x, &net->xfrm.state_all, all) { if (x->state == XFRM_STATE_DEAD) continue; state = container_of(x, struct xfrm_state, km); @@ -1660,7 +1648,7 @@ static void xfrm_replay_timer_handler(unsigned long data) spin_lock(&x->lock); if (x->km.state == XFRM_STATE_VALID) { - if (xfrm_aevent_is_on()) + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); else x->xflags |= XFRM_TIME_DEFER; @@ -1716,7 +1704,7 @@ void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) x->replay.bitmap |= (1U << diff); } - if (xfrm_aevent_is_on()) + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } @@ -1749,6 +1737,7 @@ EXPORT_SYMBOL(km_state_notify); void km_state_expired(struct xfrm_state *x, int hard, u32 pid) { + struct net *net = xs_net(x); struct km_event c; c.data.hard = hard; @@ -1757,7 +1746,7 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid) km_state_notify(x, &c); if (hard) - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); } EXPORT_SYMBOL(km_state_expired); @@ -1800,6 +1789,7 @@ EXPORT_SYMBOL(km_new_mapping); void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) { + struct net *net = xp_net(pol); struct km_event c; c.data.hard = hard; @@ -1808,7 +1798,7 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) km_policy_notify(pol, dir, &c); if (hard) - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); } EXPORT_SYMBOL(km_policy_expired); @@ -1835,7 +1825,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, EXPORT_SYMBOL(km_migrate); #endif -int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { int err = -EINVAL; int ret; @@ -1844,7 +1834,7 @@ int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { if (km->report) { - ret = km->report(proto, sel, addr); + ret = km->report(net, proto, sel, addr); if (!ret) err = ret; } @@ -2080,20 +2070,61 @@ error: EXPORT_SYMBOL(xfrm_init_state); -void __init xfrm_state_init(void) +int __net_init xfrm_state_init(struct net *net) { unsigned int sz; + INIT_LIST_HEAD(&net->xfrm.state_all); + sz = sizeof(struct hlist_head) * 8; - xfrm_state_bydst = xfrm_hash_alloc(sz); - xfrm_state_bysrc = xfrm_hash_alloc(sz); - xfrm_state_byspi = xfrm_hash_alloc(sz); - if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) - panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); - xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + net->xfrm.state_bydst = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bydst) + goto out_bydst; + net->xfrm.state_bysrc = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bysrc) + goto out_bysrc; + net->xfrm.state_byspi = xfrm_hash_alloc(sz); + if (!net->xfrm.state_byspi) + goto out_byspi; + net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + + net->xfrm.state_num = 0; + INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); + INIT_HLIST_HEAD(&net->xfrm.state_gc_list); + INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); + init_waitqueue_head(&net->xfrm.km_waitq); + return 0; + +out_byspi: + xfrm_hash_free(net->xfrm.state_bysrc, sz); +out_bysrc: + xfrm_hash_free(net->xfrm.state_bydst, sz); +out_bydst: + return -ENOMEM; +} + +void xfrm_state_fini(struct net *net) +{ + struct xfrm_audit audit_info; + unsigned int sz; - INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); + flush_work(&net->xfrm.state_hash_work); + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); + flush_work(&net->xfrm.state_gc_work); + + WARN_ON(!list_empty(&net->xfrm.state_all)); + + sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.state_byspi)); + xfrm_hash_free(net->xfrm.state_byspi, sz); + WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); + xfrm_hash_free(net->xfrm.state_bysrc, sz); + WARN_ON(!hlist_empty(net->xfrm.state_bydst)); + xfrm_hash_free(net->xfrm.state_bydst, sz); } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c new file mode 100644 index 00000000000..2e6ffb66f06 --- /dev/null +++ b/net/xfrm/xfrm_sysctl.c @@ -0,0 +1,85 @@ +#include <linux/sysctl.h> +#include <net/net_namespace.h> +#include <net/xfrm.h> + +static void __xfrm_sysctl_init(struct net *net) +{ + net->xfrm.sysctl_aevent_etime = XFRM_AE_ETIME; + net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE; + net->xfrm.sysctl_larval_drop = 1; + net->xfrm.sysctl_acq_expires = 30; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table xfrm_table[] = { + { + .ctl_name = NET_CORE_AEVENT_ETIME, + .procname = "xfrm_aevent_etime", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = NET_CORE_AEVENT_RSEQTH, + .procname = "xfrm_aevent_rseqth", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_larval_drop", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_acq_expires", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + {} +}; + +int __net_init xfrm_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + __xfrm_sysctl_init(net); + + table = kmemdup(xfrm_table, sizeof(xfrm_table), GFP_KERNEL); + if (!table) + goto out_kmemdup; + table[0].data = &net->xfrm.sysctl_aevent_etime; + table[1].data = &net->xfrm.sysctl_aevent_rseqth; + table[2].data = &net->xfrm.sysctl_larval_drop; + table[3].data = &net->xfrm.sysctl_acq_expires; + + net->xfrm.sysctl_hdr = register_net_sysctl_table(net, net_core_path, table); + if (!net->xfrm.sysctl_hdr) + goto out_register; + return 0; + +out_register: + kfree(table); +out_kmemdup: + return -ENOMEM; +} + +void xfrm_sysctl_fini(struct net *net) +{ + struct ctl_table *table; + + table = net->xfrm.sysctl_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->xfrm.sysctl_hdr); + kfree(table); +} +#else +int __net_init xfrm_sysctl_init(struct net *net) +{ + __xfrm_sysctl_init(net); + return 0; +} +#endif diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 76cf56d5d83..38ffaf33312 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -316,11 +316,12 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) x->replay_maxdiff = nla_get_u32(rt); } -static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, +static struct xfrm_state *xfrm_state_construct(struct net *net, + struct xfrm_usersa_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; if (!x) @@ -367,9 +368,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; x->km.seq = p->seq; - x->replay_maxdiff = sysctl_xfrm_aevent_rseqth; + x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ - x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; + x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; x->preplay.bitmap = 0; x->preplay.seq = x->replay.seq+x->replay_maxdiff; x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; @@ -391,6 +392,7 @@ error_no_put: static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_usersa_info *p = nlmsg_data(nlh); struct xfrm_state *x; int err; @@ -403,7 +405,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - x = xfrm_state_construct(p, attrs, &err); + x = xfrm_state_construct(net, p, attrs, &err); if (!x) return err; @@ -431,7 +433,8 @@ out: return err; } -static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, +static struct xfrm_state *xfrm_user_state_lookup(struct net *net, + struct xfrm_usersa_id *p, struct nlattr **attrs, int *errp) { @@ -440,7 +443,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_state_lookup(net, &p->daddr, p->spi, p->proto, p->family); } else { xfrm_address_t *saddr = NULL; @@ -451,8 +454,8 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, } err = -ESRCH; - x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, - p->family); + x = xfrm_state_lookup_byaddr(net, &p->daddr, saddr, + p->proto, p->family); } out: @@ -464,6 +467,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; int err = -ESRCH; struct km_event c; @@ -472,7 +476,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, u32 sessionid = NETLINK_CB(skb).sessionid; u32 sid = NETLINK_CB(skb).sid; - x = xfrm_user_state_lookup(p, attrs, &err); + x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) return err; @@ -615,6 +619,7 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb) static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; struct xfrm_dump_info info; @@ -631,7 +636,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) xfrm_state_walk_init(walk, 0); } - (void) xfrm_state_walk(walk, dump_one_state, &info); + (void) xfrm_state_walk(net, walk, dump_one_state, &info); return skb->len; } @@ -703,6 +708,7 @@ nla_put_failure: static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; @@ -715,7 +721,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_spdinfo(r_skb, spid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(xfrm_nl, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); } static inline size_t xfrm_sadinfo_msgsize(void) @@ -756,6 +762,7 @@ nla_put_failure: static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; @@ -768,18 +775,19 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_sadinfo(r_skb, spid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(xfrm_nl, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_usersa_id *p = nlmsg_data(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; int err = -ESRCH; - x = xfrm_user_state_lookup(p, attrs, &err); + x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) goto out_noput; @@ -787,7 +795,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); } xfrm_state_put(x); out_noput: @@ -820,6 +828,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p) static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct xfrm_userspi_info *p; struct sk_buff *resp_skb; @@ -837,7 +846,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, x = NULL; if (p->info.seq) { - x = xfrm_find_acq_byseq(p->info.seq); + x = xfrm_find_acq_byseq(net, p->info.seq); if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; @@ -845,7 +854,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } if (!x) - x = xfrm_find_acq(p->info.mode, p->info.reqid, + x = xfrm_find_acq(net, p->info.mode, p->info.reqid, p->info.id.proto, daddr, &p->info.saddr, 1, family); @@ -863,7 +872,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); out: xfrm_state_put(x); @@ -1078,9 +1087,9 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i p->share = XFRM_SHARE_ANY; /* XXX xp->share */ } -static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) +static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); + struct xfrm_policy *xp = xfrm_policy_alloc(net, GFP_KERNEL); int err; if (!xp) { @@ -1110,6 +1119,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_info *p = nlmsg_data(nlh); struct xfrm_policy *xp; struct km_event c; @@ -1126,7 +1136,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - xp = xfrm_policy_construct(p, attrs, &err); + xp = xfrm_policy_construct(net, p, attrs, &err); if (!xp) return err; @@ -1263,6 +1273,7 @@ static int xfrm_dump_policy_done(struct netlink_callback *cb) static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; struct xfrm_dump_info info; @@ -1279,7 +1290,7 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); } - (void) xfrm_policy_walk(walk, dump_one_policy, &info); + (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); return skb->len; } @@ -1311,6 +1322,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; u8 type = XFRM_POLICY_TYPE_MAIN; @@ -1330,7 +1342,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1347,7 +1359,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx, + xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } @@ -1361,7 +1373,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(xfrm_nl, resp_skb, + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); } } else { @@ -1390,6 +1402,7 @@ out: static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct km_event c; struct xfrm_usersa_flush *p = nlmsg_data(nlh); struct xfrm_audit audit_info; @@ -1398,13 +1411,14 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; - err = xfrm_state_flush(p->proto, &audit_info); + err = xfrm_state_flush(net, p->proto, &audit_info); if (err) return err; c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; + c.net = net; km_state_notify(NULL, &c); return 0; @@ -1457,6 +1471,7 @@ nla_put_failure: static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct sk_buff *r_skb; int err; @@ -1468,7 +1483,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - x = xfrm_state_lookup(&id->daddr, id->spi, id->proto, id->family); + x = xfrm_state_lookup(net, &id->daddr, id->spi, id->proto, id->family); if (x == NULL) { kfree_skb(r_skb); return -ESRCH; @@ -1486,7 +1501,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = nlmsg_unicast(xfrm_nl, r_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -1495,6 +1510,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct km_event c; int err = - EINVAL; @@ -1509,7 +1525,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (!(nlh->nlmsg_flags&NLM_F_REPLACE)) return err; - x = xfrm_state_lookup(&p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); + x = xfrm_state_lookup(net, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); if (x == NULL) return -ESRCH; @@ -1534,6 +1550,7 @@ out: static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; @@ -1546,13 +1563,14 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; - err = xfrm_policy_flush(type, &audit_info); + err = xfrm_policy_flush(net, type, &audit_info); if (err) return err; c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; + c.net = net; km_policy_notify(NULL, 0, &c); return 0; } @@ -1560,6 +1578,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_user_polexpire *up = nlmsg_data(nlh); struct xfrm_userpolicy_info *p = &up->pol; @@ -1571,7 +1590,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(net, type, p->dir, p->index, 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1588,7 +1607,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx, 0, &err); + xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) @@ -1623,12 +1642,13 @@ out: static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; int err; struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; - x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family); + x = xfrm_state_lookup(net, &p->id.daddr, p->id.spi, p->id.proto, p->family); err = -ENOENT; if (x == NULL) @@ -1657,13 +1677,14 @@ out: static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_user_tmpl *ut; int i; struct nlattr *rt = attrs[XFRMA_TMPL]; struct xfrm_user_acquire *ua = nlmsg_data(nlh); - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; if (!x) @@ -1677,7 +1698,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, } /* build an XP */ - xp = xfrm_policy_construct(&ua->policy, attrs, &err); + xp = xfrm_policy_construct(net, &ua->policy, attrs, &err); if (!xp) { kfree(x); return err; @@ -1816,7 +1837,7 @@ static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) uk.family = k->family; uk.reserved = k->reserved; memcpy(&uk.local, &k->local, sizeof(uk.local)); - memcpy(&uk.remote, &k->local, sizeof(uk.remote)); + memcpy(&uk.remote, &k->remote, sizeof(uk.remote)); return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } @@ -1869,6 +1890,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k) { + struct net *net = &init_net; struct sk_buff *skb; skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); @@ -1879,7 +1901,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, @@ -1968,6 +1990,7 @@ static struct xfrm_link { static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; struct xfrm_link *link; int type, err; @@ -1989,7 +2012,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, link->done); + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, link->dump, link->done); } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, @@ -2033,6 +2056,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) { + struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC); @@ -2042,11 +2066,12 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) if (build_expire(skb, x, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) { + struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); @@ -2056,11 +2081,12 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) if (build_aevent(skb, x, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); } static int xfrm_notify_sa_flush(struct km_event *c) { + struct net *net = c->net; struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2081,7 +2107,7 @@ static int xfrm_notify_sa_flush(struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); } static inline size_t xfrm_sa_len(struct xfrm_state *x) @@ -2111,6 +2137,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) { + struct net *net = xs_net(x); struct xfrm_usersa_info *p; struct xfrm_usersa_id *id; struct nlmsghdr *nlh; @@ -2155,7 +2182,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); nla_put_failure: /* Somebody screwed up with xfrm_sa_len! */ @@ -2235,6 +2262,7 @@ nlmsg_failure: static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { + struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); @@ -2244,7 +2272,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (build_acquire(skb, x, xt, xp, dir) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2253,6 +2281,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { + struct net *net = sock_net(sk); struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1); struct xfrm_policy *xp; @@ -2291,7 +2320,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, if (p->dir > XFRM_POLICY_OUT) return NULL; - xp = xfrm_policy_alloc(GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_KERNEL); if (xp == NULL) { *dir = -ENOBUFS; return NULL; @@ -2344,6 +2373,7 @@ nlmsg_failure: static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { + struct net *net = xp_net(xp); struct sk_buff *skb; skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); @@ -2353,11 +2383,12 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) { + struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; @@ -2408,7 +2439,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -2417,6 +2448,7 @@ nlmsg_failure: static int xfrm_notify_policy_flush(struct km_event *c) { + struct net *net = c->net; struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2432,7 +2464,7 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -2488,8 +2520,8 @@ nla_put_failure: return -EMSGSIZE; } -static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, - xfrm_address_t *addr) +static int xfrm_send_report(struct net *net, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) { struct sk_buff *skb; @@ -2500,7 +2532,7 @@ static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, if (build_report(skb, proto, sel, addr) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); } static inline size_t xfrm_mapping_msgsize(void) @@ -2536,6 +2568,7 @@ static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { + struct net *net = xs_net(x); struct sk_buff *skb; if (x->id.proto != IPPROTO_ESP) @@ -2551,7 +2584,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, if (build_mapping(skb, x, ipaddr, sport) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); } static struct xfrm_mgr netlink_mgr = { @@ -2565,33 +2598,53 @@ static struct xfrm_mgr netlink_mgr = { .new_mapping = xfrm_send_mapping, }; -static int __init xfrm_user_init(void) +static int __net_init xfrm_user_net_init(struct net *net) { struct sock *nlsk; - printk(KERN_INFO "Initializing XFRM netlink socket\n"); - - nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, + nlsk = netlink_kernel_create(net, NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; - rcu_assign_pointer(xfrm_nl, nlsk); - - xfrm_register_km(&netlink_mgr); - + rcu_assign_pointer(net->xfrm.nlsk, nlsk); return 0; } -static void __exit xfrm_user_exit(void) +static void __net_exit xfrm_user_net_exit(struct net *net) { - struct sock *nlsk = xfrm_nl; + struct sock *nlsk = net->xfrm.nlsk; - xfrm_unregister_km(&netlink_mgr); - rcu_assign_pointer(xfrm_nl, NULL); + rcu_assign_pointer(net->xfrm.nlsk, NULL); synchronize_rcu(); netlink_kernel_release(nlsk); } +static struct pernet_operations xfrm_user_net_ops = { + .init = xfrm_user_net_init, + .exit = xfrm_user_net_exit, +}; + +static int __init xfrm_user_init(void) +{ + int rv; + + printk(KERN_INFO "Initializing XFRM netlink socket\n"); + + rv = register_pernet_subsys(&xfrm_user_net_ops); + if (rv < 0) + return rv; + rv = xfrm_register_km(&netlink_mgr); + if (rv < 0) + unregister_pernet_subsys(&xfrm_user_net_ops); + return rv; +} + +static void __exit xfrm_user_exit(void) +{ + xfrm_unregister_km(&netlink_mgr); + unregister_pernet_subsys(&xfrm_user_net_ops); +} + module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_LICENSE("GPL"); |