diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/Kconfig | 7 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/sunvdc.c | 972 | ||||
-rw-r--r-- | drivers/net/Kconfig | 6 | ||||
-rw-r--r-- | drivers/net/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/sunvnet.c | 1164 | ||||
-rw-r--r-- | drivers/net/sunvnet.h | 70 | ||||
-rw-r--r-- | drivers/serial/sunhv.c | 30 | ||||
-rw-r--r-- | drivers/serial/sunsab.c | 19 | ||||
-rw-r--r-- | drivers/serial/sunsu.c | 14 | ||||
-rw-r--r-- | drivers/serial/sunzilog.c | 17 |
11 files changed, 2289 insertions, 12 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index fd7a53bdcb6..e49162b1557 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -423,6 +423,13 @@ config ATA_OVER_ETH This driver provides Support for ATA over Ethernet block devices like the Coraid EtherDrive (R) Storage Blade. +config SUNVDC + tristate "Sun Virtual Disk Client support" + depends on SUN_LDOMS + help + Support for virtual disk devices as a client under Sun + Logical Domains. + source "drivers/s390/block/Kconfig" endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index e5f98acc5d5..43371c59623 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o +obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c new file mode 100644 index 00000000000..0f5e3caf85d --- /dev/null +++ b/drivers/block/sunvdc.c @@ -0,0 +1,972 @@ +/* sunvdc.c: Sun LDOM Virtual Disk Client. + * + * Copyright (C) 2007 David S. Miller <davem@davemloft.net> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/blkdev.h> +#include <linux/hdreg.h> +#include <linux/genhd.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/list.h> + +#include <asm/vio.h> +#include <asm/ldc.h> + +#define DRV_MODULE_NAME "sunvdc" +#define PFX DRV_MODULE_NAME ": " +#define DRV_MODULE_VERSION "1.0" +#define DRV_MODULE_RELDATE "June 25, 2007" + +static char version[] __devinitdata = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; +MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_DESCRIPTION("Sun LDOM virtual disk client driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +#define VDC_TX_RING_SIZE 256 + +#define WAITING_FOR_LINK_UP 0x01 +#define WAITING_FOR_TX_SPACE 0x02 +#define WAITING_FOR_GEN_CMD 0x04 +#define WAITING_FOR_ANY -1 + +struct vdc_req_entry { + struct request *req; +}; + +struct vdc_port { + struct vio_driver_state vio; + + struct vdc *vp; + + struct gendisk *disk; + + struct vdc_completion *cmp; + + u64 req_id; + u64 seq; + struct vdc_req_entry rq_arr[VDC_TX_RING_SIZE]; + + unsigned long ring_cookies; + + u64 max_xfer_size; + u32 vdisk_block_size; + + /* The server fills these in for us in the disk attribute + * ACK packet. + */ + u64 operations; + u32 vdisk_size; + u8 vdisk_type; + u8 dev_no; + + char disk_name[32]; + + struct vio_disk_geom geom; + struct vio_disk_vtoc label; + + struct list_head list; +}; + +static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio) +{ + return container_of(vio, struct vdc_port, vio); +} + +struct vdc { + /* Protects prot_list. */ + spinlock_t lock; + + struct vio_dev *dev; + + struct list_head port_list; +}; + +/* Ordered from largest major to lowest */ +static struct vio_version vdc_versions[] = { + { .major = 1, .minor = 0 }, +}; + +#define VDCBLK_NAME "vdisk" +static int vdc_major; +#define PARTITION_SHIFT 3 + +static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr) +{ + return vio_dring_avail(dr, VDC_TX_RING_SIZE); +} + +static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct gendisk *disk = bdev->bd_disk; + struct vdc_port *port = disk->private_data; + + geo->heads = (u8) port->geom.num_hd; + geo->sectors = (u8) port->geom.num_sec; + geo->cylinders = port->geom.num_cyl; + + return 0; +} + +static struct block_device_operations vdc_fops = { + .owner = THIS_MODULE, + .getgeo = vdc_getgeo, +}; + +static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for) +{ + if (vio->cmp && + (waiting_for == -1 || + vio->cmp->waiting_for == waiting_for)) { + vio->cmp->err = err; + complete(&vio->cmp->com); + vio->cmp = NULL; + } +} + +static void vdc_handshake_complete(struct vio_driver_state *vio) +{ + vdc_finish(vio, 0, WAITING_FOR_LINK_UP); +} + +static int vdc_handle_unknown(struct vdc_port *port, void *arg) +{ + struct vio_msg_tag *pkt = arg; + + printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n", + pkt->type, pkt->stype, pkt->stype_env, pkt->sid); + printk(KERN_ERR PFX "Resetting connection.\n"); + + ldc_disconnect(port->vio.lp); + + return -ECONNRESET; +} + +static int vdc_send_attr(struct vio_driver_state *vio) +{ + struct vdc_port *port = to_vdc_port(vio); + struct vio_disk_attr_info pkt; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.tag.type = VIO_TYPE_CTRL; + pkt.tag.stype = VIO_SUBTYPE_INFO; + pkt.tag.stype_env = VIO_ATTR_INFO; + pkt.tag.sid = vio_send_sid(vio); + + pkt.xfer_mode = VIO_DRING_MODE; + pkt.vdisk_block_size = port->vdisk_block_size; + pkt.max_xfer_size = port->max_xfer_size; + + viodbg(HS, "SEND ATTR xfer_mode[0x%x] blksz[%u] max_xfer[%lu]\n", + pkt.xfer_mode, pkt.vdisk_block_size, pkt.max_xfer_size); + + return vio_ldc_send(&port->vio, &pkt, sizeof(pkt)); +} + +static int vdc_handle_attr(struct vio_driver_state *vio, void *arg) +{ + struct vdc_port *port = to_vdc_port(vio); + struct vio_disk_attr_info *pkt = arg; + + viodbg(HS, "GOT ATTR stype[0x%x] ops[%lx] disk_size[%lu] disk_type[%x] " + "xfer_mode[0x%x] blksz[%u] max_xfer[%lu]\n", + pkt->tag.stype, pkt->operations, + pkt->vdisk_size, pkt->vdisk_type, + pkt->xfer_mode, pkt->vdisk_block_size, + pkt->max_xfer_size); + + if (pkt->tag.stype == VIO_SUBTYPE_ACK) { + switch (pkt->vdisk_type) { + case VD_DISK_TYPE_DISK: + case VD_DISK_TYPE_SLICE: + break; + + default: + printk(KERN_ERR PFX "%s: Bogus vdisk_type 0x%x\n", + vio->name, pkt->vdisk_type); + return -ECONNRESET; + } + + if (pkt->vdisk_block_size > port->vdisk_block_size) { + printk(KERN_ERR PFX "%s: BLOCK size increased " + "%u --> %u\n", + vio->name, + port->vdisk_block_size, pkt->vdisk_block_size); + return -ECONNRESET; + } + + port->operations = pkt->operations; + port->vdisk_size = pkt->vdisk_size; + port->vdisk_type = pkt->vdisk_type; + if (pkt->max_xfer_size < port->max_xfer_size) + port->max_xfer_size = pkt->max_xfer_size; + port->vdisk_block_size = pkt->vdisk_block_size; + return 0; + } else { + printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name); + + return -ECONNRESET; + } +} + +static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc) +{ + int err = desc->status; + + vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD); +} + +static void vdc_end_request(struct request *req, int uptodate, int num_sectors) +{ + if (end_that_request_first(req, uptodate, num_sectors)) + return; + add_disk_randomness(req->rq_disk); + end_that_request_last(req, uptodate); +} + +static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr, + unsigned int index) +{ + struct vio_disk_desc *desc = vio_dring_entry(dr, index); + struct vdc_req_entry *rqe = &port->rq_arr[index]; + struct request *req; + + if (unlikely(desc->hdr.state != VIO_DESC_DONE)) + return; + + ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies); + desc->hdr.state = VIO_DESC_FREE; + dr->cons = (index + 1) & (VDC_TX_RING_SIZE - 1); + + req = rqe->req; + if (req == NULL) { + vdc_end_special(port, desc); + return; + } + + rqe->req = NULL; + + vdc_end_request(req, !desc->status, desc->size >> 9); + + if (blk_queue_stopped(port->disk->queue)) + blk_start_queue(port->disk->queue); +} + +static int vdc_ack(struct vdc_port *port, void *msgbuf) +{ + struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + struct vio_dring_data *pkt = msgbuf; + + if (unlikely(pkt->dring_ident != dr->ident || + pkt->start_idx != pkt->end_idx || + pkt->start_idx >= VDC_TX_RING_SIZE)) + return 0; + + vdc_end_one(port, dr, pkt->start_idx); + + return 0; +} + +static int vdc_nack(struct vdc_port *port, void *msgbuf) +{ + /* XXX Implement me XXX */ + return 0; +} + +static void vdc_event(void *arg, int event) +{ + struct vdc_port *port = arg; + struct vio_driver_state *vio = &port->vio; + unsigned long flags; + int err; + + spin_lock_irqsave(&vio->lock, flags); + + if (unlikely(event == LDC_EVENT_RESET || + event == LDC_EVENT_UP)) { + vio_link_state_change(vio, event); + spin_unlock_irqrestore(&vio->lock, flags); + return; + } + + if (unlikely(event != LDC_EVENT_DATA_READY)) { + printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event); + spin_unlock_irqrestore(&vio->lock, flags); + return; + } + + err = 0; + while (1) { + union { + struct vio_msg_tag tag; + u64 raw[8]; + } msgbuf; + + err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); + if (unlikely(err < 0)) { + if (err == -ECONNRESET) + vio_conn_reset(vio); + break; + } + if (err == 0) + break; + viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", + msgbuf.tag.type, + msgbuf.tag.stype, + msgbuf.tag.stype_env, + msgbuf.tag.sid); + err = vio_validate_sid(vio, &msgbuf.tag); + if (err < 0) + break; + + if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { + if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) + err = vdc_ack(port, &msgbuf); + else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) + err = vdc_nack(port, &msgbuf); + else + err = vdc_handle_unknown(port, &msgbuf); + } else if (msgbuf.tag.type == VIO_TYPE_CTRL) { + err = vio_control_pkt_engine(vio, &msgbuf); + } else { + err = vdc_handle_unknown(port, &msgbuf); + } + if (err < 0) + break; + } + if (err < 0) + vdc_finish(&port->vio, err, WAITING_FOR_ANY); + spin_unlock_irqrestore(&vio->lock, flags); +} + +static int __vdc_tx_trigger(struct vdc_port *port) +{ + struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + struct vio_dring_data hdr = { + .tag = { + .type = VIO_TYPE_DATA, + .stype = VIO_SUBTYPE_INFO, + .stype_env = VIO_DRING_DATA, + .sid = vio_send_sid(&port->vio), + }, + .dring_ident = dr->ident, + .start_idx = dr->prod, + .end_idx = dr->prod, + }; + int err, delay; + + hdr.seq = dr->snd_nxt; + delay = 1; + do { + err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); + if (err > 0) { + dr->snd_nxt++; + break; + } + udelay(delay); + if ((delay <<= 1) > 128) + delay = 128; + } while (err == -EAGAIN); + + return err; +} + +static int __send_request(struct request *req) +{ + struct vdc_port *port = req->rq_disk->private_data; + struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + struct scatterlist sg[port->ring_cookies]; + struct vdc_req_entry *rqe; + struct vio_disk_desc *desc; + unsigned int map_perm; + int nsg, err, i; + u64 len; + u8 op; + + map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO; + + if (rq_data_dir(req) == READ) { + map_perm |= LDC_MAP_W; + op = VD_OP_BREAD; + } else { + map_perm |= LDC_MAP_R; + op = VD_OP_BWRITE; + } + + nsg = blk_rq_map_sg(req->q, req, sg); + + len = 0; + for (i = 0; i < nsg; i++) + len += sg[i].length; + + if (unlikely(vdc_tx_dring_avail(dr) < 1)) { + blk_stop_queue(port->disk->queue); + err = -ENOMEM; + goto out; + } + + desc = vio_dring_cur(dr); + + err = ldc_map_sg(port->vio.lp, sg, nsg, + desc->cookies, port->ring_cookies, + map_perm); + if (err < 0) { + printk(KERN_ERR PFX "ldc_map_sg() failure, err=%d.\n", err); + return err; + } + + rqe = &port->rq_arr[dr->prod]; + rqe->req = req; + + desc->hdr.ack = VIO_ACK_ENABLE; + desc->req_id = port->req_id; + desc->operation = op; + if (port->vdisk_type == VD_DISK_TYPE_DISK) { + desc->slice = 2; + } else { + desc->slice = 0; + } + desc->status = ~0; + desc->offset = (req->sector << 9) / port->vdisk_block_size; + desc->size = len; + desc->ncookies = err; + + /* This has to be a non-SMP write barrier because we are writing + * to memory which is shared with the peer LDOM. + */ + wmb(); + desc->hdr.state = VIO_DESC_READY; + + err = __vdc_tx_trigger(port); + if (err < 0) { + printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err); + } else { + port->req_id++; + dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1); + } +out: + + return err; +} + +static void do_vdc_request(request_queue_t *q) +{ + while (1) { + struct request *req = elv_next_request(q); + + if (!req) + break; + + blkdev_dequeue_request(req); + if (__send_request(req) < 0) + vdc_end_request(req, 0, req->hard_nr_sectors); + } +} + +static int generic_request(struct vdc_port *port, u8 op, void *buf, int len) +{ + struct vio_dring_state *dr; + struct vio_completion comp; + struct vio_disk_desc *desc; + unsigned int map_perm; + unsigned long flags; + int op_len, err; + void *req_buf; + + if (!(((u64)1 << ((u64)op - 1)) & port->operations)) + return -EOPNOTSUPP; + + switch (op) { + case VD_OP_BREAD: + case VD_OP_BWRITE: + default: + return -EINVAL; + + case VD_OP_FLUSH: + op_len = 0; + map_perm = 0; + break; + + case VD_OP_GET_WCE: + op_len = sizeof(u32); + map_perm = LDC_MAP_W; + break; + + case VD_OP_SET_WCE: + op_len = sizeof(u32); + map_perm = LDC_MAP_R; + break; + + case VD_OP_GET_VTOC: + op_len = sizeof(struct vio_disk_vtoc); + map_perm = LDC_MAP_W; + break; + + case VD_OP_SET_VTOC: + op_len = sizeof(struct vio_disk_vtoc); + map_perm = LDC_MAP_R; + break; + + case VD_OP_GET_DISKGEOM: + op_len = sizeof(struct vio_disk_geom); + map_perm = LDC_MAP_W; + break; + + case VD_OP_SET_DISKGEOM: + op_len = sizeof(struct vio_disk_geom); + map_perm = LDC_MAP_R; + break; + + case VD_OP_SCSICMD: + op_len = 16; + map_perm = LDC_MAP_RW; + break; + + case VD_OP_GET_DEVID: + op_len = sizeof(struct vio_disk_devid); + map_perm = LDC_MAP_W; + break; + + case VD_OP_GET_EFI: + case VD_OP_SET_EFI: + return -EOPNOTSUPP; + break; + }; + + map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO; + + op_len = (op_len + 7) & ~7; + req_buf = kzalloc(op_len, GFP_KERNEL); + if (!req_buf) + return -ENOMEM; + + if (len > op_len) + len = op_len; + + if (map_perm & LDC_MAP_R) + memcpy(req_buf, buf, len); + + spin_lock_irqsave(&port->vio.lock, flags); + + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + + /* XXX If we want to use this code generically we have to + * XXX handle TX ring exhaustion etc. + */ + desc = vio_dring_cur(dr); + + err = ldc_map_single(port->vio.lp, req_buf, op_len, + desc->cookies, port->ring_cookies, + map_perm); + if (err < 0) { + spin_unlock_irqrestore(&port->vio.lock, flags); + kfree(req_buf); + return err; + } + + init_completion(&comp.com); + comp.waiting_for = WAITING_FOR_GEN_CMD; + port->vio.cmp = ∁ + + desc->hdr.ack = VIO_ACK_ENABLE; + desc->req_id = port->req_id; + desc->operation = op; + desc->slice = 0; + desc->status = ~0; + desc->offset = 0; + desc->size = op_len; + desc->ncookies = err; + + /* This has to be a non-SMP write barrier because we are writing + * to memory which is shared with the peer LDOM. + */ + wmb(); + desc->hdr.state = VIO_DESC_READY; + + err = __vdc_tx_trigger(port); + if (err >= 0) { + port->req_id++; + dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1); + spin_unlock_irqrestore(&port->vio.lock, flags); + + wait_for_completion(&comp.com); + err = comp.err; + } else { + port->vio.cmp = NULL; + spin_unlock_irqrestore(&port->vio.lock, flags); + } + + if (map_perm & LDC_MAP_W) + memcpy(buf, req_buf, len); + + kfree(req_buf); + + return err; +} + +static int __devinit vdc_alloc_tx_ring(struct vdc_port *port) +{ + struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + unsigned long len, entry_size; + int ncookies; + void *dring; + + entry_size = sizeof(struct vio_disk_desc) + + (sizeof(struct ldc_trans_cookie) * port->ring_cookies); + len = (VDC_TX_RING_SIZE * entry_size); + + ncookies = VIO_MAX_RING_COOKIES; + dring = ldc_alloc_exp_dring(port->vio.lp, len, + dr->cookies, &ncookies, + (LDC_MAP_SHADOW | + LDC_MAP_DIRECT | + LDC_MAP_RW)); + if (IS_ERR(dring)) + return PTR_ERR(dring); + + dr->base = dring; + dr->entry_size = entry_size; + dr->num_entries = VDC_TX_RING_SIZE; + dr->prod = dr->cons = 0; + dr->pending = VDC_TX_RING_SIZE; + dr->ncookies = ncookies; + + return 0; +} + +static void vdc_free_tx_ring(struct vdc_port *port) +{ + struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + + if (dr->base) { + ldc_free_exp_dring(port->vio.lp, dr->base, + (dr->entry_size * dr->num_entries), + dr->cookies, dr->ncookies); + dr->base = NULL; + dr->entry_size = 0; + dr->num_entries = 0; + dr->pending = 0; + dr->ncookies = 0; + } +} + +static int probe_disk(struct vdc_port *port) +{ + struct vio_completion comp; + struct request_queue *q; + struct gendisk *g; + int err; + + init_completion(&comp.com); + comp.err = 0; + comp.waiting_for = WAITING_FOR_LINK_UP; + port->vio.cmp = ∁ + + vio_port_up(&port->vio); + + wait_for_completion(&comp.com); + if (comp.err) + return comp.err; + + err = generic_request(port, VD_OP_GET_VTOC, + &port->label, sizeof(port->label)); + if (err < 0) { + printk(KERN_ERR PFX "VD_OP_GET_VTOC returns error %d\n", err); + return err; + } + + err = generic_request(port, VD_OP_GET_DISKGEOM, + &port->geom, sizeof(port->geom)); + if (err < 0) { + printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns " + "error %d\n", err); + return err; + } + + port->vdisk_size = ((u64)port->geom.num_cyl * + (u64)port->geom.num_hd * + (u64)port->geom.num_sec); + + q = blk_init_queue(do_vdc_request, &port->vio.lock); + if (!q) { + printk(KERN_ERR PFX "%s: Could not allocate queue.\n", + port->vio.name); + return -ENOMEM; + } + g = alloc_disk(1 << PARTITION_SHIFT); + if (!g) { + printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", + port->vio.name); + blk_cleanup_queue(q); + return -ENOMEM; + } + + port->disk = g; + + blk_queue_max_hw_segments(q, port->ring_cookies); + blk_queue_max_phys_segments(q, port->ring_cookies); + blk_queue_max_sectors(q, port->max_xfer_size); + g->major = vdc_major; + g->first_minor = port->dev_no << PARTITION_SHIFT; + strcpy(g->disk_name, port->disk_name); + + g->fops = &vdc_fops; + g->queue = q; + g->private_data = port; + g->driverfs_dev = &port->vio.vdev->dev; + + set_capacity(g, port->vdisk_size); + + printk(KERN_INFO PFX "%s: %u sectors (%u MB)\n", + g->disk_name, + port->vdisk_size, (port->vdisk_size >> (20 - 9))); + + add_disk(g); + + return 0; +} + +static struct ldc_channel_config vdc_ldc_cfg = { + .event = vdc_event, + .mtu = 64, + .mode = LDC_MODE_UNRELIABLE, +}; + +static struct vio_driver_ops vdc_vio_ops = { + .send_attr = vdc_send_attr, + .handle_attr = vdc_handle_attr, + .handshake_complete = vdc_handshake_complete, +}; + +static int __devinit vdc_port_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + struct mdesc_handle *hp; + struct vdc_port *port; + unsigned long flags; + struct vdc *vp; + const u64 *port_id; + int err; + + vp = dev_get_drvdata(vdev->dev.parent); + if (!vp) { + printk(KERN_ERR PFX "Cannot find port parent vdc.\n"); + return -ENODEV; + } + + hp = mdesc_grab(); + + port_id = mdesc_get_property(hp, vdev->mp, "id", NULL); + err = -ENODEV; + if (!port_id) { + printk(KERN_ERR PFX "Port lacks id property.\n"); + goto err_out_release_mdesc; + } + if ((*port_id << PARTITION_SHIFT) & ~(u64)MINORMASK) { + printk(KERN_ERR PFX "Port id [%lu] too large.\n", *port_id); + goto err_out_release_mdesc; + } + + port = kzalloc(sizeof(*port), GFP_KERNEL); + err = -ENOMEM; + if (!port) { + printk(KERN_ERR PFX "Cannot allocate vdc_port.\n"); + goto err_out_release_mdesc; + } + + port->vp = vp; + port->dev_no = *port_id; + + if (port->dev_no >= 26) + snprintf(port->disk_name, sizeof(port->disk_name), + VDCBLK_NAME "%c%c", + 'a' + (port->dev_no / 26) - 1, + 'a' + (port->dev_no % 26)); + else + snprintf(port->disk_name, sizeof(port->disk_name), + VDCBLK_NAME "%c", 'a' + (port->dev_no % 26)); + + err = vio_driver_init(&port->vio, vdev, VDEV_DISK, + vdc_versions, ARRAY_SIZE(vdc_versions), + &vdc_vio_ops, port->disk_name); + if (err) + goto err_out_free_port; + + port->vdisk_block_size = 512; + port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size); + port->ring_cookies = ((port->max_xfer_size * + port->vdisk_block_size) / PAGE_SIZE) + 2; + + err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port); + if (err) + goto err_out_free_port; + + err = vdc_alloc_tx_ring(port); + if (err) + goto err_out_free_ldc; + + err = probe_disk(port); + if (err) + goto err_out_free_tx_ring; + + INIT_LIST_HEAD(&port->list); + + spin_lock_irqsave(&vp->lock, flags); + list_add(&port->list, &vp->port_list); + spin_unlock_irqrestore(&vp->lock, flags); + + dev_set_drvdata(&vdev->dev, port); + + mdesc_release(hp); + + return 0; + +err_out_free_tx_ring: + vdc_free_tx_ring(port); + +err_out_free_ldc: + vio_ldc_free(&port->vio); + +err_out_free_port: + kfree(port); + +err_out_release_mdesc: + mdesc_release(hp); + return err; +} + +static int vdc_port_remove(struct vio_dev *vdev) +{ + struct vdc_port *port = dev_get_drvdata(&vdev->dev); + + if (port) { + del_timer_sync(&port->vio.timer); + + vdc_free_tx_ring(port); + vio_ldc_free(&port->vio); + + dev_set_drvdata(&vdev->dev, NULL); + + kfree(port); + } + return 0; +} + +static struct vio_device_id vdc_port_match[] = { + { + .type = "vdc-port", + }, + {}, +}; +MODULE_DEVICE_TABLE(vio, vdc_match); + +static struct vio_driver vdc_port_driver = { + .id_table = vdc_port_match, + .probe = vdc_port_probe, + .remove = vdc_port_remove, + .driver = { + .name = "vdc_port", + .owner = THIS_MODULE, + } +}; + +static int __devinit vdc_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + static int vdc_version_printed; + struct vdc *vp; + + if (vdc_version_printed++ == 0) + printk(KERN_INFO "%s", version); + + vp = kzalloc(sizeof(struct vdc), GFP_KERNEL); + if (!vp) + return -ENOMEM; + + spin_lock_init(&vp->lock); + vp->dev = vdev; + INIT_LIST_HEAD(&vp->port_list); + + dev_set_drvdata(&vdev->dev, vp); + + return 0; +} + +static int vdc_remove(struct vio_dev *vdev) +{ + + struct vdc *vp = dev_get_drvdata(&vdev->dev); + + if (vp) { + kfree(vp); + dev_set_drvdata(&vdev->dev, NULL); + } + return 0; +} + +static struct vio_device_id vdc_match[] = { + { + .type = "block", + }, + {}, +}; +MODULE_DEVICE_TABLE(vio, vdc_match); + +static struct vio_driver vdc_driver = { + .id_table = vdc_match, + .probe = vdc_probe, + .remove = vdc_remove, + .driver = { + .name = "vdc", + .owner = THIS_MODULE, + } +}; + +static int __init vdc_init(void) +{ + int err; + + err = register_blkdev(0, VDCBLK_NAME); + if (err < 0) + goto out_err; + + vdc_major = err; + err = vio_register_driver(&vdc_driver); + if (err) + goto out_unregister_blkdev; + + err = vio_register_driver(&vdc_port_driver); + if (err) + goto out_unregister_vdc; + + return 0; + +out_unregister_vdc: + vio_unregister_driver(&vdc_driver); + +out_unregister_blkdev: + unregister_blkdev(vdc_major, VDCBLK_NAME); + vdc_major = 0; + +out_err: + return err; +} + +static void __exit vdc_exit(void) +{ + vio_unregister_driver(&vdc_port_driver); + vio_unregister_driver(&vdc_driver); + unregister_blkdev(vdc_major, VDCBLK_NAME); +} + +module_init(vdc_init); +module_exit(vdc_exit); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index d17d64eb706..7903f9c7839 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -604,6 +604,12 @@ config CASSINI Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also <http://www.sun.com/products-n-solutions/hardware/docs/pdf/817-4341-10.pdf> +config SUNVNET + tristate "Sun Virtual Network support" + depends on SUN_LDOMS + help + Support for virtual network devices under Sun Logical Domains. + config NET_VENDOR_3COM bool "3COM cards" depends on ISA || EISA || MCA || PCI diff --git a/drivers/net/Makefile b/drivers/net/Makefile index c26b8674213..b95b1b237a2 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_SUNBMAC) += sunbmac.o obj-$(CONFIG_MYRI_SBUS) += myri_sbus.o obj-$(CONFIG_SUNGEM) += sungem.o sungem_phy.o obj-$(CONFIG_CASSINI) += cassini.o +obj-$(CONFIG_SUNVNET) += sunvnet.o obj-$(CONFIG_MACE) += mace.o obj-$(CONFIG_BMAC) += bmac.o diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c new file mode 100644 index 00000000000..8a667c13fae --- /dev/null +++ b/drivers/net/sunvnet.c @@ -0,0 +1,1164 @@ +/* sunvnet.c: Sun LDOM Virtual Network Driver. + * + * Copyright (C) 2007 David S. Miller <davem@davemloft.net> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <linux/ethtool.h> +#include <linux/etherdevice.h> + +#include <asm/vio.h> +#include <asm/ldc.h> + +#include "sunvnet.h" + +#define DRV_MODULE_NAME "sunvnet" +#define PFX DRV_MODULE_NAME ": " +#define DRV_MODULE_VERSION "1.0" +#define DRV_MODULE_RELDATE "June 25, 2007" + +static char version[] __devinitdata = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; +MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_DESCRIPTION("Sun LDOM virtual network driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +/* Ordered from largest major to lowest */ +static struct vio_version vnet_versions[] = { + { .major = 1, .minor = 0 }, +}; + +static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr) +{ + return vio_dring_avail(dr, VNET_TX_RING_SIZE); +} + +static int vnet_handle_unknown(struct vnet_port *port, void *arg) +{ + struct vio_msg_tag *pkt = arg; + + printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n", + pkt->type, pkt->stype, pkt->stype_env, pkt->sid); + printk(KERN_ERR PFX "Resetting connection.\n"); + + ldc_disconnect(port->vio.lp); + + return -ECONNRESET; +} + +static int vnet_send_attr(struct vio_driver_state *vio) +{ + struct vnet_port *port = to_vnet_port(vio); + struct net_device *dev = port->vp->dev; + struct vio_net_attr_info pkt; + int i; + + memset(&pkt, 0, sizeof(pkt)); + pkt.tag.type = VIO_TYPE_CTRL; + pkt.tag.stype = VIO_SUBTYPE_INFO; + pkt.tag.stype_env = VIO_ATTR_INFO; + pkt.tag.sid = vio_send_sid(vio); + pkt.xfer_mode = VIO_DRING_MODE; + pkt.addr_type = VNET_ADDR_ETHERMAC; + pkt.ack_freq = 0; + for (i = 0; i < 6; i++) + pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8); + pkt.mtu = ETH_FRAME_LEN; + + viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " + "ackfreq[%u] mtu[%llu]\n", + pkt.xfer_mode, pkt.addr_type, + (unsigned long long) pkt.addr, + pkt.ack_freq, + (unsigned long long) pkt.mtu); + + return vio_ldc_send(vio, &pkt, sizeof(pkt)); +} + +static int handle_attr_info(struct vio_driver_state *vio, + struct vio_net_attr_info *pkt) +{ + viodbg(HS, "GOT NET ATTR INFO xmode[0x%x] atype[0x%x] addr[%llx] " + "ackfreq[%u] mtu[%llu]\n", + pkt->xfer_mode, pkt->addr_type, + (unsigned long long) pkt->addr, + pkt->ack_freq, + (unsigned long long) pkt->mtu); + + pkt->tag.sid = vio_send_sid(vio); + + if (pkt->xfer_mode != VIO_DRING_MODE || + pkt->addr_type != VNET_ADDR_ETHERMAC || + pkt->mtu != ETH_FRAME_LEN) { + viodbg(HS, "SEND NET ATTR NACK\n"); + + pkt->tag.stype = VIO_SUBTYPE_NACK; + + (void) vio_ldc_send(vio, pkt, sizeof(*pkt)); + + return -ECONNRESET; + } else { + viodbg(HS, "SEND NET ATTR ACK\n"); + + pkt->tag.stype = VIO_SUBTYPE_ACK; + + return vio_ldc_send(vio, pkt, sizeof(*pkt)); + } + +} + +static int handle_attr_ack(struct vio_driver_state *vio, + struct vio_net_attr_info *pkt) +{ + viodbg(HS, "GOT NET ATTR ACK\n"); + + return 0; +} + +static int handle_attr_nack(struct vio_driver_state *vio, + struct vio_net_attr_info *pkt) +{ + viodbg(HS, "GOT NET ATTR NACK\n"); + + return -ECONNRESET; +} + +static int vnet_handle_attr(struct vio_driver_state *vio, void *arg) +{ + struct vio_net_attr_info *pkt = arg; + + switch (pkt->tag.stype) { + case VIO_SUBTYPE_INFO: + return handle_attr_info(vio, pkt); + + case VIO_SUBTYPE_ACK: + return handle_attr_ack(vio, pkt); + + case VIO_SUBTYPE_NACK: + return handle_attr_nack(vio, pkt); + + default: + return -ECONNRESET; + } +} + +static void vnet_handshake_complete(struct vio_driver_state *vio) +{ + struct vio_dring_state *dr; + + dr = &vio->drings[VIO_DRIVER_RX_RING]; + dr->snd_nxt = dr->rcv_nxt = 1; + + dr = &vio->drings[VIO_DRIVER_TX_RING]; + dr->snd_nxt = dr->rcv_nxt = 1; +} + +/* The hypervisor interface that implements copying to/from imported + * memory from another domain requires that copies are done to 8-byte + * aligned buffers, and that the lengths of such copies are also 8-byte + * multiples. + * + * So we align skb->data to an 8-byte multiple and pad-out the data + * area so we can round the copy length up to the next multiple of + * 8 for the copy. + * + * The transmitter puts the actual start of the packet 6 bytes into + * the buffer it sends over, so that the IP headers after the ethernet + * header are aligned properly. These 6 bytes are not in the descriptor + * length, they are simply implied. This offset is represented using + * the VNET_PACKET_SKIP macro. + */ +static struct sk_buff *alloc_and_align_skb(struct net_device *dev, + unsigned int len) +{ + struct sk_buff *skb = netdev_alloc_skb(dev, len+VNET_PACKET_SKIP+8+8); + unsigned long addr, off; + + if (unlikely(!skb)) + return NULL; + + addr = (unsigned long) skb->data; + off = ((addr + 7UL) & ~7UL) - addr; + if (off) + skb_reserve(skb, off); + + return skb; +} + +static int vnet_rx_one(struct vnet_port *port, unsigned int len, + struct ldc_trans_cookie *cookies, int ncookies) +{ + struct net_device *dev = port->vp->dev; + unsigned int copy_len; + struct sk_buff *skb; + int err; + + err = -EMSGSIZE; + if (unlikely(len < ETH_ZLEN || len > ETH_FRAME_LEN)) { + dev->stats.rx_length_errors++; + goto out_dropped; + } + + skb = alloc_and_align_skb(dev, len); + err = -ENOMEM; + if (unlikely(!skb)) { + dev->stats.rx_missed_errors++; + goto out_dropped; + } + + copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U; + skb_put(skb, copy_len); + err = ldc_copy(port->vio.lp, LDC_COPY_IN, + skb->data, copy_len, 0, + cookies, ncookies); + if (unlikely(err < 0)) { + dev->stats.rx_frame_errors++; + goto out_free_skb; + } + + skb_pull(skb, VNET_PACKET_SKIP); + skb_trim(skb, len); + skb->protocol = eth_type_trans(skb, dev); + + dev->stats.rx_packets++; + dev->stats.rx_bytes += len; + + netif_rx(skb); + + return 0; + +out_free_skb: + kfree_skb(skb); + +out_dropped: + dev->stats.rx_dropped++; + return err; +} + +static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr, + u32 start, u32 end, u8 vio_dring_state) +{ + struct vio_dring_data hdr = { + .tag = { + .type = VIO_TYPE_DATA, + .stype = VIO_SUBTYPE_ACK, + .stype_env = VIO_DRING_DATA, + .sid = vio_send_sid(&port->vio), + }, + .dring_ident = dr->ident, + .start_idx = start, + .end_idx = end, + .state = vio_dring_state, + }; + int err, delay; + + hdr.seq = dr->snd_nxt; + delay = 1; + do { + err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); + if (err > 0) { + dr->snd_nxt++; + break; + } + udelay(delay); + if ((delay <<= 1) > 128) + delay = 128; + } while (err == -EAGAIN); + + return err; +} + +static u32 next_idx(u32 idx, struct vio_dring_state *dr) +{ + if (++idx == dr->num_entries) + idx = 0; + return idx; +} + +static u32 prev_idx(u32 idx, struct vio_dring_state *dr) +{ + if (idx == 0) + idx = dr->num_entries - 1; + else + idx--; + + return idx; +} + +static struct vio_net_desc *get_rx_desc(struct vnet_port *port, + struct vio_dring_state *dr, + u32 index) +{ + struct vio_net_desc *desc = port->vio.desc_buf; + int err; + + err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size, + (index * dr->entry_size), + dr->cookies, dr->ncookies); + if (err < 0) + return ERR_PTR(err); + + return desc; +} + +static int put_rx_desc(struct vnet_port *port, + struct vio_dring_state *dr, + struct vio_net_desc *desc, + u32 index) +{ + int err; + + err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size, + (index * dr->entry_size), + dr->cookies, dr->ncookies); + if (err < 0) + return err; + + return 0; +} + +static int vnet_walk_rx_one(struct vnet_port *port, + struct vio_dring_state *dr, + u32 index, int *needs_ack) +{ + struct vio_net_desc *desc = get_rx_desc(port, dr, index); + struct vio_driver_state *vio = &port->vio; + int err; + + if (IS_ERR(desc)) + return PTR_ERR(desc); + + viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%lx:%lx]\n", + desc->hdr.state, desc->hdr.ack, + desc->size, desc->ncookies, + desc->cookies[0].cookie_addr, + desc->cookies[0].cookie_size); + + if (desc->hdr.state != VIO_DESC_READY) + return 1; + err = vnet_rx_one(port, desc->size, desc->cookies, desc->ncookies); + if (err == -ECONNRESET) + return err; + desc->hdr.state = VIO_DESC_DONE; + err = put_rx_desc(port, dr, desc, index); + if (err < 0) + return err; + *needs_ack = desc->hdr.ack; + return 0; +} + +static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, + u32 start, u32 end) +{ + struct vio_driver_state *vio = &port->vio; + int ack_start = -1, ack_end = -1; + + end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr); + + viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end); + + while (start != end) { + int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack); + if (err == -ECONNRESET) + return err; + if (err != 0) + break; + if (ack_start == -1) + ack_start = start; + ack_end = start; + start = next_idx(start, dr); + if (ack && start != end) { + err = vnet_send_ack(port, dr, ack_start, ack_end, + VIO_DRING_ACTIVE); + if (err == -ECONNRESET) + return err; + ack_start = -1; + } + } + if (unlikely(ack_start == -1)) + ack_start = ack_end = prev_idx(start, dr); + return vnet_send_ack(port, dr, ack_start, ack_end, VIO_DRING_STOPPED); +} + +static int vnet_rx(struct vnet_port *port, void *msgbuf) +{ + struct vio_dring_data *pkt = msgbuf; + struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING]; + struct vio_driver_state *vio = &port->vio; + + viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016lx] rcv_nxt[%016lx]\n", + pkt->tag.stype_env, pkt->seq, dr->rcv_nxt); + + if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) + return 0; + if (unlikely(pkt->seq != dr->rcv_nxt)) { + printk(KERN_ERR PFX "RX out of sequence seq[0x%lx] " + "rcv_nxt[0x%lx]\n", pkt->seq, dr->rcv_nxt); + return 0; + } + + dr->rcv_nxt++; + + /* XXX Validate pkt->start_idx and pkt->end_idx XXX */ + + return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx); +} + +static int idx_is_pending(struct vio_dring_state *dr, u32 end) +{ + u32 idx = dr->cons; + int found = 0; + + while (idx != dr->prod) { + if (idx == end) { + found = 1; + break; + } + idx = next_idx(idx, dr); + } + return found; +} + +static int vnet_ack(struct vnet_port *port, void *msgbuf) +{ + struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + struct vio_dring_data *pkt = msgbuf; + struct net_device *dev; + struct vnet *vp; + u32 end; + + if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) + return 0; + + end = pkt->end_idx; + if (unlikely(!idx_is_pending(dr, end))) + return 0; + + dr->cons = next_idx(end, dr); + + vp = port->vp; + dev = vp->dev; + if (unlikely(netif_queue_stopped(dev) && + vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr))) + return 1; + + return 0; +} + +static int vnet_nack(struct vnet_port *port, void *msgbuf) +{ + /* XXX just reset or similar XXX */ + return 0; +} + +static void maybe_tx_wakeup(struct vnet *vp) +{ + struct net_device *dev = vp->dev; + + netif_tx_lock(dev); + if (likely(netif_queue_stopped(dev))) { + struct vnet_port *port; + int wake = 1; + + list_for_each_entry(port, &vp->port_list, list) { + struct vio_dring_state *dr; + + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + if (vnet_tx_dring_avail(dr) < + VNET_TX_WAKEUP_THRESH(dr)) { + wake = 0; + break; + } + } + if (wake) + netif_wake_queue(dev); + } + netif_tx_unlock(dev); +} + +static void vnet_event(void *arg, int event) +{ + struct vnet_port *port = arg; + struct vio_driver_state *vio = &port->vio; + unsigned long flags; + int tx_wakeup, err; + + spin_lock_irqsave(&vio->lock, flags); + + if (unlikely(event == LDC_EVENT_RESET || + event == LDC_EVENT_UP)) { + vio_link_state_change(vio, event); + spin_unlock_irqrestore(&vio->lock, flags); + + return; + } + + if (unlikely(event != LDC_EVENT_DATA_READY)) { + printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event); + spin_unlock_irqrestore(&vio->lock, flags); + return; + } + + tx_wakeup = err = 0; + while (1) { + union { + struct vio_msg_tag tag; + u64 raw[8]; + } msgbuf; + + err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); + if (unlikely(err < 0)) { + if (err == -ECONNRESET) + vio_conn_reset(vio); + break; + } + if (err == 0) + break; + viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", + msgbuf.tag.type, + msgbuf.tag.stype, + msgbuf.tag.stype_env, + msgbuf.tag.sid); + err = vio_validate_sid(vio, &msgbuf.tag); + if (err < 0) + break; + + if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { + if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) { + err = vnet_rx(port, &msgbuf); + } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) { + err = vnet_ack(port, &msgbuf); + if (err > 0) + tx_wakeup |= err; + } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) { + err = vnet_nack(port, &msgbuf); + } + } else if (msgbuf.tag.type == VIO_TYPE_CTRL) { + err = vio_control_pkt_engine(vio, &msgbuf); + if (err) + break; + } else { + err = vnet_handle_unknown(port, &msgbuf); + } + if (err == -ECONNRESET) + break; + } + spin_unlock(&vio->lock); + if (unlikely(tx_wakeup && err != -ECONNRESET)) + maybe_tx_wakeup(port->vp); + local_irq_restore(flags); +} + +static int __vnet_tx_trigger(struct vnet_port *port) +{ + struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + struct vio_dring_data hdr = { + .tag = { + .type = VIO_TYPE_DATA, + .stype = VIO_SUBTYPE_INFO, + .stype_env = VIO_DRING_DATA, + .sid = vio_send_sid(&port->vio), + }, + .dring_ident = dr->ident, + .start_idx = dr->prod, + .end_idx = (u32) -1, + }; + int err, delay; + + hdr.seq = dr->snd_nxt; + delay = 1; + do { + err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); + if (err > 0) { + dr->snd_nxt++; + break; + } + udelay(delay); + if ((delay <<= 1) > 128) + delay = 128; + } while (err == -EAGAIN); + + return err; +} + +struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb) +{ + unsigned int hash = vnet_hashfn(skb->data); + struct hlist_head *hp = &vp->port_hash[hash]; + struct hlist_node *n; + struct vnet_port *port; + + hlist_for_each_entry(port, n, hp, hash) { + if (!compare_ether_addr(port->raddr, skb->data)) + return port; + } + port = NULL; + if (!list_empty(&vp->port_list)) + port = list_entry(vp->port_list.next, struct vnet_port, list); + + return port; +} + +struct vnet_port *tx_port_find(struct vnet *vp, struct sk_buff *skb) +{ + struct vnet_port *ret; + unsigned long flags; + + spin_lock_irqsave(&vp->lock, flags); + ret = __tx_port_find(vp, skb); + spin_unlock_irqrestore(&vp->lock, flags); + + return ret; +} + +static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct vnet *vp = netdev_priv(dev); + struct vnet_port *port = tx_port_find(vp, skb); + struct vio_dring_state *dr; + struct vio_net_desc *d; + unsigned long flags; + unsigned int len; + void *tx_buf; + int i, err; + + if (unlikely(!port)) + goto out_dropped; + + spin_lock_irqsave(&port->vio.lock, flags); + + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + if (unlikely(vnet_tx_dring_avail(dr) < 2)) { + if (!netif_queue_stopped(dev)) { + netif_stop_queue(dev); + + /* This is a hard error, log it. */ + printk(KERN_ERR PFX "%s: BUG! Tx Ring full when " + "queue awake!\n", dev->name); + dev->stats.tx_errors++; + } + spin_unlock_irqrestore(&port->vio.lock, flags); + return NETDEV_TX_BUSY; + } + + d = vio_dring_cur(dr); + + tx_buf = port->tx_bufs[dr->prod].buf; + skb_copy_from_linear_data(skb, tx_buf + VNET_PACKET_SKIP, skb->len); + + len = skb->len; + if (len < ETH_ZLEN) { + len = ETH_ZLEN; + memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len); + } + + d->hdr.ack = VIO_ACK_ENABLE; + d->size = len; + d->ncookies = port->tx_bufs[dr->prod].ncookies; + for (i = 0; i < d->ncookies; i++) + d->cookies[i] = port->tx_bufs[dr->prod].cookies[i]; + + /* This has to be a non-SMP write barrier because we are writing + * to memory which is shared with the peer LDOM. + */ + wmb(); + + d->hdr.state = VIO_DESC_READY; + + err = __vnet_tx_trigger(port); + if (unlikely(err < 0)) { + printk(KERN_INFO PFX "%s: TX trigger error %d\n", + dev->name, err); + d->hdr.state = VIO_DESC_FREE; + dev->stats.tx_carrier_errors++; + goto out_dropped_unlock; + } + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); + if (unlikely(vnet_tx_dring_avail(dr) < 2)) { + netif_stop_queue(dev); + if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) + netif_wake_queue(dev); + } + + spin_unlock_irqrestore(&port->vio.lock, flags); + + dev_kfree_skb(skb); + + dev->trans_start = jiffies; + return NETDEV_TX_OK; + +out_dropped_unlock: + spin_unlock_irqrestore(&port->vio.lock, flags); + +out_dropped: + dev_kfree_skb(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; +} + +static void vnet_tx_timeout(struct net_device *dev) +{ + /* XXX Implement me XXX */ +} + +static int vnet_open(struct net_device *dev) +{ + netif_carrier_on(dev); + netif_start_queue(dev); + + return 0; +} + +static int vnet_close(struct net_device *dev) +{ + netif_stop_queue(dev); + netif_carrier_off(dev); + + return 0; +} + +static void vnet_set_rx_mode(struct net_device *dev) +{ + /* XXX Implement multicast support XXX */ +} + +static int vnet_change_mtu(struct net_device *dev, int new_mtu) +{ + if (new_mtu != ETH_DATA_LEN) + return -EINVAL; + + dev->mtu = new_mtu; + return 0; +} + +static int vnet_set_mac_addr(struct net_device *dev, void *p) +{ + return -EINVAL; +} + +static void vnet_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strcpy(info->driver, DRV_MODULE_NAME); + strcpy(info->version, DRV_MODULE_VERSION); +} + +static u32 vnet_get_msglevel(struct net_device *dev) +{ + struct vnet *vp = netdev_priv(dev); + return vp->msg_enable; +} + +static void vnet_set_msglevel(struct net_device *dev, u32 value) +{ + struct vnet *vp = netdev_priv(dev); + vp->msg_enable = value; +} + +static const struct ethtool_ops vnet_ethtool_ops = { + .get_drvinfo = vnet_get_drvinfo, + .get_msglevel = vnet_get_msglevel, + .set_msglevel = vnet_set_msglevel, + .get_link = ethtool_op_get_link, + .get_perm_addr = ethtool_op_get_perm_addr, +}; + +static void vnet_port_free_tx_bufs(struct vnet_port *port) +{ + struct vio_dring_state *dr; + int i; + + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + if (dr->base) { + ldc_free_exp_dring(port->vio.lp, dr->base, + (dr->entry_size * dr->num_entries), + dr->cookies, dr->ncookies); + dr->base = NULL; + dr->entry_size = 0; + dr->num_entries = 0; + dr->pending = 0; + dr->ncookies = 0; + } + + for (i = 0; i < VNET_TX_RING_SIZE; i++) { + void *buf = port->tx_bufs[i].buf; + + if (!buf) + continue; + + ldc_unmap(port->vio.lp, + port->tx_bufs[i].cookies, + port->tx_bufs[i].ncookies); + + kfree(buf); + port->tx_bufs[i].buf = NULL; + } +} + +static int __devinit vnet_port_alloc_tx_bufs(struct vnet_port *port) +{ + struct vio_dring_state *dr; + unsigned long len; + int i, err, ncookies; + void *dring; + + for (i = 0; i < VNET_TX_RING_SIZE; i++) { + void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL); + int map_len = (ETH_FRAME_LEN + 7) & ~7; + + err = -ENOMEM; + if (!buf) { + printk(KERN_ERR "TX buffer allocation failure\n"); + goto err_out; + } + err = -EFAULT; + if ((unsigned long)buf & (8UL - 1)) { + printk(KERN_ERR "TX buffer misaligned\n"); + kfree(buf); + goto err_out; + } + + err = ldc_map_single(port->vio.lp, buf, map_len, + port->tx_bufs[i].cookies, 2, + (LDC_MAP_SHADOW | + LDC_MAP_DIRECT | + LDC_MAP_RW)); + if (err < 0) { + kfree(buf); + goto err_out; + } + port->tx_bufs[i].buf = buf; + port->tx_bufs[i].ncookies = err; + } + + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + + len = (VNET_TX_RING_SIZE * + (sizeof(struct vio_net_desc) + + (sizeof(struct ldc_trans_cookie) * 2))); + + ncookies = VIO_MAX_RING_COOKIES; + dring = ldc_alloc_exp_dring(port->vio.lp, len, + dr->cookies, &ncookies, + (LDC_MAP_SHADOW | + LDC_MAP_DIRECT | + LDC_MAP_RW)); + if (IS_ERR(dring)) { + err = PTR_ERR(dring); + goto err_out; + } + + dr->base = dring; + dr->entry_size = (sizeof(struct vio_net_desc) + + (sizeof(struct ldc_trans_cookie) * 2)); + dr->num_entries = VNET_TX_RING_SIZE; + dr->prod = dr->cons = 0; + dr->pending = VNET_TX_RING_SIZE; + dr->ncookies = ncookies; + + return 0; + +err_out: + vnet_port_free_tx_bufs(port); + + return err; +} + +static struct ldc_channel_config vnet_ldc_cfg = { + .event = vnet_event, + .mtu = 64, + .mode = LDC_MODE_UNRELIABLE, +}; + +static struct vio_driver_ops vnet_vio_ops = { + .send_attr = vnet_send_attr, + .handle_attr = vnet_handle_attr, + .handshake_complete = vnet_handshake_complete, +}; + +const char *remote_macaddr_prop = "remote-mac-address"; + +static int __devinit vnet_port_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + struct mdesc_handle *hp; + struct vnet_port *port; + unsigned long flags; + struct vnet *vp; + const u64 *rmac; + int len, i, err, switch_port; + + vp = dev_get_drvdata(vdev->dev.parent); + if (!vp) { + printk(KERN_ERR PFX "Cannot find port parent vnet.\n"); + return -ENODEV; + } + + hp = mdesc_grab(); + + rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len); + err = -ENODEV; + if (!rmac) { + printk(KERN_ERR PFX "Port lacks %s property.\n", + remote_macaddr_prop); + goto err_out_put_mdesc; + } + + port = kzalloc(sizeof(*port), GFP_KERNEL); + err = -ENOMEM; + if (!port) { + printk(KERN_ERR PFX "Cannot allocate vnet_port.\n"); + goto err_out_put_mdesc; + } + + for (i = 0; i < ETH_ALEN; i++) + port->raddr[i] = (*rmac >> (5 - i) * 8) & 0xff; + + port->vp = vp; + + err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK, + vnet_versions, ARRAY_SIZE(vnet_versions), + &vnet_vio_ops, vp->dev->name); + if (err) + goto err_out_free_port; + + err = vio_ldc_alloc(&port->vio, &vnet_ldc_cfg, port); + if (err) + goto err_out_free_port; + + err = vnet_port_alloc_tx_bufs(port); + if (err) + goto err_out_free_ldc; + + INIT_HLIST_NODE(&port->hash); + INIT_LIST_HEAD(&port->list); + + switch_port = 0; + if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL) + switch_port = 1; + + spin_lock_irqsave(&vp->lock, flags); + if (switch_port) + list_add(&port->list, &vp->port_list); + else + list_add_tail(&port->list, &vp->port_list); + hlist_add_head(&port->hash, &vp->port_hash[vnet_hashfn(port->raddr)]); + spin_unlock_irqrestore(&vp->lock, flags); + + dev_set_drvdata(&vdev->dev, port); + + printk(KERN_INFO "%s: PORT ( remote-mac ", vp->dev->name); + for (i = 0; i < 6; i++) + printk("%2.2x%c", port->raddr[i], i == 5 ? ' ' : ':'); + if (switch_port) + printk("switch-port "); + printk(")\n"); + + vio_port_up(&port->vio); + + mdesc_release(hp); + + return 0; + +err_out_free_ldc: + vio_ldc_free(&port->vio); + +err_out_free_port: + kfree(port); + +err_out_put_mdesc: + mdesc_release(hp); + return err; +} + +static int vnet_port_remove(struct vio_dev *vdev) +{ + struct vnet_port *port = dev_get_drvdata(&vdev->dev); + + if (port) { + struct vnet *vp = port->vp; + unsigned long flags; + + del_timer_sync(&port->vio.timer); + + spin_lock_irqsave(&vp->lock, flags); + list_del(&port->list); + hlist_del(&port->hash); + spin_unlock_irqrestore(&vp->lock, flags); + + vnet_port_free_tx_bufs(port); + vio_ldc_free(&port->vio); + + dev_set_drvdata(&vdev->dev, NULL); + + kfree(port); + } + return 0; +} + +static struct vio_device_id vnet_port_match[] = { + { + .type = "vnet-port", + }, + {}, +}; +MODULE_DEVICE_TABLE(vio, vnet_match); + +static struct vio_driver vnet_port_driver = { + .id_table = vnet_port_match, + .probe = vnet_port_probe, + .remove = vnet_port_remove, + .driver = { + .name = "vnet_port", + .owner = THIS_MODULE, + } +}; + +const char *local_mac_prop = "local-mac-address"; + +static int __devinit vnet_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + static int vnet_version_printed; + struct mdesc_handle *hp; + struct net_device *dev; + struct vnet *vp; + const u64 *mac; + int err, i, len; + + if (vnet_version_printed++ == 0) + printk(KERN_INFO "%s", version); + + hp = mdesc_grab(); + + mac = mdesc_get_property(hp, vdev->mp, local_mac_prop, &len); + if (!mac) { + printk(KERN_ERR PFX "vnet lacks %s property.\n", + local_mac_prop); + err = -ENODEV; + goto err_out; + } + + dev = alloc_etherdev(sizeof(*vp)); + if (!dev) { + printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n"); + err = -ENOMEM; + goto err_out; + } + + for (i = 0; i < ETH_ALEN; i++) + dev->dev_addr[i] = (*mac >> (5 - i) * 8) & 0xff; + + memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); + + SET_NETDEV_DEV(dev, &vdev->dev); + + vp = netdev_priv(dev); + + spin_lock_init(&vp->lock); + vp->dev = dev; + vp->vdev = vdev; + + INIT_LIST_HEAD(&vp->port_list); + for (i = 0; i < VNET_PORT_HASH_SIZE; i++) + INIT_HLIST_HEAD(&vp->port_hash[i]); + + dev->open = vnet_open; + dev->stop = vnet_close; + dev->set_multicast_list = vnet_set_rx_mode; + dev->set_mac_address = vnet_set_mac_addr; + dev->tx_timeout = vnet_tx_timeout; + dev->ethtool_ops = &vnet_ethtool_ops; + dev->watchdog_timeo = VNET_TX_TIMEOUT; + dev->change_mtu = vnet_change_mtu; + dev->hard_start_xmit = vnet_start_xmit; + + err = register_netdev(dev); + if (err) { + printk(KERN_ERR PFX "Cannot register net device, " + "aborting.\n"); + goto err_out_free_dev; + } + + printk(KERN_INFO "%s: Sun LDOM vnet ", dev->name); + + for (i = 0; i < 6; i++) + printk("%2.2x%c", dev->dev_addr[i], i == 5 ? '\n' : ':'); + + dev_set_drvdata(&vdev->dev, vp); + + mdesc_release(hp); + + return 0; + +err_out_free_dev: + free_netdev(dev); + +err_out: + mdesc_release(hp); + return err; +} + +static int vnet_remove(struct vio_dev *vdev) +{ + + struct vnet *vp = dev_get_drvdata(&vdev->dev); + + if (vp) { + /* XXX unregister port, or at least check XXX */ + unregister_netdevice(vp->dev); + dev_set_drvdata(&vdev->dev, NULL); + } + return 0; +} + +static struct vio_device_id vnet_match[] = { + { + .type = "network", + }, + {}, +}; +MODULE_DEVICE_TABLE(vio, vnet_match); + +static struct vio_driver vnet_driver = { + .id_table = vnet_match, + .probe = vnet_probe, + .remove = vnet_remove, + .driver = { + .name = "vnet", + .owner = THIS_MODULE, + } +}; + +static int __init vnet_init(void) +{ + int err = vio_register_driver(&vnet_driver); + + if (!err) { + err = vio_register_driver(&vnet_port_driver); + if (err) + vio_unregister_driver(&vnet_driver); + } + + return err; +} + +static void __exit vnet_exit(void) +{ + vio_unregister_driver(&vnet_port_driver); + vio_unregister_driver(&vnet_driver); +} + +module_init(vnet_init); +module_exit(vnet_exit); diff --git a/drivers/net/sunvnet.h b/drivers/net/sunvnet.h new file mode 100644 index 00000000000..1c887302d46 --- /dev/null +++ b/drivers/net/sunvnet.h @@ -0,0 +1,70 @@ +#ifndef _SUNVNET_H +#define _SUNVNET_H + +#define DESC_NCOOKIES(entry_size) \ + ((entry_size) - sizeof(struct vio_net_desc)) + +/* length of time before we decide the hardware is borked, + * and dev->tx_timeout() should be called to fix the problem + */ +#define VNET_TX_TIMEOUT (5 * HZ) + +#define VNET_TX_RING_SIZE 512 +#define VNET_TX_WAKEUP_THRESH(dr) ((dr)->pending / 4) + +/* VNET packets are sent in buffers with the first 6 bytes skipped + * so that after the ethernet header the IPv4/IPv6 headers are aligned + * properly. + */ +#define VNET_PACKET_SKIP 6 + +struct vnet_tx_entry { + void *buf; + unsigned int ncookies; + struct ldc_trans_cookie cookies[2]; +}; + +struct vnet; +struct vnet_port { + struct vio_driver_state vio; + + struct hlist_node hash; + u8 raddr[ETH_ALEN]; + + struct vnet *vp; + + struct vnet_tx_entry tx_bufs[VNET_TX_RING_SIZE]; + + struct list_head list; +}; + +static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio) +{ + return container_of(vio, struct vnet_port, vio); +} + +#define VNET_PORT_HASH_SIZE 16 +#define VNET_PORT_HASH_MASK (VNET_PORT_HASH_SIZE - 1) + +static inline unsigned int vnet_hashfn(u8 *mac) +{ + unsigned int val = mac[4] ^ mac[5]; + + return val & (VNET_PORT_HASH_MASK); +} + +struct vnet { + /* Protects port_list and port_hash. */ + spinlock_t lock; + + struct net_device *dev; + + u32 msg_enable; + struct vio_dev *vdev; + + struct list_head port_list; + + struct hlist_head port_hash[VNET_PORT_HASH_SIZE]; +}; + +#endif /* _SUNVNET_H */ diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c index 96557e6dba6..17bcca53d6a 100644 --- a/drivers/serial/sunhv.c +++ b/drivers/serial/sunhv.c @@ -440,8 +440,16 @@ static void sunhv_console_write_paged(struct console *con, const char *s, unsign { struct uart_port *port = sunhv_port; unsigned long flags; + int locked = 1; + + local_irq_save(flags); + if (port->sysrq) { + locked = 0; + } else if (oops_in_progress) { + locked = spin_trylock(&port->lock); + } else + spin_lock(&port->lock); - spin_lock_irqsave(&port->lock, flags); while (n > 0) { unsigned long ra = __pa(con_write_page); unsigned long page_bytes; @@ -469,7 +477,10 @@ static void sunhv_console_write_paged(struct console *con, const char *s, unsign ra += written; } } - spin_unlock_irqrestore(&port->lock, flags); + + if (locked) + spin_unlock(&port->lock); + local_irq_restore(flags); } static inline void sunhv_console_putchar(struct uart_port *port, char c) @@ -488,7 +499,15 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig { struct uart_port *port = sunhv_port; unsigned long flags; - int i; + int i, locked = 1; + + local_irq_save(flags); + if (port->sysrq) { + locked = 0; + } else if (oops_in_progress) { + locked = spin_trylock(&port->lock); + } else + spin_lock(&port->lock); spin_lock_irqsave(&port->lock, flags); for (i = 0; i < n; i++) { @@ -496,7 +515,10 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig sunhv_console_putchar(port, '\r'); sunhv_console_putchar(port, *s++); } - spin_unlock_irqrestore(&port->lock, flags); + + if (locked) + spin_unlock(&port->lock); + local_irq_restore(flags); } static struct console sunhv_console = { diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index deb9ab4b5a0..8a0f9e4408d 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -860,22 +860,31 @@ static int num_channels; static void sunsab_console_putchar(struct uart_port *port, int c) { struct uart_sunsab_port *up = (struct uart_sunsab_port *)port; - unsigned long flags; - - spin_lock_irqsave(&up->port.lock, flags); sunsab_tec_wait(up); writeb(c, &up->regs->w.tic); - - spin_unlock_irqrestore(&up->port.lock, flags); } static void sunsab_console_write(struct console *con, const char *s, unsigned n) { struct uart_sunsab_port *up = &sunsab_ports[con->index]; + unsigned long flags; + int locked = 1; + + local_irq_save(flags); + if (up->port.sysrq) { + locked = 0; + } else if (oops_in_progress) { + locked = spin_trylock(&up->port.lock); + } else + spin_lock(&up->port.lock); uart_console_write(&up->port, s, n, sunsab_console_putchar); sunsab_tec_wait(up); + + if (locked) + spin_unlock(&up->port.lock); + local_irq_restore(flags); } static int sunsab_console_setup(struct console *con, char *options) diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c index 2a63cdba320..26d720baf88 100644 --- a/drivers/serial/sunsu.c +++ b/drivers/serial/sunsu.c @@ -1288,7 +1288,17 @@ static void sunsu_console_write(struct console *co, const char *s, unsigned int count) { struct uart_sunsu_port *up = &sunsu_ports[co->index]; + unsigned long flags; unsigned int ier; + int locked = 1; + + local_irq_save(flags); + if (up->port.sysrq) { + locked = 0; + } else if (oops_in_progress) { + locked = spin_trylock(&up->port.lock); + } else + spin_lock(&up->port.lock); /* * First save the UER then disable the interrupts @@ -1304,6 +1314,10 @@ static void sunsu_console_write(struct console *co, const char *s, */ wait_for_xmitr(up); serial_out(up, UART_IER, ier); + + if (locked) + spin_unlock(&up->port.lock); + local_irq_restore(flags); } /* diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c index 15b6e1cb040..0a3e10a4a35 100644 --- a/drivers/serial/sunzilog.c +++ b/drivers/serial/sunzilog.c @@ -9,7 +9,7 @@ * C. Dost, Pete Zaitcev, Ted Ts'o and Alex Buell for their * work there. * - * Copyright (C) 2002, 2006 David S. Miller (davem@davemloft.net) + * Copyright (C) 2002, 2006, 2007 David S. Miller (davem@davemloft.net) */ #include <linux/module.h> @@ -1151,11 +1151,22 @@ sunzilog_console_write(struct console *con, const char *s, unsigned int count) { struct uart_sunzilog_port *up = &sunzilog_port_table[con->index]; unsigned long flags; + int locked = 1; + + local_irq_save(flags); + if (up->port.sysrq) { + locked = 0; + } else if (oops_in_progress) { + locked = spin_trylock(&up->port.lock); + } else + spin_lock(&up->port.lock); - spin_lock_irqsave(&up->port.lock, flags); uart_console_write(&up->port, s, count, sunzilog_putchar); udelay(2); - spin_unlock_irqrestore(&up->port.lock, flags); + + if (locked) + spin_unlock(&up->port.lock); + local_irq_restore(flags); } static int __init sunzilog_console_setup(struct console *con, char *options) |