/* * linux/ipc/util.c * Copyright (C) 1992 Krishna Balasubramanian * * Sep 1997 - Call suser() last after "normal" permission checks so we * get BSD style process accounting right. * Occurs in several places in the IPC code. * Chris Evans, * Nov 1999 - ipc helper functions, unified SMP locking * Manfred Spraul * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary(). * Mingming Cao * Mar 2006 - support for audit of ipc object properties * Dustin Kirkland * Jun 2006 - namespaces ssupport * OpenVZ, SWsoft Inc. * Pavel Emelianov */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" struct ipc_proc_iface { const char *path; const char *header; int ids; int (*show)(struct seq_file *, void *); }; struct ipc_namespace init_ipc_ns = { .kref = { .refcount = ATOMIC_INIT(2), }, }; atomic_t nr_ipc_ns = ATOMIC_INIT(1); #ifdef CONFIG_MEMORY_HOTPLUG static void ipc_memory_notifier(struct work_struct *work) { ipcns_notify(IPCNS_MEMCHANGED); } static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier); static int ipc_memory_callback(struct notifier_block *self, unsigned long action, void *arg) { switch (action) { case MEM_ONLINE: /* memory successfully brought online */ case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */ /* * This is done by invoking the ipcns notifier chain with the * IPC_MEMCHANGED event. * In order not to keep the lock on the hotplug memory chain * for too long, queue a work item that will, when waken up, * activate the ipcns notification chain. * No need to keep several ipc work items on the queue. */ if (!work_pending(&ipc_memory_wq)) schedule_work(&ipc_memory_wq); break; case MEM_GOING_ONLINE: case MEM_GOING_OFFLINE: case MEM_CANCEL_ONLINE: case MEM_CANCEL_OFFLINE: default: break; } return NOTIFY_OK; } #endif /* CONFIG_MEMORY_HOTPLUG */ /** * ipc_init - initialise IPC subsystem * * The various system5 IPC resources (semaphores, messages and shared * memory) are initialised * A callback routine is registered into the memory hotplug notifier * chain: since msgmni scales to lowmem this callback routine will be * called upon successful memory add / remove to recompute msmgni. */ static int __init ipc_init(void) { sem_init(); msg_init(); shm_init(); hotplug_memory_notifier(ipc_memory_callback, IPC_CALLBACK_PRI); register_ipcns_notifier(&init_ipc_ns); return 0; } __initcall(ipc_init); /** * ipc_init_ids - initialise IPC identifiers * @ids: Identifier set * * Set up the sequence range to use for the ipc identifier range (limited * below IPCMNI) then initialise the ids idr. */ void ipc_init_ids(struct ipc_ids *ids) { init_rwsem(&ids->rw_mutex); ids->in_use = 0; ids->seq = 0; { int seq_limit = INT_MAX/SEQ_MULTIPLIER; if (seq_limit > USHORT_MAX) ids->seq_max = USHORT_MAX; else ids->seq_max = seq_limit; } idr_init(&ids->ipcs_idr); } #ifdef CONFIG_PROC_FS static const struct file_operations sysvipc_proc_fops; /** * ipc_init_proc_interface - Create a proc interface for sysipc types using a seq_file interface. * @path: Path in procfs * @header: Banner to be printed at the beginning of the file. * @ids: ipc id table to iterate. * @show: show routine. */ void __init ipc_init_proc_interface(const char *path, const char *header, int ids, int (*show)(struct seq_file *, void *)) { struct proc_dir_entry *pde; struct ipc_proc_iface *iface; iface = kmalloc(sizeof(*iface), GFP_KERNEL); if (!iface) return; iface->path = path; iface->header = header; iface->ids = ids; iface->show = show; pde = proc_create_data(path, S_IRUGO, /* world readable */ NULL, /* parent dir */ &sysvipc_proc_fops, iface); if (!pde) { kfree(iface); } } #endif /** * ipc_findkey - find a key in an ipc identifier set * @ids: Identifier set * @key: The key to find * * Requires ipc_ids.rw_mutex locked. * Returns the LOCKED pointer to the ipc structure if found or NULL * if not. * If key is found ipc points to the owning ipc structure */ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) { struct kern_ipc_perm *ipc; int next_id; int total; for (total = 0, next_id = 0; total < ids->in_use; next_id++) { ipc = idr_find(&ids->ipcs_idr, next_id); if (ipc == NULL) continue; if (ipc->key != key) { total++; continue; } ipc_lock_by_ptr(ipc); return ipc; } return NULL; } /** * ipc_get_maxid - get the last assigned id * @ids: IPC identifier set * * Called with ipc_ids.rw_mutex held. */ int ipc_get_maxid(struct ipc_ids *ids) { struct kern_ipc_perm *ipc; int max_id = -1; int total, id; if (ids->in_use == 0) return -1; if (ids->in_use == IPCMNI) return IPCMNI - 1; /* Look for the last assigned id */ total = 0; for (id = 0; id < IPCMNI && total < ids->in_use; id++) { ipc = idr_find(&ids->ipcs_idr, id); if (ipc != NULL) { max_id = id; total++; } } return max_id; } /** * ipc_addid - add an IPC identifier * @ids: IPC identifier set * @new: new IPC permission set * @size: limit for the number of used ids * * Add an entry 'new' to the IPC ids idr. The permissions object is * initialised and the first free entry is set up and the id assigned * is returned. The 'new' entry is returned in a locked state on success. * On failure the entry is not locked and a negative err-code is returned. * * Called with ipc_ids.rw_mutex held as a writer. */ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) { int id, err; if (size > IPCMNI) size = IPCMNI; if (ids->in_use >= size) return -ENOSPC; spin_lock_init(&new->lock); new->deleted = 0; rcu_read_lock(); spin_lock(&new->lock); err = idr_get_new(&ids->ipcs_idr, new, &id); if (err) { spin_unlock(&new->lock); rcu_read_unlock(); return err; } ids->in_use++; new->cuid = new->uid = current->euid; new->gid = new->cgid = current->egid; new->seq = ids->seq++; if(ids->seq > ids->seq_max) ids->seq = 0; new->id = ipc_buildid(id, new->seq); return id; } /** * ipcget_new - create a new ipc object * @ns: namespace * @ids: IPC identifer set * @ops: the actual creation routine to call * @params: its parameters * * This routine is called by sys_msgget, sys_semget() and sys_shmget() * when the key is IPC_PRIVATE. */ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, struct ipc_ops *ops, struct ipc_params *params) { int err; retry: err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); if (!err) return -ENOMEM; down_write(&ids->rw_mutex); err = ops->getnew(ns, params); up_write(&ids->rw_mutex); if (err == -EAGAIN) goto retry; return err; } /** * ipc_check_perms - check security and permissions for an IPC * @ipcp: ipc permission set * @ops: the actual security routine to call * @params: its parameters * * This routine is called by sys_msgget(), sys_semget() and sys_shmget() * when the key is not IPC_PRIVATE and that key already exists in the * ids IDR. * * On success, the IPC id is returned. * * It is called with ipc_ids.rw_mutex and ipcp->lock held. */ static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops, struct ipc_params *params) { int err; if (ipcperms(ipcp, params->flg)) err = -EACCES; else { err = ops->associate(ipcp, params->flg); if (!err) err = ipcp->id; } return err; } /** * ipcget_public - get an ipc object or create a new one * @ns: namespace * @ids: IPC identifer set * @ops: the actual creation routine to call * @params: its parameters * * This routine is called by sys_msgget, sys_semget() and sys_shmget() * when the key is not IPC_PRIVATE. * It adds a new entry if the key is not found and does some permission * / security checkings if the key is found. * * On success, the ipc id is returned. */ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, struct ipc_ops *ops, struct ipc_params *params) { struct kern_ipc_perm *ipcp; int flg = params->flg; int err; retry: err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); /* * Take the lock as a writer since we are potentially going to add * a new entry + read locks are not "upgradable" */ down_write(&ids->rw_mutex); ipcp = ipc_findkey(ids, params->key); if (ipcp == NULL) { /* key not used */ if (!(flg & IPC_CREAT)) err = -ENOENT; else if (!err) err = -ENOMEM; else err = ops->getnew(ns, params); } else { /* ipc object has been locked by ipc_findkey() */ if (flg & IPC_CREAT && flg & IPC_EXCL) err = -EEXIST; else { err = 0; if (ops->more_checks) err = ops->more_checks(ipcp, params); if (!err) /* * ipc_check_perms returns the IPC id on * success */ err = ipc_check_perms(ipcp, ops, params); } ipc_unlock(ipcp); } up_write(&ids->rw_mutex); if (err == -EAGAIN) goto retry; return err; } /** * ipc_rmid - remove an IPC identifier * @ids: IPC identifier set * @ipcp: ipc perm structure containing the identifier to remove * * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held * before this function is called, and remain locked on the exit. */ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) { int lid = ipcid_to_idx(ipcp->id); idr_remove(&ids->ipcs_idr, lid); ids->in_use--; ipcp->deleted = 1; return; } /** * ipc_alloc - allocate ipc space * @size: size desired * * Allocate memory from the appropriate pools and return a pointer to it. * NULL is returned if the allocation fails */ void* ipc_alloc(int size) { void* out; if(size > PAGE_SIZE) out = vmalloc(size); else out = kmalloc(size, GFP_KERNEL); return out; } /** * ipc_free - free ipc space * @ptr: pointer returned by ipc_alloc * @size: size of block * * Free a block created with ipc_alloc(). The caller must know the size * used in the allocation call. */ void ipc_free(void* ptr, int size) { if(size > PAGE_SIZE) vfree(ptr); else kfree(ptr); } /* * rcu allocations: * There are three headers that are prepended to the actual allocation: * - during use: ipc_rcu_hdr. * - during the rcu grace period: ipc_rcu_grace. * - [only if vmalloc]: ipc_rcu_sched. * Their lifetime doesn't overlap, thus the headers share the same memory. * Unlike a normal union, they are right-aligned, thus some container_of * forward/backward casting is necessary: */ struct ipc_rcu_hdr { int refcount; int is_vmalloc; void *data[0]; }; struct ipc_rcu_grace { struct rcu_head rcu; /* "void *" makes sure alignment of following data is sane. */ void *data[0]; }; struct ipc_rcu_sched { struct work_struct work; /* "void *" makes sure alignment of following data is sane. */ void *data[0]; }; #define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \ sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr)) #define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \ sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC) static inline int rcu_use_vmalloc(int size) { /* Too big for a single page? */ if (HDRLEN_KMALLOC + size > PAGE_SIZE) return 1; return 0; } /** * ipc_rcu_alloc - allocate ipc and rcu space * @size: size desired * * Allocate memory for the rcu header structure + the object. * Returns the pointer to the object. * NULL is returned if the allocation fails. */ void* ipc_rcu_alloc(int size) { void* out; /* * We prepend the allocation with the rcu struct, and * workqueue if necessary (for vmalloc). */ if (rcu_use_vmalloc(size)) { out = vmalloc(HDRLEN_VMALLOC + size); if (out) { out += HDRLEN_VMALLOC; container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; } } else { out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); if (out) { out += HDRLEN_KMALLOC; container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; } } return out; } void ipc_rcu_getref(void *ptr) { container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; } static void ipc_do_vfree(struct work_struct *work) { vfree(container_of(work, struct ipc_rcu_sched, work)); } /** * ipc_schedule_free - free ipc + rcu space * @head: RCU callback structure for queued work * * Since RCU callback function is called in bh, * we need to defer the vfree to schedule_work(). */ static void ipc_schedule_free(struct rcu_head *head) { struct ipc_rcu_grace *grace; struct ipc_rcu_sched *sched; grace = container_of(head, struct ipc_rcu_grace, rcu); sched = container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]); INIT_WORK(&sched->work, ipc_do_vfree); schedule_work(&sched->work); } /** * ipc_immediate_free - free ipc + rcu space * @head: RCU callback structure that contains pointer to be freed * * Free from the RCU callback context. */ static void ipc_immediate_free(struct rcu_head *head) { struct ipc_rcu_grace *free = container_of(head, struct ipc_rcu_grace, rcu); kfree(free); } void ipc_rcu_putref(void *ptr) { if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) return; if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, ipc_schedule_free); } else { call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, ipc_immediate_free); } } /** * ipcperms - check IPC permissions * @ipcp: IPC permission set * @flag: desired permission set. * * Check user, group, other permissions for access * to ipc resources. return 0 if allowed */ int ipcperms (struct kern_ipc_perm *ipcp, short flag) { /* flag will most probably be 0 or S_...UGO from */ int requested_mode, granted_mode, err; if (unlikely((err = audit_ipc_obj(ipcp)))) return err; requested_mode = (flag >> 6) | (flag >> 3) | flag; granted_mode = ipcp->mode; if (current->euid == ipcp->cuid || current->euid == ipcp->uid) granted_mode >>= 6; else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid)) granted_mode >>= 3; /* is there some bit set in requested_mode but not in granted_mode? */ if ((requested_mode & ~granted_mode & 0007) && !capable(CAP_IPC_OWNER)) return -1; return security_ipc_permission(ipcp, flag); } /* * Functions to convert between the kern_ipc_perm structure and the * old/new ipc_perm structures */ /** * kernel_to_ipc64_perm - convert kernel ipc permissions to user * @in: kernel permissions * @out: new style IPC permissions * * Turn the kernel object @in into a set of permissions descriptions * for returning to userspace (@out). */ void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out) { out->key = in->key; out->uid = in->uid; out->gid = in->gid; out->cuid = in->cuid; out->cgid = in->cgid; out->mode = in->mode; out->seq = in->seq; } /** * ipc64_perm_to_ipc_perm - convert new ipc permissions to old * @in: new style IPC permissions * @out: old style IPC permissions * * Turn the new style permissions object @in into a compatibility * object and store it into the @out pointer. */ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) { out->key = in->key; SET_UID(out->uid, in->uid); SET_GID(out->gid, in->gid); SET_UID(out->cuid, in->cuid); SET_GID(out->cgid, in->cgid); out->mode = in->mode; out->seq = in->seq; } /** * ipc_lock - Lock an ipc structure without rw_mutex held * @ids: IPC identifier set * @id: ipc id to look for * * Look for an id in the ipc ids idr and lock the associated ipc object. * * The ipc object is locked on exit. */ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) { struct kern_ipc_perm *out; int lid = ipcid_to_idx(id); rcu_read_lock(); out = idr_find(&ids->ipcs_idr, lid); if (out == NULL) { rcu_read_unlock(); return ERR_PTR(-EINVAL); } spin_lock(&out->lock); /* ipc_rmid() may have already freed the ID while ipc_lock * was spinning: here verify that the structure is still valid */ if (out->deleted) { spin_unlock(&out->lock); rcu_read_unlock(); return ERR_PTR(-EINVAL); } return out; } struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id) { struct kern_ipc_perm *out; out = ipc_lock(ids, id); if (IS_ERR(out)) return out; if (ipc_checkid(out, id)) { ipc_unlock(out); return ERR_PTR(-EIDRM); } return out; } /** * ipcget - Common sys_*get() code * @ns : namsepace * @ids : IPC identifier set * @ops : operations to be called on ipc object creation, permission checks * and further checks * @params : the parameters needed by the previous operations. * * Common routine called by sys_msgget(), sys_semget() and sys_shmget(). */ int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, struct ipc_ops *ops, struct ipc_params *params) { if (params->key == IPC_PRIVATE) return ipcget_new(ns, ids, ops, params); else return ipcget_public(ns, ids, ops, params); } /** * ipc_update_perm - update the permissions of an IPC. * @in: the permission given as input. * @out: the permission of the ipc to set. */ void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) { out->uid = in->uid; out->gid = in->gid; out->mode = (out->mode & ~S_IRWXUGO) | (in->mode & S_IRWXUGO); } /** * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd * @ids: the table of ids where to look for the ipc * @id: the id of the ipc to retrieve * @cmd: the cmd to check * @perm: the permission to set * @extra_perm: one extra permission parameter used by msq * * This function does some common audit and permissions check for some IPC_XXX * cmd and is called from semctl_down, shmctl_down and msgctl_down. * It must be called without any lock held and * - retrieves the ipc with the given id in the given table. * - performs some audit and permission check, depending on the given cmd * - returns the ipc with both ipc and rw_mutex locks held in case of success * or an err-code without any lock held otherwise. */ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd, struct ipc64_perm *perm, int extra_perm) { struct kern_ipc_perm *ipcp; int err; down_write(&ids->rw_mutex); ipcp = ipc_lock_check(ids, id); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); goto out_up; } err = audit_ipc_obj(ipcp); if (err) goto out_unlock; if (cmd == IPC_SET) { err = audit_ipc_set_perm(extra_perm, perm->uid, perm->gid, perm->mode); if (err) goto out_unlock; } if (current->euid == ipcp->cuid || current->euid == ipcp->uid || capable(CAP_SYS_ADMIN)) return ipcp; err = -EPERM; out_unlock: ipc_unlock(ipcp); out_up: up_write(&ids->rw_mutex); return ERR_PTR(err); } #ifdef __ARCH_WANT_IPC_PARSE_VERSION /** * ipc_parse_version - IPC call version * @cmd: pointer to command * * Return IPC_64 for new style IPC and IPC_OLD for old style IPC. * The @cmd value is turned from an encoding command and version into * just the command code. */ int ipc_parse_version (int *cmd) { if (*cmd & IPC_64) { *cmd ^= IPC_64; return IPC_64; } else { return IPC_OLD; } } #endif /* __ARCH_WANT_IPC_PARSE_VERSION */ #ifdef CONFIG_PROC_FS struct ipc_proc_iter { struct ipc_namespace *ns; struct ipc_proc_iface *iface; }; /* * This routine locks the ipc structure found at least at position pos. */ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, loff_t *new_pos) { struct kern_ipc_perm *ipc; int total, id; total = 0; for (id = 0; id < pos && total < ids->in_use; id++) { ipc = idr_find(&ids->ipcs_idr, id); if (ipc != NULL) total++; } if (total >= ids->in_use) return NULL; for ( ; pos < IPCMNI; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { *new_pos = pos + 1; ipc_lock_by_ptr(ipc); return ipc; } } /* Out of range - return NULL to terminate iteration */ return NULL; } static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) { struct ipc_proc_iter *iter = s->private; struct ipc_proc_iface *iface = iter->iface; struct kern_ipc_perm *ipc = it; /* If we had an ipc id locked before, unlock it */ if (ipc && ipc != SEQ_START_TOKEN) ipc_unlock(ipc); return sysvipc_find_ipc(&iter->ns->ids[iface->ids], *pos, pos); } /* * File positions: pos 0 -> header, pos n -> ipc id = n - 1. * SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START. */ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) { struct ipc_proc_iter *iter = s->private; struct ipc_proc_iface *iface = iter->iface; struct ipc_ids *ids; ids = &iter->ns->ids[iface->ids]; /* * Take the lock - this will be released by the corresponding * call to stop(). */ down_read(&ids->rw_mutex); /* pos < 0 is invalid */ if (*pos < 0) return NULL; /* pos == 0 means header */ if (*pos == 0) return SEQ_START_TOKEN; /* Find the (pos-1)th ipc */ return sysvipc_find_ipc(ids, *pos - 1, pos); } static void sysvipc_proc_stop(struct seq_file *s, void *it) { struct kern_ipc_perm *ipc = it; struct ipc_proc_iter *iter = s->private; struct ipc_proc_iface *iface = iter->iface; struct ipc_ids *ids; /* If we had a locked structure, release it */ if (ipc && ipc != SEQ_START_TOKEN) ipc_unlock(ipc); ids = &iter->ns->ids[iface->ids]; /* Release the lock we took in start() */ up_read(&ids->rw_mutex); } static int sysvipc_proc_show(struct seq_file *s, void *it) { struct ipc_proc_iter *iter = s->private; struct ipc_proc_iface *iface = iter->iface; if (it == SEQ_START_TOKEN) return seq_puts(s, iface->header); return iface->show(s, it); } static struct seq_operations sysvipc_proc_seqops = { .start = sysvipc_proc_start, .stop = sysvipc_proc_stop, .next = sysvipc_proc_next, .show = sysvipc_proc_show, }; static int sysvipc_proc_open(struct inode *inode, struct file *file) { int ret; struct seq_file *seq; struct ipc_proc_iter *iter; ret = -ENOMEM; iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) goto out; ret = seq_open(file, &sysvipc_proc_seqops); if (ret) goto out_kfree; seq = file->private_data; seq->private = iter; iter->iface = PDE(inode)->data; iter->ns = get_ipc_ns(current->nsproxy->ipc_ns); out: return ret; out_kfree: kfree(iter); goto out; } static int sysvipc_proc_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct ipc_proc_iter *iter = seq->private; put_ipc_ns(iter->ns); return seq_release_private(inode, file); } static const struct file_operations sysvipc_proc_fops = { .open = sysvipc_proc_open, .read = seq_read, .llseek = seq_lseek, .release = sysvipc_proc_release, }; #endif /* CONFIG_PROC_FS */ >577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
/* znet.c: An Zenith Z-Note ethernet driver for linux. */

/*
	Written by Donald Becker.

	The author may be reached as becker@scyld.com.
	This driver is based on the Linux skeleton driver.  The copyright of the
	skeleton driver is held by the United States Government, as represented
	by DIRNSA, and it is released under the GPL.

	Thanks to Mike Hollick for alpha testing and suggestions.

  References:
	   The Crynwr packet driver.

	  "82593 CSMA/CD Core LAN Controller" Intel datasheet, 1992
	  Intel Microcommunications Databook, Vol. 1, 1990.
    As usual with Intel, the documentation is incomplete and inaccurate.
	I had to read the Crynwr packet driver to figure out how to actually
	use the i82593, and guess at what register bits matched the loosely
	related i82586.

					Theory of Operation

	The i82593 used in the Zenith Z-Note series operates using two(!) slave
	DMA	channels, one interrupt, and one 8-bit I/O port.

	While there	several ways to configure '593 DMA system, I chose the one
	that seemed commensurate with the highest system performance in the face
	of moderate interrupt latency: Both DMA channels are configured as
	recirculating ring buffers, with one channel (#0) dedicated to Rx and
	the other channel (#1) to Tx and configuration.  (Note that this is
	different than the Crynwr driver, where the Tx DMA channel is initialized
	before each operation.  That approach simplifies operation and Tx error
	recovery, but requires additional I/O in normal operation and precludes
	transmit buffer	chaining.)

	Both rings are set to 8192 bytes using {TX,RX}_RING_SIZE.  This provides
	a reasonable ring size for Rx, while simplifying DMA buffer allocation --
	DMA buffers must not cross a 128K boundary.  (In truth the size selection
	was influenced by my lack of '593 documentation.  I thus was constrained
	to use the Crynwr '593 initialization table, which sets the Rx ring size
	to 8K.)

	Despite my usual low opinion about Intel-designed parts, I must admit
	that the bulk data handling of the i82593 is a good design for
	an integrated system, like a laptop, where using two slave DMA channels
	doesn't pose a problem.  I still take issue with using only a single I/O
	port.  In the same controlled environment there are essentially no
	limitations on I/O space, and using multiple locations would eliminate
	the	need for multiple operations when looking at status registers,
	setting the Rx ring boundary, or switching to promiscuous mode.

	I also question Zenith's selection of the '593: one of the advertised
	advantages of earlier Intel parts was that if you figured out the magic
	initialization incantation you could use the same part on many different
	network types.  Zenith's use of the "FriendlyNet" (sic) connector rather
	than an	on-board transceiver leads me to believe that they were planning
	to take advantage of this.  But, uhmmm, the '593 omits all but ethernet
	functionality from the serial subsystem.
 */

/* 10/2002

   o Resurected for Linux 2.5+ by Marc Zyngier <maz@wild-wind.fr.eu.org> :

   - Removed strange DMA snooping in znet_sent_packet, which lead to
     TX buffer corruption on my laptop.
   - Use init_etherdev stuff.
   - Use kmalloc-ed DMA buffers.
   - Use as few global variables as possible.
   - Use proper resources management.
   - Use wireless/i82593.h as much as possible (structure, constants)
   - Compiles as module or build-in.
   - Now survives unplugging/replugging cable.

   Some code was taken from wavelan_cs.
   
   Tested on a vintage Zenith Z-Note 433Lnp+. Probably broken on
   anything else. Testers (and detailed bug reports) are welcome :-).

   o TODO :

   - Properly handle multicast
   - Understand why some traffic patterns add a 1s latency...
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/if_arp.h>
#include <linux/bitops.h>

#include <asm/system.h>
#include <asm/io.h>
#include <asm/dma.h>

/* This include could be elsewhere, since it is not wireless specific */
#include "wireless/i82593.h"

static char version[] __initdata = "znet.c:v1.02 9/23/94 becker@scyld.com\n";

#ifndef ZNET_DEBUG
#define ZNET_DEBUG 1
#endif
static unsigned int znet_debug = ZNET_DEBUG;
module_param (znet_debug, int, 0);
MODULE_PARM_DESC (znet_debug, "ZNet debug level");
MODULE_LICENSE("GPL");

/* The DMA modes we need aren't in <dma.h>. */
#define DMA_RX_MODE		0x14	/* Auto init, I/O to mem, ++, demand. */
#define DMA_TX_MODE		0x18	/* Auto init, Mem to I/O, ++, demand. */
#define dma_page_eq(ptr1, ptr2) ((long)(ptr1)>>17 == (long)(ptr2)>>17)
#define RX_BUF_SIZE 8192
#define TX_BUF_SIZE 8192
#define DMA_BUF_SIZE (RX_BUF_SIZE + 16)	/* 8k + 16 bytes for trailers */

#define TX_TIMEOUT	10

struct znet_private {
	int rx_dma, tx_dma;
	struct net_device_stats stats;
	spinlock_t lock;
	short sia_base, sia_size, io_size;
	struct i82593_conf_block i593_init;
	/* The starting, current, and end pointers for the packet buffers. */
	ushort *rx_start, *rx_cur, *rx_end;
	ushort *tx_start, *tx_cur, *tx_end;
	ushort tx_buf_len;			/* Tx buffer length, in words. */
};

/* Only one can be built-in;-> */
static struct net_device *znet_dev;

struct netidblk {
	char magic[8];		/* The magic number (string) "NETIDBLK" */
	unsigned char netid[8]; /* The physical station address */
	char nettype, globalopt;
	char vendor[8];		/* The machine vendor and product name. */
	char product[8];
	char irq1, irq2;		/* Interrupts, only one is currently used.	*/
	char dma1, dma2;
	short dma_mem_misc[8];		/* DMA buffer locations (unused in Linux). */
	short iobase1, iosize1;
	short iobase2, iosize2;		/* Second iobase unused. */
	char driver_options;			/* Misc. bits */
	char pad;
};

static int	znet_open(struct net_device *dev);
static int	znet_send_packet(struct sk_buff *skb, struct net_device *dev);
static irqreturn_t znet_interrupt(int irq, void *dev_id, struct pt_regs *regs);
static void	znet_rx(struct net_device *dev);
static int	znet_close(struct net_device *dev);
static struct net_device_stats *net_get_stats(struct net_device *dev);
static void hardware_init(struct net_device *dev);
static void update_stop_hit(short ioaddr, unsigned short rx_stop_offset);
static void znet_tx_timeout (struct net_device *dev);

/* Request needed resources */
static int znet_request_resources (struct net_device *dev)
{
	struct znet_private *znet = dev->priv;
	unsigned long flags;
		
	if (request_irq (dev->irq, &znet_interrupt, 0, "ZNet", dev))
		goto failed;
	if (request_dma (znet->rx_dma, "ZNet rx"))
		goto free_irq;
	if (request_dma (znet->tx_dma, "ZNet tx"))
		goto free_rx_dma;
	if (!request_region (znet->sia_base, znet->sia_size, "ZNet SIA"))
		goto free_tx_dma;
	if (!request_region (dev->base_addr, znet->io_size, "ZNet I/O"))
		goto free_sia;

	return 0;				/* Happy ! */

 free_sia:
	release_region (znet->sia_base, znet->sia_size);
 free_tx_dma:
	flags = claim_dma_lock();
	free_dma (znet->tx_dma);
	release_dma_lock (flags);
 free_rx_dma:
	flags = claim_dma_lock();
	free_dma (znet->rx_dma);
	release_dma_lock (flags);
 free_irq:
	free_irq (dev->irq, dev);
 failed:
	return -1;
}

static void znet_release_resources (struct net_device *dev)
{
	struct znet_private *znet = dev->priv;
	unsigned long flags;
		
	release_region (znet->sia_base, znet->sia_size);
	release_region (dev->base_addr, znet->io_size);
	flags = claim_dma_lock();
	free_dma (znet->tx_dma);
	free_dma (znet->rx_dma);
	release_dma_lock (flags);
	free_irq (dev->irq, dev);
}

/* Keep the magical SIA stuff in a single function... */
static void znet_transceiver_power (struct net_device *dev, int on)
{
	struct znet_private *znet = dev->priv;
	unsigned char v;

	/* Turn on/off the 82501 SIA, using zenith-specific magic. */
	/* Select LAN control register */
	outb(0x10, znet->sia_base);

	if (on)
		v = inb(znet->sia_base + 1) | 0x84;
	else
		v = inb(znet->sia_base + 1) & ~0x84;
		
	outb(v, znet->sia_base+1); /* Turn on/off LAN power (bit 2). */
}

/* Init the i82593, with current promisc/mcast configuration.
   Also used from hardware_init. */
static void znet_set_multicast_list (struct net_device *dev)
{
	struct znet_private *znet = dev->priv;
	short ioaddr = dev->base_addr;
	struct i82593_conf_block *cfblk = &znet->i593_init;

	memset(cfblk, 0x00, sizeof(struct i82593_conf_block));
	
        /* The configuration block.  What an undocumented nightmare.
	   The first set of values are those suggested (without explanation)
	   for ethernet in the Intel 82586 databook.  The rest appear to be
	   completely undocumented, except for cryptic notes in the Crynwr
	   packet driver.  This driver uses the Crynwr values verbatim. */

	/* maz : Rewritten to take advantage of the wanvelan includes.
	   At least we have names, not just blind values */
	
	/* Byte 0 */
	cfblk->fifo_limit = 10;	/* = 16 B rx and 80 B tx fifo thresholds */
	cfblk->forgnesi = 0;	/* 0=82C501, 1=AMD7992B compatibility */
	cfblk->fifo_32 = 1;
	cfblk->d6mod = 0;  	/* Run in i82593 advanced mode */
	cfblk->throttle_enb = 1;

	/* Byte 1 */
	cfblk->throttle = 8;	/* Continuous w/interrupts, 128-clock DMA. */
	cfblk->cntrxint = 0;	/* enable continuous mode receive interrupts */
	cfblk->contin = 1;	/* enable continuous mode */

	/* Byte 2 */
	cfblk->addr_len = ETH_ALEN;
	cfblk->acloc = 1;	/* Disable source addr insertion by i82593 */
	cfblk->preamb_len = 2;	/* 8 bytes preamble */
	cfblk->loopback = 0;	/* Loopback off */
  
	/* Byte 3 */
	cfblk->lin_prio = 0;	/* Default priorities & backoff methods. */
	cfblk->tbofstop = 0;
	cfblk->exp_prio = 0;
	cfblk->bof_met = 0;
  
	/* Byte 4 */
	cfblk->ifrm_spc = 6;	/* 96 bit times interframe spacing */
	
	/* Byte 5 */
	cfblk->slottim_low = 0; /* 512 bit times slot time (low) */
	
	/* Byte 6 */
	cfblk->slottim_hi = 2;	/* 512 bit times slot time (high) */
	cfblk->max_retr = 15;	/* 15 collisions retries */
	
	/* Byte 7 */
	cfblk->prmisc = ((dev->flags & IFF_PROMISC) ? 1 : 0); /* Promiscuous mode */
	cfblk->bc_dis = 0;	/* Enable broadcast reception */
	cfblk->crs_1 = 0;	/* Don't transmit without carrier sense */
	cfblk->nocrc_ins = 0;	/* i82593 generates CRC */
	cfblk->crc_1632 = 0;	/* 32-bit Autodin-II CRC */
	cfblk->crs_cdt = 0;	/* CD not to be interpreted as CS */
	
	/* Byte 8 */
	cfblk->cs_filter = 0;  	/* CS is recognized immediately */
	cfblk->crs_src = 0;	/* External carrier sense */
	cfblk->cd_filter = 0;  	/* CD is recognized immediately */
	
	/* Byte 9 */
	cfblk->min_fr_len = ETH_ZLEN >> 2; /* Minimum frame length */
	
	/* Byte A */
	cfblk->lng_typ = 1;	/* Type/length checks OFF */
	cfblk->lng_fld = 1; 	/* Disable 802.3 length field check */
	cfblk->rxcrc_xf = 1;	/* Don't transfer CRC to memory */
	cfblk->artx = 1;	/* Disable automatic retransmission */
	cfblk->sarec = 1;	/* Disable source addr trig of CD */
	cfblk->tx_jabber = 0;	/* Disable jabber jam sequence */
	cfblk->hash_1 = 1; 	/* Use bits 0-5 in mc address hash */
	cfblk->lbpkpol = 0; 	/* Loopback pin active high */
	
	/* Byte B */
	cfblk->fdx = 0;		/* Disable full duplex operation */
	
	/* Byte C */
	cfblk->dummy_6 = 0x3f; 	/* all ones, Default multicast addresses & backoff. */
	cfblk->mult_ia = 0;	/* No multiple individual addresses */
	cfblk->dis_bof = 0;	/* Disable the backoff algorithm ?! */
	
	/* Byte D */
	cfblk->dummy_1 = 1; 	/* set to 1 */
	cfblk->tx_ifs_retrig = 3; /* Hmm... Disabled */
	cfblk->mc_all = (dev->mc_list || (dev->flags&IFF_ALLMULTI));/* multicast all mode */
	cfblk->rcv_mon = 0;	/* Monitor mode disabled */
	cfblk->frag_acpt = 0;	/* Do not accept fragments */
	cfblk->tstrttrs = 0;	/* No start transmission threshold */
	
	/* Byte E */
	cfblk->fretx = 1;	/* FIFO automatic retransmission */
	cfblk->runt_eop = 0;	/* drop "runt" packets */
	cfblk->hw_sw_pin = 0;	/* ?? */
	cfblk->big_endn = 0;	/* Big Endian ? no... */
	cfblk->syncrqs = 1;	/* Synchronous DRQ deassertion... */
	cfblk->sttlen = 1;  	/* 6 byte status registers */
	cfblk->rx_eop = 0;  	/* Signal EOP on packet reception */
	cfblk->tx_eop = 0;  	/* Signal EOP on packet transmission */

	/* Byte F */
	cfblk->rbuf_size = RX_BUF_SIZE >> 12; /* Set receive buffer size */
	cfblk->rcvstop = 1; 	/* Enable Receive Stop Register */

	if (znet_debug > 2) {
		int i;
		unsigned char *c;

		for (i = 0, c = (char *) cfblk; i < sizeof (*cfblk); i++)
			printk ("%02X ", c[i]);
		printk ("\n");
	}
	
	*znet->tx_cur++ = sizeof(struct i82593_conf_block);
	memcpy(znet->tx_cur, cfblk, sizeof(struct i82593_conf_block));
	znet->tx_cur += sizeof(struct i82593_conf_block)/2;
	outb(OP0_CONFIGURE | CR0_CHNL, ioaddr);

	/* XXX FIXME maz : Add multicast addresses here, so having a
	 * multicast address configured isn't equal to IFF_ALLMULTI */
}

/* The Z-Note probe is pretty easy.  The NETIDBLK exists in the safe-to-probe
   BIOS area.  We just scan for the signature, and pull the vital parameters
   out of the structure. */

static int __init znet_probe (void)
{
	int i;
	struct netidblk *netinfo;
	struct znet_private *znet;
	struct net_device *dev;
	char *p;
	int err = -ENOMEM;

	/* This code scans the region 0xf0000 to 0xfffff for a "NETIDBLK". */
	for(p = (char *)phys_to_virt(0xf0000); p < (char *)phys_to_virt(0x100000); p++)
		if (*p == 'N'  &&  strncmp(p, "NETIDBLK", 8) == 0)
			break;

	if (p >= (char *)phys_to_virt(0x100000)) {
		if (znet_debug > 1)
			printk(KERN_INFO "No Z-Note ethernet adaptor found.\n");
		return -ENODEV;
	}

	dev = alloc_etherdev(sizeof(struct znet_private));
	if (!dev)
		return -ENOMEM;

	SET_MODULE_OWNER (dev);

	znet = dev->priv;

	netinfo = (struct netidblk *)p;
	dev->base_addr = netinfo->iobase1;
	dev->irq = netinfo->irq1;

	printk(KERN_INFO "%s: ZNET at %#3lx,", dev->name, dev->base_addr);

	/* The station address is in the "netidblk" at 0x0f0000. */
	for (i = 0; i < 6; i++)
		printk(" %2.2x", dev->dev_addr[i] = netinfo->netid[i]);

	printk(", using IRQ %d DMA %d and %d.\n", dev->irq, netinfo->dma1,
	       netinfo->dma2);

	if (znet_debug > 1) {
		printk(KERN_INFO "%s: vendor '%16.16s' IRQ1 %d IRQ2 %d DMA1 %d DMA2 %d.\n",
		       dev->name, netinfo->vendor,
		       netinfo->irq1, netinfo->irq2,
		       netinfo->dma1, netinfo->dma2);
		printk(KERN_INFO "%s: iobase1 %#x size %d iobase2 %#x size %d net type %2.2x.\n",
		       dev->name, netinfo->iobase1, netinfo->iosize1,
		       netinfo->iobase2, netinfo->iosize2, netinfo->nettype);
	}

	if (znet_debug > 0)
		printk(KERN_INFO "%s", version);

	znet->rx_dma = netinfo->dma1;
	znet->tx_dma = netinfo->dma2;
	spin_lock_init(&znet->lock);
	znet->sia_base = 0xe6;	/* Magic address for the 82501 SIA */
	znet->sia_size = 2;
	/* maz: Despite the '593 being advertised above as using a
	 * single 8bits I/O port, this driver does many 16bits
	 * access. So set io_size accordingly */
	znet->io_size  = 2;

	if (!(znet->rx_start = kmalloc (DMA_BUF_SIZE, GFP_KERNEL | GFP_DMA)))
		goto free_dev;
	if (!(znet->tx_start = kmalloc (DMA_BUF_SIZE, GFP_KERNEL | GFP_DMA)))
		goto free_rx;

	if (!dma_page_eq (znet->rx_start, znet->rx_start + (RX_BUF_SIZE/2-1)) ||
	    !dma_page_eq (znet->tx_start, znet->tx_start + (TX_BUF_SIZE/2-1))) {
		printk (KERN_WARNING "tx/rx crossing DMA frontiers, giving up\n");
		goto free_tx;
	}
	
	znet->rx_end = znet->rx_start + RX_BUF_SIZE/2;
	znet->tx_buf_len = TX_BUF_SIZE/2;
	znet->tx_end = znet->tx_start + znet->tx_buf_len;

	/* The ZNET-specific entries in the device structure. */
	dev->open = &znet_open;
	dev->hard_start_xmit = &znet_send_packet;
	dev->stop = &znet_close;
	dev->get_stats	= net_get_stats;
	dev->set_multicast_list = &znet_set_multicast_list;
	dev->tx_timeout = znet_tx_timeout;
	dev->watchdog_timeo = TX_TIMEOUT;
	err = register_netdev(dev);
	if (err)
		goto free_tx;
	znet_dev = dev;
	return 0;

 free_tx:
	kfree(znet->tx_start);
 free_rx:
	kfree(znet->rx_start);
 free_dev:
	free_netdev(dev);
	return err;
}


static int znet_open(struct net_device *dev)
{
	int ioaddr = dev->base_addr;

	if (znet_debug > 2)
		printk(KERN_DEBUG "%s: znet_open() called.\n", dev->name);

	/* These should never fail.  You can't add devices to a sealed box! */
	if (znet_request_resources (dev)) {
		printk(KERN_WARNING "%s: Not opened -- resource busy?!?\n", dev->name);
		return -EBUSY;
	}

	znet_transceiver_power (dev, 1);
	
	/* According to the Crynwr driver we should wait 50 msec. for the
	   LAN clock to stabilize.  My experiments indicates that the '593 can
	   be initialized immediately.  The delay is probably needed for the
	   DC-to-DC converter to come up to full voltage, and for the oscillator
	   to be spot-on at 20Mhz before transmitting.
	   Until this proves to be a problem we rely on the higher layers for the
	   delay and save allocating a timer entry. */

	/* maz : Well, I'm getting every time the following message
	 * without the delay on a 486@33. This machine is much too
	 * fast... :-) So maybe the Crynwr driver wasn't wrong after
	 * all, even if the message is completly harmless on my
	 * setup. */
	mdelay (50);
	
	/* This follows the packet driver's lead, and checks for success. */
	if (inb(ioaddr) != 0x10 && inb(ioaddr) != 0x00)
		printk(KERN_WARNING "%s: Problem turning on the transceiver power.\n",
		       dev->name);

	hardware_init(dev);
	netif_start_queue (dev);

	return 0;
}


static void znet_tx_timeout (struct net_device *dev)
{
	int ioaddr = dev->base_addr;
	ushort event, tx_status, rx_offset, state;

	outb (CR0_STATUS_0, ioaddr);
	event = inb (ioaddr);
	outb (CR0_STATUS_1, ioaddr);
	tx_status = inw (ioaddr);
	outb (CR0_STATUS_2, ioaddr);
	rx_offset = inw (ioaddr);
	outb (CR0_STATUS_3, ioaddr);
	state = inb (ioaddr);
	printk (KERN_WARNING "%s: transmit timed out, status %02x %04x %04x %02x,"
	 " resetting.\n", dev->name, event, tx_status, rx_offset, state);
	if (tx_status == TX_LOST_CRS)
		printk (KERN_WARNING "%s: Tx carrier error, check transceiver cable.\n",
			dev->name);
	outb (OP0_RESET, ioaddr);
	hardware_init (dev);
	netif_wake_queue (dev);
}

static int znet_send_packet(struct sk_buff *skb, struct net_device *dev)
{
	int ioaddr = dev->base_addr;
	struct znet_private *znet = dev->priv;
	unsigned long flags;
	short length = skb->len;

	if (znet_debug > 4)
		printk(KERN_DEBUG "%s: ZNet_send_packet.\n", dev->name);

	if (length < ETH_ZLEN) {
		if (skb_padto(skb, ETH_ZLEN))
			return 0;
		length = ETH_ZLEN;
	}
	
	netif_stop_queue (dev);
	
	/* Check that the part hasn't reset itself, probably from suspend. */
	outb(CR0_STATUS_0, ioaddr);
	if (inw(ioaddr) == 0x0010 &&
	    inw(ioaddr) == 0x0000 &&
	    inw(ioaddr) == 0x0010) {
		if (znet_debug > 1)
			printk (KERN_WARNING "%s : waking up\n", dev->name);
		hardware_init(dev);
		znet_transceiver_power (dev, 1);
	}

	if (1) {
		unsigned char *buf = (void *)skb->data;
		ushort *tx_link = znet->tx_cur - 1;
		ushort rnd_len = (length + 1)>>1;
		
		znet->stats.tx_bytes+=length;

		if (znet->tx_cur >= znet->tx_end)
		  znet->tx_cur = znet->tx_start;
		*znet->tx_cur++ = length;
		if (znet->tx_cur + rnd_len + 1 > znet->tx_end) {
			int semi_cnt = (znet->tx_end - znet->tx_cur)<<1; /* Cvrt to byte cnt. */
			memcpy(znet->tx_cur, buf, semi_cnt);
			rnd_len -= semi_cnt>>1;
			memcpy(znet->tx_start, buf + semi_cnt, length - semi_cnt);
			znet->tx_cur = znet->tx_start + rnd_len;
		} else {
			memcpy(znet->tx_cur, buf, skb->len);
			znet->tx_cur += rnd_len;
		}
		*znet->tx_cur++ = 0;

		spin_lock_irqsave(&znet->lock, flags);
		{
			*tx_link = OP0_TRANSMIT | CR0_CHNL;
			/* Is this always safe to do? */
			outb(OP0_TRANSMIT | CR0_CHNL, ioaddr);
		}
		spin_unlock_irqrestore (&znet->lock, flags);

		dev->trans_start = jiffies;
		netif_start_queue (dev);

		if (znet_debug > 4)
		  printk(KERN_DEBUG "%s: Transmitter queued, length %d.\n", dev->name, length);
	}
	dev_kfree_skb(skb); 
	return 0;
}

/* The ZNET interrupt handler. */
static irqreturn_t znet_interrupt(int irq, void *dev_id, struct pt_regs * regs)
{
	struct net_device *dev = dev_id;
	struct znet_private *znet = dev->priv;
	int ioaddr;
	int boguscnt = 20;
	int handled = 0;

	if (dev == NULL) {
		printk(KERN_WARNING "znet_interrupt(): IRQ %d for unknown device.\n", irq);
		return IRQ_NONE;
	}

	spin_lock (&znet->lock);
	
	ioaddr = dev->base_addr;

	outb(CR0_STATUS_0, ioaddr);
	do {
		ushort status = inb(ioaddr);
		if (znet_debug > 5) {
			ushort result, rx_ptr, running;
			outb(CR0_STATUS_1, ioaddr);
			result = inw(ioaddr);
			outb(CR0_STATUS_2, ioaddr);
			rx_ptr = inw(ioaddr);
			outb(CR0_STATUS_3, ioaddr);
			running = inb(ioaddr);
			printk(KERN_DEBUG "%s: interrupt, status %02x, %04x %04x %02x serial %d.\n",
				 dev->name, status, result, rx_ptr, running, boguscnt);
		}
		if ((status & SR0_INTERRUPT) == 0)
			break;

		handled = 1;

		if ((status & SR0_EVENT_MASK) == SR0_TRANSMIT_DONE ||
		    (status & SR0_EVENT_MASK) == SR0_RETRANSMIT_DONE ||
		    (status & SR0_EVENT_MASK) == SR0_TRANSMIT_NO_CRC_DONE) {
			int tx_status;
			outb(CR0_STATUS_1, ioaddr);
			tx_status = inw(ioaddr);
			/* It's undocumented, but tx_status seems to match the i82586. */
			if (tx_status & TX_OK) {
				znet->stats.tx_packets++;
				znet->stats.collisions += tx_status & TX_NCOL_MASK;
			} else {
				if (tx_status & (TX_LOST_CTS | TX_LOST_CRS))
					znet->stats.tx_carrier_errors++;
				if (tx_status & TX_UND_RUN)
					znet->stats.tx_fifo_errors++;
				if (!(tx_status & TX_HRT_BEAT))
					znet->stats.tx_heartbeat_errors++;
				if (tx_status & TX_MAX_COL)
					znet->stats.tx_aborted_errors++;
				/* ...and the catch-all. */
				if ((tx_status | (TX_LOST_CRS | TX_LOST_CTS | TX_UND_RUN | TX_HRT_BEAT | TX_MAX_COL)) != (TX_LOST_CRS | TX_LOST_CTS | TX_UND_RUN | TX_HRT_BEAT | TX_MAX_COL))
					znet->stats.tx_errors++;

				/* Transceiver may be stuck if cable
				 * was removed while emiting a
				 * packet. Flip it off, then on to
				 * reset it. This is very empirical,
				 * but it seems to work. */
				
				znet_transceiver_power (dev, 0);
				znet_transceiver_power (dev, 1);
			}
			netif_wake_queue (dev);
		}

		if ((status & SR0_RECEPTION) ||
		    (status & SR0_EVENT_MASK) == SR0_STOP_REG_HIT) {
			znet_rx(dev);
		}
		/* Clear the interrupts we've handled. */
		outb(CR0_INT_ACK, ioaddr);
	} while (boguscnt--);

	spin_unlock (&znet->lock);
	
	return IRQ_RETVAL(handled);
}

static void znet_rx(struct net_device *dev)
{
	struct znet_private *znet = dev->priv;
	int ioaddr = dev->base_addr;
	int boguscount = 1;
	short next_frame_end_offset = 0; 		/* Offset of next frame start. */
	short *cur_frame_end;
	short cur_frame_end_offset;

	outb(CR0_STATUS_2, ioaddr);
	cur_frame_end_offset = inw(ioaddr);

	if (cur_frame_end_offset == znet->rx_cur - znet->rx_start) {
		printk(KERN_WARNING "%s: Interrupted, but nothing to receive, offset %03x.\n",
			   dev->name, cur_frame_end_offset);
		return;
	}

	/* Use same method as the Crynwr driver: construct a forward list in
	   the same area of the backwards links we now have.  This allows us to
	   pass packets to the upper layers in the order they were received --
	   important for fast-path sequential operations. */
	 while (znet->rx_start + cur_frame_end_offset != znet->rx_cur
			&& ++boguscount < 5) {
		unsigned short hi_cnt, lo_cnt, hi_status, lo_status;
		int count, status;

		if (cur_frame_end_offset < 4) {
			/* Oh no, we have a special case: the frame trailer wraps around
			   the end of the ring buffer.  We've saved space at the end of
			   the ring buffer for just this problem. */
			memcpy(znet->rx_end, znet->rx_start, 8);
			cur_frame_end_offset += (RX_BUF_SIZE/2);
		}
		cur_frame_end = znet->rx_start + cur_frame_end_offset - 4;

		lo_status = *cur_frame_end++;
		hi_status = *cur_frame_end++;
		status = ((hi_status & 0xff) << 8) + (lo_status & 0xff);
		lo_cnt = *cur_frame_end++;
		hi_cnt = *cur_frame_end++;
		count = ((hi_cnt & 0xff) << 8) + (lo_cnt & 0xff);

		if (znet_debug > 5)
		  printk(KERN_DEBUG "Constructing trailer at location %03x, %04x %04x %04x %04x"
				 " count %#x status %04x.\n",
				 cur_frame_end_offset<<1, lo_status, hi_status, lo_cnt, hi_cnt,
				 count, status);
		cur_frame_end[-4] = status;
		cur_frame_end[-3] = next_frame_end_offset;
		cur_frame_end[-2] = count;
		next_frame_end_offset = cur_frame_end_offset;
		cur_frame_end_offset -= ((count + 1)>>1) + 3;
		if (cur_frame_end_offset < 0)
		  cur_frame_end_offset += RX_BUF_SIZE/2;
	};

	/* Now step  forward through the list. */
	do {
		ushort *this_rfp_ptr = znet->rx_start + next_frame_end_offset;
		int status = this_rfp_ptr[-4];
		int pkt_len = this_rfp_ptr[-2];
	  
		if (znet_debug > 5)
		  printk(KERN_DEBUG "Looking at trailer ending at %04x status %04x length %03x"
				 " next %04x.\n", next_frame_end_offset<<1, status, pkt_len,
				 this_rfp_ptr[-3]<<1);
		/* Once again we must assume that the i82586 docs apply. */
		if ( ! (status & RX_RCV_OK)) { /* There was an error. */
			znet->stats.rx_errors++;
			if (status & RX_CRC_ERR) znet->stats.rx_crc_errors++;
			if (status & RX_ALG_ERR) znet->stats.rx_frame_errors++;
#if 0
			if (status & 0x0200) znet->stats.rx_over_errors++; /* Wrong. */
			if (status & 0x0100) znet->stats.rx_fifo_errors++;
#else
			/* maz : Wild guess... */
			if (status & RX_OVRRUN) znet->stats.rx_over_errors++;
#endif
			if (status & RX_SRT_FRM) znet->stats.rx_length_errors++;
		} else if (pkt_len > 1536) {
			znet->stats.rx_length_errors++;
		} else {
			/* Malloc up new buffer. */
			struct sk_buff *skb;

			skb = dev_alloc_skb(pkt_len);
			if (skb == NULL) {
				if (znet_debug)
				  printk(KERN_WARNING "%s: Memory squeeze, dropping packet.\n", dev->name);
				znet->stats.rx_dropped++;
				break;
			}
			skb->dev = dev;

			if (&znet->rx_cur[(pkt_len+1)>>1] > znet->rx_end) {
				int semi_cnt = (znet->rx_end - znet->rx_cur)<<1;
				memcpy(skb_put(skb,semi_cnt), znet->rx_cur, semi_cnt);
				memcpy(skb_put(skb,pkt_len-semi_cnt), znet->rx_start,
					   pkt_len - semi_cnt);
			} else {
				memcpy(skb_put(skb,pkt_len), znet->rx_cur, pkt_len);
				if (znet_debug > 6) {
					unsigned int *packet = (unsigned int *) skb->data;
					printk(KERN_DEBUG "Packet data is %08x %08x %08x %08x.\n", packet[0],
						   packet[1], packet[2], packet[3]);
				}
		  }
		  skb->protocol=eth_type_trans(skb,dev);
		  netif_rx(skb);
		  dev->last_rx = jiffies;
		  znet->stats.rx_packets++;
		  znet->stats.rx_bytes += pkt_len;
		}
		znet->rx_cur = this_rfp_ptr;
		if (znet->rx_cur >= znet->rx_end)
			znet->rx_cur -= RX_BUF_SIZE/2;
		update_stop_hit(ioaddr, (znet->rx_cur - znet->rx_start)<<1);
		next_frame_end_offset = this_rfp_ptr[-3];
		if (next_frame_end_offset == 0)		/* Read all the frames? */
			break;			/* Done for now */
		this_rfp_ptr = znet->rx_start + next_frame_end_offset;
	} while (--boguscount);

	/* If any worth-while packets have been received, dev_rint()
	   has done a mark_bh(INET_BH) for us and will work on them
	   when we get to the bottom-half routine. */
	return;
}

/* The inverse routine to znet_open(). */
static int znet_close(struct net_device *dev)
{
	int ioaddr = dev->base_addr;

	netif_stop_queue (dev);

	outb(OP0_RESET, ioaddr);			/* CMD0_RESET */

	if (znet_debug > 1)
		printk(KERN_DEBUG "%s: Shutting down ethercard.\n", dev->name);
	/* Turn off transceiver power. */
	znet_transceiver_power (dev, 0);
	
	znet_release_resources (dev);
	
	return 0;
}

/* Get the current statistics.	This may be called with the card open or
   closed. */
static struct net_device_stats *net_get_stats(struct net_device *dev)
{
	struct znet_private *znet = dev->priv;

	return &znet->stats;
}

static void show_dma(struct net_device *dev)
{
	short ioaddr = dev->base_addr;
	unsigned char stat = inb (ioaddr);
	struct znet_private *znet = dev->priv;
	unsigned long flags;
	short dma_port = ((znet->tx_dma&3)<<2) + IO_DMA2_BASE;
	unsigned addr = inb(dma_port);
	short residue;

	addr |= inb(dma_port) << 8;
	residue = get_dma_residue(znet->tx_dma);
		
	if (znet_debug > 1) {
		flags=claim_dma_lock();
		printk(KERN_DEBUG "Stat:%02x Addr: %04x cnt:%3x\n",
		       stat, addr<<1, residue);
		release_dma_lock(flags);
	}
}

/* Initialize the hardware.  We have to do this when the board is open()ed
   or when we come out of suspend mode. */
static void hardware_init(struct net_device *dev)
{
	unsigned long flags;
	short ioaddr = dev->base_addr;
	struct znet_private *znet = dev->priv;

	znet->rx_cur = znet->rx_start;
	znet->tx_cur = znet->tx_start;

	/* Reset the chip, and start it up. */
	outb(OP0_RESET, ioaddr);

	flags=claim_dma_lock();
	disable_dma(znet->rx_dma); 		/* reset by an interrupting task. */
	clear_dma_ff(znet->rx_dma);
	set_dma_mode(znet->rx_dma, DMA_RX_MODE);
	set_dma_addr(znet->rx_dma, (unsigned int) znet->rx_start);
	set_dma_count(znet->rx_dma, RX_BUF_SIZE);
	enable_dma(znet->rx_dma);
	/* Now set up the Tx channel. */
	disable_dma(znet->tx_dma);
	clear_dma_ff(znet->tx_dma);
	set_dma_mode(znet->tx_dma, DMA_TX_MODE);
	set_dma_addr(znet->tx_dma, (unsigned int) znet->tx_start);
	set_dma_count(znet->tx_dma, znet->tx_buf_len<<1);
	enable_dma(znet->tx_dma);
	release_dma_lock(flags);
	
	if (znet_debug > 1)
	  printk(KERN_DEBUG "%s: Initializing the i82593, rx buf %p tx buf %p\n",
			 dev->name, znet->rx_start,znet->tx_start);
	/* Do an empty configure command, just like the Crynwr driver.  This
	   resets to chip to its default values. */
	*znet->tx_cur++ = 0;
	*znet->tx_cur++ = 0;
	show_dma(dev);
	outb(OP0_CONFIGURE | CR0_CHNL, ioaddr);

	znet_set_multicast_list (dev);

	*znet->tx_cur++ = 6;
	memcpy(znet->tx_cur, dev->dev_addr, 6);
	znet->tx_cur += 3;
	show_dma(dev);
	outb(OP0_IA_SETUP | CR0_CHNL, ioaddr);
	show_dma(dev);

	update_stop_hit(ioaddr, 8192);
	if (znet_debug > 1)  printk(KERN_DEBUG "enabling Rx.\n");
	outb(OP0_RCV_ENABLE, ioaddr);
	netif_start_queue (dev);
}

static void update_stop_hit(short ioaddr, unsigned short rx_stop_offset)
{
	outb(OP0_SWIT_TO_PORT_1 | CR0_CHNL, ioaddr);
	if (znet_debug > 5)
	  printk(KERN_DEBUG "Updating stop hit with value %02x.\n",
			 (rx_stop_offset >> 6) | CR1_STOP_REG_UPDATE);
	outb((rx_stop_offset >> 6) | CR1_STOP_REG_UPDATE, ioaddr);
	outb(OP1_SWIT_TO_PORT_0, ioaddr);
}

static __exit void znet_cleanup (void)
{
	if (znet_dev) {
		struct znet_private *znet = znet_dev->priv;

		unregister_netdev (znet_dev);
		kfree (znet->rx_start);
		kfree (znet->tx_start);
		free_netdev (znet_dev);
	}
}

module_init (znet_probe);
module_exit (znet_cleanup);