summaryrefslogtreecommitdiffstats
path: root/arch/um/kernel
diff options
context:
space:
mode:
authorAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
committerAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
commitb4b6116a13633898cf868f2f103c96a90c4c20f8 (patch)
tree93d1b7e2cfcdf473d8d4ff3ad141fa864f8491f6 /arch/um/kernel
parentedd4be777c953e5faafc80d091d3084b4343f5d3 (diff)
downloadkernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.gz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.xz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.zip
fedora kernel: d9aad82f3319f3cfd1aebc01234254ef0c37ad84v3.3.2-1
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'arch/um/kernel')
-rw-r--r--arch/um/kernel/Makefile48
-rw-r--r--arch/um/kernel/asm-offsets.c1
-rw-r--r--arch/um/kernel/config.c.in26
-rw-r--r--arch/um/kernel/dyn.lds.S166
-rw-r--r--arch/um/kernel/early_printk.c33
-rw-r--r--arch/um/kernel/exec.c88
-rw-r--r--arch/um/kernel/exitcode.c78
-rw-r--r--arch/um/kernel/gmon_syms.c9
-rw-r--r--arch/um/kernel/gprof_syms.c9
-rw-r--r--arch/um/kernel/init_task.c38
-rw-r--r--arch/um/kernel/initrd.c85
-rw-r--r--arch/um/kernel/internal.h1
-rw-r--r--arch/um/kernel/irq.c464
-rw-r--r--arch/um/kernel/ksyms.c44
-rw-r--r--arch/um/kernel/mem.c322
-rw-r--r--arch/um/kernel/physmem.c215
-rw-r--r--arch/um/kernel/process.c468
-rw-r--r--arch/um/kernel/ptrace.c209
-rw-r--r--arch/um/kernel/reboot.c57
-rw-r--r--arch/um/kernel/sigio.c50
-rw-r--r--arch/um/kernel/signal.c181
-rw-r--r--arch/um/kernel/skas/Makefile15
-rw-r--r--arch/um/kernel/skas/clone.c56
-rw-r--r--arch/um/kernel/skas/mmu.c192
-rw-r--r--arch/um/kernel/skas/process.c81
-rw-r--r--arch/um/kernel/skas/syscall.c40
-rw-r--r--arch/um/kernel/skas/uaccess.c259
-rw-r--r--arch/um/kernel/smp.c239
-rw-r--r--arch/um/kernel/syscall.c68
-rw-r--r--arch/um/kernel/sysrq.c78
-rw-r--r--arch/um/kernel/time.c115
-rw-r--r--arch/um/kernel/tlb.c536
-rw-r--r--arch/um/kernel/trap.c276
-rw-r--r--arch/um/kernel/um_arch.c404
-rw-r--r--arch/um/kernel/umid.c38
-rw-r--r--arch/um/kernel/uml.lds.S108
-rw-r--r--arch/um/kernel/vmlinux.lds.S8
37 files changed, 5105 insertions, 0 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
new file mode 100644
index 00000000000..bc494741b1f
--- /dev/null
+++ b/arch/um/kernel/Makefile
@@ -0,0 +1,48 @@
+#
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux,intel}.com)
+# Licensed under the GPL
+#
+
+CPPFLAGS_vmlinux.lds := -U$(SUBARCH) -DSTART=$(LDS_START) \
+ -DELF_ARCH=$(LDS_ELF_ARCH) \
+ -DELF_FORMAT=$(LDS_ELF_FORMAT)
+extra-y := vmlinux.lds
+clean-files :=
+
+obj-y = config.o exec.o exitcode.o init_task.o irq.o ksyms.o mem.o \
+ physmem.o process.o ptrace.o reboot.o sigio.o \
+ signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o \
+ um_arch.o umid.o skas/
+
+obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
+obj-$(CONFIG_GPROF) += gprof_syms.o
+obj-$(CONFIG_GCOV) += gmon_syms.o
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+
+USER_OBJS := config.o
+
+include arch/um/scripts/Makefile.rules
+
+targets := config.c config.tmp
+
+# Be careful with the below Sed code - sed is pitfall-rich!
+# We use sed to lower build requirements, for "embedded" builders for instance.
+
+$(obj)/config.tmp: $(objtree)/.config FORCE
+ $(call if_changed,quote1)
+
+quiet_cmd_quote1 = QUOTE $@
+ cmd_quote1 = sed -e 's/"/\\"/g' -e 's/^/"/' -e 's/$$/\\n",/' \
+ $< > $@
+
+$(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE
+ $(call if_changed,quote2)
+
+quiet_cmd_quote2 = QUOTE $@
+ cmd_quote2 = sed -e '/CONFIG/{' \
+ -e 's/"CONFIG"//' \
+ -e 'r $(obj)/config.tmp' \
+ -e 'a \' \
+ -e '""' \
+ -e '}' \
+ $< > $@
diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
new file mode 100644
index 00000000000..91ea538e161
--- /dev/null
+++ b/arch/um/kernel/asm-offsets.c
@@ -0,0 +1 @@
+#include "sysdep/kernel-offsets.h"
diff --git a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in
new file mode 100644
index 00000000000..b7a43feafde
--- /dev/null
+++ b/arch/um/kernel/config.c.in
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "init.h"
+
+static __initdata const char *config[] = {
+"CONFIG"
+};
+
+static int __init print_config(char *line, int *add)
+{
+ int i;
+ for (i = 0; i < sizeof(config)/sizeof(config[0]); i++)
+ printf("%s", config[i]);
+ exit(0);
+}
+
+__uml_setup("--showconfig", print_config,
+"--showconfig\n"
+" Prints the config file that this UML binary was generated from.\n\n"
+);
+
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
new file mode 100644
index 00000000000..a3cab6d3ae0
--- /dev/null
+++ b/arch/um/kernel/dyn.lds.S
@@ -0,0 +1,166 @@
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
+
+OUTPUT_FORMAT(ELF_FORMAT)
+OUTPUT_ARCH(ELF_ARCH)
+ENTRY(_start)
+jiffies = jiffies_64;
+
+SECTIONS
+{
+ PROVIDE (__executable_start = START);
+ . = START + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ __binary_start = .;
+ . = ALIGN(4096); /* Init code and data */
+ _text = .;
+ _stext = .;
+ __init_begin = .;
+ INIT_TEXT_SECTION(PAGE_SIZE)
+
+ . = ALIGN(PAGE_SIZE);
+
+ /* Read-only sections, merged into text segment: */
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.init : { *(.rel.init) }
+ .rela.init : { *(.rela.init) }
+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rel.fini : { *(.rel.fini) }
+ .rela.fini : { *(.rela.fini) }
+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rel.got : { *(.rel.got) }
+ .rela.got : { *(.rela.got) }
+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rel.plt : {
+ *(.rel.plt)
+ PROVIDE_HIDDEN(__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN(__rel_iplt_end = .);
+ }
+ .rela.plt : {
+ *(.rela.plt)
+ PROVIDE_HIDDEN(__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN(__rela_iplt_end = .);
+ }
+ .init : {
+ KEEP (*(.init))
+ } =0x90909090
+ .plt : { *(.plt) }
+ .text : {
+ TEXT_TEXT
+ SCHED_TEXT
+ LOCK_TEXT
+ *(.fixup)
+ *(.stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+
+ . = ALIGN(PAGE_SIZE);
+ } =0x90909090
+ . = ALIGN(PAGE_SIZE);
+ .syscall_stub : {
+ __syscall_stub_start = .;
+ *(.__syscall_stub*)
+ __syscall_stub_end = .;
+ }
+ .fini : {
+ KEEP (*(.fini))
+ } =0x90909090
+
+ .kstrtab : { *(.kstrtab) }
+
+ #include "asm/common.lds.S"
+
+ init.data : { INIT_DATA }
+
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array : { *(.preinit_array) }
+ .init_array : { *(.init_array) }
+ .fini_array : { *(.fini_array) }
+ .data : {
+ INIT_TASK_DATA(KERNEL_STACK_SIZE)
+ . = ALIGN(KERNEL_STACK_SIZE);
+ *(.data..init_irqstack)
+ DATA_DATA
+ *(.data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ .eh_frame : { KEEP (*(.eh_frame)) }
+ .gcc_except_table : { *(.gcc_except_table) }
+ .dynamic : { *(.dynamic) }
+ .ctors : {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ /* We don't want to include the .ctor section from
+ from the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors : {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .got : { *(.got.plt) *(.got) }
+ _edata = .;
+ PROVIDE (edata = .);
+ .bss : {
+ __bss_start = .;
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ . = ALIGN(32 / 8);
+ }
+ _end = .;
+ PROVIDE (end = .);
+
+ STABS_DEBUG
+
+ DWARF_DEBUG
+
+ DISCARDS
+}
diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c
new file mode 100644
index 00000000000..ec649bf72f6
--- /dev/null
+++ b/arch/um/kernel/early_printk.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include "os.h"
+
+static void early_console_write(struct console *con, const char *s, unsigned int n)
+{
+ um_early_printk(s, n);
+}
+
+static struct console early_console = {
+ .name = "earlycon",
+ .write = early_console_write,
+ .flags = CON_BOOT,
+ .index = -1,
+};
+
+static int __init setup_early_printk(char *buf)
+{
+ register_console(&early_console);
+
+ return 0;
+}
+
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
new file mode 100644
index 00000000000..6cade936636
--- /dev/null
+++ b/arch/um/kernel/exec.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/stddef.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include "as-layout.h"
+#include "mem_user.h"
+#include "skas.h"
+#include "os.h"
+#include "internal.h"
+
+void flush_thread(void)
+{
+ void *data = NULL;
+ int ret;
+
+ arch_flush_thread(&current->thread.arch);
+
+ ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data);
+ ret = ret || unmap(&current->mm->context.id, STUB_END,
+ host_task_size - STUB_END, 1, &data);
+ if (ret) {
+ printk(KERN_ERR "flush_thread - clearing address space failed, "
+ "err = %d\n", ret);
+ force_sig(SIGKILL, current);
+ }
+
+ __switch_mm(&current->mm->context.id);
+}
+
+void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
+{
+ PT_REGS_IP(regs) = eip;
+ PT_REGS_SP(regs) = esp;
+}
+EXPORT_SYMBOL(start_thread);
+
+static long execve1(const char *file,
+ const char __user *const __user *argv,
+ const char __user *const __user *env)
+{
+ long error;
+
+ error = do_execve(file, argv, env, &current->thread.regs);
+ if (error == 0) {
+ task_lock(current);
+ current->ptrace &= ~PT_DTRACE;
+#ifdef SUBARCH_EXECVE1
+ SUBARCH_EXECVE1(&current->thread.regs.regs);
+#endif
+ task_unlock(current);
+ }
+ return error;
+}
+
+long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env)
+{
+ long err;
+
+ err = execve1(file, argv, env);
+ if (!err)
+ UML_LONGJMP(current->thread.exec_buf, 1);
+ return err;
+}
+
+long sys_execve(const char __user *file, const char __user *const __user *argv,
+ const char __user *const __user *env)
+{
+ long error;
+ char *filename;
+
+ filename = getname(file);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename)) goto out;
+ error = execve1(filename, argv, env);
+ putname(filename);
+ out:
+ return error;
+}
diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c
new file mode 100644
index 00000000000..829df49dee9
--- /dev/null
+++ b/arch/um/kernel/exitcode.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/types.h>
+#include <asm/uaccess.h>
+
+/*
+ * If read and write race, the read will still atomically read a valid
+ * value.
+ */
+int uml_exitcode = 0;
+
+static int exitcode_proc_show(struct seq_file *m, void *v)
+{
+ int val;
+
+ /*
+ * Save uml_exitcode in a local so that we don't need to guarantee
+ * that sprintf accesses it atomically.
+ */
+ val = uml_exitcode;
+ seq_printf(m, "%d\n", val);
+ return 0;
+}
+
+static int exitcode_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, exitcode_proc_show, NULL);
+}
+
+static ssize_t exitcode_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *pos)
+{
+ char *end, buf[sizeof("nnnnn\0")];
+ int tmp;
+
+ if (copy_from_user(buf, buffer, count))
+ return -EFAULT;
+
+ tmp = simple_strtol(buf, &end, 0);
+ if ((*end != '\0') && !isspace(*end))
+ return -EINVAL;
+
+ uml_exitcode = tmp;
+ return count;
+}
+
+static const struct file_operations exitcode_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = exitcode_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = exitcode_proc_write,
+};
+
+static int make_proc_exitcode(void)
+{
+ struct proc_dir_entry *ent;
+
+ ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_fops);
+ if (ent == NULL) {
+ printk(KERN_WARNING "make_proc_exitcode : Failed to register "
+ "/proc/exitcode\n");
+ return 0;
+ }
+ return 0;
+}
+
+__initcall(make_proc_exitcode);
diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c
new file mode 100644
index 00000000000..e9bcf247bce
--- /dev/null
+++ b/arch/um/kernel/gmon_syms.c
@@ -0,0 +1,9 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/module.h"
+
+extern void __bb_init_func(void *) __attribute__((weak));
+EXPORT_SYMBOL(__bb_init_func);
diff --git a/arch/um/kernel/gprof_syms.c b/arch/um/kernel/gprof_syms.c
new file mode 100644
index 00000000000..e2f043d0de6
--- /dev/null
+++ b/arch/um/kernel/gprof_syms.c
@@ -0,0 +1,9 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/module.h"
+
+extern void mcount(void);
+EXPORT_SYMBOL(mcount);
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
new file mode 100644
index 00000000000..ddc9698b66e
--- /dev/null
+++ b/arch/um/kernel/init_task.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,intel.linux}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/sched.h"
+#include "linux/init_task.h"
+#include "linux/fs.h"
+#include "linux/module.h"
+#include "linux/mqueue.h"
+#include "asm/uaccess.h"
+
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
+
+union thread_union cpu0_irqstack
+ __attribute__((__section__(".data..init_irqstack"))) =
+ { INIT_THREAD_INFO(init_task) };
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
new file mode 100644
index 00000000000..10cc18f729f
--- /dev/null
+++ b/arch/um/kernel/initrd.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/init.h"
+#include "linux/bootmem.h"
+#include "linux/initrd.h"
+#include "asm/types.h"
+#include "init.h"
+#include "os.h"
+
+/* Changed by uml_initrd_setup, which is a setup */
+static char *initrd __initdata = NULL;
+static int load_initrd(char *filename, void *buf, int size);
+
+static int __init read_initrd(void)
+{
+ void *area;
+ long long size;
+ int err;
+
+ if (initrd == NULL)
+ return 0;
+
+ err = os_file_size(initrd, &size);
+ if (err)
+ return 0;
+
+ /*
+ * This is necessary because alloc_bootmem craps out if you
+ * ask for no memory.
+ */
+ if (size == 0) {
+ printk(KERN_ERR "\"%s\" is a zero-size initrd\n", initrd);
+ return 0;
+ }
+
+ area = alloc_bootmem(size);
+ if (area == NULL)
+ return 0;
+
+ if (load_initrd(initrd, area, size) == -1)
+ return 0;
+
+ initrd_start = (unsigned long) area;
+ initrd_end = initrd_start + size;
+ return 0;
+}
+
+__uml_postsetup(read_initrd);
+
+static int __init uml_initrd_setup(char *line, int *add)
+{
+ initrd = line;
+ return 0;
+}
+
+__uml_setup("initrd=", uml_initrd_setup,
+"initrd=<initrd image>\n"
+" This is used to boot UML from an initrd image. The argument is the\n"
+" name of the file containing the image.\n\n"
+);
+
+static int load_initrd(char *filename, void *buf, int size)
+{
+ int fd, n;
+
+ fd = os_open_file(filename, of_read(OPENFLAGS()), 0);
+ if (fd < 0) {
+ printk(KERN_ERR "Opening '%s' failed - err = %d\n", filename,
+ -fd);
+ return -1;
+ }
+ n = os_read_file(fd, buf, size);
+ if (n != size) {
+ printk(KERN_ERR "Read of %d bytes from '%s' failed, "
+ "err = %d\n", size,
+ filename, -n);
+ return -1;
+ }
+
+ os_close_file(fd);
+ return 0;
+}
diff --git a/arch/um/kernel/internal.h b/arch/um/kernel/internal.h
new file mode 100644
index 00000000000..5bf97db24a0
--- /dev/null
+++ b/arch/um/kernel/internal.h
@@ -0,0 +1 @@
+extern long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
new file mode 100644
index 00000000000..71b8c947e5e
--- /dev/null
+++ b/arch/um/kernel/irq.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ */
+
+#include "linux/cpumask.h"
+#include "linux/hardirq.h"
+#include "linux/interrupt.h"
+#include "linux/kernel_stat.h"
+#include "linux/module.h"
+#include "linux/sched.h"
+#include "linux/seq_file.h"
+#include "linux/slab.h"
+#include "as-layout.h"
+#include "kern_util.h"
+#include "os.h"
+
+/*
+ * This list is accessed under irq_lock, except in sigio_handler,
+ * where it is safe from being modified. IRQ handlers won't change it -
+ * if an IRQ source has vanished, it will be freed by free_irqs just
+ * before returning from sigio_handler. That will process a separate
+ * list of irqs to free, with its own locking, coming back here to
+ * remove list elements, taking the irq_lock to do so.
+ */
+static struct irq_fd *active_fds = NULL;
+static struct irq_fd **last_irq_ptr = &active_fds;
+
+extern void free_irqs(void);
+
+void sigio_handler(int sig, struct uml_pt_regs *regs)
+{
+ struct irq_fd *irq_fd;
+ int n;
+
+ if (smp_sigio_handler())
+ return;
+
+ while (1) {
+ n = os_waiting_for_events(active_fds);
+ if (n <= 0) {
+ if (n == -EINTR)
+ continue;
+ else break;
+ }
+
+ for (irq_fd = active_fds; irq_fd != NULL;
+ irq_fd = irq_fd->next) {
+ if (irq_fd->current_events != 0) {
+ irq_fd->current_events = 0;
+ do_IRQ(irq_fd->irq, regs);
+ }
+ }
+ }
+
+ free_irqs();
+}
+
+static DEFINE_SPINLOCK(irq_lock);
+
+static int activate_fd(int irq, int fd, int type, void *dev_id)
+{
+ struct pollfd *tmp_pfd;
+ struct irq_fd *new_fd, *irq_fd;
+ unsigned long flags;
+ int events, err, n;
+
+ err = os_set_fd_async(fd);
+ if (err < 0)
+ goto out;
+
+ err = -ENOMEM;
+ new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL);
+ if (new_fd == NULL)
+ goto out;
+
+ if (type == IRQ_READ)
+ events = UM_POLLIN | UM_POLLPRI;
+ else events = UM_POLLOUT;
+ *new_fd = ((struct irq_fd) { .next = NULL,
+ .id = dev_id,
+ .fd = fd,
+ .type = type,
+ .irq = irq,
+ .events = events,
+ .current_events = 0 } );
+
+ err = -EBUSY;
+ spin_lock_irqsave(&irq_lock, flags);
+ for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
+ if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
+ printk(KERN_ERR "Registering fd %d twice\n", fd);
+ printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
+ printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
+ dev_id);
+ goto out_unlock;
+ }
+ }
+
+ if (type == IRQ_WRITE)
+ fd = -1;
+
+ tmp_pfd = NULL;
+ n = 0;
+
+ while (1) {
+ n = os_create_pollfd(fd, events, tmp_pfd, n);
+ if (n == 0)
+ break;
+
+ /*
+ * n > 0
+ * It means we couldn't put new pollfd to current pollfds
+ * and tmp_fds is NULL or too small for new pollfds array.
+ * Needed size is equal to n as minimum.
+ *
+ * Here we have to drop the lock in order to call
+ * kmalloc, which might sleep.
+ * If something else came in and changed the pollfds array
+ * so we will not be able to put new pollfd struct to pollfds
+ * then we free the buffer tmp_fds and try again.
+ */
+ spin_unlock_irqrestore(&irq_lock, flags);
+ kfree(tmp_pfd);
+
+ tmp_pfd = kmalloc(n, GFP_KERNEL);
+ if (tmp_pfd == NULL)
+ goto out_kfree;
+
+ spin_lock_irqsave(&irq_lock, flags);
+ }
+
+ *last_irq_ptr = new_fd;
+ last_irq_ptr = &new_fd->next;
+
+ spin_unlock_irqrestore(&irq_lock, flags);
+
+ /*
+ * This calls activate_fd, so it has to be outside the critical
+ * section.
+ */
+ maybe_sigio_broken(fd, (type == IRQ_READ));
+
+ return 0;
+
+ out_unlock:
+ spin_unlock_irqrestore(&irq_lock, flags);
+ out_kfree:
+ kfree(new_fd);
+ out:
+ return err;
+}
+
+static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq_lock, flags);
+ os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
+ spin_unlock_irqrestore(&irq_lock, flags);
+}
+
+struct irq_and_dev {
+ int irq;
+ void *dev;
+};
+
+static int same_irq_and_dev(struct irq_fd *irq, void *d)
+{
+ struct irq_and_dev *data = d;
+
+ return ((irq->irq == data->irq) && (irq->id == data->dev));
+}
+
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
+{
+ struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq,
+ .dev = dev });
+
+ free_irq_by_cb(same_irq_and_dev, &data);
+}
+
+static int same_fd(struct irq_fd *irq, void *fd)
+{
+ return (irq->fd == *((int *)fd));
+}
+
+void free_irq_by_fd(int fd)
+{
+ free_irq_by_cb(same_fd, &fd);
+}
+
+/* Must be called with irq_lock held */
+static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+{
+ struct irq_fd *irq;
+ int i = 0;
+ int fdi;
+
+ for (irq = active_fds; irq != NULL; irq = irq->next) {
+ if ((irq->fd == fd) && (irq->irq == irqnum))
+ break;
+ i++;
+ }
+ if (irq == NULL) {
+ printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
+ fd);
+ goto out;
+ }
+ fdi = os_get_pollfd(i);
+ if ((fdi != -1) && (fdi != fd)) {
+ printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
+ "and pollfds, fd %d vs %d, need %d\n", irq->fd,
+ fdi, fd);
+ irq = NULL;
+ goto out;
+ }
+ *index_out = i;
+ out:
+ return irq;
+}
+
+void reactivate_fd(int fd, int irqnum)
+{
+ struct irq_fd *irq;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&irq_lock, flags);
+ irq = find_irq_by_fd(fd, irqnum, &i);
+ if (irq == NULL) {
+ spin_unlock_irqrestore(&irq_lock, flags);
+ return;
+ }
+ os_set_pollfd(i, irq->fd);
+ spin_unlock_irqrestore(&irq_lock, flags);
+
+ add_sigio_fd(fd);
+}
+
+void deactivate_fd(int fd, int irqnum)
+{
+ struct irq_fd *irq;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&irq_lock, flags);
+ irq = find_irq_by_fd(fd, irqnum, &i);
+ if (irq == NULL) {
+ spin_unlock_irqrestore(&irq_lock, flags);
+ return;
+ }
+
+ os_set_pollfd(i, -1);
+ spin_unlock_irqrestore(&irq_lock, flags);
+
+ ignore_sigio_fd(fd);
+}
+EXPORT_SYMBOL(deactivate_fd);
+
+/*
+ * Called just before shutdown in order to provide a clean exec
+ * environment in case the system is rebooting. No locking because
+ * that would cause a pointless shutdown hang if something hadn't
+ * released the lock.
+ */
+int deactivate_all_fds(void)
+{
+ struct irq_fd *irq;
+ int err;
+
+ for (irq = active_fds; irq != NULL; irq = irq->next) {
+ err = os_clear_fd_async(irq->fd);
+ if (err)
+ return err;
+ }
+ /* If there is a signal already queued, after unblocking ignore it */
+ os_set_ioignore();
+
+ return 0;
+}
+
+/*
+ * do_IRQ handles all normal device IRQs (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
+ irq_enter();
+ generic_handle_irq(irq);
+ irq_exit();
+ set_irq_regs(old_regs);
+ return 1;
+}
+
+int um_request_irq(unsigned int irq, int fd, int type,
+ irq_handler_t handler,
+ unsigned long irqflags, const char * devname,
+ void *dev_id)
+{
+ int err;
+
+ if (fd != -1) {
+ err = activate_fd(irq, fd, type, dev_id);
+ if (err)
+ return err;
+ }
+
+ return request_irq(irq, handler, irqflags, devname, dev_id);
+}
+
+EXPORT_SYMBOL(um_request_irq);
+EXPORT_SYMBOL(reactivate_fd);
+
+/*
+ * irq_chip must define at least enable/disable and ack when
+ * the edge handler is used.
+ */
+static void dummy(struct irq_data *d)
+{
+}
+
+/* This is used for everything else than the timer. */
+static struct irq_chip normal_irq_type = {
+ .name = "SIGIO",
+ .release = free_irq_by_irq_and_dev,
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
+};
+
+static struct irq_chip SIGVTALRM_irq_type = {
+ .name = "SIGVTALRM",
+ .release = free_irq_by_irq_and_dev,
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
+};
+
+void __init init_IRQ(void)
+{
+ int i;
+
+ irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
+
+ for (i = 1; i < NR_IRQS; i++)
+ irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+}
+
+/*
+ * IRQ stack entry and exit:
+ *
+ * Unlike i386, UML doesn't receive IRQs on the normal kernel stack
+ * and switch over to the IRQ stack after some preparation. We use
+ * sigaltstack to receive signals on a separate stack from the start.
+ * These two functions make sure the rest of the kernel won't be too
+ * upset by being on a different stack. The IRQ stack has a
+ * thread_info structure at the bottom so that current et al continue
+ * to work.
+ *
+ * to_irq_stack copies the current task's thread_info to the IRQ stack
+ * thread_info and sets the tasks's stack to point to the IRQ stack.
+ *
+ * from_irq_stack copies the thread_info struct back (flags may have
+ * been modified) and resets the task's stack pointer.
+ *
+ * Tricky bits -
+ *
+ * What happens when two signals race each other? UML doesn't block
+ * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal
+ * could arrive while a previous one is still setting up the
+ * thread_info.
+ *
+ * There are three cases -
+ * The first interrupt on the stack - sets up the thread_info and
+ * handles the interrupt
+ * A nested interrupt interrupting the copying of the thread_info -
+ * can't handle the interrupt, as the stack is in an unknown state
+ * A nested interrupt not interrupting the copying of the
+ * thread_info - doesn't do any setup, just handles the interrupt
+ *
+ * The first job is to figure out whether we interrupted stack setup.
+ * This is done by xchging the signal mask with thread_info->pending.
+ * If the value that comes back is zero, then there is no setup in
+ * progress, and the interrupt can be handled. If the value is
+ * non-zero, then there is stack setup in progress. In order to have
+ * the interrupt handled, we leave our signal in the mask, and it will
+ * be handled by the upper handler after it has set up the stack.
+ *
+ * Next is to figure out whether we are the outer handler or a nested
+ * one. As part of setting up the stack, thread_info->real_thread is
+ * set to non-NULL (and is reset to NULL on exit). This is the
+ * nesting indicator. If it is non-NULL, then the stack is already
+ * set up and the handler can run.
+ */
+
+static unsigned long pending_mask;
+
+unsigned long to_irq_stack(unsigned long *mask_out)
+{
+ struct thread_info *ti;
+ unsigned long mask, old;
+ int nested;
+
+ mask = xchg(&pending_mask, *mask_out);
+ if (mask != 0) {
+ /*
+ * If any interrupts come in at this point, we want to
+ * make sure that their bits aren't lost by our
+ * putting our bit in. So, this loop accumulates bits
+ * until xchg returns the same value that we put in.
+ * When that happens, there were no new interrupts,
+ * and pending_mask contains a bit for each interrupt
+ * that came in.
+ */
+ old = *mask_out;
+ do {
+ old |= mask;
+ mask = xchg(&pending_mask, old);
+ } while (mask != old);
+ return 1;
+ }
+
+ ti = current_thread_info();
+ nested = (ti->real_thread != NULL);
+ if (!nested) {
+ struct task_struct *task;
+ struct thread_info *tti;
+
+ task = cpu_tasks[ti->cpu].task;
+ tti = task_thread_info(task);
+
+ *ti = *tti;
+ ti->real_thread = tti;
+ task->stack = ti;
+ }
+
+ mask = xchg(&pending_mask, 0);
+ *mask_out |= mask | nested;
+ return 0;
+}
+
+unsigned long from_irq_stack(int nested)
+{
+ struct thread_info *ti, *to;
+ unsigned long mask;
+
+ ti = current_thread_info();
+
+ pending_mask = 1;
+
+ to = ti->real_thread;
+ current->stack = to;
+ ti->real_thread = NULL;
+ *to = *ti;
+
+ mask = xchg(&pending_mask, 0);
+ return mask & ~1;
+}
+
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
new file mode 100644
index 00000000000..e17bea0b22e
--- /dev/null
+++ b/arch/um/kernel/ksyms.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/module.h>
+#include "os.h"
+
+EXPORT_SYMBOL(set_signals);
+EXPORT_SYMBOL(get_signals);
+
+EXPORT_SYMBOL(os_stat_fd);
+EXPORT_SYMBOL(os_stat_file);
+EXPORT_SYMBOL(os_access);
+EXPORT_SYMBOL(os_set_exec_close);
+EXPORT_SYMBOL(os_getpid);
+EXPORT_SYMBOL(os_open_file);
+EXPORT_SYMBOL(os_read_file);
+EXPORT_SYMBOL(os_write_file);
+EXPORT_SYMBOL(os_seek_file);
+EXPORT_SYMBOL(os_lock_file);
+EXPORT_SYMBOL(os_ioctl_generic);
+EXPORT_SYMBOL(os_pipe);
+EXPORT_SYMBOL(os_file_type);
+EXPORT_SYMBOL(os_file_mode);
+EXPORT_SYMBOL(os_file_size);
+EXPORT_SYMBOL(os_flush_stdout);
+EXPORT_SYMBOL(os_close_file);
+EXPORT_SYMBOL(os_set_fd_async);
+EXPORT_SYMBOL(os_set_fd_block);
+EXPORT_SYMBOL(helper_wait);
+EXPORT_SYMBOL(os_shutdown_socket);
+EXPORT_SYMBOL(os_create_unix_socket);
+EXPORT_SYMBOL(os_connect_socket);
+EXPORT_SYMBOL(os_accept_connection);
+EXPORT_SYMBOL(os_rcv_fd);
+EXPORT_SYMBOL(run_helper);
+EXPORT_SYMBOL(os_major);
+EXPORT_SYMBOL(os_minor);
+EXPORT_SYMBOL(os_makedev);
+
+EXPORT_SYMBOL(add_sigio_fd);
+EXPORT_SYMBOL(ignore_sigio_fd);
+EXPORT_SYMBOL(sigio_broken);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
new file mode 100644
index 00000000000..ebb86b21844
--- /dev/null
+++ b/arch/um/kernel/mem.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/stddef.h>
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <asm/fixmap.h>
+#include <asm/page.h>
+#include "as-layout.h"
+#include "init.h"
+#include "kern.h"
+#include "kern_util.h"
+#include "mem_user.h"
+#include "os.h"
+
+/* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
+unsigned long *empty_zero_page = NULL;
+EXPORT_SYMBOL(empty_zero_page);
+/* allocated in paging_init and unchanged thereafter */
+static unsigned long *empty_bad_page = NULL;
+
+/*
+ * Initialized during boot, and readonly for initializing page tables
+ * afterwards
+ */
+pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+/* Initialized at boot time, and readonly after that */
+unsigned long long highmem;
+int kmalloc_ok = 0;
+
+/* Used during early boot */
+static unsigned long brk_end;
+
+#ifdef CONFIG_HIGHMEM
+static void setup_highmem(unsigned long highmem_start,
+ unsigned long highmem_len)
+{
+ struct page *page;
+ unsigned long highmem_pfn;
+ int i;
+
+ highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT;
+ for (i = 0; i < highmem_len >> PAGE_SHIFT; i++) {
+ page = &mem_map[highmem_pfn + i];
+ ClearPageReserved(page);
+ init_page_count(page);
+ __free_page(page);
+ }
+}
+#endif
+
+void __init mem_init(void)
+{
+ /* clear the zero-page */
+ memset(empty_zero_page, 0, PAGE_SIZE);
+
+ /* Map in the area just after the brk now that kmalloc is about
+ * to be turned on.
+ */
+ brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
+ map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
+ free_bootmem(__pa(brk_end), uml_reserved - brk_end);
+ uml_reserved = brk_end;
+
+ /* this will put all low memory onto the freelists */
+ totalram_pages = free_all_bootmem();
+ max_low_pfn = totalram_pages;
+#ifdef CONFIG_HIGHMEM
+ totalhigh_pages = highmem >> PAGE_SHIFT;
+ totalram_pages += totalhigh_pages;
+#endif
+ num_physpages = totalram_pages;
+ max_pfn = totalram_pages;
+ printk(KERN_INFO "Memory: %luk available\n",
+ nr_free_pages() << (PAGE_SHIFT-10));
+ kmalloc_ok = 1;
+
+#ifdef CONFIG_HIGHMEM
+ setup_highmem(end_iomem, highmem);
+#endif
+}
+
+/*
+ * Create a page table and place a pointer to it in a middle page
+ * directory entry.
+ */
+static void __init one_page_table_init(pmd_t *pmd)
+{
+ if (pmd_none(*pmd)) {
+ pte_t *pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ set_pmd(pmd, __pmd(_KERNPG_TABLE +
+ (unsigned long) __pa(pte)));
+ if (pte != pte_offset_kernel(pmd, 0))
+ BUG();
+ }
+}
+
+static void __init one_md_table_init(pud_t *pud)
+{
+#ifdef CONFIG_3_LEVEL_PGTABLES
+ pmd_t *pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table)));
+ if (pmd_table != pmd_offset(pud, 0))
+ BUG();
+#endif
+}
+
+static void __init fixrange_init(unsigned long start, unsigned long end,
+ pgd_t *pgd_base)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ int i, j;
+ unsigned long vaddr;
+
+ vaddr = start;
+ i = pgd_index(vaddr);
+ j = pmd_index(vaddr);
+ pgd = pgd_base + i;
+
+ for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
+ pud = pud_offset(pgd, vaddr);
+ if (pud_none(*pud))
+ one_md_table_init(pud);
+ pmd = pmd_offset(pud, vaddr);
+ for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) {
+ one_page_table_init(pmd);
+ vaddr += PMD_SIZE;
+ }
+ j = 0;
+ }
+}
+
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+#define kmap_get_fixmap_pte(vaddr) \
+ pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\
+ (vaddr)), (vaddr))
+
+static void __init kmap_init(void)
+{
+ unsigned long kmap_vstart;
+
+ /* cache the first kmap pte */
+ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+ kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+ kmap_prot = PAGE_KERNEL;
+}
+
+static void __init init_highmem(void)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long vaddr;
+
+ /*
+ * Permanent kmaps:
+ */
+ vaddr = PKMAP_BASE;
+ fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir);
+
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ pud = pud_offset(pgd, vaddr);
+ pmd = pmd_offset(pud, vaddr);
+ pte = pte_offset_kernel(pmd, vaddr);
+ pkmap_page_table = pte;
+
+ kmap_init();
+}
+#endif /* CONFIG_HIGHMEM */
+
+static void __init fixaddr_user_init( void)
+{
+#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
+ long size = FIXADDR_USER_END - FIXADDR_USER_START;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ phys_t p;
+ unsigned long v, vaddr = FIXADDR_USER_START;
+
+ if (!size)
+ return;
+
+ fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir);
+ v = (unsigned long) alloc_bootmem_low_pages(size);
+ memcpy((void *) v , (void *) FIXADDR_USER_START, size);
+ p = __pa(v);
+ for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
+ p += PAGE_SIZE) {
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ pud = pud_offset(pgd, vaddr);
+ pmd = pmd_offset(pud, vaddr);
+ pte = pte_offset_kernel(pmd, vaddr);
+ pte_set_val(*pte, p, PAGE_READONLY);
+ }
+#endif
+}
+
+void __init paging_init(void)
+{
+ unsigned long zones_size[MAX_NR_ZONES], vaddr;
+ int i;
+
+ empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
+ empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
+ for (i = 0; i < ARRAY_SIZE(zones_size); i++)
+ zones_size[i] = 0;
+
+ zones_size[ZONE_NORMAL] = (end_iomem >> PAGE_SHIFT) -
+ (uml_physmem >> PAGE_SHIFT);
+#ifdef CONFIG_HIGHMEM
+ zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT;
+#endif
+ free_area_init(zones_size);
+
+ /*
+ * Fixed mappings, only the page table structure has to be
+ * created - mappings will be set by set_fixmap():
+ */
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+ fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir);
+
+ fixaddr_user_init();
+
+#ifdef CONFIG_HIGHMEM
+ init_highmem();
+#endif
+}
+
+/*
+ * This can't do anything because nothing in the kernel image can be freed
+ * since it's not in kernel physical memory.
+ */
+
+void free_initmem(void)
+{
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+ if (start < end)
+ printk(KERN_INFO "Freeing initrd memory: %ldk freed\n",
+ (end - start) >> 10);
+ for (; start < end; start += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(start));
+ init_page_count(virt_to_page(start));
+ free_page(start);
+ totalram_pages++;
+ }
+}
+#endif
+
+/* Allocate and free page tables. */
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+
+ if (pgd) {
+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+ memcpy(pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir + USER_PTRS_PER_PGD,
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+ }
+ return pgd;
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ free_page((unsigned long) pgd);
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+ pte_t *pte;
+
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ return pte;
+}
+
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ struct page *pte;
+
+ pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ if (pte)
+ pgtable_page_ctor(pte);
+ return pte;
+}
+
+#ifdef CONFIG_3_LEVEL_PGTABLES
+pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
+
+ if (pmd)
+ memset(pmd, 0, PAGE_SIZE);
+
+ return pmd;
+}
+#endif
+
+void *uml_kmalloc(int size, int flags)
+{
+ return kmalloc(size, flags);
+}
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
new file mode 100644
index 00000000000..f116db15d40
--- /dev/null
+++ b/arch/um/kernel/physmem.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/pfn.h>
+#include <asm/page.h>
+#include <as-layout.h>
+#include <init.h>
+#include <kern.h>
+#include <mem_user.h>
+#include <os.h>
+
+static int physmem_fd = -1;
+
+/* Changed during early boot */
+unsigned long high_physmem;
+EXPORT_SYMBOL(high_physmem);
+
+extern unsigned long long physmem_size;
+
+int __init init_maps(unsigned long physmem, unsigned long iomem,
+ unsigned long highmem)
+{
+ struct page *p, *map;
+ unsigned long phys_len, phys_pages, highmem_len, highmem_pages;
+ unsigned long iomem_len, iomem_pages, total_len, total_pages;
+ int i;
+
+ phys_pages = physmem >> PAGE_SHIFT;
+ phys_len = phys_pages * sizeof(struct page);
+
+ iomem_pages = iomem >> PAGE_SHIFT;
+ iomem_len = iomem_pages * sizeof(struct page);
+
+ highmem_pages = highmem >> PAGE_SHIFT;
+ highmem_len = highmem_pages * sizeof(struct page);
+
+ total_pages = phys_pages + iomem_pages + highmem_pages;
+ total_len = phys_len + iomem_len + highmem_len;
+
+ map = alloc_bootmem_low_pages(total_len);
+ if (map == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < total_pages; i++) {
+ p = &map[i];
+ memset(p, 0, sizeof(struct page));
+ SetPageReserved(p);
+ INIT_LIST_HEAD(&p->lru);
+ }
+
+ max_mapnr = total_pages;
+ return 0;
+}
+
+void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
+ int r, int w, int x)
+{
+ __u64 offset;
+ int fd, err;
+
+ fd = phys_mapping(phys, &offset);
+ err = os_map_memory((void *) virt, fd, offset, len, r, w, x);
+ if (err) {
+ if (err == -ENOMEM)
+ printk(KERN_ERR "try increasing the host's "
+ "/proc/sys/vm/max_map_count to <physical "
+ "memory size>/4096\n");
+ panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, "
+ "err = %d\n", virt, fd, offset, len, r, w, x, err);
+ }
+}
+
+extern int __syscall_stub_start;
+
+void __init setup_physmem(unsigned long start, unsigned long reserve_end,
+ unsigned long len, unsigned long long highmem)
+{
+ unsigned long reserve = reserve_end - start;
+ int pfn = PFN_UP(__pa(reserve_end));
+ int delta = (len - reserve) >> PAGE_SHIFT;
+ int err, offset, bootmap_size;
+
+ physmem_fd = create_mem_file(len + highmem);
+
+ offset = uml_reserved - uml_physmem;
+ err = os_map_memory((void *) uml_reserved, physmem_fd, offset,
+ len - offset, 1, 1, 1);
+ if (err < 0) {
+ printf("setup_physmem - mapping %ld bytes of memory at 0x%p "
+ "failed - errno = %d\n", len - offset,
+ (void *) uml_reserved, err);
+ exit(1);
+ }
+
+ /*
+ * Special kludge - This page will be mapped in to userspace processes
+ * from physmem_fd, so it needs to be written out there.
+ */
+ os_seek_file(physmem_fd, __pa(&__syscall_stub_start));
+ os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE);
+
+ bootmap_size = init_bootmem(pfn, pfn + delta);
+ free_bootmem(__pa(reserve_end) + bootmap_size,
+ len - bootmap_size - reserve);
+}
+
+int phys_mapping(unsigned long phys, unsigned long long *offset_out)
+{
+ int fd = -1;
+
+ if (phys < physmem_size) {
+ fd = physmem_fd;
+ *offset_out = phys;
+ }
+ else if (phys < __pa(end_iomem)) {
+ struct iomem_region *region = iomem_regions;
+
+ while (region != NULL) {
+ if ((phys >= region->phys) &&
+ (phys < region->phys + region->size)) {
+ fd = region->fd;
+ *offset_out = phys - region->phys;
+ break;
+ }
+ region = region->next;
+ }
+ }
+ else if (phys < __pa(end_iomem) + highmem) {
+ fd = physmem_fd;
+ *offset_out = phys - iomem_size;
+ }
+
+ return fd;
+}
+
+static int __init uml_mem_setup(char *line, int *add)
+{
+ char *retptr;
+ physmem_size = memparse(line,&retptr);
+ return 0;
+}
+__uml_setup("mem=", uml_mem_setup,
+"mem=<Amount of desired ram>\n"
+" This controls how much \"physical\" memory the kernel allocates\n"
+" for the system. The size is specified as a number followed by\n"
+" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n"
+" This is not related to the amount of memory in the host. It can\n"
+" be more, and the excess, if it's ever used, will just be swapped out.\n"
+" Example: mem=64M\n\n"
+);
+
+extern int __init parse_iomem(char *str, int *add);
+
+__uml_setup("iomem=", parse_iomem,
+"iomem=<name>,<file>\n"
+" Configure <file> as an IO memory region named <name>.\n\n"
+);
+
+/*
+ * This list is constructed in parse_iomem and addresses filled in in
+ * setup_iomem, both of which run during early boot. Afterwards, it's
+ * unchanged.
+ */
+struct iomem_region *iomem_regions;
+
+/* Initialized in parse_iomem and unchanged thereafter */
+int iomem_size;
+
+unsigned long find_iomem(char *driver, unsigned long *len_out)
+{
+ struct iomem_region *region = iomem_regions;
+
+ while (region != NULL) {
+ if (!strcmp(region->driver, driver)) {
+ *len_out = region->size;
+ return region->virt;
+ }
+
+ region = region->next;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(find_iomem);
+
+static int setup_iomem(void)
+{
+ struct iomem_region *region = iomem_regions;
+ unsigned long iomem_start = high_physmem + PAGE_SIZE;
+ int err;
+
+ while (region != NULL) {
+ err = os_map_memory((void *) iomem_start, region->fd, 0,
+ region->size, 1, 1, 0);
+ if (err)
+ printk(KERN_ERR "Mapping iomem region for driver '%s' "
+ "failed, errno = %d\n", region->driver, -err);
+ else {
+ region->virt = iomem_start;
+ region->phys = __pa(region->virt);
+ }
+
+ iomem_start += region->size + PAGE_SIZE;
+ region = region->next;
+ }
+
+ return 0;
+}
+
+__initcall(setup_iomem);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
new file mode 100644
index 00000000000..69f24905abd
--- /dev/null
+++ b/arch/um/kernel/process.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright 2003 PathScale, Inc.
+ * Licensed under the GPL
+ */
+
+#include <linux/stddef.h>
+#include <linux/err.h>
+#include <linux/hardirq.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/personality.h>
+#include <linux/proc_fs.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/tick.h>
+#include <linux/threads.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
+#include <asm/uaccess.h>
+#include "as-layout.h"
+#include "kern_util.h"
+#include "os.h"
+#include "skas.h"
+
+/*
+ * This is a per-cpu array. A processor only modifies its entry and it only
+ * cares about its entry, so it's OK if another processor is modifying its
+ * entry.
+ */
+struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } };
+
+static inline int external_pid(void)
+{
+ /* FIXME: Need to look up userspace_pid by cpu */
+ return userspace_pid[0];
+}
+
+int pid_to_processor_id(int pid)
+{
+ int i;
+
+ for (i = 0; i < ncpus; i++) {
+ if (cpu_tasks[i].pid == pid)
+ return i;
+ }
+ return -1;
+}
+
+void free_stack(unsigned long stack, int order)
+{
+ free_pages(stack, order);
+}
+
+unsigned long alloc_stack(int order, int atomic)
+{
+ unsigned long page;
+ gfp_t flags = GFP_KERNEL;
+
+ if (atomic)
+ flags = GFP_ATOMIC;
+ page = __get_free_pages(flags, order);
+
+ return page;
+}
+
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+ int pid;
+
+ current->thread.request.u.thread.proc = fn;
+ current->thread.request.u.thread.arg = arg;
+ pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0,
+ &current->thread.regs, 0, NULL, NULL);
+ return pid;
+}
+EXPORT_SYMBOL(kernel_thread);
+
+static inline void set_current(struct task_struct *task)
+{
+ cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task)
+ { external_pid(), task });
+}
+
+extern void arch_switch_to(struct task_struct *to);
+
+void *_switch_to(void *prev, void *next, void *last)
+{
+ struct task_struct *from = prev;
+ struct task_struct *to = next;
+
+ to->thread.prev_sched = from;
+ set_current(to);
+
+ do {
+ current->thread.saved_task = NULL;
+
+ switch_threads(&from->thread.switch_buf,
+ &to->thread.switch_buf);
+
+ arch_switch_to(current);
+
+ if (current->thread.saved_task)
+ show_regs(&(current->thread.regs));
+ to = current->thread.saved_task;
+ from = current;
+ } while (current->thread.saved_task);
+
+ return current->thread.prev_sched;
+
+}
+
+void interrupt_end(void)
+{
+ if (need_resched())
+ schedule();
+ if (test_tsk_thread_flag(current, TIF_SIGPENDING))
+ do_signal();
+}
+
+void exit_thread(void)
+{
+}
+
+void *get_current(void)
+{
+ return current;
+}
+
+/*
+ * This is called magically, by its address being stuffed in a jmp_buf
+ * and being longjmp-d to.
+ */
+void new_thread_handler(void)
+{
+ int (*fn)(void *), n;
+ void *arg;
+
+ if (current->thread.prev_sched != NULL)
+ schedule_tail(current->thread.prev_sched);
+ current->thread.prev_sched = NULL;
+
+ fn = current->thread.request.u.thread.proc;
+ arg = current->thread.request.u.thread.arg;
+
+ /*
+ * The return value is 1 if the kernel thread execs a process,
+ * 0 if it just exits
+ */
+ n = run_kernel_thread(fn, arg, &current->thread.exec_buf);
+ if (n == 1) {
+ /* Handle any immediate reschedules or signals */
+ interrupt_end();
+ userspace(&current->thread.regs.regs);
+ }
+ else do_exit(0);
+}
+
+/* Called magically, see new_thread_handler above */
+void fork_handler(void)
+{
+ force_flush_all();
+
+ schedule_tail(current->thread.prev_sched);
+
+ /*
+ * XXX: if interrupt_end() calls schedule, this call to
+ * arch_switch_to isn't needed. We could want to apply this to
+ * improve performance. -bb
+ */
+ arch_switch_to(current);
+
+ current->thread.prev_sched = NULL;
+
+ /* Handle any immediate reschedules or signals */
+ interrupt_end();
+
+ userspace(&current->thread.regs.regs);
+}
+
+int copy_thread(unsigned long clone_flags, unsigned long sp,
+ unsigned long stack_top, struct task_struct * p,
+ struct pt_regs *regs)
+{
+ void (*handler)(void);
+ int ret = 0;
+
+ p->thread = (struct thread_struct) INIT_THREAD;
+
+ if (current->thread.forking) {
+ memcpy(&p->thread.regs.regs, &regs->regs,
+ sizeof(p->thread.regs.regs));
+ REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.gp, 0);
+ if (sp != 0)
+ REGS_SP(p->thread.regs.regs.gp) = sp;
+
+ handler = fork_handler;
+
+ arch_copy_thread(&current->thread.arch, &p->thread.arch);
+ }
+ else {
+ get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
+ p->thread.request.u.thread = current->thread.request.u.thread;
+ handler = new_thread_handler;
+ }
+
+ new_thread(task_stack_page(p), &p->thread.switch_buf, handler);
+
+ if (current->thread.forking) {
+ clear_flushed_tls(p);
+
+ /*
+ * Set a new TLS for the child thread?
+ */
+ if (clone_flags & CLONE_SETTLS)
+ ret = arch_copy_tls(p);
+ }
+
+ return ret;
+}
+
+void initial_thread_cb(void (*proc)(void *), void *arg)
+{
+ int save_kmalloc_ok = kmalloc_ok;
+
+ kmalloc_ok = 0;
+ initial_thread_cb_skas(proc, arg);
+ kmalloc_ok = save_kmalloc_ok;
+}
+
+void default_idle(void)
+{
+ unsigned long long nsecs;
+
+ while (1) {
+ /* endless idle loop with no priority at all */
+
+ /*
+ * although we are an idle CPU, we do not want to
+ * get into the scheduler unnecessarily.
+ */
+ if (need_resched())
+ schedule();
+
+ tick_nohz_idle_enter();
+ rcu_idle_enter();
+ nsecs = disable_timer();
+ idle_sleep(nsecs);
+ rcu_idle_exit();
+ tick_nohz_idle_exit();
+ }
+}
+
+void cpu_idle(void)
+{
+ cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
+ default_idle();
+}
+
+int __cant_sleep(void) {
+ return in_atomic() || irqs_disabled() || in_interrupt();
+ /* Is in_interrupt() really needed? */
+}
+
+int user_context(unsigned long sp)
+{
+ unsigned long stack;
+
+ stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER);
+ return stack != (unsigned long) current_thread_info();
+}
+
+extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end;
+
+void do_uml_exitcalls(void)
+{
+ exitcall_t *call;
+
+ call = &__uml_exitcall_end;
+ while (--call >= &__uml_exitcall_begin)
+ (*call)();
+}
+
+char *uml_strdup(const char *string)
+{
+ return kstrdup(string, GFP_KERNEL);
+}
+EXPORT_SYMBOL(uml_strdup);
+
+int copy_to_user_proc(void __user *to, void *from, int size)
+{
+ return copy_to_user(to, from, size);
+}
+
+int copy_from_user_proc(void *to, void __user *from, int size)
+{
+ return copy_from_user(to, from, size);
+}
+
+int clear_user_proc(void __user *buf, int size)
+{
+ return clear_user(buf, size);
+}
+
+int strlen_user_proc(char __user *str)
+{
+ return strlen_user(str);
+}
+
+int smp_sigio_handler(void)
+{
+#ifdef CONFIG_SMP
+ int cpu = current_thread_info()->cpu;
+ IPI_handler(cpu);
+ if (cpu != 0)
+ return 1;
+#endif
+ return 0;
+}
+
+int cpu(void)
+{
+ return current_thread_info()->cpu;
+}
+
+static atomic_t using_sysemu = ATOMIC_INIT(0);
+int sysemu_supported;
+
+void set_using_sysemu(int value)
+{
+ if (value > sysemu_supported)
+ return;
+ atomic_set(&using_sysemu, value);
+}
+
+int get_using_sysemu(void)
+{
+ return atomic_read(&using_sysemu);
+}
+
+static int sysemu_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%d\n", get_using_sysemu());
+ return 0;
+}
+
+static int sysemu_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, sysemu_proc_show, NULL);
+}
+
+static ssize_t sysemu_proc_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ char tmp[2];
+
+ if (copy_from_user(tmp, buf, 1))
+ return -EFAULT;
+
+ if (tmp[0] >= '0' && tmp[0] <= '2')
+ set_using_sysemu(tmp[0] - '0');
+ /* We use the first char, but pretend to write everything */
+ return count;
+}
+
+static const struct file_operations sysemu_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = sysemu_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = sysemu_proc_write,
+};
+
+int __init make_proc_sysemu(void)
+{
+ struct proc_dir_entry *ent;
+ if (!sysemu_supported)
+ return 0;
+
+ ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_fops);
+
+ if (ent == NULL)
+ {
+ printk(KERN_WARNING "Failed to register /proc/sysemu\n");
+ return 0;
+ }
+
+ return 0;
+}
+
+late_initcall(make_proc_sysemu);
+
+int singlestepping(void * t)
+{
+ struct task_struct *task = t ? t : current;
+
+ if (!(task->ptrace & PT_DTRACE))
+ return 0;
+
+ if (task->thread.singlestep_syscall)
+ return 1;
+
+ return 2;
+}
+
+/*
+ * Only x86 and x86_64 have an arch_align_stack().
+ * All other arches have "#define arch_align_stack(x) (x)"
+ * in their asm/system.h
+ * As this is included in UML from asm-um/system-generic.h,
+ * we can use it to behave as the subarch does.
+ */
+#ifndef arch_align_stack
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_int() % 8192;
+ return sp & ~0xf;
+}
+#endif
+
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long stack_page, sp, ip;
+ bool seen_sched = 0;
+
+ if ((p == NULL) || (p == current) || (p->state == TASK_RUNNING))
+ return 0;
+
+ stack_page = (unsigned long) task_stack_page(p);
+ /* Bail if the process has no kernel stack for some reason */
+ if (stack_page == 0)
+ return 0;
+
+ sp = p->thread.switch_buf->JB_SP;
+ /*
+ * Bail if the stack pointer is below the bottom of the kernel
+ * stack for some reason
+ */
+ if (sp < stack_page)
+ return 0;
+
+ while (sp < stack_page + THREAD_SIZE) {
+ ip = *((unsigned long *) sp);
+ if (in_sched_functions(ip))
+ /* Ignore everything until we're above the scheduler */
+ seen_sched = 1;
+ else if (kernel_text_address(ip) && seen_sched)
+ return ip;
+
+ sp += sizeof(unsigned long);
+ }
+
+ return 0;
+}
+
+int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
+{
+ int cpu = current_thread_info()->cpu;
+
+ return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu);
+}
+
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
new file mode 100644
index 00000000000..06b19039050
--- /dev/null
+++ b/arch/um/kernel/ptrace.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/audit.h"
+#include "linux/ptrace.h"
+#include "linux/sched.h"
+#include "asm/uaccess.h"
+#include "skas_ptrace.h"
+
+
+
+void user_enable_single_step(struct task_struct *child)
+{
+ child->ptrace |= PT_DTRACE;
+ child->thread.singlestep_syscall = 0;
+
+#ifdef SUBARCH_SET_SINGLESTEPPING
+ SUBARCH_SET_SINGLESTEPPING(child, 1);
+#endif
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+ child->ptrace &= ~PT_DTRACE;
+ child->thread.singlestep_syscall = 0;
+
+#ifdef SUBARCH_SET_SINGLESTEPPING
+ SUBARCH_SET_SINGLESTEPPING(child, 0);
+#endif
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ user_disable_single_step(child);
+}
+
+extern int peek_user(struct task_struct * child, long addr, long data);
+extern int poke_user(struct task_struct * child, long addr, long data);
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ int i, ret;
+ unsigned long __user *p = (void __user *)data;
+ void __user *vp = p;
+
+ switch (request) {
+ /* read the word at location addr in the USER area. */
+ case PTRACE_PEEKUSR:
+ ret = peek_user(child, addr, data);
+ break;
+
+ /* write the word at location addr in the USER area */
+ case PTRACE_POKEUSR:
+ ret = poke_user(child, addr, data);
+ break;
+
+ case PTRACE_SYSEMU:
+ case PTRACE_SYSEMU_SINGLESTEP:
+ ret = -EIO;
+ break;
+
+#ifdef PTRACE_GETREGS
+ case PTRACE_GETREGS: { /* Get all gp regs from the child. */
+ if (!access_ok(VERIFY_WRITE, p, MAX_REG_OFFSET)) {
+ ret = -EIO;
+ break;
+ }
+ for ( i = 0; i < MAX_REG_OFFSET; i += sizeof(long) ) {
+ __put_user(getreg(child, i), p);
+ p++;
+ }
+ ret = 0;
+ break;
+ }
+#endif
+#ifdef PTRACE_SETREGS
+ case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+ unsigned long tmp = 0;
+ if (!access_ok(VERIFY_READ, p, MAX_REG_OFFSET)) {
+ ret = -EIO;
+ break;
+ }
+ for ( i = 0; i < MAX_REG_OFFSET; i += sizeof(long) ) {
+ __get_user(tmp, p);
+ putreg(child, i, tmp);
+ p++;
+ }
+ ret = 0;
+ break;
+ }
+#endif
+ case PTRACE_GET_THREAD_AREA:
+ ret = ptrace_get_thread_area(child, addr, vp);
+ break;
+
+ case PTRACE_SET_THREAD_AREA:
+ ret = ptrace_set_thread_area(child, addr, vp);
+ break;
+
+ case PTRACE_FAULTINFO: {
+ /*
+ * Take the info from thread->arch->faultinfo,
+ * but transfer max. sizeof(struct ptrace_faultinfo).
+ * On i386, ptrace_faultinfo is smaller!
+ */
+ ret = copy_to_user(p, &child->thread.arch.faultinfo,
+ sizeof(struct ptrace_faultinfo)) ?
+ -EIO : 0;
+ break;
+ }
+
+#ifdef PTRACE_LDT
+ case PTRACE_LDT: {
+ struct ptrace_ldt ldt;
+
+ if (copy_from_user(&ldt, p, sizeof(ldt))) {
+ ret = -EIO;
+ break;
+ }
+
+ /*
+ * This one is confusing, so just punt and return -EIO for
+ * now
+ */
+ ret = -EIO;
+ break;
+ }
+#endif
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ if (ret == -EIO)
+ ret = subarch_ptrace(child, request, addr, data);
+ break;
+ }
+
+ return ret;
+}
+
+static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
+ int error_code)
+{
+ struct siginfo info;
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = SIGTRAP;
+ info.si_code = TRAP_BRKPT;
+
+ /* User-mode eip? */
+ info.si_addr = UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL;
+
+ /* Send us the fake SIGTRAP */
+ force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/*
+ * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and
+ * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check
+ */
+void syscall_trace(struct uml_pt_regs *regs, int entryexit)
+{
+ int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit;
+ int tracesysgood;
+
+ if (!entryexit)
+ audit_syscall_entry(HOST_AUDIT_ARCH,
+ UPT_SYSCALL_NR(regs),
+ UPT_SYSCALL_ARG1(regs),
+ UPT_SYSCALL_ARG2(regs),
+ UPT_SYSCALL_ARG3(regs),
+ UPT_SYSCALL_ARG4(regs));
+ else
+ audit_syscall_exit(regs);
+
+ /* Fake a debug trap */
+ if (is_singlestep)
+ send_sigtrap(current, regs, 0);
+
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ return;
+
+ if (!(current->ptrace & PT_PTRACED))
+ return;
+
+ /*
+ * the 0x80 provides a way for the tracing parent to distinguish
+ * between a syscall stop and SIGTRAP delivery
+ */
+ tracesysgood = (current->ptrace & PT_TRACESYSGOOD);
+ ptrace_notify(SIGTRAP | (tracesysgood ? 0x80 : 0));
+
+ if (entryexit) /* force do_signal() --> is_syscall() */
+ set_thread_flag(TIF_SIGPENDING);
+
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
new file mode 100644
index 00000000000..4d93dff6b37
--- /dev/null
+++ b/arch/um/kernel/reboot.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/sched.h"
+#include "linux/slab.h"
+#include "kern_util.h"
+#include "os.h"
+#include "skas.h"
+
+void (*pm_power_off)(void);
+
+static void kill_off_processes(void)
+{
+ if (proc_mm)
+ /*
+ * FIXME: need to loop over userspace_pids
+ */
+ os_kill_ptraced_process(userspace_pid[0], 1);
+ else {
+ struct task_struct *p;
+ int pid;
+
+ for_each_process(p) {
+ if (p->mm == NULL)
+ continue;
+
+ pid = p->mm->context.id.u.pid;
+ os_kill_ptraced_process(pid, 1);
+ }
+ }
+}
+
+void uml_cleanup(void)
+{
+ kmalloc_ok = 0;
+ do_uml_exitcalls();
+ kill_off_processes();
+}
+
+void machine_restart(char * __unused)
+{
+ uml_cleanup();
+ reboot_skas();
+}
+
+void machine_power_off(void)
+{
+ uml_cleanup();
+ halt_skas();
+}
+
+void machine_halt(void)
+{
+ machine_power_off();
+}
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
new file mode 100644
index 00000000000..2b272b63b51
--- /dev/null
+++ b/arch/um/kernel/sigio.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/interrupt.h>
+#include "irq_kern.h"
+#include "os.h"
+#include "sigio.h"
+
+/* Protected by sigio_lock() called from write_sigio_workaround */
+static int sigio_irq_fd = -1;
+
+static irqreturn_t sigio_interrupt(int irq, void *data)
+{
+ char c;
+
+ os_read_file(sigio_irq_fd, &c, sizeof(c));
+ reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
+ return IRQ_HANDLED;
+}
+
+int write_sigio_irq(int fd)
+{
+ int err;
+
+ err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt,
+ IRQF_DISABLED|IRQF_SAMPLE_RANDOM, "write sigio",
+ NULL);
+ if (err) {
+ printk(KERN_ERR "write_sigio_irq : um_request_irq failed, "
+ "err = %d\n", err);
+ return -1;
+ }
+ sigio_irq_fd = fd;
+ return 0;
+}
+
+/* These are called from os-Linux/sigio.c to protect its pollfds arrays. */
+static DEFINE_SPINLOCK(sigio_spinlock);
+
+void sigio_lock(void)
+{
+ spin_lock(&sigio_spinlock);
+}
+
+void sigio_unlock(void)
+{
+ spin_unlock(&sigio_spinlock);
+}
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
new file mode 100644
index 00000000000..e8b889d3bce
--- /dev/null
+++ b/arch/um/kernel/signal.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <asm/siginfo.h>
+#include <asm/signal.h>
+#include <asm/unistd.h>
+#include "frame_kern.h"
+#include "kern_util.h"
+
+EXPORT_SYMBOL(block_signals);
+EXPORT_SYMBOL(unblock_signals);
+
+#define _S(nr) (1<<((nr)-1))
+
+#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
+
+/*
+ * OK, we're invoking a handler
+ */
+static int handle_signal(struct pt_regs *regs, unsigned long signr,
+ struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *oldset)
+{
+ unsigned long sp;
+ int err;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ /* Did we come from a system call? */
+ if (PT_REGS_SYSCALL_NR(regs) >= 0) {
+ /* If so, check system call restarting.. */
+ switch (PT_REGS_SYSCALL_RET(regs)) {
+ case -ERESTART_RESTARTBLOCK:
+ case -ERESTARTNOHAND:
+ PT_REGS_SYSCALL_RET(regs) = -EINTR;
+ break;
+
+ case -ERESTARTSYS:
+ if (!(ka->sa.sa_flags & SA_RESTART)) {
+ PT_REGS_SYSCALL_RET(regs) = -EINTR;
+ break;
+ }
+ /* fallthrough */
+ case -ERESTARTNOINTR:
+ PT_REGS_RESTART_SYSCALL(regs);
+ PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs);
+ break;
+ }
+ }
+
+ sp = PT_REGS_SP(regs);
+ if ((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0))
+ sp = current->sas_ss_sp + current->sas_ss_size;
+
+#ifdef CONFIG_ARCH_HAS_SC_SIGNALS
+ if (!(ka->sa.sa_flags & SA_SIGINFO))
+ err = setup_signal_stack_sc(sp, signr, ka, regs, oldset);
+ else
+#endif
+ err = setup_signal_stack_si(sp, signr, ka, regs, info, oldset);
+
+ if (err) {
+ spin_lock_irq(&current->sighand->siglock);
+ current->blocked = *oldset;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+ force_sigsegv(signr, current);
+ } else {
+ spin_lock_irq(&current->sighand->siglock);
+ sigorsets(&current->blocked, &current->blocked,
+ &ka->sa.sa_mask);
+ if (!(ka->sa.sa_flags & SA_NODEFER))
+ sigaddset(&current->blocked, signr);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+ }
+
+ return err;
+}
+
+static int kern_do_signal(struct pt_regs *regs)
+{
+ struct k_sigaction ka_copy;
+ siginfo_t info;
+ sigset_t *oldset;
+ int sig, handled_sig = 0;
+
+ if (test_thread_flag(TIF_RESTORE_SIGMASK))
+ oldset = &current->saved_sigmask;
+ else
+ oldset = &current->blocked;
+
+ while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) {
+ handled_sig = 1;
+ /* Whee! Actually deliver the signal. */
+ if (!handle_signal(regs, sig, &ka_copy, &info, oldset)) {
+ /*
+ * a signal was successfully delivered; the saved
+ * sigmask will have been stored in the signal frame,
+ * and will be restored by sigreturn, so we can simply
+ * clear the TIF_RESTORE_SIGMASK flag
+ */
+ if (test_thread_flag(TIF_RESTORE_SIGMASK))
+ clear_thread_flag(TIF_RESTORE_SIGMASK);
+ break;
+ }
+ }
+
+ /* Did we come from a system call? */
+ if (!handled_sig && (PT_REGS_SYSCALL_NR(regs) >= 0)) {
+ /* Restart the system call - no handlers present */
+ switch (PT_REGS_SYSCALL_RET(regs)) {
+ case -ERESTARTNOHAND:
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs);
+ PT_REGS_RESTART_SYSCALL(regs);
+ break;
+ case -ERESTART_RESTARTBLOCK:
+ PT_REGS_ORIG_SYSCALL(regs) = __NR_restart_syscall;
+ PT_REGS_RESTART_SYSCALL(regs);
+ break;
+ }
+ }
+
+ /*
+ * This closes a way to execute a system call on the host. If
+ * you set a breakpoint on a system call instruction and singlestep
+ * from it, the tracing thread used to PTRACE_SINGLESTEP the process
+ * rather than PTRACE_SYSCALL it, allowing the system call to execute
+ * on the host. The tracing thread will check this flag and
+ * PTRACE_SYSCALL if necessary.
+ */
+ if (current->ptrace & PT_DTRACE)
+ current->thread.singlestep_syscall =
+ is_syscall(PT_REGS_IP(&current->thread.regs));
+
+ /*
+ * if there's no signal to deliver, we just put the saved sigmask
+ * back
+ */
+ if (!handled_sig && test_thread_flag(TIF_RESTORE_SIGMASK)) {
+ clear_thread_flag(TIF_RESTORE_SIGMASK);
+ sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+ }
+ return handled_sig;
+}
+
+int do_signal(void)
+{
+ return kern_do_signal(&current->thread.regs);
+}
+
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+long sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+{
+ mask &= _BLOCKABLE;
+ spin_lock_irq(&current->sighand->siglock);
+ current->saved_sigmask = current->blocked;
+ siginitset(&current->blocked, mask);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ set_thread_flag(TIF_RESTORE_SIGMASK);
+ return -ERESTARTNOHAND;
+}
+
+long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
+{
+ return do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs));
+}
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
new file mode 100644
index 00000000000..0b76d8869c9
--- /dev/null
+++ b/arch/um/kernel/skas/Makefile
@@ -0,0 +1,15 @@
+#
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+# Licensed under the GPL
+#
+
+obj-y := clone.o mmu.o process.o syscall.o uaccess.o
+
+# clone.o is in the stub, so it can't be built with profiling
+# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
+# disable it
+
+CFLAGS_clone.o := $(CFLAGS_NO_HARDENING)
+UNPROFILE_OBJS := clone.o
+
+include arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
new file mode 100644
index 00000000000..e1fd066a352
--- /dev/null
+++ b/arch/um/kernel/skas/clone.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <signal.h>
+#include <sched.h>
+#include <asm/unistd.h>
+#include <sys/time.h>
+#include "as-layout.h"
+#include "ptrace_user.h"
+#include "stub-data.h"
+#include "sysdep/stub.h"
+
+/*
+ * This is in a separate file because it needs to be compiled with any
+ * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled
+ *
+ * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize
+ * on some systems.
+ */
+
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_clone_handler(void)
+{
+ struct stub_data *data = (struct stub_data *) STUB_DATA;
+ long err;
+
+ err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
+ STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *));
+ if (err != 0)
+ goto out;
+
+ err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
+ if (err)
+ goto out;
+
+ err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL,
+ (long) &data->timer, 0);
+ if (err)
+ goto out;
+
+ remap_stack(data->fd, data->offset);
+ goto done;
+
+ out:
+ /*
+ * save current result.
+ * Parent: pid;
+ * child: retcode of mmap already saved and it jumps around this
+ * assignment
+ */
+ data->err = err;
+ done:
+ trap_myself();
+}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
new file mode 100644
index 00000000000..1aee587e9c5
--- /dev/null
+++ b/arch/um/kernel/skas/mmu.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/mm.h"
+#include "linux/sched.h"
+#include "linux/slab.h"
+#include "asm/pgalloc.h"
+#include "asm/pgtable.h"
+#include "as-layout.h"
+#include "os.h"
+#include "skas.h"
+
+extern int __syscall_stub_start;
+
+static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
+ unsigned long kernel)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset(mm, proc);
+ pud = pud_alloc(mm, pgd, proc);
+ if (!pud)
+ goto out;
+
+ pmd = pmd_alloc(mm, pud, proc);
+ if (!pmd)
+ goto out_pmd;
+
+ pte = pte_alloc_map(mm, NULL, pmd, proc);
+ if (!pte)
+ goto out_pte;
+
+ *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
+ *pte = pte_mkread(*pte);
+ return 0;
+
+ out_pte:
+ pmd_free(mm, pmd);
+ out_pmd:
+ pud_free(mm, pud);
+ out:
+ return -ENOMEM;
+}
+
+int init_new_context(struct task_struct *task, struct mm_struct *mm)
+{
+ struct mm_context *from_mm = NULL;
+ struct mm_context *to_mm = &mm->context;
+ unsigned long stack = 0;
+ int ret = -ENOMEM;
+
+ if (skas_needs_stub) {
+ stack = get_zeroed_page(GFP_KERNEL);
+ if (stack == 0)
+ goto out;
+ }
+
+ to_mm->id.stack = stack;
+ if (current->mm != NULL && current->mm != &init_mm)
+ from_mm = &current->mm->context;
+
+ if (proc_mm) {
+ ret = new_mm(stack);
+ if (ret < 0) {
+ printk(KERN_ERR "init_new_context_skas - "
+ "new_mm failed, errno = %d\n", ret);
+ goto out_free;
+ }
+ to_mm->id.u.mm_fd = ret;
+ }
+ else {
+ if (from_mm)
+ to_mm->id.u.pid = copy_context_skas0(stack,
+ from_mm->id.u.pid);
+ else to_mm->id.u.pid = start_userspace(stack);
+
+ if (to_mm->id.u.pid < 0) {
+ ret = to_mm->id.u.pid;
+ goto out_free;
+ }
+ }
+
+ ret = init_new_ldt(to_mm, from_mm);
+ if (ret < 0) {
+ printk(KERN_ERR "init_new_context_skas - init_ldt"
+ " failed, errno = %d\n", ret);
+ goto out_free;
+ }
+
+ to_mm->stub_pages = NULL;
+
+ return 0;
+
+ out_free:
+ if (to_mm->id.stack != 0)
+ free_page(to_mm->id.stack);
+ out:
+ return ret;
+}
+
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ struct page **pages;
+ int err, ret;
+
+ if (!skas_needs_stub)
+ return;
+
+ ret = init_stub_pte(mm, STUB_CODE,
+ (unsigned long) &__syscall_stub_start);
+ if (ret)
+ goto out;
+
+ ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack);
+ if (ret)
+ goto out;
+
+ pages = kmalloc(2 * sizeof(struct page *), GFP_KERNEL);
+ if (pages == NULL) {
+ printk(KERN_ERR "arch_dup_mmap failed to allocate 2 page "
+ "pointers\n");
+ goto out;
+ }
+
+ pages[0] = virt_to_page(&__syscall_stub_start);
+ pages[1] = virt_to_page(mm->context.id.stack);
+ mm->context.stub_pages = pages;
+
+ /* dup_mmap already holds mmap_sem */
+ err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
+ VM_READ | VM_MAYREAD | VM_EXEC |
+ VM_MAYEXEC | VM_DONTCOPY, pages);
+ if (err) {
+ printk(KERN_ERR "install_special_mapping returned %d\n", err);
+ goto out_free;
+ }
+ return;
+
+out_free:
+ kfree(pages);
+out:
+ force_sigsegv(SIGSEGV, current);
+}
+
+void arch_exit_mmap(struct mm_struct *mm)
+{
+ pte_t *pte;
+
+ if (mm->context.stub_pages != NULL)
+ kfree(mm->context.stub_pages);
+ pte = virt_to_pte(mm, STUB_CODE);
+ if (pte != NULL)
+ pte_clear(mm, STUB_CODE, pte);
+
+ pte = virt_to_pte(mm, STUB_DATA);
+ if (pte == NULL)
+ return;
+
+ pte_clear(mm, STUB_DATA, pte);
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+ struct mm_context *mmu = &mm->context;
+
+ if (proc_mm)
+ os_close_file(mmu->id.u.mm_fd);
+ else {
+ /*
+ * If init_new_context wasn't called, this will be
+ * zero, resulting in a kill(0), which will result in the
+ * whole UML suddenly dying. Also, cover negative and
+ * 1 cases, since they shouldn't happen either.
+ */
+ if (mmu->id.u.pid < 2) {
+ printk(KERN_ERR "corrupt mm_context - pid = %d\n",
+ mmu->id.u.pid);
+ return;
+ }
+ os_kill_ptraced_process(mmu->id.u.pid, 1);
+ }
+
+ if (skas_needs_stub)
+ free_page(mmu->id.stack);
+
+ free_ldt(mmu);
+}
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
new file mode 100644
index 00000000000..2e9852c0d48
--- /dev/null
+++ b/arch/um/kernel/skas/process.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/init.h"
+#include "linux/sched.h"
+#include "as-layout.h"
+#include "kern.h"
+#include "os.h"
+#include "skas.h"
+
+int new_mm(unsigned long stack)
+{
+ int fd, err;
+
+ fd = os_open_file("/proc/mm", of_cloexec(of_write(OPENFLAGS())), 0);
+ if (fd < 0)
+ return fd;
+
+ if (skas_needs_stub) {
+ err = map_stub_pages(fd, STUB_CODE, STUB_DATA, stack);
+ if (err) {
+ os_close_file(fd);
+ return err;
+ }
+ }
+
+ return fd;
+}
+
+extern void start_kernel(void);
+
+static int __init start_kernel_proc(void *unused)
+{
+ int pid;
+
+ block_signals();
+ pid = os_getpid();
+
+ cpu_tasks[0].pid = pid;
+ cpu_tasks[0].task = current;
+#ifdef CONFIG_SMP
+ cpu_online_map = cpumask_of_cpu(0);
+#endif
+ start_kernel();
+ return 0;
+}
+
+extern int userspace_pid[];
+
+extern char cpu0_irqstack[];
+
+int __init start_uml(void)
+{
+ stack_protections((unsigned long) &cpu0_irqstack);
+ set_sigstack(cpu0_irqstack, THREAD_SIZE);
+ if (proc_mm) {
+ userspace_pid[0] = start_userspace(0);
+ if (userspace_pid[0] < 0) {
+ printf("start_uml - start_userspace returned %d\n",
+ userspace_pid[0]);
+ exit(1);
+ }
+ }
+
+ init_new_thread_signals();
+
+ init_task.thread.request.u.thread.proc = start_kernel_proc;
+ init_task.thread.request.u.thread.arg = NULL;
+ return start_idle_thread(task_stack_page(&init_task),
+ &init_task.thread.switch_buf);
+}
+
+unsigned long current_stub_stack(void)
+{
+ if (current->mm == NULL)
+ return 0;
+
+ return current->mm->context.id.stack;
+}
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
new file mode 100644
index 00000000000..f5173e1ec3a
--- /dev/null
+++ b/arch/um/kernel/skas/syscall.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/kernel.h"
+#include "linux/ptrace.h"
+#include "kern_util.h"
+#include "sysdep/ptrace.h"
+#include "sysdep/syscalls.h"
+
+extern int syscall_table_size;
+#define NR_SYSCALLS (syscall_table_size / sizeof(void *))
+
+void handle_syscall(struct uml_pt_regs *r)
+{
+ struct pt_regs *regs = container_of(r, struct pt_regs, regs);
+ long result;
+ int syscall;
+
+ syscall_trace(r, 0);
+
+ /*
+ * This should go in the declaration of syscall, but when I do that,
+ * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing
+ * children at all, sometimes hanging when bash doesn't see the first
+ * ls exit.
+ * The assembly looks functionally the same to me. This is
+ * gcc version 4.0.1 20050727 (Red Hat 4.0.1-5)
+ * in case it's a compiler bug.
+ */
+ syscall = UPT_SYSCALL_NR(r);
+ if ((syscall >= NR_SYSCALLS) || (syscall < 0))
+ result = -ENOSYS;
+ else result = EXECUTE_SYSCALL(syscall, regs);
+
+ REGS_SET_SYSCALL_RETURN(r->gp, result);
+
+ syscall_trace(r, 1);
+}
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
new file mode 100644
index 00000000000..9fefd924fb4
--- /dev/null
+++ b/arch/um/kernel/skas/uaccess.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/current.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include "kern_util.h"
+#include "os.h"
+
+pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (mm == NULL)
+ return NULL;
+
+ pgd = pgd_offset(mm, addr);
+ if (!pgd_present(*pgd))
+ return NULL;
+
+ pud = pud_offset(pgd, addr);
+ if (!pud_present(*pud))
+ return NULL;
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ return NULL;
+
+ return pte_offset_kernel(pmd, addr);
+}
+
+static pte_t *maybe_map(unsigned long virt, int is_write)
+{
+ pte_t *pte = virt_to_pte(current->mm, virt);
+ int err, dummy_code;
+
+ if ((pte == NULL) || !pte_present(*pte) ||
+ (is_write && !pte_write(*pte))) {
+ err = handle_page_fault(virt, 0, is_write, 1, &dummy_code);
+ if (err)
+ return NULL;
+ pte = virt_to_pte(current->mm, virt);
+ }
+ if (!pte_present(*pte))
+ pte = NULL;
+
+ return pte;
+}
+
+static int do_op_one_page(unsigned long addr, int len, int is_write,
+ int (*op)(unsigned long addr, int len, void *arg), void *arg)
+{
+ jmp_buf buf;
+ struct page *page;
+ pte_t *pte;
+ int n, faulted;
+
+ pte = maybe_map(addr, is_write);
+ if (pte == NULL)
+ return -1;
+
+ page = pte_page(*pte);
+ addr = (unsigned long) kmap_atomic(page, KM_UML_USERCOPY) +
+ (addr & ~PAGE_MASK);
+
+ current->thread.fault_catcher = &buf;
+
+ faulted = UML_SETJMP(&buf);
+ if (faulted == 0)
+ n = (*op)(addr, len, arg);
+ else
+ n = -1;
+
+ current->thread.fault_catcher = NULL;
+
+ kunmap_atomic((void *)addr, KM_UML_USERCOPY);
+
+ return n;
+}
+
+static int buffer_op(unsigned long addr, int len, int is_write,
+ int (*op)(unsigned long, int, void *), void *arg)
+{
+ int size, remain, n;
+
+ size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len);
+ remain = len;
+
+ n = do_op_one_page(addr, size, is_write, op, arg);
+ if (n != 0) {
+ remain = (n < 0 ? remain : 0);
+ goto out;
+ }
+
+ addr += size;
+ remain -= size;
+ if (remain == 0)
+ goto out;
+
+ while (addr < ((addr + remain) & PAGE_MASK)) {
+ n = do_op_one_page(addr, PAGE_SIZE, is_write, op, arg);
+ if (n != 0) {
+ remain = (n < 0 ? remain : 0);
+ goto out;
+ }
+
+ addr += PAGE_SIZE;
+ remain -= PAGE_SIZE;
+ }
+ if (remain == 0)
+ goto out;
+
+ n = do_op_one_page(addr, remain, is_write, op, arg);
+ if (n != 0) {
+ remain = (n < 0 ? remain : 0);
+ goto out;
+ }
+
+ return 0;
+ out:
+ return remain;
+}
+
+static int copy_chunk_from_user(unsigned long from, int len, void *arg)
+{
+ unsigned long *to_ptr = arg, to = *to_ptr;
+
+ memcpy((void *) to, (void *) from, len);
+ *to_ptr += len;
+ return 0;
+}
+
+int copy_from_user(void *to, const void __user *from, int n)
+{
+ if (segment_eq(get_fs(), KERNEL_DS)) {
+ memcpy(to, (__force void*)from, n);
+ return 0;
+ }
+
+ return access_ok(VERIFY_READ, from, n) ?
+ buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to):
+ n;
+}
+EXPORT_SYMBOL(copy_from_user);
+
+static int copy_chunk_to_user(unsigned long to, int len, void *arg)
+{
+ unsigned long *from_ptr = arg, from = *from_ptr;
+
+ memcpy((void *) to, (void *) from, len);
+ *from_ptr += len;
+ return 0;
+}
+
+int copy_to_user(void __user *to, const void *from, int n)
+{
+ if (segment_eq(get_fs(), KERNEL_DS)) {
+ memcpy((__force void *) to, from, n);
+ return 0;
+ }
+
+ return access_ok(VERIFY_WRITE, to, n) ?
+ buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) :
+ n;
+}
+EXPORT_SYMBOL(copy_to_user);
+
+static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
+{
+ char **to_ptr = arg, *to = *to_ptr;
+ int n;
+
+ strncpy(to, (void *) from, len);
+ n = strnlen(to, len);
+ *to_ptr += n;
+
+ if (n < len)
+ return 1;
+ return 0;
+}
+
+int strncpy_from_user(char *dst, const char __user *src, int count)
+{
+ int n;
+ char *ptr = dst;
+
+ if (segment_eq(get_fs(), KERNEL_DS)) {
+ strncpy(dst, (__force void *) src, count);
+ return strnlen(dst, count);
+ }
+
+ if (!access_ok(VERIFY_READ, src, 1))
+ return -EFAULT;
+
+ n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user,
+ &ptr);
+ if (n != 0)
+ return -EFAULT;
+ return strnlen(dst, count);
+}
+EXPORT_SYMBOL(strncpy_from_user);
+
+static int clear_chunk(unsigned long addr, int len, void *unused)
+{
+ memset((void *) addr, 0, len);
+ return 0;
+}
+
+int __clear_user(void __user *mem, int len)
+{
+ return buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL);
+}
+
+int clear_user(void __user *mem, int len)
+{
+ if (segment_eq(get_fs(), KERNEL_DS)) {
+ memset((__force void*)mem, 0, len);
+ return 0;
+ }
+
+ return access_ok(VERIFY_WRITE, mem, len) ?
+ buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len;
+}
+EXPORT_SYMBOL(clear_user);
+
+static int strnlen_chunk(unsigned long str, int len, void *arg)
+{
+ int *len_ptr = arg, n;
+
+ n = strnlen((void *) str, len);
+ *len_ptr += n;
+
+ if (n < len)
+ return 1;
+ return 0;
+}
+
+int strnlen_user(const void __user *str, int len)
+{
+ int count = 0, n;
+
+ if (segment_eq(get_fs(), KERNEL_DS))
+ return strnlen((__force char*)str, len) + 1;
+
+ n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
+ if (n == 0)
+ return count + 1;
+ return -EFAULT;
+}
+EXPORT_SYMBOL(strnlen_user);
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
new file mode 100644
index 00000000000..155206a6690
--- /dev/null
+++ b/arch/um/kernel/smp.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/percpu.h"
+#include "asm/pgalloc.h"
+#include "asm/tlb.h"
+
+#ifdef CONFIG_SMP
+
+#include "linux/sched.h"
+#include "linux/module.h"
+#include "linux/threads.h"
+#include "linux/interrupt.h"
+#include "linux/err.h"
+#include "linux/hardirq.h"
+#include "asm/smp.h"
+#include "asm/processor.h"
+#include "asm/spinlock.h"
+#include "kern.h"
+#include "irq_user.h"
+#include "os.h"
+
+/* Per CPU bogomips and other parameters
+ * The only piece used here is the ipi pipe, which is set before SMP is
+ * started and never changed.
+ */
+struct cpuinfo_um cpu_data[NR_CPUS];
+
+/* A statistic, can be a little off */
+int num_reschedules_sent = 0;
+
+/* Not changed after boot */
+struct task_struct *idle_threads[NR_CPUS];
+
+void smp_send_reschedule(int cpu)
+{
+ os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1);
+ num_reschedules_sent++;
+}
+
+void smp_send_stop(void)
+{
+ int i;
+
+ printk(KERN_INFO "Stopping all CPUs...");
+ for (i = 0; i < num_online_cpus(); i++) {
+ if (i == current_thread->cpu)
+ continue;
+ os_write_file(cpu_data[i].ipi_pipe[1], "S", 1);
+ }
+ printk(KERN_CONT "done\n");
+}
+
+static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
+static cpumask_t cpu_callin_map = CPU_MASK_NONE;
+
+static int idle_proc(void *cpup)
+{
+ int cpu = (int) cpup, err;
+
+ err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1);
+ if (err < 0)
+ panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err);
+
+ os_set_fd_async(cpu_data[cpu].ipi_pipe[0]);
+
+ wmb();
+ if (cpu_test_and_set(cpu, cpu_callin_map)) {
+ printk(KERN_ERR "huh, CPU#%d already present??\n", cpu);
+ BUG();
+ }
+
+ while (!cpu_isset(cpu, smp_commenced_mask))
+ cpu_relax();
+
+ notify_cpu_starting(cpu);
+ cpu_set(cpu, cpu_online_map);
+ default_idle();
+ return 0;
+}
+
+static struct task_struct *idle_thread(int cpu)
+{
+ struct task_struct *new_task;
+
+ current->thread.request.u.thread.proc = idle_proc;
+ current->thread.request.u.thread.arg = (void *) cpu;
+ new_task = fork_idle(cpu);
+ if (IS_ERR(new_task))
+ panic("copy_process failed in idle_thread, error = %ld",
+ PTR_ERR(new_task));
+
+ cpu_tasks[cpu] = ((struct cpu_task)
+ { .pid = new_task->thread.mode.tt.extern_pid,
+ .task = new_task } );
+ idle_threads[cpu] = new_task;
+ panic("skas mode doesn't support SMP");
+ return new_task;
+}
+
+void smp_prepare_cpus(unsigned int maxcpus)
+{
+ struct task_struct *idle;
+ unsigned long waittime;
+ int err, cpu, me = smp_processor_id();
+ int i;
+
+ for (i = 0; i < ncpus; ++i)
+ set_cpu_possible(i, true);
+
+ cpu_clear(me, cpu_online_map);
+ cpu_set(me, cpu_online_map);
+ cpu_set(me, cpu_callin_map);
+
+ err = os_pipe(cpu_data[me].ipi_pipe, 1, 1);
+ if (err < 0)
+ panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
+
+ os_set_fd_async(cpu_data[me].ipi_pipe[0]);
+
+ for (cpu = 1; cpu < ncpus; cpu++) {
+ printk(KERN_INFO "Booting processor %d...\n", cpu);
+
+ idle = idle_thread(cpu);
+
+ init_idle(idle, cpu);
+
+ waittime = 200000000;
+ while (waittime-- && !cpu_isset(cpu, cpu_callin_map))
+ cpu_relax();
+
+ printk(KERN_INFO "%s\n",
+ cpu_isset(cpu, cpu_calling_map) ? "done" : "failed");
+ }
+}
+
+void smp_prepare_boot_cpu(void)
+{
+ cpu_set(smp_processor_id(), cpu_online_map);
+}
+
+int __cpu_up(unsigned int cpu)
+{
+ cpu_set(cpu, smp_commenced_mask);
+ while (!cpu_isset(cpu, cpu_online_map))
+ mb();
+ return 0;
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+ printk(KERN_INFO "setup_profiling_timer\n");
+ return 0;
+}
+
+void smp_call_function_slave(int cpu);
+
+void IPI_handler(int cpu)
+{
+ unsigned char c;
+ int fd;
+
+ fd = cpu_data[cpu].ipi_pipe[0];
+ while (os_read_file(fd, &c, 1) == 1) {
+ switch (c) {
+ case 'C':
+ smp_call_function_slave(cpu);
+ break;
+
+ case 'R':
+ scheduler_ipi();
+ break;
+
+ case 'S':
+ printk(KERN_INFO "CPU#%d stopping\n", cpu);
+ while (1)
+ pause();
+ break;
+
+ default:
+ printk(KERN_ERR "CPU#%d received unknown IPI [%c]!\n",
+ cpu, c);
+ break;
+ }
+ }
+}
+
+int hard_smp_processor_id(void)
+{
+ return pid_to_processor_id(os_getpid());
+}
+
+static DEFINE_SPINLOCK(call_lock);
+static atomic_t scf_started;
+static atomic_t scf_finished;
+static void (*func)(void *info);
+static void *info;
+
+void smp_call_function_slave(int cpu)
+{
+ atomic_inc(&scf_started);
+ (*func)(info);
+ atomic_inc(&scf_finished);
+}
+
+int smp_call_function(void (*_func)(void *info), void *_info, int wait)
+{
+ int cpus = num_online_cpus() - 1;
+ int i;
+
+ if (!cpus)
+ return 0;
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
+ spin_lock_bh(&call_lock);
+ atomic_set(&scf_started, 0);
+ atomic_set(&scf_finished, 0);
+ func = _func;
+ info = _info;
+
+ for_each_online_cpu(i)
+ os_write_file(cpu_data[i].ipi_pipe[1], "C", 1);
+
+ while (atomic_read(&scf_started) != cpus)
+ barrier();
+
+ if (wait)
+ while (atomic_read(&scf_finished) != cpus)
+ barrier();
+
+ spin_unlock_bh(&call_lock);
+ return 0;
+}
+
+#endif
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
new file mode 100644
index 00000000000..f958cb876ee
--- /dev/null
+++ b/arch/um/kernel/syscall.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/file.h"
+#include "linux/fs.h"
+#include "linux/mm.h"
+#include "linux/sched.h"
+#include "linux/utsname.h"
+#include "linux/syscalls.h"
+#include "asm/current.h"
+#include "asm/mman.h"
+#include "asm/uaccess.h"
+#include "asm/unistd.h"
+#include "internal.h"
+
+long sys_fork(void)
+{
+ long ret;
+
+ current->thread.forking = 1;
+ ret = do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
+ &current->thread.regs, 0, NULL, NULL);
+ current->thread.forking = 0;
+ return ret;
+}
+
+long sys_vfork(void)
+{
+ long ret;
+
+ current->thread.forking = 1;
+ ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
+ UPT_SP(&current->thread.regs.regs),
+ &current->thread.regs, 0, NULL, NULL);
+ current->thread.forking = 0;
+ return ret;
+}
+
+long old_mmap(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long offset)
+{
+ long err = -EINVAL;
+ if (offset & ~PAGE_MASK)
+ goto out;
+
+ err = sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
+ out:
+ return err;
+}
+
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
+{
+ mm_segment_t fs;
+ int ret;
+
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = um_execve(filename, (const char __user *const __user *)argv,
+ (const char __user *const __user *) envp);
+ set_fs(fs);
+
+ return ret;
+}
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
new file mode 100644
index 00000000000..0960de54495
--- /dev/null
+++ b/arch/um/kernel/sysrq.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include "sysrq.h"
+
+/* Catch non-i386 SUBARCH's. */
+#if !defined(CONFIG_UML_X86) || defined(CONFIG_64BIT)
+void show_trace(struct task_struct *task, unsigned long * stack)
+{
+ unsigned long addr;
+
+ if (!stack) {
+ stack = (unsigned long*) &stack;
+ WARN_ON(1);
+ }
+
+ printk(KERN_INFO "Call Trace: \n");
+ while (((long) stack & (THREAD_SIZE-1)) != 0) {
+ addr = *stack;
+ if (__kernel_text_address(addr)) {
+ printk(KERN_INFO "%08lx: [<%08lx>]",
+ (unsigned long) stack, addr);
+ print_symbol(KERN_CONT " %s", addr);
+ printk(KERN_CONT "\n");
+ }
+ stack++;
+ }
+ printk(KERN_INFO "\n");
+}
+#endif
+
+/*
+ * stack dumps generator - this is used by arch-independent code.
+ * And this is identical to i386 currently.
+ */
+void dump_stack(void)
+{
+ unsigned long stack;
+
+ show_trace(current, &stack);
+}
+EXPORT_SYMBOL(dump_stack);
+
+/*Stolen from arch/i386/kernel/traps.c */
+static const int kstack_depth_to_print = 24;
+
+/* This recently started being used in arch-independent code too, as in
+ * kernel/sched.c.*/
+void show_stack(struct task_struct *task, unsigned long *esp)
+{
+ unsigned long *stack;
+ int i;
+
+ if (esp == NULL) {
+ if (task != current && task != NULL) {
+ esp = (unsigned long *) KSTK_ESP(task);
+ } else {
+ esp = (unsigned long *) &esp;
+ }
+ }
+
+ stack = esp;
+ for (i = 0; i < kstack_depth_to_print; i++) {
+ if (kstack_end(stack))
+ break;
+ if (i && ((i % 8) == 0))
+ printk(KERN_INFO " ");
+ printk(KERN_CONT "%08lx ", *stack++);
+ }
+
+ show_trace(task, esp);
+}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
new file mode 100644
index 00000000000..82a6e22f1f3
--- /dev/null
+++ b/arch/um/kernel/time.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/clockchips.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/threads.h>
+#include <asm/irq.h>
+#include <asm/param.h>
+#include "kern_util.h"
+#include "os.h"
+
+void timer_handler(int sig, struct uml_pt_regs *regs)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ do_IRQ(TIMER_IRQ, regs);
+ local_irq_restore(flags);
+}
+
+static void itimer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ set_interval();
+ break;
+
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_ONESHOT:
+ disable_timer();
+ break;
+
+ case CLOCK_EVT_MODE_RESUME:
+ break;
+ }
+}
+
+static int itimer_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ return timer_one_shot(delta + 1);
+}
+
+static struct clock_event_device itimer_clockevent = {
+ .name = "itimer",
+ .rating = 250,
+ .cpumask = cpu_all_mask,
+ .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+ .set_mode = itimer_set_mode,
+ .set_next_event = itimer_next_event,
+ .shift = 32,
+ .irq = 0,
+};
+
+static irqreturn_t um_timer(int irq, void *dev)
+{
+ (*itimer_clockevent.event_handler)(&itimer_clockevent);
+
+ return IRQ_HANDLED;
+}
+
+static cycle_t itimer_read(struct clocksource *cs)
+{
+ return os_nsecs() / 1000;
+}
+
+static struct clocksource itimer_clocksource = {
+ .name = "itimer",
+ .rating = 300,
+ .read = itimer_read,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static void __init setup_itimer(void)
+{
+ int err;
+
+ err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL);
+ if (err != 0)
+ printk(KERN_ERR "register_timer : request_irq failed - "
+ "errno = %d\n", -err);
+
+ itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32);
+ itimer_clockevent.max_delta_ns =
+ clockevent_delta2ns(60 * HZ, &itimer_clockevent);
+ itimer_clockevent.min_delta_ns =
+ clockevent_delta2ns(1, &itimer_clockevent);
+ err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC);
+ if (err) {
+ printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
+ return;
+ }
+ clockevents_register_device(&itimer_clockevent);
+}
+
+void read_persistent_clock(struct timespec *ts)
+{
+ long long nsecs = os_nsecs();
+
+ set_normalized_timespec(ts, nsecs / NSEC_PER_SEC,
+ nsecs % NSEC_PER_SEC);
+}
+
+void __init time_init(void)
+{
+ timer_init();
+ late_time_init = setup_itimer;
+}
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
new file mode 100644
index 00000000000..7f3d4d86431
--- /dev/null
+++ b/arch/um/kernel/tlb.c
@@ -0,0 +1,536 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include "as-layout.h"
+#include "mem_user.h"
+#include "os.h"
+#include "skas.h"
+
+struct host_vm_change {
+ struct host_vm_op {
+ enum { NONE, MMAP, MUNMAP, MPROTECT } type;
+ union {
+ struct {
+ unsigned long addr;
+ unsigned long len;
+ unsigned int prot;
+ int fd;
+ __u64 offset;
+ } mmap;
+ struct {
+ unsigned long addr;
+ unsigned long len;
+ } munmap;
+ struct {
+ unsigned long addr;
+ unsigned long len;
+ unsigned int prot;
+ } mprotect;
+ } u;
+ } ops[1];
+ int index;
+ struct mm_id *id;
+ void *data;
+ int force;
+};
+
+#define INIT_HVC(mm, force) \
+ ((struct host_vm_change) \
+ { .ops = { { .type = NONE } }, \
+ .id = &mm->context.id, \
+ .data = NULL, \
+ .index = 0, \
+ .force = force })
+
+static int do_ops(struct host_vm_change *hvc, int end,
+ int finished)
+{
+ struct host_vm_op *op;
+ int i, ret = 0;
+
+ for (i = 0; i < end && !ret; i++) {
+ op = &hvc->ops[i];
+ switch (op->type) {
+ case MMAP:
+ ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
+ op->u.mmap.prot, op->u.mmap.fd,
+ op->u.mmap.offset, finished, &hvc->data);
+ break;
+ case MUNMAP:
+ ret = unmap(hvc->id, op->u.munmap.addr,
+ op->u.munmap.len, finished, &hvc->data);
+ break;
+ case MPROTECT:
+ ret = protect(hvc->id, op->u.mprotect.addr,
+ op->u.mprotect.len, op->u.mprotect.prot,
+ finished, &hvc->data);
+ break;
+ default:
+ printk(KERN_ERR "Unknown op type %d in do_ops\n",
+ op->type);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
+ unsigned int prot, struct host_vm_change *hvc)
+{
+ __u64 offset;
+ struct host_vm_op *last;
+ int fd, ret = 0;
+
+ fd = phys_mapping(phys, &offset);
+ if (hvc->index != 0) {
+ last = &hvc->ops[hvc->index - 1];
+ if ((last->type == MMAP) &&
+ (last->u.mmap.addr + last->u.mmap.len == virt) &&
+ (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
+ (last->u.mmap.offset + last->u.mmap.len == offset)) {
+ last->u.mmap.len += len;
+ return 0;
+ }
+ }
+
+ if (hvc->index == ARRAY_SIZE(hvc->ops)) {
+ ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
+ hvc->index = 0;
+ }
+
+ hvc->ops[hvc->index++] = ((struct host_vm_op)
+ { .type = MMAP,
+ .u = { .mmap = { .addr = virt,
+ .len = len,
+ .prot = prot,
+ .fd = fd,
+ .offset = offset }
+ } });
+ return ret;
+}
+
+static int add_munmap(unsigned long addr, unsigned long len,
+ struct host_vm_change *hvc)
+{
+ struct host_vm_op *last;
+ int ret = 0;
+
+ if (hvc->index != 0) {
+ last = &hvc->ops[hvc->index - 1];
+ if ((last->type == MUNMAP) &&
+ (last->u.munmap.addr + last->u.mmap.len == addr)) {
+ last->u.munmap.len += len;
+ return 0;
+ }
+ }
+
+ if (hvc->index == ARRAY_SIZE(hvc->ops)) {
+ ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
+ hvc->index = 0;
+ }
+
+ hvc->ops[hvc->index++] = ((struct host_vm_op)
+ { .type = MUNMAP,
+ .u = { .munmap = { .addr = addr,
+ .len = len } } });
+ return ret;
+}
+
+static int add_mprotect(unsigned long addr, unsigned long len,
+ unsigned int prot, struct host_vm_change *hvc)
+{
+ struct host_vm_op *last;
+ int ret = 0;
+
+ if (hvc->index != 0) {
+ last = &hvc->ops[hvc->index - 1];
+ if ((last->type == MPROTECT) &&
+ (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
+ (last->u.mprotect.prot == prot)) {
+ last->u.mprotect.len += len;
+ return 0;
+ }
+ }
+
+ if (hvc->index == ARRAY_SIZE(hvc->ops)) {
+ ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
+ hvc->index = 0;
+ }
+
+ hvc->ops[hvc->index++] = ((struct host_vm_op)
+ { .type = MPROTECT,
+ .u = { .mprotect = { .addr = addr,
+ .len = len,
+ .prot = prot } } });
+ return ret;
+}
+
+#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
+
+static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end,
+ struct host_vm_change *hvc)
+{
+ pte_t *pte;
+ int r, w, x, prot, ret = 0;
+
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+ if ((addr >= STUB_START) && (addr < STUB_END))
+ continue;
+
+ r = pte_read(*pte);
+ w = pte_write(*pte);
+ x = pte_exec(*pte);
+ if (!pte_young(*pte)) {
+ r = 0;
+ w = 0;
+ } else if (!pte_dirty(*pte))
+ w = 0;
+
+ prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
+ (x ? UM_PROT_EXEC : 0));
+ if (hvc->force || pte_newpage(*pte)) {
+ if (pte_present(*pte))
+ ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
+ PAGE_SIZE, prot, hvc);
+ else
+ ret = add_munmap(addr, PAGE_SIZE, hvc);
+ } else if (pte_newprot(*pte))
+ ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
+ *pte = pte_mkuptodate(*pte);
+ } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
+ return ret;
+}
+
+static inline int update_pmd_range(pud_t *pud, unsigned long addr,
+ unsigned long end,
+ struct host_vm_change *hvc)
+{
+ pmd_t *pmd;
+ unsigned long next;
+ int ret = 0;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (!pmd_present(*pmd)) {
+ if (hvc->force || pmd_newpage(*pmd)) {
+ ret = add_munmap(addr, next - addr, hvc);
+ pmd_mkuptodate(*pmd);
+ }
+ }
+ else ret = update_pte_range(pmd, addr, next, hvc);
+ } while (pmd++, addr = next, ((addr < end) && !ret));
+ return ret;
+}
+
+static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end,
+ struct host_vm_change *hvc)
+{
+ pud_t *pud;
+ unsigned long next;
+ int ret = 0;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (!pud_present(*pud)) {
+ if (hvc->force || pud_newpage(*pud)) {
+ ret = add_munmap(addr, next - addr, hvc);
+ pud_mkuptodate(*pud);
+ }
+ }
+ else ret = update_pmd_range(pud, addr, next, hvc);
+ } while (pud++, addr = next, ((addr < end) && !ret));
+ return ret;
+}
+
+void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
+ unsigned long end_addr, int force)
+{
+ pgd_t *pgd;
+ struct host_vm_change hvc;
+ unsigned long addr = start_addr, next;
+ int ret = 0;
+
+ hvc = INIT_HVC(mm, force);
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end_addr);
+ if (!pgd_present(*pgd)) {
+ if (force || pgd_newpage(*pgd)) {
+ ret = add_munmap(addr, next - addr, &hvc);
+ pgd_mkuptodate(*pgd);
+ }
+ }
+ else ret = update_pud_range(pgd, addr, next, &hvc);
+ } while (pgd++, addr = next, ((addr < end_addr) && !ret));
+
+ if (!ret)
+ ret = do_ops(&hvc, hvc.index, 1);
+
+ /* This is not an else because ret is modified above */
+ if (ret) {
+ printk(KERN_ERR "fix_range_common: failed, killing current "
+ "process\n");
+ force_sig(SIGKILL, current);
+ }
+}
+
+static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
+{
+ struct mm_struct *mm;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long addr, last;
+ int updated = 0, err;
+
+ mm = &init_mm;
+ for (addr = start; addr < end;) {
+ pgd = pgd_offset(mm, addr);
+ if (!pgd_present(*pgd)) {
+ last = ADD_ROUND(addr, PGDIR_SIZE);
+ if (last > end)
+ last = end;
+ if (pgd_newpage(*pgd)) {
+ updated = 1;
+ err = os_unmap_memory((void *) addr,
+ last - addr);
+ if (err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pud = pud_offset(pgd, addr);
+ if (!pud_present(*pud)) {
+ last = ADD_ROUND(addr, PUD_SIZE);
+ if (last > end)
+ last = end;
+ if (pud_newpage(*pud)) {
+ updated = 1;
+ err = os_unmap_memory((void *) addr,
+ last - addr);
+ if (err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd)) {
+ last = ADD_ROUND(addr, PMD_SIZE);
+ if (last > end)
+ last = end;
+ if (pmd_newpage(*pmd)) {
+ updated = 1;
+ err = os_unmap_memory((void *) addr,
+ last - addr);
+ if (err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pte = pte_offset_kernel(pmd, addr);
+ if (!pte_present(*pte) || pte_newpage(*pte)) {
+ updated = 1;
+ err = os_unmap_memory((void *) addr,
+ PAGE_SIZE);
+ if (err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ if (pte_present(*pte))
+ map_memory(addr,
+ pte_val(*pte) & PAGE_MASK,
+ PAGE_SIZE, 1, 1, 1);
+ }
+ else if (pte_newprot(*pte)) {
+ updated = 1;
+ os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
+ }
+ addr += PAGE_SIZE;
+ }
+ return updated;
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ struct mm_struct *mm = vma->vm_mm;
+ void *flush = NULL;
+ int r, w, x, prot, err = 0;
+ struct mm_id *mm_id;
+
+ address &= PAGE_MASK;
+ pgd = pgd_offset(mm, address);
+ if (!pgd_present(*pgd))
+ goto kill;
+
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ goto kill;
+
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ goto kill;
+
+ pte = pte_offset_kernel(pmd, address);
+
+ r = pte_read(*pte);
+ w = pte_write(*pte);
+ x = pte_exec(*pte);
+ if (!pte_young(*pte)) {
+ r = 0;
+ w = 0;
+ } else if (!pte_dirty(*pte)) {
+ w = 0;
+ }
+
+ mm_id = &mm->context.id;
+ prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
+ (x ? UM_PROT_EXEC : 0));
+ if (pte_newpage(*pte)) {
+ if (pte_present(*pte)) {
+ unsigned long long offset;
+ int fd;
+
+ fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
+ err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
+ 1, &flush);
+ }
+ else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
+ }
+ else if (pte_newprot(*pte))
+ err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
+
+ if (err)
+ goto kill;
+
+ *pte = pte_mkuptodate(*pte);
+
+ return;
+
+kill:
+ printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
+ force_sig(SIGKILL, current);
+}
+
+pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
+{
+ return pgd_offset(mm, address);
+}
+
+pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
+{
+ return pud_offset(pgd, address);
+}
+
+pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
+{
+ return pmd_offset(pud, address);
+}
+
+pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
+{
+ return pte_offset_kernel(pmd, address);
+}
+
+pte_t *addr_pte(struct task_struct *task, unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset(task->mm, addr);
+ pud_t *pud = pud_offset(pgd, addr);
+ pmd_t *pmd = pmd_offset(pud, addr);
+
+ return pte_offset_map(pmd, addr);
+}
+
+void flush_tlb_all(void)
+{
+ flush_tlb_mm(current->mm);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ flush_tlb_kernel_range_common(start, end);
+}
+
+void flush_tlb_kernel_vm(void)
+{
+ flush_tlb_kernel_range_common(start_vm, end_vm);
+}
+
+void __flush_tlb_one(unsigned long addr)
+{
+ flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
+}
+
+static void fix_range(struct mm_struct *mm, unsigned long start_addr,
+ unsigned long end_addr, int force)
+{
+ fix_range_common(mm, start_addr, end_addr, force);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ if (vma->vm_mm == NULL)
+ flush_tlb_kernel_range_common(start, end);
+ else fix_range(vma->vm_mm, start, end, 0);
+}
+EXPORT_SYMBOL(flush_tlb_range);
+
+void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ /*
+ * Don't bother flushing if this address space is about to be
+ * destroyed.
+ */
+ if (atomic_read(&mm->mm_users) == 0)
+ return;
+
+ fix_range(mm, start, end, 0);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma = mm->mmap;
+
+ while (vma != NULL) {
+ fix_range(mm, vma->vm_start, vma->vm_end, 0);
+ vma = vma->vm_next;
+ }
+}
+
+void force_flush_all(void)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma = mm->mmap;
+
+ while (vma != NULL) {
+ fix_range(mm, vma->vm_start, vma->vm_end, 1);
+ vma = vma->vm_next;
+ }
+}
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
new file mode 100644
index 00000000000..dafc9471595
--- /dev/null
+++ b/arch/um/kernel/trap.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+#include <linux/module.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include "arch.h"
+#include "as-layout.h"
+#include "kern_util.h"
+#include "os.h"
+#include "skas.h"
+
+/*
+ * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
+ * segv().
+ */
+int handle_page_fault(unsigned long address, unsigned long ip,
+ int is_write, int is_user, int *code_out)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ int err = -EFAULT;
+
+ *code_out = SEGV_MAPERR;
+
+ /*
+ * If the fault was during atomic operation, don't take the fault, just
+ * fail.
+ */
+ if (in_atomic())
+ goto out_nosemaphore;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto out;
+ else if (vma->vm_start <= address)
+ goto good_area;
+ else if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto out;
+ else if (is_user && !ARCH_IS_STACKGROW(address))
+ goto out;
+ else if (expand_stack(vma, address))
+ goto out;
+
+good_area:
+ *code_out = SEGV_ACCERR;
+ if (is_write && !(vma->vm_flags & VM_WRITE))
+ goto out;
+
+ /* Don't require VM_READ|VM_EXEC for write faults! */
+ if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
+ goto out;
+
+ do {
+ int fault;
+
+ fault = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ if (fault & VM_FAULT_OOM) {
+ goto out_of_memory;
+ } else if (fault & VM_FAULT_SIGBUS) {
+ err = -EACCES;
+ goto out;
+ }
+ BUG();
+ }
+ if (fault & VM_FAULT_MAJOR)
+ current->maj_flt++;
+ else
+ current->min_flt++;
+
+ pgd = pgd_offset(mm, address);
+ pud = pud_offset(pgd, address);
+ pmd = pmd_offset(pud, address);
+ pte = pte_offset_kernel(pmd, address);
+ } while (!pte_present(*pte));
+ err = 0;
+ /*
+ * The below warning was added in place of
+ * pte_mkyoung(); if (is_write) pte_mkdirty();
+ * If it's triggered, we'd see normally a hang here (a clean pte is
+ * marked read-only to emulate the dirty bit).
+ * However, the generic code can mark a PTE writable but clean on a
+ * concurrent read fault, triggering this harmlessly. So comment it out.
+ */
+#if 0
+ WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
+#endif
+ flush_tlb_page(vma, address);
+out:
+ up_read(&mm->mmap_sem);
+out_nosemaphore:
+ return err;
+
+out_of_memory:
+ /*
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed).
+ */
+ up_read(&mm->mmap_sem);
+ pagefault_out_of_memory();
+ return 0;
+}
+EXPORT_SYMBOL(handle_page_fault);
+
+static void show_segv_info(struct uml_pt_regs *regs)
+{
+ struct task_struct *tsk = current;
+ struct faultinfo *fi = UPT_FAULTINFO(regs);
+
+ if (!unhandled_signal(tsk, SIGSEGV))
+ return;
+
+ if (!printk_ratelimit())
+ return;
+
+ printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+ tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
+ (void *)UPT_IP(regs), (void *)UPT_SP(regs),
+ fi->error_code);
+
+ print_vma_addr(KERN_CONT " in ", UPT_IP(regs));
+ printk(KERN_CONT "\n");
+}
+
+static void bad_segv(struct faultinfo fi, unsigned long ip)
+{
+ struct siginfo si;
+
+ si.si_signo = SIGSEGV;
+ si.si_code = SEGV_ACCERR;
+ si.si_addr = (void __user *) FAULT_ADDRESS(fi);
+ current->thread.arch.faultinfo = fi;
+ force_sig_info(SIGSEGV, &si, current);
+}
+
+void fatal_sigsegv(void)
+{
+ force_sigsegv(SIGSEGV, current);
+ do_signal();
+ /*
+ * This is to tell gcc that we're not returning - do_signal
+ * can, in general, return, but in this case, it's not, since
+ * we just got a fatal SIGSEGV queued.
+ */
+ os_dump_core();
+}
+
+void segv_handler(int sig, struct uml_pt_regs *regs)
+{
+ struct faultinfo * fi = UPT_FAULTINFO(regs);
+
+ if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) {
+ show_segv_info(regs);
+ bad_segv(*fi, UPT_IP(regs));
+ return;
+ }
+ segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
+}
+
+/*
+ * We give a *copy* of the faultinfo in the regs to segv.
+ * This must be done, since nesting SEGVs could overwrite
+ * the info in the regs. A pointer to the info then would
+ * give us bad data!
+ */
+unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
+ struct uml_pt_regs *regs)
+{
+ struct siginfo si;
+ jmp_buf *catcher;
+ int err;
+ int is_write = FAULT_WRITE(fi);
+ unsigned long address = FAULT_ADDRESS(fi);
+
+ if (!is_user && (address >= start_vm) && (address < end_vm)) {
+ flush_tlb_kernel_vm();
+ return 0;
+ }
+ else if (current->mm == NULL) {
+ show_regs(container_of(regs, struct pt_regs, regs));
+ panic("Segfault with no mm");
+ }
+
+ if (SEGV_IS_FIXABLE(&fi) || SEGV_MAYBE_FIXABLE(&fi))
+ err = handle_page_fault(address, ip, is_write, is_user,
+ &si.si_code);
+ else {
+ err = -EFAULT;
+ /*
+ * A thread accessed NULL, we get a fault, but CR2 is invalid.
+ * This code is used in __do_copy_from_user() of TT mode.
+ * XXX tt mode is gone, so maybe this isn't needed any more
+ */
+ address = 0;
+ }
+
+ catcher = current->thread.fault_catcher;
+ if (!err)
+ return 0;
+ else if (catcher != NULL) {
+ current->thread.fault_addr = (void *) address;
+ UML_LONGJMP(catcher, 1);
+ }
+ else if (current->thread.fault_addr != NULL)
+ panic("fault_addr set but no fault catcher");
+ else if (!is_user && arch_fixup(ip, regs))
+ return 0;
+
+ if (!is_user) {
+ show_regs(container_of(regs, struct pt_regs, regs));
+ panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
+ address, ip);
+ }
+
+ show_segv_info(regs);
+
+ if (err == -EACCES) {
+ si.si_signo = SIGBUS;
+ si.si_errno = 0;
+ si.si_code = BUS_ADRERR;
+ si.si_addr = (void __user *)address;
+ current->thread.arch.faultinfo = fi;
+ force_sig_info(SIGBUS, &si, current);
+ } else {
+ BUG_ON(err != -EFAULT);
+ si.si_signo = SIGSEGV;
+ si.si_addr = (void __user *) address;
+ current->thread.arch.faultinfo = fi;
+ force_sig_info(SIGSEGV, &si, current);
+ }
+ return 0;
+}
+
+void relay_signal(int sig, struct uml_pt_regs *regs)
+{
+ if (!UPT_IS_USER(regs)) {
+ if (sig == SIGBUS)
+ printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
+ "mount likely just ran out of space\n");
+ panic("Kernel mode signal %d", sig);
+ }
+
+ arch_examine_signal(sig, regs);
+
+ current->thread.arch.faultinfo = *UPT_FAULTINFO(regs);
+ force_sig(sig, current);
+}
+
+void bus_handler(int sig, struct uml_pt_regs *regs)
+{
+ if (current->thread.fault_catcher != NULL)
+ UML_LONGJMP(current->thread.fault_catcher, 1);
+ else relay_signal(sig, regs);
+}
+
+void winch(int sig, struct uml_pt_regs *regs)
+{
+ do_IRQ(WINCH_IRQ, regs);
+}
+
+void trap_init(void)
+{
+}
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
new file mode 100644
index 00000000000..ba00eae45aa
--- /dev/null
+++ b/arch/um/kernel/um_arch.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/utsname.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include "as-layout.h"
+#include "arch.h"
+#include "init.h"
+#include "kern.h"
+#include "kern_util.h"
+#include "mem_user.h"
+#include "os.h"
+
+#define DEFAULT_COMMAND_LINE "root=98:0"
+
+/* Changed in add_arg and setup_arch, which run before SMP is started */
+static char __initdata command_line[COMMAND_LINE_SIZE] = { 0 };
+
+static void __init add_arg(char *arg)
+{
+ if (strlen(command_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) {
+ printf("add_arg: Too many command line arguments!\n");
+ exit(1);
+ }
+ if (strlen(command_line) > 0)
+ strcat(command_line, " ");
+ strcat(command_line, arg);
+}
+
+/*
+ * These fields are initialized at boot time and not changed.
+ * XXX This structure is used only in the non-SMP case. Maybe this
+ * should be moved to smp.c.
+ */
+struct cpuinfo_um boot_cpu_data = {
+ .loops_per_jiffy = 0,
+ .ipi_pipe = { -1, -1 }
+};
+
+unsigned long thread_saved_pc(struct task_struct *task)
+{
+ /* FIXME: Need to look up userspace_pid by cpu */
+ return os_process_pc(userspace_pid[0]);
+}
+
+/* Changed in setup_arch, which is called in early boot */
+static char host_info[(__NEW_UTS_LEN + 1) * 5];
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+ int index = 0;
+
+#ifdef CONFIG_SMP
+ index = (struct cpuinfo_um *) v - cpu_data;
+ if (!cpu_online(index))
+ return 0;
+#endif
+
+ seq_printf(m, "processor\t: %d\n", index);
+ seq_printf(m, "vendor_id\t: User Mode Linux\n");
+ seq_printf(m, "model name\t: UML\n");
+ seq_printf(m, "mode\t\t: skas\n");
+ seq_printf(m, "host\t\t: %s\n", host_info);
+ seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+ loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
+
+ return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
+
+/* Set in linux_main */
+unsigned long uml_physmem;
+EXPORT_SYMBOL(uml_physmem);
+
+unsigned long uml_reserved; /* Also modified in mem_init */
+unsigned long start_vm;
+unsigned long end_vm;
+
+/* Set in uml_ncpus_setup */
+int ncpus = 1;
+
+/* Set in early boot */
+static int have_root __initdata = 0;
+
+/* Set in uml_mem_setup and modified in linux_main */
+long long physmem_size = 32 * 1024 * 1024;
+
+static const char *usage_string =
+"User Mode Linux v%s\n"
+" available at http://user-mode-linux.sourceforge.net/\n\n";
+
+static int __init uml_version_setup(char *line, int *add)
+{
+ printf("%s\n", init_utsname()->release);
+ exit(0);
+
+ return 0;
+}
+
+__uml_setup("--version", uml_version_setup,
+"--version\n"
+" Prints the version number of the kernel.\n\n"
+);
+
+static int __init uml_root_setup(char *line, int *add)
+{
+ have_root = 1;
+ return 0;
+}
+
+__uml_setup("root=", uml_root_setup,
+"root=<file containing the root fs>\n"
+" This is actually used by the generic kernel in exactly the same\n"
+" way as in any other kernel. If you configure a number of block\n"
+" devices and want to boot off something other than ubd0, you \n"
+" would use something like:\n"
+" root=/dev/ubd5\n\n"
+);
+
+static int __init no_skas_debug_setup(char *line, int *add)
+{
+ printf("'debug' is not necessary to gdb UML in skas mode - run \n");
+ printf("'gdb linux'\n");
+
+ return 0;
+}
+
+__uml_setup("debug", no_skas_debug_setup,
+"debug\n"
+" this flag is not needed to run gdb on UML in skas mode\n\n"
+);
+
+#ifdef CONFIG_SMP
+static int __init uml_ncpus_setup(char *line, int *add)
+{
+ if (!sscanf(line, "%d", &ncpus)) {
+ printf("Couldn't parse [%s]\n", line);
+ return -1;
+ }
+
+ return 0;
+}
+
+__uml_setup("ncpus=", uml_ncpus_setup,
+"ncpus=<# of desired CPUs>\n"
+" This tells an SMP kernel how many virtual processors to start.\n\n"
+);
+#endif
+
+static int __init Usage(char *line, int *add)
+{
+ const char **p;
+
+ printf(usage_string, init_utsname()->release);
+ p = &__uml_help_start;
+ while (p < &__uml_help_end) {
+ printf("%s", *p);
+ p++;
+ }
+ exit(0);
+ return 0;
+}
+
+__uml_setup("--help", Usage,
+"--help\n"
+" Prints this message.\n\n"
+);
+
+static void __init uml_checksetup(char *line, int *add)
+{
+ struct uml_param *p;
+
+ p = &__uml_setup_start;
+ while (p < &__uml_setup_end) {
+ size_t n;
+
+ n = strlen(p->str);
+ if (!strncmp(line, p->str, n) && p->setup_func(line + n, add))
+ return;
+ p++;
+ }
+}
+
+static void __init uml_postsetup(void)
+{
+ initcall_t *p;
+
+ p = &__uml_postsetup_start;
+ while (p < &__uml_postsetup_end) {
+ (*p)();
+ p++;
+ }
+ return;
+}
+
+static int panic_exit(struct notifier_block *self, unsigned long unused1,
+ void *unused2)
+{
+ bust_spinlocks(1);
+ show_regs(&(current->thread.regs));
+ bust_spinlocks(0);
+ uml_exitcode = 1;
+ os_dump_core();
+ return 0;
+}
+
+static struct notifier_block panic_exit_notifier = {
+ .notifier_call = panic_exit,
+ .next = NULL,
+ .priority = 0
+};
+
+/* Set during early boot */
+unsigned long task_size;
+EXPORT_SYMBOL(task_size);
+
+unsigned long host_task_size;
+
+unsigned long brk_start;
+unsigned long end_iomem;
+EXPORT_SYMBOL(end_iomem);
+
+#define MIN_VMALLOC (32 * 1024 * 1024)
+
+extern char __binary_start;
+
+int __init linux_main(int argc, char **argv)
+{
+ unsigned long avail, diff;
+ unsigned long virtmem_size, max_physmem;
+ unsigned long stack;
+ unsigned int i;
+ int add;
+ char * mode;
+
+ for (i = 1; i < argc; i++) {
+ if ((i == 1) && (argv[i][0] == ' '))
+ continue;
+ add = 1;
+ uml_checksetup(argv[i], &add);
+ if (add)
+ add_arg(argv[i]);
+ }
+ if (have_root == 0)
+ add_arg(DEFAULT_COMMAND_LINE);
+
+ host_task_size = os_get_top_address();
+ /*
+ * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps
+ * out
+ */
+ task_size = host_task_size & PGDIR_MASK;
+
+ /* OS sanity checks that need to happen before the kernel runs */
+ os_early_checks();
+
+ can_do_skas();
+
+ if (proc_mm && ptrace_faultinfo)
+ mode = "SKAS3";
+ else
+ mode = "SKAS0";
+
+ printf("UML running in %s mode\n", mode);
+
+ brk_start = (unsigned long) sbrk(0);
+
+ /*
+ * Increase physical memory size for exec-shield users
+ * so they actually get what they asked for. This should
+ * add zero for non-exec shield users
+ */
+
+ diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
+ if (diff > 1024 * 1024) {
+ printf("Adding %ld bytes to physical memory to account for "
+ "exec-shield gap\n", diff);
+ physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
+ }
+
+ uml_physmem = (unsigned long) &__binary_start & PAGE_MASK;
+
+ /* Reserve up to 4M after the current brk */
+ uml_reserved = ROUND_4M(brk_start) + (1 << 22);
+
+ setup_machinename(init_utsname()->machine);
+
+ highmem = 0;
+ iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
+ max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
+
+ /*
+ * Zones have to begin on a 1 << MAX_ORDER page boundary,
+ * so this makes sure that's true for highmem
+ */
+ max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1);
+ if (physmem_size + iomem_size > max_physmem) {
+ highmem = physmem_size + iomem_size - max_physmem;
+ physmem_size -= highmem;
+#ifndef CONFIG_HIGHMEM
+ highmem = 0;
+ printf("CONFIG_HIGHMEM not enabled - physical memory shrunk "
+ "to %Lu bytes\n", physmem_size);
+#endif
+ }
+
+ high_physmem = uml_physmem + physmem_size;
+ end_iomem = high_physmem + iomem_size;
+ high_memory = (void *) end_iomem;
+
+ start_vm = VMALLOC_START;
+
+ setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
+ if (init_maps(physmem_size, iomem_size, highmem)) {
+ printf("Failed to allocate mem_map for %Lu bytes of physical "
+ "memory and %Lu bytes of highmem\n", physmem_size,
+ highmem);
+ exit(1);
+ }
+
+ virtmem_size = physmem_size;
+ stack = (unsigned long) argv;
+ stack &= ~(1024 * 1024 - 1);
+ avail = stack - start_vm;
+ if (physmem_size > avail)
+ virtmem_size = avail;
+ end_vm = start_vm + virtmem_size;
+
+ if (virtmem_size < physmem_size)
+ printf("Kernel virtual memory size shrunk to %lu bytes\n",
+ virtmem_size);
+
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &panic_exit_notifier);
+
+ uml_postsetup();
+
+ stack_protections((unsigned long) &init_thread_info);
+ os_flush_stdout();
+
+ return start_uml();
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+ paging_init();
+ strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
+ *cmdline_p = command_line;
+ setup_hostinfo(host_info, sizeof host_info);
+}
+
+void __init check_bugs(void)
+{
+ arch_check_bugs();
+ os_check_bugs();
+}
+
+void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+{
+}
+
+#ifdef CONFIG_SMP
+void alternatives_smp_module_add(struct module *mod, char *name,
+ void *locks, void *locks_end,
+ void *text, void *text_end)
+{
+}
+
+void alternatives_smp_module_del(struct module *mod)
+{
+}
+#endif
diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
new file mode 100644
index 00000000000..81e07e2be3a
--- /dev/null
+++ b/arch/um/kernel/umid.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <asm/errno.h>
+#include "init.h"
+#include "kern.h"
+#include "os.h"
+
+/* Changed by set_umid_arg */
+static int umid_inited = 0;
+
+static int __init set_umid_arg(char *name, int *add)
+{
+ int err;
+
+ if (umid_inited) {
+ printf("umid already set\n");
+ return 0;
+ }
+
+ *add = 0;
+ err = set_umid(name);
+ if (err == -EEXIST)
+ printf("umid '%s' already in use\n", name);
+ else if (!err)
+ umid_inited = 1;
+
+ return 0;
+}
+
+__uml_setup("umid=", set_umid_arg,
+"umid=<name>\n"
+" This is used to assign a unique identity to this UML machine and\n"
+" is used for naming the pid file and management console socket.\n\n"
+);
+
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
new file mode 100644
index 00000000000..fbd99402d4d
--- /dev/null
+++ b/arch/um/kernel/uml.lds.S
@@ -0,0 +1,108 @@
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
+
+OUTPUT_FORMAT(ELF_FORMAT)
+OUTPUT_ARCH(ELF_ARCH)
+ENTRY(_start)
+jiffies = jiffies_64;
+
+SECTIONS
+{
+ /* This must contain the right address - not quite the default ELF one.*/
+ PROVIDE (__executable_start = START);
+ /* Static binaries stick stuff here, like the sigreturn trampoline,
+ * invisibly to objdump. So, just make __binary_start equal to the very
+ * beginning of the executable, and if there are unmapped pages after this,
+ * they are forever unusable.
+ */
+ __binary_start = START;
+
+ . = START + SIZEOF_HEADERS;
+
+ _text = .;
+ _stext = .;
+ __init_begin = .;
+ INIT_TEXT_SECTION(0)
+ . = ALIGN(PAGE_SIZE);
+
+ .text :
+ {
+ TEXT_TEXT
+ SCHED_TEXT
+ LOCK_TEXT
+ *(.fixup)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ *(.gnu.linkonce.t*)
+ }
+
+ . = ALIGN(PAGE_SIZE);
+ .syscall_stub : {
+ __syscall_stub_start = .;
+ *(.__syscall_stub*)
+ __syscall_stub_end = .;
+ }
+
+ /*
+ * These are needed even in a static link, even if they wind up being empty.
+ * Newer glibc needs these __rel{,a}_iplt_{start,end} symbols.
+ */
+ .rel.plt : {
+ *(.rel.plt)
+ PROVIDE_HIDDEN(__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN(__rel_iplt_end = .);
+ }
+ .rela.plt : {
+ *(.rela.plt)
+ PROVIDE_HIDDEN(__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN(__rela_iplt_end = .);
+ }
+
+ #include "asm/common.lds.S"
+
+ init.data : { INIT_DATA }
+ .data :
+ {
+ INIT_TASK_DATA(KERNEL_STACK_SIZE)
+ . = ALIGN(KERNEL_STACK_SIZE);
+ *(.data..init_irqstack)
+ DATA_DATA
+ *(.gnu.linkonce.d*)
+ CONSTRUCTORS
+ }
+ .data1 : { *(.data1) }
+ .ctors :
+ {
+ *(.ctors)
+ }
+ .dtors :
+ {
+ *(.dtors)
+ }
+
+ .got : { *(.got.plt) *(.got) }
+ .dynamic : { *(.dynamic) }
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* We want the small data sections together, so single-instruction offsets
+ can access them all, and initialized data all before uninitialized, so
+ we can shorten the on-disk segment size. */
+ .sdata : { *(.sdata) }
+ _edata = .;
+ PROVIDE (edata = .);
+ . = ALIGN(PAGE_SIZE);
+ __bss_start = .;
+ PROVIDE(_bss_start = .);
+ SBSS(0)
+ BSS(0)
+ _end = .;
+ PROVIDE (end = .);
+
+ STABS_DEBUG
+
+ DWARF_DEBUG
+
+ DISCARDS
+}
diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S
new file mode 100644
index 00000000000..16e49bfa2b4
--- /dev/null
+++ b/arch/um/kernel/vmlinux.lds.S
@@ -0,0 +1,8 @@
+
+KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER);
+
+#ifdef CONFIG_LD_SCRIPT_STATIC
+#include "uml.lds.S"
+#else
+#include "dyn.lds.S"
+#endif