summaryrefslogtreecommitdiffstats
path: root/arch/x86/platform
diff options
context:
space:
mode:
authorAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
committerAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
commitb4b6116a13633898cf868f2f103c96a90c4c20f8 (patch)
tree93d1b7e2cfcdf473d8d4ff3ad141fa864f8491f6 /arch/x86/platform
parentedd4be777c953e5faafc80d091d3084b4343f5d3 (diff)
downloadkernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.gz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.xz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.zip
fedora kernel: d9aad82f3319f3cfd1aebc01234254ef0c37ad84v3.3.2-1
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'arch/x86/platform')
-rw-r--r--arch/x86/platform/Makefile11
-rw-r--r--arch/x86/platform/ce4100/Makefile1
-rw-r--r--arch/x86/platform/ce4100/ce4100.c146
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts430
-rw-r--r--arch/x86/platform/efi/Makefile1
-rw-r--r--arch/x86/platform/efi/efi.c786
-rw-r--r--arch/x86/platform/efi/efi_32.c69
-rw-r--r--arch/x86/platform/efi/efi_64.c99
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S123
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S116
-rw-r--r--arch/x86/platform/geode/Makefile1
-rw-r--r--arch/x86/platform/geode/alix.c142
-rw-r--r--arch/x86/platform/iris/Makefile1
-rw-r--r--arch/x86/platform/iris/iris.c91
-rw-r--r--arch/x86/platform/mrst/Makefile4
-rw-r--r--arch/x86/platform/mrst/early_printk_mrst.c327
-rw-r--r--arch/x86/platform/mrst/mrst.c1047
-rw-r--r--arch/x86/platform/mrst/pmu.c817
-rw-r--r--arch/x86/platform/mrst/pmu.h234
-rw-r--r--arch/x86/platform/mrst/vrtc.c169
-rw-r--r--arch/x86/platform/olpc/Makefile5
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c216
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-rtc.c81
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c614
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c168
-rw-r--r--arch/x86/platform/olpc/olpc.c379
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c304
-rw-r--r--arch/x86/platform/olpc/olpc_ofw.c117
-rw-r--r--arch/x86/platform/olpc/xo1-wakeup.S124
-rw-r--r--arch/x86/platform/scx200/Makefile2
-rw-r--r--arch/x86/platform/scx200/scx200_32.c131
-rw-r--r--arch/x86/platform/sfi/Makefile1
-rw-r--r--arch/x86/platform/sfi/sfi.c109
-rw-r--r--arch/x86/platform/uv/Makefile1
-rw-r--r--arch/x86/platform/uv/bios_uv.c216
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2123
-rw-r--r--arch/x86/platform/uv/uv_irq.c285
-rw-r--r--arch/x86/platform/uv/uv_sysfs.c76
-rw-r--r--arch/x86/platform/uv/uv_time.c429
-rw-r--r--arch/x86/platform/visws/Makefile1
-rw-r--r--arch/x86/platform/visws/visws_quirks.c608
41 files changed, 10605 insertions, 0 deletions
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
new file mode 100644
index 00000000000..8d874396cb2
--- /dev/null
+++ b/arch/x86/platform/Makefile
@@ -0,0 +1,11 @@
+# Platform specific code goes here
+obj-y += ce4100/
+obj-y += efi/
+obj-y += geode/
+obj-y += iris/
+obj-y += mrst/
+obj-y += olpc/
+obj-y += scx200/
+obj-y += sfi/
+obj-y += visws/
+obj-y += uv/
diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile
new file mode 100644
index 00000000000..91fc92971d9
--- /dev/null
+++ b/arch/x86/platform/ce4100/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
new file mode 100644
index 00000000000..4c61b52191e
--- /dev/null
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -0,0 +1,146 @@
+/*
+ * Intel CE4100 platform specific setup code
+ *
+ * (C) Copyright 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+
+#include <asm/ce4100.h>
+#include <asm/prom.h>
+#include <asm/setup.h>
+#include <asm/i8259.h>
+#include <asm/io.h>
+#include <asm/io_apic.h>
+
+static int ce4100_i8042_detect(void)
+{
+ return 0;
+}
+
+#ifdef CONFIG_SERIAL_8250
+
+static unsigned int mem_serial_in(struct uart_port *p, int offset)
+{
+ offset = offset << p->regshift;
+ return readl(p->membase + offset);
+}
+
+/*
+ * The UART Tx interrupts are not set under some conditions and therefore serial
+ * transmission hangs. This is a silicon issue and has not been root caused. The
+ * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT
+ * bit of LSR register in interrupt handler to see whether at least one of these
+ * two bits is set, if so then process the transmit request. If this workaround
+ * is not applied, then the serial transmission may hang. This workaround is for
+ * errata number 9 in Errata - B step.
+*/
+
+static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset)
+{
+ unsigned int ret, ier, lsr;
+
+ if (offset == UART_IIR) {
+ offset = offset << p->regshift;
+ ret = readl(p->membase + offset);
+ if (ret & UART_IIR_NO_INT) {
+ /* see if the TX interrupt should have really set */
+ ier = mem_serial_in(p, UART_IER);
+ /* see if the UART's XMIT interrupt is enabled */
+ if (ier & UART_IER_THRI) {
+ lsr = mem_serial_in(p, UART_LSR);
+ /* now check to see if the UART should be
+ generating an interrupt (but isn't) */
+ if (lsr & (UART_LSR_THRE | UART_LSR_TEMT))
+ ret &= ~UART_IIR_NO_INT;
+ }
+ }
+ } else
+ ret = mem_serial_in(p, offset);
+ return ret;
+}
+
+static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
+{
+ offset = offset << p->regshift;
+ writel(value, p->membase + offset);
+}
+
+static void ce4100_serial_fixup(int port, struct uart_port *up,
+ unsigned short *capabilites)
+{
+#ifdef CONFIG_EARLY_PRINTK
+ /*
+ * Over ride the legacy port configuration that comes from
+ * asm/serial.h. Using the ioport driver then switching to the
+ * PCI memmaped driver hangs the IOAPIC
+ */
+ if (up->iotype != UPIO_MEM32) {
+ up->uartclk = 14745600;
+ up->mapbase = 0xdffe0200;
+ set_fixmap_nocache(FIX_EARLYCON_MEM_BASE,
+ up->mapbase & PAGE_MASK);
+ up->membase =
+ (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
+ up->membase += up->mapbase & ~PAGE_MASK;
+ up->iotype = UPIO_MEM32;
+ up->regshift = 2;
+ }
+#endif
+ up->iobase = 0;
+ up->serial_in = ce4100_mem_serial_in;
+ up->serial_out = ce4100_mem_serial_out;
+
+ *capabilites |= (1 << 12);
+}
+
+static __init void sdv_serial_fixup(void)
+{
+ serial8250_set_isa_configurator(ce4100_serial_fixup);
+}
+
+#else
+static inline void sdv_serial_fixup(void) {};
+#endif
+
+static void __init sdv_arch_setup(void)
+{
+ sdv_serial_fixup();
+}
+
+#ifdef CONFIG_X86_IO_APIC
+static void __cpuinit sdv_pci_init(void)
+{
+ x86_of_pci_init();
+ /* We can't set this earlier, because we need to calibrate the timer */
+ legacy_pic = &null_legacy_pic;
+}
+#endif
+
+/*
+ * CE4100 specific x86_init function overrides and early setup
+ * calls.
+ */
+void __init x86_ce4100_early_setup(void)
+{
+ x86_init.oem.arch_setup = sdv_arch_setup;
+ x86_platform.i8042_detect = ce4100_i8042_detect;
+ x86_init.resources.probe_roms = x86_init_noop;
+ x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.pci.init = ce4100_pci_init;
+
+#ifdef CONFIG_X86_IO_APIC
+ x86_init.pci.init_irq = sdv_pci_init;
+ x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
+#endif
+}
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
new file mode 100644
index 00000000000..e70be38ce03
--- /dev/null
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -0,0 +1,430 @@
+/*
+ * CE4100 on Falcon Falls
+ *
+ * (c) Copyright 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+/dts-v1/;
+/ {
+ model = "intel,falconfalls";
+ compatible = "intel,falconfalls";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "intel,ce4100";
+ reg = <0>;
+ lapic = <&lapic0>;
+ };
+ };
+
+ soc@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "intel,ce4100-cp";
+ ranges;
+
+ ioapic1: interrupt-controller@fec00000 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ce4100-ioapic";
+ interrupt-controller;
+ reg = <0xfec00000 0x1000>;
+ };
+
+ timer@fed00000 {
+ compatible = "intel,ce4100-hpet";
+ reg = <0xfed00000 0x200>;
+ };
+
+ lapic0: interrupt-controller@fee00000 {
+ compatible = "intel,ce4100-lapic";
+ reg = <0xfee00000 0x1000>;
+ };
+
+ pci@3fc {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ compatible = "intel,ce4100-pci", "pci";
+ device_type = "pci";
+ bus-range = <0 0>;
+ ranges = <0x2000000 0 0xbffff000 0xbffff000 0 0x1000
+ 0x2000000 0 0xdffe0000 0xdffe0000 0 0x1000
+ 0x0000000 0 0x0 0x0 0 0x100>;
+
+ /* Secondary IO-APIC */
+ ioapic2: interrupt-controller@0,1 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ce4100-ioapic";
+ interrupt-controller;
+ reg = <0x100 0x0 0x0 0x0 0x0>;
+ assigned-addresses = <0x02000000 0x0 0xbffff000 0x0 0x1000>;
+ };
+
+ pci@1,0 {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ compatible = "intel,ce4100-pci", "pci";
+ device_type = "pci";
+ bus-range = <1 1>;
+ reg = <0x0800 0x0 0x0 0x0 0x0>;
+ ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>;
+
+ interrupt-parent = <&ioapic2>;
+
+ display@2,0 {
+ compatible = "pci8086,2e5b.2",
+ "pci8086,2e5b",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x11000 0x0 0x0 0x0 0x0>;
+ interrupts = <0 1>;
+ };
+
+ multimedia@3,0 {
+ compatible = "pci8086,2e5c.2",
+ "pci8086,2e5c",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x11800 0x0 0x0 0x0 0x0>;
+ interrupts = <2 1>;
+ };
+
+ multimedia@4,0 {
+ compatible = "pci8086,2e5d.2",
+ "pci8086,2e5d",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x12000 0x0 0x0 0x0 0x0>;
+ interrupts = <4 1>;
+ };
+
+ multimedia@4,1 {
+ compatible = "pci8086,2e5e.2",
+ "pci8086,2e5e",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x12100 0x0 0x0 0x0 0x0>;
+ interrupts = <5 1>;
+ };
+
+ sound@6,0 {
+ compatible = "pci8086,2e5f.2",
+ "pci8086,2e5f",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13000 0x0 0x0 0x0 0x0>;
+ interrupts = <6 1>;
+ };
+
+ sound@6,1 {
+ compatible = "pci8086,2e5f.2",
+ "pci8086,2e5f",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13100 0x0 0x0 0x0 0x0>;
+ interrupts = <7 1>;
+ };
+
+ sound@6,2 {
+ compatible = "pci8086,2e60.2",
+ "pci8086,2e60",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13200 0x0 0x0 0x0 0x0>;
+ interrupts = <8 1>;
+ };
+
+ display@8,0 {
+ compatible = "pci8086,2e61.2",
+ "pci8086,2e61",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x14000 0x0 0x0 0x0 0x0>;
+ interrupts = <9 1>;
+ };
+
+ display@8,1 {
+ compatible = "pci8086,2e62.2",
+ "pci8086,2e62",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x14100 0x0 0x0 0x0 0x0>;
+ interrupts = <10 1>;
+ };
+
+ multimedia@8,2 {
+ compatible = "pci8086,2e63.2",
+ "pci8086,2e63",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x14200 0x0 0x0 0x0 0x0>;
+ interrupts = <11 1>;
+ };
+
+ entertainment-encryption@9,0 {
+ compatible = "pci8086,2e64.2",
+ "pci8086,2e64",
+ "pciclass101000",
+ "pciclass1010";
+
+ reg = <0x14800 0x0 0x0 0x0 0x0>;
+ interrupts = <12 1>;
+ };
+
+ localbus@a,0 {
+ compatible = "pci8086,2e65.2",
+ "pci8086,2e65",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15000 0x0 0x0 0x0 0x0>;
+ };
+
+ serial@b,0 {
+ compatible = "pci8086,2e66.2",
+ "pci8086,2e66",
+ "pciclass070003",
+ "pciclass0700";
+
+ reg = <0x15800 0x0 0x0 0x0 0x0>;
+ interrupts = <14 1>;
+ };
+
+ gpio@b,1 {
+ compatible = "pci8086,2e67.2",
+ "pci8086,2e67",
+ "pciclassff0000",
+ "pciclassff00";
+
+ #gpio-cells = <2>;
+ reg = <0x15900 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+ gpio-controller;
+ };
+
+ i2c-controller@b,2 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "pci8086,2e68.2",
+ "pci8086,2e68",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15a00 0x0 0x0 0x0 0x0>;
+ interrupts = <16 1>;
+ ranges = <0 0 0x02000000 0 0xdffe0500 0x100
+ 1 0 0x02000000 0 0xdffe0600 0x100
+ 2 0 0x02000000 0 0xdffe0700 0x100>;
+
+ i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <0 0 0x100>;
+ };
+
+ i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <1 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <2 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+ };
+
+ smard-card@b,3 {
+ compatible = "pci8086,2e69.2",
+ "pci8086,2e69",
+ "pciclass070500",
+ "pciclass0705";
+
+ reg = <0x15b00 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+ };
+
+ spi-controller@b,4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible =
+ "pci8086,2e6a.2",
+ "pci8086,2e6a",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15c00 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+
+ dac@0 {
+ compatible = "ti,pcm1755";
+ reg = <0>;
+ spi-max-frequency = <115200>;
+ };
+
+ dac@1 {
+ compatible = "ti,pcm1609a";
+ reg = <1>;
+ spi-max-frequency = <115200>;
+ };
+
+ eeprom@2 {
+ compatible = "atmel,at93c46";
+ reg = <2>;
+ spi-max-frequency = <115200>;
+ };
+ };
+
+ multimedia@b,7 {
+ compatible = "pci8086,2e6d.2",
+ "pci8086,2e6d",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15f00 0x0 0x0 0x0 0x0>;
+ };
+
+ ethernet@c,0 {
+ compatible = "pci8086,2e6e.2",
+ "pci8086,2e6e",
+ "pciclass020000",
+ "pciclass0200";
+
+ reg = <0x16000 0x0 0x0 0x0 0x0>;
+ interrupts = <21 1>;
+ };
+
+ clock@c,1 {
+ compatible = "pci8086,2e6f.2",
+ "pci8086,2e6f",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x16100 0x0 0x0 0x0 0x0>;
+ interrupts = <3 1>;
+ };
+
+ usb@d,0 {
+ compatible = "pci8086,2e70.2",
+ "pci8086,2e70",
+ "pciclass0c0320",
+ "pciclass0c03";
+
+ reg = <0x16800 0x0 0x0 0x0 0x0>;
+ interrupts = <22 1>;
+ };
+
+ usb@d,1 {
+ compatible = "pci8086,2e70.2",
+ "pci8086,2e70",
+ "pciclass0c0320",
+ "pciclass0c03";
+
+ reg = <0x16900 0x0 0x0 0x0 0x0>;
+ interrupts = <22 1>;
+ };
+
+ sata@e,0 {
+ compatible = "pci8086,2e71.0",
+ "pci8086,2e71",
+ "pciclass010601",
+ "pciclass0106";
+
+ reg = <0x17000 0x0 0x0 0x0 0x0>;
+ interrupts = <23 1>;
+ };
+
+ flash@f,0 {
+ compatible = "pci8086,701.1",
+ "pci8086,701",
+ "pciclass050100",
+ "pciclass0501";
+
+ reg = <0x17800 0x0 0x0 0x0 0x0>;
+ interrupts = <13 1>;
+ };
+
+ entertainment-encryption@10,0 {
+ compatible = "pci8086,702.1",
+ "pci8086,702",
+ "pciclass101000",
+ "pciclass1010";
+
+ reg = <0x18000 0x0 0x0 0x0 0x0>;
+ };
+
+ co-processor@11,0 {
+ compatible = "pci8086,703.1",
+ "pci8086,703",
+ "pciclass0b4000",
+ "pciclass0b40";
+
+ reg = <0x18800 0x0 0x0 0x0 0x0>;
+ interrupts = <1 1>;
+ };
+
+ multimedia@12,0 {
+ compatible = "pci8086,704.0",
+ "pci8086,704",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x19000 0x0 0x0 0x0 0x0>;
+ };
+ };
+
+ isa@1f,0 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "isa";
+ reg = <0xf800 0x0 0x0 0x0 0x0>;
+ ranges = <1 0 0 0 0 0x100>;
+
+ rtc@70 {
+ compatible = "intel,ce4100-rtc", "motorola,mc146818";
+ interrupts = <8 3>;
+ interrupt-parent = <&ioapic1>;
+ ctrl-reg = <2>;
+ freq-reg = <0x26>;
+ reg = <1 0x70 2>;
+ };
+ };
+ };
+ };
+};
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
new file mode 100644
index 00000000000..73b8be0f367
--- /dev/null
+++ b/arch/x86/platform/efi/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
new file mode 100644
index 00000000000..7ed4e0f6f3c
--- /dev/null
+++ b/arch/x86/platform/efi/efi.c
@@ -0,0 +1,786 @@
+/*
+ * Common EFI (Extensible Firmware Interface) support functions
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2002 Hewlett-Packard Co.
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2005-2008 Intel Co.
+ * Fenghua Yu <fenghua.yu@intel.com>
+ * Bibo Mao <bibo.mao@intel.com>
+ * Chandramouli Narayanan <mouli@linux.intel.com>
+ * Huang Ying <ying.huang@intel.com>
+ *
+ * Copied from efi_32.c to eliminate the duplicated code between EFI
+ * 32/64 support code. --ying 2007-10-26
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version. --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls. --davidm
+ *
+ * Goutham Rao: <goutham.rao@intel.com>
+ * Skip non-WB memory and ignore empty memory ranges.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/efi.h>
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/reboot.h>
+#include <linux/bcd.h>
+
+#include <asm/setup.h>
+#include <asm/efi.h>
+#include <asm/time.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/x86_init.h>
+
+#define EFI_DEBUG 1
+#define PFX "EFI: "
+
+int efi_enabled;
+EXPORT_SYMBOL(efi_enabled);
+
+struct efi __read_mostly efi = {
+ .mps = EFI_INVALID_TABLE_ADDR,
+ .acpi = EFI_INVALID_TABLE_ADDR,
+ .acpi20 = EFI_INVALID_TABLE_ADDR,
+ .smbios = EFI_INVALID_TABLE_ADDR,
+ .sal_systab = EFI_INVALID_TABLE_ADDR,
+ .boot_info = EFI_INVALID_TABLE_ADDR,
+ .hcdp = EFI_INVALID_TABLE_ADDR,
+ .uga = EFI_INVALID_TABLE_ADDR,
+ .uv_systab = EFI_INVALID_TABLE_ADDR,
+};
+EXPORT_SYMBOL(efi);
+
+struct efi_memory_map memmap;
+
+static struct efi efi_phys __initdata;
+static efi_system_table_t efi_systab __initdata;
+
+static int __init setup_noefi(char *arg)
+{
+ efi_enabled = 0;
+ return 0;
+}
+early_param("noefi", setup_noefi);
+
+int add_efi_memmap;
+EXPORT_SYMBOL(add_efi_memmap);
+
+static int __init setup_add_efi_memmap(char *arg)
+{
+ add_efi_memmap = 1;
+ return 0;
+}
+early_param("add_efi_memmap", setup_add_efi_memmap);
+
+
+static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+{
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ status = efi_call_virt2(get_time, tm, tc);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return status;
+}
+
+static efi_status_t virt_efi_set_time(efi_time_t *tm)
+{
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ status = efi_call_virt1(set_time, tm);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return status;
+}
+
+static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
+ efi_bool_t *pending,
+ efi_time_t *tm)
+{
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ status = efi_call_virt3(get_wakeup_time,
+ enabled, pending, tm);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return status;
+}
+
+static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+{
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ status = efi_call_virt2(set_wakeup_time,
+ enabled, tm);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return status;
+}
+
+static efi_status_t virt_efi_get_variable(efi_char16_t *name,
+ efi_guid_t *vendor,
+ u32 *attr,
+ unsigned long *data_size,
+ void *data)
+{
+ return efi_call_virt5(get_variable,
+ name, vendor, attr,
+ data_size, data);
+}
+
+static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
+ efi_char16_t *name,
+ efi_guid_t *vendor)
+{
+ return efi_call_virt3(get_next_variable,
+ name_size, name, vendor);
+}
+
+static efi_status_t virt_efi_set_variable(efi_char16_t *name,
+ efi_guid_t *vendor,
+ u32 attr,
+ unsigned long data_size,
+ void *data)
+{
+ return efi_call_virt5(set_variable,
+ name, vendor, attr,
+ data_size, data);
+}
+
+static efi_status_t virt_efi_query_variable_info(u32 attr,
+ u64 *storage_space,
+ u64 *remaining_space,
+ u64 *max_variable_size)
+{
+ if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+ return EFI_UNSUPPORTED;
+
+ return efi_call_virt4(query_variable_info, attr, storage_space,
+ remaining_space, max_variable_size);
+}
+
+static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
+{
+ return efi_call_virt1(get_next_high_mono_count, count);
+}
+
+static void virt_efi_reset_system(int reset_type,
+ efi_status_t status,
+ unsigned long data_size,
+ efi_char16_t *data)
+{
+ efi_call_virt4(reset_system, reset_type, status,
+ data_size, data);
+}
+
+static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
+ unsigned long count,
+ unsigned long sg_list)
+{
+ if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+ return EFI_UNSUPPORTED;
+
+ return efi_call_virt3(update_capsule, capsules, count, sg_list);
+}
+
+static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
+ unsigned long count,
+ u64 *max_size,
+ int *reset_type)
+{
+ if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+ return EFI_UNSUPPORTED;
+
+ return efi_call_virt4(query_capsule_caps, capsules, count, max_size,
+ reset_type);
+}
+
+static efi_status_t __init phys_efi_set_virtual_address_map(
+ unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map)
+{
+ efi_status_t status;
+
+ efi_call_phys_prelog();
+ status = efi_call_phys4(efi_phys.set_virtual_address_map,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map);
+ efi_call_phys_epilog();
+ return status;
+}
+
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+ efi_time_cap_t *tc)
+{
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ efi_call_phys_prelog();
+ status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
+ virt_to_phys(tc));
+ efi_call_phys_epilog();
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return status;
+}
+
+int efi_set_rtc_mmss(unsigned long nowtime)
+{
+ int real_seconds, real_minutes;
+ efi_status_t status;
+ efi_time_t eft;
+ efi_time_cap_t cap;
+
+ status = efi.get_time(&eft, &cap);
+ if (status != EFI_SUCCESS) {
+ printk(KERN_ERR "Oops: efitime: can't read time!\n");
+ return -1;
+ }
+
+ real_seconds = nowtime % 60;
+ real_minutes = nowtime / 60;
+ if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
+ real_minutes += 30;
+ real_minutes %= 60;
+ eft.minute = real_minutes;
+ eft.second = real_seconds;
+
+ status = efi.set_time(&eft);
+ if (status != EFI_SUCCESS) {
+ printk(KERN_ERR "Oops: efitime: can't write time!\n");
+ return -1;
+ }
+ return 0;
+}
+
+unsigned long efi_get_time(void)
+{
+ efi_status_t status;
+ efi_time_t eft;
+ efi_time_cap_t cap;
+
+ status = efi.get_time(&eft, &cap);
+ if (status != EFI_SUCCESS)
+ printk(KERN_ERR "Oops: efitime: can't read time!\n");
+
+ return mktime(eft.year, eft.month, eft.day, eft.hour,
+ eft.minute, eft.second);
+}
+
+/*
+ * Tell the kernel about the EFI memory map. This might include
+ * more than the max 128 entries that can fit in the e820 legacy
+ * (zeropage) memory map.
+ */
+
+static void __init do_add_efi_memmap(void)
+{
+ void *p;
+
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ efi_memory_desc_t *md = p;
+ unsigned long long start = md->phys_addr;
+ unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+ int e820_type;
+
+ switch (md->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ if (md->attribute & EFI_MEMORY_WB)
+ e820_type = E820_RAM;
+ else
+ e820_type = E820_RESERVED;
+ break;
+ case EFI_ACPI_RECLAIM_MEMORY:
+ e820_type = E820_ACPI;
+ break;
+ case EFI_ACPI_MEMORY_NVS:
+ e820_type = E820_NVS;
+ break;
+ case EFI_UNUSABLE_MEMORY:
+ e820_type = E820_UNUSABLE;
+ break;
+ default:
+ /*
+ * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
+ * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
+ * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
+ */
+ e820_type = E820_RESERVED;
+ break;
+ }
+ e820_add_region(start, size, e820_type);
+ }
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
+void __init efi_memblock_x86_reserve_range(void)
+{
+ unsigned long pmap;
+
+#ifdef CONFIG_X86_32
+ pmap = boot_params.efi_info.efi_memmap;
+#else
+ pmap = (boot_params.efi_info.efi_memmap |
+ ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
+#endif
+ memmap.phys_map = (void *)pmap;
+ memmap.nr_map = boot_params.efi_info.efi_memmap_size /
+ boot_params.efi_info.efi_memdesc_size;
+ memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
+ memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
+ memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+}
+
+#if EFI_DEBUG
+static void __init print_efi_memmap(void)
+{
+ efi_memory_desc_t *md;
+ void *p;
+ int i;
+
+ for (p = memmap.map, i = 0;
+ p < memmap.map_end;
+ p += memmap.desc_size, i++) {
+ md = p;
+ printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, "
+ "range=[0x%016llx-0x%016llx) (%lluMB)\n",
+ i, md->type, md->attribute, md->phys_addr,
+ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+ (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
+ }
+}
+#endif /* EFI_DEBUG */
+
+void __init efi_reserve_boot_services(void)
+{
+ void *p;
+
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ efi_memory_desc_t *md = p;
+ u64 start = md->phys_addr;
+ u64 size = md->num_pages << EFI_PAGE_SHIFT;
+
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA)
+ continue;
+ /* Only reserve where possible:
+ * - Not within any already allocated areas
+ * - Not over any memory area (really needed, if above?)
+ * - Not within any part of the kernel
+ * - Not the bios reserved area
+ */
+ if ((start+size >= virt_to_phys(_text)
+ && start <= virt_to_phys(_end)) ||
+ !e820_all_mapped(start, start+size, E820_RAM) ||
+ memblock_is_region_reserved(start, size)) {
+ /* Could not reserve, skip it */
+ md->num_pages = 0;
+ memblock_dbg(PFX "Could not reserve boot range "
+ "[0x%010llx-0x%010llx]\n",
+ start, start+size-1);
+ } else
+ memblock_reserve(start, size);
+ }
+}
+
+static void __init efi_free_boot_services(void)
+{
+ void *p;
+
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ efi_memory_desc_t *md = p;
+ unsigned long long start = md->phys_addr;
+ unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA)
+ continue;
+
+ /* Could not reserve boot area */
+ if (!size)
+ continue;
+
+ free_bootmem_late(start, size);
+ }
+}
+
+void __init efi_init(void)
+{
+ efi_config_table_t *config_tables;
+ efi_runtime_services_t *runtime;
+ efi_char16_t *c16;
+ char vendor[100] = "unknown";
+ int i = 0;
+ void *tmp;
+
+#ifdef CONFIG_X86_32
+ efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
+#else
+ efi_phys.systab = (efi_system_table_t *)
+ (boot_params.efi_info.efi_systab |
+ ((__u64)boot_params.efi_info.efi_systab_hi<<32));
+#endif
+
+ efi.systab = early_ioremap((unsigned long)efi_phys.systab,
+ sizeof(efi_system_table_t));
+ if (efi.systab == NULL)
+ printk(KERN_ERR "Couldn't map the EFI system table!\n");
+ memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
+ early_iounmap(efi.systab, sizeof(efi_system_table_t));
+ efi.systab = &efi_systab;
+
+ /*
+ * Verify the EFI Table
+ */
+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ printk(KERN_ERR "EFI system table signature incorrect!\n");
+ if ((efi.systab->hdr.revision >> 16) == 0)
+ printk(KERN_ERR "Warning: EFI system table version "
+ "%d.%02d, expected 1.00 or greater!\n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff);
+
+ /*
+ * Show what we know for posterity
+ */
+ c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
+ if (c16) {
+ for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
+ vendor[i] = *c16++;
+ vendor[i] = '\0';
+ } else
+ printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
+ early_iounmap(tmp, 2);
+
+ printk(KERN_INFO "EFI v%u.%.02u by %s\n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff, vendor);
+
+ /*
+ * Let's see what config tables the firmware passed to us.
+ */
+ config_tables = early_ioremap(
+ efi.systab->tables,
+ efi.systab->nr_tables * sizeof(efi_config_table_t));
+ if (config_tables == NULL)
+ printk(KERN_ERR "Could not map EFI Configuration Table!\n");
+
+ printk(KERN_INFO);
+ for (i = 0; i < efi.systab->nr_tables; i++) {
+ if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) {
+ efi.mps = config_tables[i].table;
+ printk(" MPS=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ ACPI_20_TABLE_GUID)) {
+ efi.acpi20 = config_tables[i].table;
+ printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ ACPI_TABLE_GUID)) {
+ efi.acpi = config_tables[i].table;
+ printk(" ACPI=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ SMBIOS_TABLE_GUID)) {
+ efi.smbios = config_tables[i].table;
+ printk(" SMBIOS=0x%lx ", config_tables[i].table);
+#ifdef CONFIG_X86_UV
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ UV_SYSTEM_TABLE_GUID)) {
+ efi.uv_systab = config_tables[i].table;
+ printk(" UVsystab=0x%lx ", config_tables[i].table);
+#endif
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ HCDP_TABLE_GUID)) {
+ efi.hcdp = config_tables[i].table;
+ printk(" HCDP=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ UGA_IO_PROTOCOL_GUID)) {
+ efi.uga = config_tables[i].table;
+ printk(" UGA=0x%lx ", config_tables[i].table);
+ }
+ }
+ printk("\n");
+ early_iounmap(config_tables,
+ efi.systab->nr_tables * sizeof(efi_config_table_t));
+
+ /*
+ * Check out the runtime services table. We need to map
+ * the runtime services table so that we can grab the physical
+ * address of several of the EFI runtime functions, needed to
+ * set the firmware into virtual mode.
+ */
+ runtime = early_ioremap((unsigned long)efi.systab->runtime,
+ sizeof(efi_runtime_services_t));
+ if (runtime != NULL) {
+ /*
+ * We will only need *early* access to the following
+ * two EFI runtime services before set_virtual_address_map
+ * is invoked.
+ */
+ efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
+ efi_phys.set_virtual_address_map =
+ (efi_set_virtual_address_map_t *)
+ runtime->set_virtual_address_map;
+ /*
+ * Make efi_get_time can be called before entering
+ * virtual mode.
+ */
+ efi.get_time = phys_efi_get_time;
+ } else
+ printk(KERN_ERR "Could not map the EFI runtime service "
+ "table!\n");
+ early_iounmap(runtime, sizeof(efi_runtime_services_t));
+
+ /* Map the EFI memory map */
+ memmap.map = early_ioremap((unsigned long)memmap.phys_map,
+ memmap.nr_map * memmap.desc_size);
+ if (memmap.map == NULL)
+ printk(KERN_ERR "Could not map the EFI memory map!\n");
+ memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
+ if (memmap.desc_size != sizeof(efi_memory_desc_t))
+ printk(KERN_WARNING
+ "Kernel-defined memdesc doesn't match the one from EFI!\n");
+
+ if (add_efi_memmap)
+ do_add_efi_memmap();
+
+#ifdef CONFIG_X86_32
+ x86_platform.get_wallclock = efi_get_time;
+ x86_platform.set_wallclock = efi_set_rtc_mmss;
+#endif
+
+#if EFI_DEBUG
+ print_efi_memmap();
+#endif
+}
+
+void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
+{
+ u64 addr, npages;
+
+ addr = md->virt_addr;
+ npages = md->num_pages;
+
+ memrange_efi_to_native(&addr, &npages);
+
+ if (executable)
+ set_memory_x(addr, npages);
+ else
+ set_memory_nx(addr, npages);
+}
+
+static void __init runtime_code_page_mkexec(void)
+{
+ efi_memory_desc_t *md;
+ void *p;
+
+ /* Make EFI runtime service code area executable */
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+
+ if (md->type != EFI_RUNTIME_SERVICES_CODE)
+ continue;
+
+ efi_set_executable(md, true);
+ }
+}
+
+/*
+ * This function will switch the EFI runtime services to virtual mode.
+ * Essentially, look through the EFI memmap and map every region that
+ * has the runtime attribute bit set in its memory descriptor and update
+ * that memory descriptor with the virtual address obtained from ioremap().
+ * This enables the runtime services to be called without having to
+ * thunk back into physical mode for every invocation.
+ */
+void __init efi_enter_virtual_mode(void)
+{
+ efi_memory_desc_t *md, *prev_md = NULL;
+ efi_status_t status;
+ unsigned long size;
+ u64 end, systab, addr, npages, end_pfn;
+ void *p, *va, *new_memmap = NULL;
+ int count = 0;
+
+ efi.systab = NULL;
+
+ /* Merge contiguous regions of the same type and attribute */
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ u64 prev_size;
+ md = p;
+
+ if (!prev_md) {
+ prev_md = md;
+ continue;
+ }
+
+ if (prev_md->type != md->type ||
+ prev_md->attribute != md->attribute) {
+ prev_md = md;
+ continue;
+ }
+
+ prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;
+
+ if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
+ prev_md->num_pages += md->num_pages;
+ md->type = EFI_RESERVED_TYPE;
+ md->attribute = 0;
+ continue;
+ }
+ prev_md = md;
+ }
+
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
+#ifdef CONFIG_X86_64
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA)
+#endif
+ continue;
+ }
+
+ size = md->num_pages << EFI_PAGE_SHIFT;
+ end = md->phys_addr + size;
+
+ end_pfn = PFN_UP(end);
+ if (end_pfn <= max_low_pfn_mapped
+ || (end_pfn > (1UL << (32 - PAGE_SHIFT))
+ && end_pfn <= max_pfn_mapped))
+ va = __va(md->phys_addr);
+ else
+ va = efi_ioremap(md->phys_addr, size, md->type);
+
+ md->virt_addr = (u64) (unsigned long) va;
+
+ if (!va) {
+ printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
+ (unsigned long long)md->phys_addr);
+ continue;
+ }
+
+ if (!(md->attribute & EFI_MEMORY_WB)) {
+ addr = md->virt_addr;
+ npages = md->num_pages;
+ memrange_efi_to_native(&addr, &npages);
+ set_memory_uc(addr, npages);
+ }
+
+ systab = (u64) (unsigned long) efi_phys.systab;
+ if (md->phys_addr <= systab && systab < end) {
+ systab += md->virt_addr - md->phys_addr;
+ efi.systab = (efi_system_table_t *) (unsigned long) systab;
+ }
+ new_memmap = krealloc(new_memmap,
+ (count + 1) * memmap.desc_size,
+ GFP_KERNEL);
+ memcpy(new_memmap + (count * memmap.desc_size), md,
+ memmap.desc_size);
+ count++;
+ }
+
+ BUG_ON(!efi.systab);
+
+ status = phys_efi_set_virtual_address_map(
+ memmap.desc_size * count,
+ memmap.desc_size,
+ memmap.desc_version,
+ (efi_memory_desc_t *)__pa(new_memmap));
+
+ if (status != EFI_SUCCESS) {
+ printk(KERN_ALERT "Unable to switch EFI into virtual mode "
+ "(status=%lx)!\n", status);
+ panic("EFI call to SetVirtualAddressMap() failed!");
+ }
+
+ /*
+ * Thankfully, it does seem that no runtime services other than
+ * SetVirtualAddressMap() will touch boot services code, so we can
+ * get rid of it all at this point
+ */
+ efi_free_boot_services();
+
+ /*
+ * Now that EFI is in virtual mode, update the function
+ * pointers in the runtime service table to the new virtual addresses.
+ *
+ * Call EFI services through wrapper functions.
+ */
+ efi.get_time = virt_efi_get_time;
+ efi.set_time = virt_efi_set_time;
+ efi.get_wakeup_time = virt_efi_get_wakeup_time;
+ efi.set_wakeup_time = virt_efi_set_wakeup_time;
+ efi.get_variable = virt_efi_get_variable;
+ efi.get_next_variable = virt_efi_get_next_variable;
+ efi.set_variable = virt_efi_set_variable;
+ efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
+ efi.reset_system = virt_efi_reset_system;
+ efi.set_virtual_address_map = NULL;
+ efi.query_variable_info = virt_efi_query_variable_info;
+ efi.update_capsule = virt_efi_update_capsule;
+ efi.query_capsule_caps = virt_efi_query_capsule_caps;
+ if (__supported_pte_mask & _PAGE_NX)
+ runtime_code_page_mkexec();
+ early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
+ memmap.map = NULL;
+ kfree(new_memmap);
+}
+
+/*
+ * Convenience functions to obtain memory types and attributes
+ */
+u32 efi_mem_type(unsigned long phys_addr)
+{
+ efi_memory_desc_t *md;
+ void *p;
+
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ if ((md->phys_addr <= phys_addr) &&
+ (phys_addr < (md->phys_addr +
+ (md->num_pages << EFI_PAGE_SHIFT))))
+ return md->type;
+ }
+ return 0;
+}
+
+u64 efi_mem_attributes(unsigned long phys_addr)
+{
+ efi_memory_desc_t *md;
+ void *p;
+
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ if ((md->phys_addr <= phys_addr) &&
+ (phys_addr < (md->phys_addr +
+ (md->num_pages << EFI_PAGE_SHIFT))))
+ return md->attribute;
+ }
+ return 0;
+}
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
new file mode 100644
index 00000000000..40e446941dd
--- /dev/null
+++ b/arch/x86/platform/efi/efi_32.c
@@ -0,0 +1,69 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2002 Hewlett-Packard Co.
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version. --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls. --davidm
+ *
+ * Goutham Rao: <goutham.rao@intel.com>
+ * Skip non-WB memory and ignore empty memory ranges.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/efi.h>
+
+#include <asm/io.h>
+#include <asm/desc.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/efi.h>
+
+/*
+ * To make EFI call EFI runtime service in physical addressing mode we need
+ * prelog/epilog before/after the invocation to disable interrupt, to
+ * claim EFI runtime service handler exclusively and to duplicate a memory in
+ * low memory space say 0 - 3G.
+ */
+
+static unsigned long efi_rt_eflags;
+
+void efi_call_phys_prelog(void)
+{
+ struct desc_ptr gdt_descr;
+
+ local_irq_save(efi_rt_eflags);
+
+ load_cr3(initial_page_table);
+ __flush_tlb_all();
+
+ gdt_descr.address = __pa(get_cpu_gdt_table(0));
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
+}
+
+void efi_call_phys_epilog(void)
+{
+ struct desc_ptr gdt_descr;
+
+ gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
+
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+
+ local_irq_restore(efi_rt_eflags);
+}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
new file mode 100644
index 00000000000..ac3aa54e265
--- /dev/null
+++ b/arch/x86/platform/efi/efi_64.c
@@ -0,0 +1,99 @@
+/*
+ * x86_64 specific EFI support functions
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 2005-2008 Intel Co.
+ * Fenghua Yu <fenghua.yu@intel.com>
+ * Bibo Mao <bibo.mao@intel.com>
+ * Chandramouli Narayanan <mouli@linux.intel.com>
+ * Huang Ying <ying.huang@intel.com>
+ *
+ * Code to convert EFI to E820 map has been implemented in elilo bootloader
+ * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table
+ * is setup appropriately for EFI runtime code.
+ * - mouli 06/14/2007.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/reboot.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/e820.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/proto.h>
+#include <asm/efi.h>
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+
+static pgd_t save_pgd __initdata;
+static unsigned long efi_flags __initdata;
+
+static void __init early_code_mapping_set_exec(int executable)
+{
+ efi_memory_desc_t *md;
+ void *p;
+
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return;
+
+ /* Make EFI service code area executable */
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ if (md->type == EFI_RUNTIME_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_CODE)
+ efi_set_executable(md, executable);
+ }
+}
+
+void __init efi_call_phys_prelog(void)
+{
+ unsigned long vaddress;
+
+ early_code_mapping_set_exec(1);
+ local_irq_save(efi_flags);
+ vaddress = (unsigned long)__va(0x0UL);
+ save_pgd = *pgd_offset_k(0x0UL);
+ set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
+ __flush_tlb_all();
+}
+
+void __init efi_call_phys_epilog(void)
+{
+ /*
+ * After the lock is released, the original page table is restored.
+ */
+ set_pgd(pgd_offset_k(0x0UL), save_pgd);
+ __flush_tlb_all();
+ local_irq_restore(efi_flags);
+ early_code_mapping_set_exec(0);
+}
+
+void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
+ u32 type)
+{
+ unsigned long last_map_pfn;
+
+ if (type == EFI_MEMORY_MAPPED_IO)
+ return ioremap(phys_addr, size);
+
+ last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+ if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
+ unsigned long top = last_map_pfn << PAGE_SHIFT;
+ efi_ioremap(top, size - (top - phys_addr), type);
+ }
+
+ return (void __iomem *)__va(phys_addr);
+}
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
new file mode 100644
index 00000000000..fbe66e626c0
--- /dev/null
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -0,0 +1,123 @@
+/*
+ * EFI call stub for IA32.
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off.
+ */
+
+#include <linux/linkage.h>
+#include <asm/page_types.h>
+
+/*
+ * efi_call_phys(void *, ...) is a function with variable parameters.
+ * All the callers of this function assure that all the parameters are 4-bytes.
+ */
+
+/*
+ * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
+ * So we'd better save all of them at the beginning of this function and restore
+ * at the end no matter how many we use, because we can not assure EFI runtime
+ * service functions will comply with gcc calling convention, too.
+ */
+
+.text
+ENTRY(efi_call_phys)
+ /*
+ * 0. The function can only be called in Linux kernel. So CS has been
+ * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
+ * the values of these registers are the same. And, the corresponding
+ * GDT entries are identical. So I will do nothing about segment reg
+ * and GDT, but change GDT base register in prelog and epilog.
+ */
+
+ /*
+ * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
+ * But to make it smoothly switch from virtual mode to flat mode.
+ * The mapping of lower virtual memory has been created in prelog and
+ * epilog.
+ */
+ movl $1f, %edx
+ subl $__PAGE_OFFSET, %edx
+ jmp *%edx
+1:
+
+ /*
+ * 2. Now on the top of stack is the return
+ * address in the caller of efi_call_phys(), then parameter 1,
+ * parameter 2, ..., param n. To make things easy, we save the return
+ * address of efi_call_phys in a global variable.
+ */
+ popl %edx
+ movl %edx, saved_return_addr
+ /* get the function pointer into ECX*/
+ popl %ecx
+ movl %ecx, efi_rt_function_ptr
+ movl $2f, %edx
+ subl $__PAGE_OFFSET, %edx
+ pushl %edx
+
+ /*
+ * 3. Clear PG bit in %CR0.
+ */
+ movl %cr0, %edx
+ andl $0x7fffffff, %edx
+ movl %edx, %cr0
+ jmp 1f
+1:
+
+ /*
+ * 4. Adjust stack pointer.
+ */
+ subl $__PAGE_OFFSET, %esp
+
+ /*
+ * 5. Call the physical function.
+ */
+ jmp *%ecx
+
+2:
+ /*
+ * 6. After EFI runtime service returns, control will return to
+ * following instruction. We'd better readjust stack pointer first.
+ */
+ addl $__PAGE_OFFSET, %esp
+
+ /*
+ * 7. Restore PG bit
+ */
+ movl %cr0, %edx
+ orl $0x80000000, %edx
+ movl %edx, %cr0
+ jmp 1f
+1:
+ /*
+ * 8. Now restore the virtual mode from flat mode by
+ * adding EIP with PAGE_OFFSET.
+ */
+ movl $1f, %edx
+ jmp *%edx
+1:
+
+ /*
+ * 9. Balance the stack. And because EAX contain the return value,
+ * we'd better not clobber it.
+ */
+ leal efi_rt_function_ptr, %edx
+ movl (%edx), %ecx
+ pushl %ecx
+
+ /*
+ * 10. Push the saved return address onto the stack and return.
+ */
+ leal saved_return_addr, %edx
+ movl (%edx), %ecx
+ pushl %ecx
+ ret
+ENDPROC(efi_call_phys)
+.previous
+
+.data
+saved_return_addr:
+ .long 0
+efi_rt_function_ptr:
+ .long 0
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
new file mode 100644
index 00000000000..4c07ccab814
--- /dev/null
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -0,0 +1,116 @@
+/*
+ * Function calling ABI conversion from Linux to EFI for x86_64
+ *
+ * Copyright (C) 2007 Intel Corp
+ * Bibo Mao <bibo.mao@intel.com>
+ * Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/linkage.h>
+
+#define SAVE_XMM \
+ mov %rsp, %rax; \
+ subq $0x70, %rsp; \
+ and $~0xf, %rsp; \
+ mov %rax, (%rsp); \
+ mov %cr0, %rax; \
+ clts; \
+ mov %rax, 0x8(%rsp); \
+ movaps %xmm0, 0x60(%rsp); \
+ movaps %xmm1, 0x50(%rsp); \
+ movaps %xmm2, 0x40(%rsp); \
+ movaps %xmm3, 0x30(%rsp); \
+ movaps %xmm4, 0x20(%rsp); \
+ movaps %xmm5, 0x10(%rsp)
+
+#define RESTORE_XMM \
+ movaps 0x60(%rsp), %xmm0; \
+ movaps 0x50(%rsp), %xmm1; \
+ movaps 0x40(%rsp), %xmm2; \
+ movaps 0x30(%rsp), %xmm3; \
+ movaps 0x20(%rsp), %xmm4; \
+ movaps 0x10(%rsp), %xmm5; \
+ mov 0x8(%rsp), %rsi; \
+ mov %rsi, %cr0; \
+ mov (%rsp), %rsp
+
+ENTRY(efi_call0)
+ SAVE_XMM
+ subq $32, %rsp
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+ENDPROC(efi_call0)
+
+ENTRY(efi_call1)
+ SAVE_XMM
+ subq $32, %rsp
+ mov %rsi, %rcx
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+ENDPROC(efi_call1)
+
+ENTRY(efi_call2)
+ SAVE_XMM
+ subq $32, %rsp
+ mov %rsi, %rcx
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+ENDPROC(efi_call2)
+
+ENTRY(efi_call3)
+ SAVE_XMM
+ subq $32, %rsp
+ mov %rcx, %r8
+ mov %rsi, %rcx
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+ENDPROC(efi_call3)
+
+ENTRY(efi_call4)
+ SAVE_XMM
+ subq $32, %rsp
+ mov %r8, %r9
+ mov %rcx, %r8
+ mov %rsi, %rcx
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+ENDPROC(efi_call4)
+
+ENTRY(efi_call5)
+ SAVE_XMM
+ subq $48, %rsp
+ mov %r9, 32(%rsp)
+ mov %r8, %r9
+ mov %rcx, %r8
+ mov %rsi, %rcx
+ call *%rdi
+ addq $48, %rsp
+ RESTORE_XMM
+ ret
+ENDPROC(efi_call5)
+
+ENTRY(efi_call6)
+ SAVE_XMM
+ mov (%rsp), %rax
+ mov 8(%rax), %rax
+ subq $48, %rsp
+ mov %r9, 32(%rsp)
+ mov %rax, 40(%rsp)
+ mov %r8, %r9
+ mov %rcx, %r8
+ mov %rsi, %rcx
+ call *%rdi
+ addq $48, %rsp
+ RESTORE_XMM
+ ret
+ENDPROC(efi_call6)
diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile
new file mode 100644
index 00000000000..07c9cd05021
--- /dev/null
+++ b/arch/x86/platform/geode/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ALIX) += alix.o
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
new file mode 100644
index 00000000000..dc5f1d32ace
--- /dev/null
+++ b/arch/x86/platform/geode/alix.c
@@ -0,0 +1,142 @@
+/*
+ * System Specific setup for PCEngines ALIX.
+ * At the moment this means setup of GPIO control of LEDs
+ * on Alix.2/3/6 boards.
+ *
+ *
+ * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
+ * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
+ *
+ * TODO: There are large similarities with leds-net5501.c
+ * by Alessandro Zummo <a.zummo@towertech.it>
+ * In the future leds-net5501.c should be migrated over to platform
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/leds.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+
+#include <asm/geode.h>
+
+static bool force = 0;
+module_param(force, bool, 0444);
+/* FIXME: Award bios is not automatically detected as Alix platform */
+MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform");
+
+static struct gpio_led alix_leds[] = {
+ {
+ .name = "alix:1",
+ .gpio = 6,
+ .default_trigger = "default-on",
+ .active_low = 1,
+ },
+ {
+ .name = "alix:2",
+ .gpio = 25,
+ .default_trigger = "default-off",
+ .active_low = 1,
+ },
+ {
+ .name = "alix:3",
+ .gpio = 27,
+ .default_trigger = "default-off",
+ .active_low = 1,
+ },
+};
+
+static struct gpio_led_platform_data alix_leds_data = {
+ .num_leds = ARRAY_SIZE(alix_leds),
+ .leds = alix_leds,
+};
+
+static struct platform_device alix_leds_dev = {
+ .name = "leds-gpio",
+ .id = -1,
+ .dev.platform_data = &alix_leds_data,
+};
+
+static void __init register_alix(void)
+{
+ /* Setup LED control through leds-gpio driver */
+ platform_device_register(&alix_leds_dev);
+}
+
+static int __init alix_present(unsigned long bios_phys,
+ const char *alix_sig,
+ size_t alix_sig_len)
+{
+ const size_t bios_len = 0x00010000;
+ const char *bios_virt;
+ const char *scan_end;
+ const char *p;
+ char name[64];
+
+ if (force) {
+ printk(KERN_NOTICE "%s: forced to skip BIOS test, "
+ "assume system is ALIX.2/ALIX.3\n",
+ KBUILD_MODNAME);
+ return 1;
+ }
+
+ bios_virt = phys_to_virt(bios_phys);
+ scan_end = bios_virt + bios_len - (alix_sig_len + 2);
+ for (p = bios_virt; p < scan_end; p++) {
+ const char *tail;
+ char *a;
+
+ if (memcmp(p, alix_sig, alix_sig_len) != 0)
+ continue;
+
+ memcpy(name, p, sizeof(name));
+
+ /* remove the first \0 character from string */
+ a = strchr(name, '\0');
+ if (a)
+ *a = ' ';
+
+ /* cut the string at a newline */
+ a = strchr(name, '\r');
+ if (a)
+ *a = '\0';
+
+ tail = p + alix_sig_len;
+ if ((tail[0] == '2' || tail[0] == '3')) {
+ printk(KERN_INFO
+ "%s: system is recognized as \"%s\"\n",
+ KBUILD_MODNAME, name);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int __init alix_init(void)
+{
+ const char tinybios_sig[] = "PC Engines ALIX.";
+ const char coreboot_sig[] = "PC Engines\0ALIX.";
+
+ if (!is_geode())
+ return 0;
+
+ if (alix_present(0xf0000, tinybios_sig, sizeof(tinybios_sig) - 1) ||
+ alix_present(0x500, coreboot_sig, sizeof(coreboot_sig) - 1))
+ register_alix();
+
+ return 0;
+}
+
+module_init(alix_init);
+
+MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>");
+MODULE_DESCRIPTION("PCEngines ALIX System Setup");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile
new file mode 100644
index 00000000000..db921983a10
--- /dev/null
+++ b/arch/x86/platform/iris/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_32_IRIS) += iris.o
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
new file mode 100644
index 00000000000..5917eb56b31
--- /dev/null
+++ b/arch/x86/platform/iris/iris.c
@@ -0,0 +1,91 @@
+/*
+ * Eurobraille/Iris power off support.
+ *
+ * Eurobraille's Iris machine is a PC with no APM or ACPI support.
+ * It is shutdown by a special I/O sequence which this module provides.
+ *
+ * Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/io.h>
+
+#define IRIS_GIO_BASE 0x340
+#define IRIS_GIO_INPUT IRIS_GIO_BASE
+#define IRIS_GIO_OUTPUT (IRIS_GIO_BASE + 1)
+#define IRIS_GIO_PULSE 0x80 /* First byte to send */
+#define IRIS_GIO_REST 0x00 /* Second byte to send */
+#define IRIS_GIO_NODEV 0xff /* Likely not an Iris */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
+MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
+MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
+
+static bool force;
+
+module_param(force, bool, 0);
+MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation.");
+
+static void (*old_pm_power_off)(void);
+
+static void iris_power_off(void)
+{
+ outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT);
+ msleep(850);
+ outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT);
+}
+
+/*
+ * Before installing the power_off handler, try to make sure the OS is
+ * running on an Iris. Since Iris does not support DMI, this is done
+ * by reading its input port and seeing whether the read value is
+ * meaningful.
+ */
+static int iris_init(void)
+{
+ unsigned char status;
+ if (force != 1) {
+ printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
+ return -ENODEV;
+ }
+ status = inb(IRIS_GIO_INPUT);
+ if (status == IRIS_GIO_NODEV) {
+ printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
+ return -ENODEV;
+ }
+ old_pm_power_off = pm_power_off;
+ pm_power_off = &iris_power_off;
+ printk(KERN_INFO "Iris power_off handler installed.\n");
+
+ return 0;
+}
+
+static void iris_exit(void)
+{
+ pm_power_off = old_pm_power_off;
+ printk(KERN_INFO "Iris power_off handler uninstalled.\n");
+}
+
+module_init(iris_init);
+module_exit(iris_exit);
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
new file mode 100644
index 00000000000..7baed5135e0
--- /dev/null
+++ b/arch/x86/platform/mrst/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_X86_INTEL_MID) += mrst.o
+obj-$(CONFIG_X86_INTEL_MID) += vrtc.o
+obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o
+obj-$(CONFIG_X86_MRST) += pmu.o
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c
new file mode 100644
index 00000000000..3c6e328483c
--- /dev/null
+++ b/arch/x86/platform/mrst/early_printk_mrst.c
@@ -0,0 +1,327 @@
+/*
+ * early_printk_mrst.c - early consoles for Intel MID platforms
+ *
+ * Copyright (c) 2008-2010, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/*
+ * This file implements two early consoles named mrst and hsu.
+ * mrst is based on Maxim3110 spi-uart device, it exists in both
+ * Moorestown and Medfield platforms, while hsu is based on a High
+ * Speed UART device which only exists in the Medfield platform
+ */
+
+#include <linux/serial_reg.h>
+#include <linux/serial_mfd.h>
+#include <linux/kmsg_dump.h>
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/mrst.h>
+
+#define MRST_SPI_TIMEOUT 0x200000
+#define MRST_REGBASE_SPI0 0xff128000
+#define MRST_REGBASE_SPI1 0xff128400
+#define MRST_CLK_SPI0_REG 0xff11d86c
+
+/* Bit fields in CTRLR0 */
+#define SPI_DFS_OFFSET 0
+
+#define SPI_FRF_OFFSET 4
+#define SPI_FRF_SPI 0x0
+#define SPI_FRF_SSP 0x1
+#define SPI_FRF_MICROWIRE 0x2
+#define SPI_FRF_RESV 0x3
+
+#define SPI_MODE_OFFSET 6
+#define SPI_SCPH_OFFSET 6
+#define SPI_SCOL_OFFSET 7
+#define SPI_TMOD_OFFSET 8
+#define SPI_TMOD_TR 0x0 /* xmit & recv */
+#define SPI_TMOD_TO 0x1 /* xmit only */
+#define SPI_TMOD_RO 0x2 /* recv only */
+#define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */
+
+#define SPI_SLVOE_OFFSET 10
+#define SPI_SRL_OFFSET 11
+#define SPI_CFS_OFFSET 12
+
+/* Bit fields in SR, 7 bits */
+#define SR_MASK 0x7f /* cover 7 bits */
+#define SR_BUSY (1 << 0)
+#define SR_TF_NOT_FULL (1 << 1)
+#define SR_TF_EMPT (1 << 2)
+#define SR_RF_NOT_EMPT (1 << 3)
+#define SR_RF_FULL (1 << 4)
+#define SR_TX_ERR (1 << 5)
+#define SR_DCOL (1 << 6)
+
+struct dw_spi_reg {
+ u32 ctrl0;
+ u32 ctrl1;
+ u32 ssienr;
+ u32 mwcr;
+ u32 ser;
+ u32 baudr;
+ u32 txfltr;
+ u32 rxfltr;
+ u32 txflr;
+ u32 rxflr;
+ u32 sr;
+ u32 imr;
+ u32 isr;
+ u32 risr;
+ u32 txoicr;
+ u32 rxoicr;
+ u32 rxuicr;
+ u32 msticr;
+ u32 icr;
+ u32 dmacr;
+ u32 dmatdlr;
+ u32 dmardlr;
+ u32 idr;
+ u32 version;
+
+ /* Currently operates as 32 bits, though only the low 16 bits matter */
+ u32 dr;
+} __packed;
+
+#define dw_readl(dw, name) __raw_readl(&(dw)->name)
+#define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name)
+
+/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */
+static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0;
+
+static u32 *pclk_spi0;
+/* Always contains an accessible address, start with 0 */
+static struct dw_spi_reg *pspi;
+
+static struct kmsg_dumper dw_dumper;
+static int dumper_registered;
+
+static void dw_kmsg_dump(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason,
+ const char *s1, unsigned long l1,
+ const char *s2, unsigned long l2)
+{
+ int i;
+
+ /* When run to this, we'd better re-init the HW */
+ mrst_early_console_init();
+
+ for (i = 0; i < l1; i++)
+ early_mrst_console.write(&early_mrst_console, s1 + i, 1);
+ for (i = 0; i < l2; i++)
+ early_mrst_console.write(&early_mrst_console, s2 + i, 1);
+}
+
+/* Set the ratio rate to 115200, 8n1, IRQ disabled */
+static void max3110_write_config(void)
+{
+ u16 config;
+
+ config = 0xc001;
+ dw_writel(pspi, dr, config);
+}
+
+/* Translate char to a eligible word and send to max3110 */
+static void max3110_write_data(char c)
+{
+ u16 data;
+
+ data = 0x8000 | c;
+ dw_writel(pspi, dr, data);
+}
+
+void mrst_early_console_init(void)
+{
+ u32 ctrlr0 = 0;
+ u32 spi0_cdiv;
+ u32 freq; /* Freqency info only need be searched once */
+
+ /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */
+ pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
+ MRST_CLK_SPI0_REG);
+ spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9;
+ freq = 100000000 / (spi0_cdiv + 1);
+
+ if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL)
+ mrst_spi_paddr = MRST_REGBASE_SPI1;
+
+ pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
+ mrst_spi_paddr);
+
+ /* Disable SPI controller */
+ dw_writel(pspi, ssienr, 0);
+
+ /* Set control param, 8 bits, transmit only mode */
+ ctrlr0 = dw_readl(pspi, ctrl0);
+
+ ctrlr0 &= 0xfcc0;
+ ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET)
+ | (SPI_TMOD_TO << SPI_TMOD_OFFSET);
+ dw_writel(pspi, ctrl0, ctrlr0);
+
+ /*
+ * Change the spi0 clk to comply with 115200 bps, use 100000 to
+ * calculate the clk dividor to make the clock a little slower
+ * than real baud rate.
+ */
+ dw_writel(pspi, baudr, freq/100000);
+
+ /* Disable all INT for early phase */
+ dw_writel(pspi, imr, 0x0);
+
+ /* Set the cs to spi-uart */
+ dw_writel(pspi, ser, 0x2);
+
+ /* Enable the HW, the last step for HW init */
+ dw_writel(pspi, ssienr, 0x1);
+
+ /* Set the default configuration */
+ max3110_write_config();
+
+ /* Register the kmsg dumper */
+ if (!dumper_registered) {
+ dw_dumper.dump = dw_kmsg_dump;
+ kmsg_dump_register(&dw_dumper);
+ dumper_registered = 1;
+ }
+}
+
+/* Slave select should be called in the read/write function */
+static void early_mrst_spi_putc(char c)
+{
+ unsigned int timeout;
+ u32 sr;
+
+ timeout = MRST_SPI_TIMEOUT;
+ /* Early putc needs to make sure the TX FIFO is not full */
+ while (--timeout) {
+ sr = dw_readl(pspi, sr);
+ if (!(sr & SR_TF_NOT_FULL))
+ cpu_relax();
+ else
+ break;
+ }
+
+ if (!timeout)
+ pr_warning("MRST earlycon: timed out\n");
+ else
+ max3110_write_data(c);
+}
+
+/* Early SPI only uses polling mode */
+static void early_mrst_spi_write(struct console *con, const char *str, unsigned n)
+{
+ int i;
+
+ for (i = 0; i < n && *str; i++) {
+ if (*str == '\n')
+ early_mrst_spi_putc('\r');
+ early_mrst_spi_putc(*str);
+ str++;
+ }
+}
+
+struct console early_mrst_console = {
+ .name = "earlymrst",
+ .write = early_mrst_spi_write,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
+
+/*
+ * Following is the early console based on Medfield HSU (High
+ * Speed UART) device.
+ */
+#define HSU_PORT_BASE 0xffa28080
+
+static void __iomem *phsu;
+
+void hsu_early_console_init(const char *s)
+{
+ unsigned long paddr, port = 0;
+ u8 lcr;
+
+ /*
+ * Select the early HSU console port if specified by user in the
+ * kernel command line.
+ */
+ if (*s && !kstrtoul(s, 10, &port))
+ port = clamp_val(port, 0, 2);
+
+ paddr = HSU_PORT_BASE + port * 0x80;
+ phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr);
+
+ /* Disable FIFO */
+ writeb(0x0, phsu + UART_FCR);
+
+ /* Set to default 115200 bps, 8n1 */
+ lcr = readb(phsu + UART_LCR);
+ writeb((0x80 | lcr), phsu + UART_LCR);
+ writeb(0x18, phsu + UART_DLL);
+ writeb(lcr, phsu + UART_LCR);
+ writel(0x3600, phsu + UART_MUL*4);
+
+ writeb(0x8, phsu + UART_MCR);
+ writeb(0x7, phsu + UART_FCR);
+ writeb(0x3, phsu + UART_LCR);
+
+ /* Clear IRQ status */
+ readb(phsu + UART_LSR);
+ readb(phsu + UART_RX);
+ readb(phsu + UART_IIR);
+ readb(phsu + UART_MSR);
+
+ /* Enable FIFO */
+ writeb(0x7, phsu + UART_FCR);
+}
+
+#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
+
+static void early_hsu_putc(char ch)
+{
+ unsigned int timeout = 10000; /* 10ms */
+ u8 status;
+
+ while (--timeout) {
+ status = readb(phsu + UART_LSR);
+ if (status & BOTH_EMPTY)
+ break;
+ udelay(1);
+ }
+
+ /* Only write the char when there was no timeout */
+ if (timeout)
+ writeb(ch, phsu + UART_TX);
+}
+
+static void early_hsu_write(struct console *con, const char *str, unsigned n)
+{
+ int i;
+
+ for (i = 0; i < n && *str; i++) {
+ if (*str == '\n')
+ early_hsu_putc('\r');
+ early_hsu_putc(*str);
+ str++;
+ }
+}
+
+struct console early_hsu_console = {
+ .name = "earlyhsu",
+ .write = early_hsu_write,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
new file mode 100644
index 00000000000..475e2cd0f3c
--- /dev/null
+++ b/arch/x86/platform/mrst/mrst.c
@@ -0,0 +1,1047 @@
+/*
+ * mrst.c: Intel Moorestown platform specific setup code
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Jacob Pan (jacob.jun.pan@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#define pr_fmt(fmt) "mrst: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/sfi.h>
+#include <linux/intel_pmic_gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pca953x.h>
+#include <linux/gpio_keys.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/mfd/intel_msic.h>
+
+#include <asm/setup.h>
+#include <asm/mpspec_def.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
+#include <asm/io.h>
+#include <asm/i8259.h>
+#include <asm/intel_scu_ipc.h>
+#include <asm/apb_timer.h>
+#include <asm/reboot.h>
+
+/*
+ * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
+ * cmdline option x86_mrst_timer can be used to override the configuration
+ * to prefer one or the other.
+ * at runtime, there are basically three timer configurations:
+ * 1. per cpu apbt clock only
+ * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
+ * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
+ *
+ * by default (without cmdline option), platform code first detects cpu type
+ * to see if we are on lincroft or penwell, then set up both lapic or apbt
+ * clocks accordingly.
+ * i.e. by default, medfield uses configuration #2, moorestown uses #1.
+ * config #3 is supported but not recommended on medfield.
+ *
+ * rating and feature summary:
+ * lapic (with C3STOP) --------- 100
+ * apbt (always-on) ------------ 110
+ * lapic (always-on,ARAT) ------ 150
+ */
+
+__cpuinitdata enum mrst_timer_options mrst_timer_options;
+
+static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
+static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
+enum mrst_cpu_type __mrst_cpu_chip;
+EXPORT_SYMBOL_GPL(__mrst_cpu_chip);
+
+int sfi_mtimer_num;
+
+struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
+EXPORT_SYMBOL_GPL(sfi_mrtc_array);
+int sfi_mrtc_num;
+
+static void mrst_power_off(void)
+{
+ if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
+ intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 1);
+}
+
+static void mrst_reboot(void)
+{
+ if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
+ intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
+ else
+ intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+}
+
+/* parse all the mtimer info to a static mtimer array */
+static int __init sfi_parse_mtmr(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_timer_table_entry *pentry;
+ struct mpc_intsrc mp_irq;
+ int totallen;
+
+ sb = (struct sfi_table_simple *)table;
+ if (!sfi_mtimer_num) {
+ sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
+ struct sfi_timer_table_entry);
+ pentry = (struct sfi_timer_table_entry *) sb->pentry;
+ totallen = sfi_mtimer_num * sizeof(*pentry);
+ memcpy(sfi_mtimer_array, pentry, totallen);
+ }
+
+ pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
+ pentry = sfi_mtimer_array;
+ for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
+ pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
+ " irq = %d\n", totallen, (u32)pentry->phys_addr,
+ pentry->freq_hz, pentry->irq);
+ if (!pentry->irq)
+ continue;
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
+ mp_irq.irqflag = 5;
+ mp_irq.srcbus = MP_BUS_ISA;
+ mp_irq.srcbusirq = pentry->irq; /* IRQ */
+ mp_irq.dstapic = MP_APIC_ALL;
+ mp_irq.dstirq = pentry->irq;
+ mp_save_irq(&mp_irq);
+ }
+
+ return 0;
+}
+
+struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
+{
+ int i;
+ if (hint < sfi_mtimer_num) {
+ if (!sfi_mtimer_usage[hint]) {
+ pr_debug("hint taken for timer %d irq %d\n",\
+ hint, sfi_mtimer_array[hint].irq);
+ sfi_mtimer_usage[hint] = 1;
+ return &sfi_mtimer_array[hint];
+ }
+ }
+ /* take the first timer available */
+ for (i = 0; i < sfi_mtimer_num;) {
+ if (!sfi_mtimer_usage[i]) {
+ sfi_mtimer_usage[i] = 1;
+ return &sfi_mtimer_array[i];
+ }
+ i++;
+ }
+ return NULL;
+}
+
+void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
+{
+ int i;
+ for (i = 0; i < sfi_mtimer_num;) {
+ if (mtmr->irq == sfi_mtimer_array[i].irq) {
+ sfi_mtimer_usage[i] = 0;
+ return;
+ }
+ i++;
+ }
+}
+
+/* parse all the mrtc info to a global mrtc array */
+int __init sfi_parse_mrtc(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_rtc_table_entry *pentry;
+ struct mpc_intsrc mp_irq;
+
+ int totallen;
+
+ sb = (struct sfi_table_simple *)table;
+ if (!sfi_mrtc_num) {
+ sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
+ struct sfi_rtc_table_entry);
+ pentry = (struct sfi_rtc_table_entry *)sb->pentry;
+ totallen = sfi_mrtc_num * sizeof(*pentry);
+ memcpy(sfi_mrtc_array, pentry, totallen);
+ }
+
+ pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
+ pentry = sfi_mrtc_array;
+ for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
+ pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
+ totallen, (u32)pentry->phys_addr, pentry->irq);
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = 0xf; /* level trigger and active low */
+ mp_irq.srcbus = MP_BUS_ISA;
+ mp_irq.srcbusirq = pentry->irq; /* IRQ */
+ mp_irq.dstapic = MP_APIC_ALL;
+ mp_irq.dstirq = pentry->irq;
+ mp_save_irq(&mp_irq);
+ }
+ return 0;
+}
+
+static unsigned long __init mrst_calibrate_tsc(void)
+{
+ unsigned long flags, fast_calibrate;
+ if (__mrst_cpu_chip == MRST_CPU_CHIP_PENWELL) {
+ u32 lo, hi, ratio, fsb;
+
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
+ ratio = (hi >> 8) & 0x1f;
+ pr_debug("ratio is %d\n", ratio);
+ if (!ratio) {
+ pr_err("read a zero ratio, should be incorrect!\n");
+ pr_err("force tsc ratio to 16 ...\n");
+ ratio = 16;
+ }
+ rdmsr(MSR_FSB_FREQ, lo, hi);
+ if ((lo & 0x7) == 0x7)
+ fsb = PENWELL_FSB_FREQ_83SKU;
+ else
+ fsb = PENWELL_FSB_FREQ_100SKU;
+ fast_calibrate = ratio * fsb;
+ pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
+ lapic_timer_frequency = fsb * 1000 / HZ;
+ /* mark tsc clocksource as reliable */
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
+ } else {
+ local_irq_save(flags);
+ fast_calibrate = apbt_quick_calibrate();
+ local_irq_restore(flags);
+ }
+
+ if (fast_calibrate)
+ return fast_calibrate;
+
+ return 0;
+}
+
+static void __init mrst_time_init(void)
+{
+ sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
+ switch (mrst_timer_options) {
+ case MRST_TIMER_APBT_ONLY:
+ break;
+ case MRST_TIMER_LAPIC_APBT:
+ x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
+ x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
+ break;
+ default:
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
+ break;
+ x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
+ x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
+ return;
+ }
+ /* we need at least one APB timer */
+ pre_init_apic_IRQ0();
+ apbt_time_init();
+}
+
+static void __cpuinit mrst_arch_setup(void)
+{
+ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
+ __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
+ else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26)
+ __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
+ else {
+ pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
+ __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
+ }
+ pr_debug("Moorestown CPU %s identified\n",
+ (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
+ "Lincroft" : "Penwell");
+}
+
+/* MID systems don't have i8042 controller */
+static int mrst_i8042_detect(void)
+{
+ return 0;
+}
+
+/*
+ * Moorestown does not have external NMI source nor port 0x61 to report
+ * NMI status. The possible NMI sources are from pmu as a result of NMI
+ * watchdog or lock debug. Reading io port 0x61 results in 0xff which
+ * misled NMI handler.
+ */
+static unsigned char mrst_get_nmi_reason(void)
+{
+ return 0;
+}
+
+/*
+ * Moorestown specific x86_init function overrides and early setup
+ * calls.
+ */
+void __init x86_mrst_early_setup(void)
+{
+ x86_init.resources.probe_roms = x86_init_noop;
+ x86_init.resources.reserve_resources = x86_init_noop;
+
+ x86_init.timers.timer_init = mrst_time_init;
+ x86_init.timers.setup_percpu_clockev = x86_init_noop;
+
+ x86_init.irqs.pre_vector_init = x86_init_noop;
+
+ x86_init.oem.arch_setup = mrst_arch_setup;
+
+ x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
+
+ x86_platform.calibrate_tsc = mrst_calibrate_tsc;
+ x86_platform.i8042_detect = mrst_i8042_detect;
+ x86_init.timers.wallclock_init = mrst_rtc_init;
+ x86_platform.get_nmi_reason = mrst_get_nmi_reason;
+
+ x86_init.pci.init = pci_mrst_init;
+ x86_init.pci.fixup_irqs = x86_init_noop;
+
+ legacy_pic = &null_legacy_pic;
+
+ /* Moorestown specific power_off/restart method */
+ pm_power_off = mrst_power_off;
+ machine_ops.emergency_restart = mrst_reboot;
+
+ /* Avoid searching for BIOS MP tables */
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+ set_bit(MP_BUS_ISA, mp_bus_not_pci);
+}
+
+/*
+ * if user does not want to use per CPU apb timer, just give it a lower rating
+ * than local apic timer and skip the late per cpu timer init.
+ */
+static inline int __init setup_x86_mrst_timer(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp("apbt_only", arg) == 0)
+ mrst_timer_options = MRST_TIMER_APBT_ONLY;
+ else if (strcmp("lapic_and_apbt", arg) == 0)
+ mrst_timer_options = MRST_TIMER_LAPIC_APBT;
+ else {
+ pr_warning("X86 MRST timer option %s not recognised"
+ " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
+ arg);
+ return -EINVAL;
+ }
+ return 0;
+}
+__setup("x86_mrst_timer=", setup_x86_mrst_timer);
+
+/*
+ * Parsing GPIO table first, since the DEVS table will need this table
+ * to map the pin name to the actual pin.
+ */
+static struct sfi_gpio_table_entry *gpio_table;
+static int gpio_num_entry;
+
+static int __init sfi_parse_gpio(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_gpio_table_entry *pentry;
+ int num, i;
+
+ if (gpio_table)
+ return 0;
+ sb = (struct sfi_table_simple *)table;
+ num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
+ pentry = (struct sfi_gpio_table_entry *)sb->pentry;
+
+ gpio_table = (struct sfi_gpio_table_entry *)
+ kmalloc(num * sizeof(*pentry), GFP_KERNEL);
+ if (!gpio_table)
+ return -1;
+ memcpy(gpio_table, pentry, num * sizeof(*pentry));
+ gpio_num_entry = num;
+
+ pr_debug("GPIO pin info:\n");
+ for (i = 0; i < num; i++, pentry++)
+ pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
+ " pin = %d\n", i,
+ pentry->controller_name,
+ pentry->pin_name,
+ pentry->pin_no);
+ return 0;
+}
+
+static int get_gpio_by_name(const char *name)
+{
+ struct sfi_gpio_table_entry *pentry = gpio_table;
+ int i;
+
+ if (!pentry)
+ return -1;
+ for (i = 0; i < gpio_num_entry; i++, pentry++) {
+ if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
+ return pentry->pin_no;
+ }
+ return -1;
+}
+
+/*
+ * Here defines the array of devices platform data that IAFW would export
+ * through SFI "DEVS" table, we use name and type to match the device and
+ * its platform data.
+ */
+struct devs_id {
+ char name[SFI_NAME_LEN + 1];
+ u8 type;
+ u8 delay;
+ void *(*get_platform_data)(void *info);
+};
+
+/* the offset for the mapping of global gpio pin to irq */
+#define MRST_IRQ_OFFSET 0x100
+
+static void __init *pmic_gpio_platform_data(void *info)
+{
+ static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
+ int gpio_base = get_gpio_by_name("pmic_gpio_base");
+
+ if (gpio_base == -1)
+ gpio_base = 64;
+ pmic_gpio_pdata.gpio_base = gpio_base;
+ pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
+ pmic_gpio_pdata.gpiointr = 0xffffeff8;
+
+ return &pmic_gpio_pdata;
+}
+
+static void __init *max3111_platform_data(void *info)
+{
+ struct spi_board_info *spi_info = info;
+ int intr = get_gpio_by_name("max3111_int");
+
+ spi_info->mode = SPI_MODE_0;
+ if (intr == -1)
+ return NULL;
+ spi_info->irq = intr + MRST_IRQ_OFFSET;
+ return NULL;
+}
+
+/* we have multiple max7315 on the board ... */
+#define MAX7315_NUM 2
+static void __init *max7315_platform_data(void *info)
+{
+ static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
+ static int nr;
+ struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
+ struct i2c_board_info *i2c_info = info;
+ int gpio_base, intr;
+ char base_pin_name[SFI_NAME_LEN + 1];
+ char intr_pin_name[SFI_NAME_LEN + 1];
+
+ if (nr == MAX7315_NUM) {
+ pr_err("too many max7315s, we only support %d\n",
+ MAX7315_NUM);
+ return NULL;
+ }
+ /* we have several max7315 on the board, we only need load several
+ * instances of the same pca953x driver to cover them
+ */
+ strcpy(i2c_info->type, "max7315");
+ if (nr++) {
+ sprintf(base_pin_name, "max7315_%d_base", nr);
+ sprintf(intr_pin_name, "max7315_%d_int", nr);
+ } else {
+ strcpy(base_pin_name, "max7315_base");
+ strcpy(intr_pin_name, "max7315_int");
+ }
+
+ gpio_base = get_gpio_by_name(base_pin_name);
+ intr = get_gpio_by_name(intr_pin_name);
+
+ if (gpio_base == -1)
+ return NULL;
+ max7315->gpio_base = gpio_base;
+ if (intr != -1) {
+ i2c_info->irq = intr + MRST_IRQ_OFFSET;
+ max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
+ } else {
+ i2c_info->irq = -1;
+ max7315->irq_base = -1;
+ }
+ return max7315;
+}
+
+static void *tca6416_platform_data(void *info)
+{
+ static struct pca953x_platform_data tca6416;
+ struct i2c_board_info *i2c_info = info;
+ int gpio_base, intr;
+ char base_pin_name[SFI_NAME_LEN + 1];
+ char intr_pin_name[SFI_NAME_LEN + 1];
+
+ strcpy(i2c_info->type, "tca6416");
+ strcpy(base_pin_name, "tca6416_base");
+ strcpy(intr_pin_name, "tca6416_int");
+
+ gpio_base = get_gpio_by_name(base_pin_name);
+ intr = get_gpio_by_name(intr_pin_name);
+
+ if (gpio_base == -1)
+ return NULL;
+ tca6416.gpio_base = gpio_base;
+ if (intr != -1) {
+ i2c_info->irq = intr + MRST_IRQ_OFFSET;
+ tca6416.irq_base = gpio_base + MRST_IRQ_OFFSET;
+ } else {
+ i2c_info->irq = -1;
+ tca6416.irq_base = -1;
+ }
+ return &tca6416;
+}
+
+static void *mpu3050_platform_data(void *info)
+{
+ struct i2c_board_info *i2c_info = info;
+ int intr = get_gpio_by_name("mpu3050_int");
+
+ if (intr == -1)
+ return NULL;
+
+ i2c_info->irq = intr + MRST_IRQ_OFFSET;
+ return NULL;
+}
+
+static void __init *emc1403_platform_data(void *info)
+{
+ static short intr2nd_pdata;
+ struct i2c_board_info *i2c_info = info;
+ int intr = get_gpio_by_name("thermal_int");
+ int intr2nd = get_gpio_by_name("thermal_alert");
+
+ if (intr == -1 || intr2nd == -1)
+ return NULL;
+
+ i2c_info->irq = intr + MRST_IRQ_OFFSET;
+ intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+ return &intr2nd_pdata;
+}
+
+static void __init *lis331dl_platform_data(void *info)
+{
+ static short intr2nd_pdata;
+ struct i2c_board_info *i2c_info = info;
+ int intr = get_gpio_by_name("accel_int");
+ int intr2nd = get_gpio_by_name("accel_2");
+
+ if (intr == -1 || intr2nd == -1)
+ return NULL;
+
+ i2c_info->irq = intr + MRST_IRQ_OFFSET;
+ intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+ return &intr2nd_pdata;
+}
+
+static void __init *no_platform_data(void *info)
+{
+ return NULL;
+}
+
+static struct resource msic_resources[] = {
+ {
+ .start = INTEL_MSIC_IRQ_PHYS_BASE,
+ .end = INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct intel_msic_platform_data msic_pdata;
+
+static struct platform_device msic_device = {
+ .name = "intel_msic",
+ .id = -1,
+ .dev = {
+ .platform_data = &msic_pdata,
+ },
+ .num_resources = ARRAY_SIZE(msic_resources),
+ .resource = msic_resources,
+};
+
+static inline bool mrst_has_msic(void)
+{
+ return mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL;
+}
+
+static int msic_scu_status_change(struct notifier_block *nb,
+ unsigned long code, void *data)
+{
+ if (code == SCU_DOWN) {
+ platform_device_unregister(&msic_device);
+ return 0;
+ }
+
+ return platform_device_register(&msic_device);
+}
+
+static int __init msic_init(void)
+{
+ static struct notifier_block msic_scu_notifier = {
+ .notifier_call = msic_scu_status_change,
+ };
+
+ /*
+ * We need to be sure that the SCU IPC is ready before MSIC device
+ * can be registered.
+ */
+ if (mrst_has_msic())
+ intel_scu_notifier_add(&msic_scu_notifier);
+
+ return 0;
+}
+arch_initcall(msic_init);
+
+/*
+ * msic_generic_platform_data - sets generic platform data for the block
+ * @info: pointer to the SFI device table entry for this block
+ * @block: MSIC block
+ *
+ * Function sets IRQ number from the SFI table entry for given device to
+ * the MSIC platform data.
+ */
+static void *msic_generic_platform_data(void *info, enum intel_msic_block block)
+{
+ struct sfi_device_table_entry *entry = info;
+
+ BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST);
+ msic_pdata.irq[block] = entry->irq;
+
+ return no_platform_data(info);
+}
+
+static void *msic_battery_platform_data(void *info)
+{
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY);
+}
+
+static void *msic_gpio_platform_data(void *info)
+{
+ static struct intel_msic_gpio_pdata pdata;
+ int gpio = get_gpio_by_name("msic_gpio_base");
+
+ if (gpio < 0)
+ return NULL;
+
+ pdata.gpio_base = gpio;
+ msic_pdata.gpio = &pdata;
+
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO);
+}
+
+static void *msic_audio_platform_data(void *info)
+{
+ struct platform_device *pdev;
+
+ pdev = platform_device_register_simple("sst-platform", -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ pr_err("failed to create audio platform device\n");
+ return NULL;
+ }
+
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO);
+}
+
+static void *msic_power_btn_platform_data(void *info)
+{
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN);
+}
+
+static void *msic_ocd_platform_data(void *info)
+{
+ static struct intel_msic_ocd_pdata pdata;
+ int gpio = get_gpio_by_name("ocd_gpio");
+
+ if (gpio < 0)
+ return NULL;
+
+ pdata.gpio = gpio;
+ msic_pdata.ocd = &pdata;
+
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
+}
+
+static const struct devs_id __initconst device_ids[] = {
+ {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data},
+ {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
+ {"pmic_gpio", SFI_DEV_TYPE_IPC, 1, &pmic_gpio_platform_data},
+ {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
+ {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+ {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+ {"tca6416", SFI_DEV_TYPE_I2C, 1, &tca6416_platform_data},
+ {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
+ {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
+ {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+ {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data},
+
+ /* MSIC subdevices */
+ {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
+ {"msic_gpio", SFI_DEV_TYPE_IPC, 1, &msic_gpio_platform_data},
+ {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data},
+ {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data},
+ {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data},
+
+ {},
+};
+
+#define MAX_IPCDEVS 24
+static struct platform_device *ipc_devs[MAX_IPCDEVS];
+static int ipc_next_dev;
+
+#define MAX_SCU_SPI 24
+static struct spi_board_info *spi_devs[MAX_SCU_SPI];
+static int spi_next_dev;
+
+#define MAX_SCU_I2C 24
+static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
+static int i2c_bus[MAX_SCU_I2C];
+static int i2c_next_dev;
+
+static void __init intel_scu_device_register(struct platform_device *pdev)
+{
+ if(ipc_next_dev == MAX_IPCDEVS)
+ pr_err("too many SCU IPC devices");
+ else
+ ipc_devs[ipc_next_dev++] = pdev;
+}
+
+static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
+{
+ struct spi_board_info *new_dev;
+
+ if (spi_next_dev == MAX_SCU_SPI) {
+ pr_err("too many SCU SPI devices");
+ return;
+ }
+
+ new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!new_dev) {
+ pr_err("failed to alloc mem for delayed spi dev %s\n",
+ sdev->modalias);
+ return;
+ }
+ memcpy(new_dev, sdev, sizeof(*sdev));
+
+ spi_devs[spi_next_dev++] = new_dev;
+}
+
+static void __init intel_scu_i2c_device_register(int bus,
+ struct i2c_board_info *idev)
+{
+ struct i2c_board_info *new_dev;
+
+ if (i2c_next_dev == MAX_SCU_I2C) {
+ pr_err("too many SCU I2C devices");
+ return;
+ }
+
+ new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
+ if (!new_dev) {
+ pr_err("failed to alloc mem for delayed i2c dev %s\n",
+ idev->type);
+ return;
+ }
+ memcpy(new_dev, idev, sizeof(*idev));
+
+ i2c_bus[i2c_next_dev] = bus;
+ i2c_devs[i2c_next_dev++] = new_dev;
+}
+
+BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
+EXPORT_SYMBOL_GPL(intel_scu_notifier);
+
+/* Called by IPC driver */
+void intel_scu_devices_create(void)
+{
+ int i;
+
+ for (i = 0; i < ipc_next_dev; i++)
+ platform_device_add(ipc_devs[i]);
+
+ for (i = 0; i < spi_next_dev; i++)
+ spi_register_board_info(spi_devs[i], 1);
+
+ for (i = 0; i < i2c_next_dev; i++) {
+ struct i2c_adapter *adapter;
+ struct i2c_client *client;
+
+ adapter = i2c_get_adapter(i2c_bus[i]);
+ if (adapter) {
+ client = i2c_new_device(adapter, i2c_devs[i]);
+ if (!client)
+ pr_err("can't create i2c device %s\n",
+ i2c_devs[i]->type);
+ } else
+ i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
+ }
+ intel_scu_notifier_post(SCU_AVAILABLE, 0L);
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_create);
+
+/* Called by IPC driver */
+void intel_scu_devices_destroy(void)
+{
+ int i;
+
+ intel_scu_notifier_post(SCU_DOWN, 0L);
+
+ for (i = 0; i < ipc_next_dev; i++)
+ platform_device_del(ipc_devs[i]);
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
+
+static void __init install_irq_resource(struct platform_device *pdev, int irq)
+{
+ /* Single threaded */
+ static struct resource __initdata res = {
+ .name = "IRQ",
+ .flags = IORESOURCE_IRQ,
+ };
+ res.start = irq;
+ platform_device_add_resources(pdev, &res, 1);
+}
+
+static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry)
+{
+ const struct devs_id *dev = device_ids;
+ struct platform_device *pdev;
+ void *pdata = NULL;
+
+ while (dev->name[0]) {
+ if (dev->type == SFI_DEV_TYPE_IPC &&
+ !strncmp(dev->name, entry->name, SFI_NAME_LEN)) {
+ pdata = dev->get_platform_data(entry);
+ break;
+ }
+ dev++;
+ }
+
+ /*
+ * On Medfield the platform device creation is handled by the MSIC
+ * MFD driver so we don't need to do it here.
+ */
+ if (mrst_has_msic())
+ return;
+
+ pdev = platform_device_alloc(entry->name, 0);
+ if (pdev == NULL) {
+ pr_err("out of memory for SFI platform device '%s'.\n",
+ entry->name);
+ return;
+ }
+ install_irq_resource(pdev, entry->irq);
+
+ pdev->dev.platform_data = pdata;
+ intel_scu_device_register(pdev);
+}
+
+static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
+{
+ const struct devs_id *dev = device_ids;
+ void *pdata = NULL;
+
+ while (dev->name[0]) {
+ if (dev->type == SFI_DEV_TYPE_SPI &&
+ !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
+ pdata = dev->get_platform_data(spi_info);
+ break;
+ }
+ dev++;
+ }
+ spi_info->platform_data = pdata;
+ if (dev->delay)
+ intel_scu_spi_device_register(spi_info);
+ else
+ spi_register_board_info(spi_info, 1);
+}
+
+static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
+{
+ const struct devs_id *dev = device_ids;
+ void *pdata = NULL;
+
+ while (dev->name[0]) {
+ if (dev->type == SFI_DEV_TYPE_I2C &&
+ !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
+ pdata = dev->get_platform_data(i2c_info);
+ break;
+ }
+ dev++;
+ }
+ i2c_info->platform_data = pdata;
+
+ if (dev->delay)
+ intel_scu_i2c_device_register(bus, i2c_info);
+ else
+ i2c_register_board_info(bus, i2c_info, 1);
+ }
+
+
+static int __init sfi_parse_devs(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_device_table_entry *pentry;
+ struct spi_board_info spi_info;
+ struct i2c_board_info i2c_info;
+ int num, i, bus;
+ int ioapic;
+ struct io_apic_irq_attr irq_attr;
+
+ sb = (struct sfi_table_simple *)table;
+ num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
+ pentry = (struct sfi_device_table_entry *)sb->pentry;
+
+ for (i = 0; i < num; i++, pentry++) {
+ int irq = pentry->irq;
+
+ if (irq != (u8)0xff) { /* native RTE case */
+ /* these SPI2 devices are not exposed to system as PCI
+ * devices, but they have separate RTE entry in IOAPIC
+ * so we have to enable them one by one here
+ */
+ ioapic = mp_find_ioapic(irq);
+ irq_attr.ioapic = ioapic;
+ irq_attr.ioapic_pin = irq;
+ irq_attr.trigger = 1;
+ irq_attr.polarity = 1;
+ io_apic_set_pci_routing(NULL, irq, &irq_attr);
+ } else
+ irq = 0; /* No irq */
+
+ switch (pentry->type) {
+ case SFI_DEV_TYPE_IPC:
+ pr_debug("info[%2d]: IPC bus, name = %16.16s, "
+ "irq = 0x%2x\n", i, pentry->name, pentry->irq);
+ sfi_handle_ipc_dev(pentry);
+ break;
+ case SFI_DEV_TYPE_SPI:
+ memset(&spi_info, 0, sizeof(spi_info));
+ strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
+ spi_info.irq = irq;
+ spi_info.bus_num = pentry->host_num;
+ spi_info.chip_select = pentry->addr;
+ spi_info.max_speed_hz = pentry->max_freq;
+ pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
+ "irq = 0x%2x, max_freq = %d, cs = %d\n", i,
+ spi_info.bus_num,
+ spi_info.modalias,
+ spi_info.irq,
+ spi_info.max_speed_hz,
+ spi_info.chip_select);
+ sfi_handle_spi_dev(&spi_info);
+ break;
+ case SFI_DEV_TYPE_I2C:
+ memset(&i2c_info, 0, sizeof(i2c_info));
+ bus = pentry->host_num;
+ strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
+ i2c_info.irq = irq;
+ i2c_info.addr = pentry->addr;
+ pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
+ "irq = 0x%2x, addr = 0x%x\n", i, bus,
+ i2c_info.type,
+ i2c_info.irq,
+ i2c_info.addr);
+ sfi_handle_i2c_dev(bus, &i2c_info);
+ break;
+ case SFI_DEV_TYPE_UART:
+ case SFI_DEV_TYPE_HSI:
+ default:
+ ;
+ }
+ }
+ return 0;
+}
+
+static int __init mrst_platform_init(void)
+{
+ sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
+ sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
+ return 0;
+}
+arch_initcall(mrst_platform_init);
+
+/*
+ * we will search these buttons in SFI GPIO table (by name)
+ * and register them dynamically. Please add all possible
+ * buttons here, we will shrink them if no GPIO found.
+ */
+static struct gpio_keys_button gpio_button[] = {
+ {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000},
+ {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20},
+ {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20},
+ {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20},
+ {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20},
+ {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20},
+ {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20},
+ {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20},
+ {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20},
+ {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20},
+};
+
+static struct gpio_keys_platform_data mrst_gpio_keys = {
+ .buttons = gpio_button,
+ .rep = 1,
+ .nbuttons = -1, /* will fill it after search */
+};
+
+static struct platform_device pb_device = {
+ .name = "gpio-keys",
+ .id = -1,
+ .dev = {
+ .platform_data = &mrst_gpio_keys,
+ },
+};
+
+/*
+ * Shrink the non-existent buttons, register the gpio button
+ * device if there is some
+ */
+static int __init pb_keys_init(void)
+{
+ struct gpio_keys_button *gb = gpio_button;
+ int i, num, good = 0;
+
+ num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
+ for (i = 0; i < num; i++) {
+ gb[i].gpio = get_gpio_by_name(gb[i].desc);
+ pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio);
+ if (gb[i].gpio == -1)
+ continue;
+
+ if (i != good)
+ gb[good] = gb[i];
+ good++;
+ }
+
+ if (good) {
+ mrst_gpio_keys.nbuttons = good;
+ return platform_device_register(&pb_device);
+ }
+ return 0;
+}
+late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c
new file mode 100644
index 00000000000..c0ac06da57a
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.c
@@ -0,0 +1,817 @@
+/*
+ * mrst/pmu.c - driver for MRST Power Management Unit
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <linux/sfi.h>
+#include <asm/intel_scu_ipc.h>
+#include "pmu.h"
+
+#define IPCMSG_FW_REVISION 0xF4
+
+struct mrst_device {
+ u16 pci_dev_num; /* DEBUG only */
+ u16 lss;
+ u16 latest_request;
+ unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
+};
+
+/*
+ * comlete list of MRST PCI devices
+ */
+static struct mrst_device mrst_devs[] = {
+/* 0 */ { 0x0800, LSS_SPI0 }, /* Moorestown SPI Ctrl 0 */
+/* 1 */ { 0x0801, LSS_SPI1 }, /* Moorestown SPI Ctrl 1 */
+/* 2 */ { 0x0802, LSS_I2C0 }, /* Moorestown I2C 0 */
+/* 3 */ { 0x0803, LSS_I2C1 }, /* Moorestown I2C 1 */
+/* 4 */ { 0x0804, LSS_I2C2 }, /* Moorestown I2C 2 */
+/* 5 */ { 0x0805, LSS_KBD }, /* Moorestown Keyboard Ctrl */
+/* 6 */ { 0x0806, LSS_USB_HC }, /* Moorestown USB Ctrl */
+/* 7 */ { 0x0807, LSS_SD_HC0 }, /* Moorestown SD Host Ctrl 0 */
+/* 8 */ { 0x0808, LSS_SD_HC1 }, /* Moorestown SD Host Ctrl 1 */
+/* 9 */ { 0x0809, LSS_NAND }, /* Moorestown NAND Ctrl */
+/* 10 */ { 0x080a, LSS_AUDIO }, /* Moorestown Audio Ctrl */
+/* 11 */ { 0x080b, LSS_IMAGING }, /* Moorestown ISP */
+/* 12 */ { 0x080c, LSS_SECURITY }, /* Moorestown Security Controller */
+/* 13 */ { 0x080d, LSS_DISPLAY }, /* Moorestown External Displays */
+/* 14 */ { 0x080e, 0 }, /* Moorestown SCU IPC */
+/* 15 */ { 0x080f, LSS_GPIO }, /* Moorestown GPIO Controller */
+/* 16 */ { 0x0810, 0 }, /* Moorestown Power Management Unit */
+/* 17 */ { 0x0811, LSS_USB_OTG }, /* Moorestown OTG Ctrl */
+/* 18 */ { 0x0812, LSS_SPI2 }, /* Moorestown SPI Ctrl 2 */
+/* 19 */ { 0x0813, 0 }, /* Moorestown SC DMA */
+/* 20 */ { 0x0814, LSS_AUDIO_LPE }, /* Moorestown LPE DMA */
+/* 21 */ { 0x0815, LSS_AUDIO_SSP }, /* Moorestown SSP0 */
+
+/* 22 */ { 0x084F, LSS_SD_HC2 }, /* Moorestown SD Host Ctrl 2 */
+
+/* 23 */ { 0x4102, 0 }, /* Lincroft */
+/* 24 */ { 0x4110, 0 }, /* Lincroft */
+};
+
+/* n.b. We ignore PCI-id 0x815 in LSS9 b/c Linux has no driver for it */
+static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
+static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
+ 0x0804, 0x0805, 0x080f, 0};
+
+/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
+static spinlock_t mrst_pmu_power_state_lock;
+
+static unsigned int wake_counters[MRST_NUM_LSS]; /* DEBUG only */
+static unsigned int pmu_irq_stats[INT_INVALID + 1]; /* DEBUG only */
+
+static int graphics_is_off;
+static int lss_s0i3_enabled;
+static bool mrst_pmu_s0i3_enable;
+
+/* debug counters */
+static u32 pmu_wait_ready_calls;
+static u32 pmu_wait_ready_udelays;
+static u32 pmu_wait_ready_udelays_max;
+static u32 pmu_wait_done_calls;
+static u32 pmu_wait_done_udelays;
+static u32 pmu_wait_done_udelays_max;
+static u32 pmu_set_power_state_entry;
+static u32 pmu_set_power_state_send_cmd;
+
+static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
+{
+ int index = 0;
+
+ if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
+ index = pci_dev_num - 0x800;
+ else if (pci_dev_num == 0x084F)
+ index = 22;
+ else if (pci_dev_num == 0x4102)
+ index = 23;
+ else if (pci_dev_num == 0x4110)
+ index = 24;
+
+ if (pci_dev_num != mrst_devs[index].pci_dev_num) {
+ WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
+ return 0;
+ }
+
+ return &mrst_devs[index];
+}
+
+/**
+ * mrst_pmu_validate_cstates
+ * @dev: cpuidle_device
+ *
+ * Certain states are not appropriate for governor to pick in some cases.
+ * This function will be called as cpuidle_device's prepare callback and
+ * thus tells governor to ignore such states when selecting the next state
+ * to enter.
+ */
+
+#define IDLE_STATE4_IS_C6 4
+#define IDLE_STATE5_IS_S0I3 5
+
+int mrst_pmu_invalid_cstates(void)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Demote to C4 if the PMU is busy.
+ * Since LSS changes leave the busy bit clear...
+ * busy means either the PMU is waiting for an ACK-C6 that
+ * isn't coming due to an MWAIT that returned immediately;
+ * or we returned from S0i3 successfully, and the PMU
+ * is not done sending us interrupts.
+ */
+ if (pmu_read_busy_status())
+ return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
+
+ /*
+ * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
+ * or if device LSS is insufficient, or the GPU is active,
+ * or if it has been explicitly disabled.
+ */
+ if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
+ !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
+ return 1 << IDLE_STATE5_IS_S0I3;
+ else
+ return 0;
+}
+
+/*
+ * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
+ * DEBUG only.
+ */
+static void pmu_update_wake_counters(void)
+{
+ int lss;
+ u32 wake_status;
+
+ wake_status = pmu_read_wks();
+
+ for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
+ if (wake_status & (1 << lss))
+ wake_counters[lss]++;
+ }
+}
+
+int mrst_pmu_s0i3_entry(void)
+{
+ int status;
+
+ /* Clear any possible error conditions */
+ pmu_write_ics(0x300);
+
+ /* set wake control to current D-states */
+ pmu_write_wssc(S0I3_SSS_TARGET);
+
+ status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
+ pmu_update_wake_counters();
+ return status;
+}
+
+/* poll for maximum of 5ms for busy bit to clear */
+static int pmu_wait_ready(void)
+{
+ int udelays;
+
+ pmu_wait_ready_calls++;
+
+ for (udelays = 0; udelays < 500; ++udelays) {
+ if (udelays > pmu_wait_ready_udelays_max)
+ pmu_wait_ready_udelays_max = udelays;
+
+ if (pmu_read_busy_status() == 0)
+ return 0;
+
+ udelay(10);
+ pmu_wait_ready_udelays++;
+ }
+
+ /*
+ * if this fires, observe
+ * /sys/kernel/debug/mrst_pmu_wait_ready_calls
+ * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
+ */
+ WARN_ONCE(1, "SCU not ready for 5ms");
+ return -EBUSY;
+}
+/* poll for maximum of 50ms us for busy bit to clear */
+static int pmu_wait_done(void)
+{
+ int udelays;
+
+ pmu_wait_done_calls++;
+
+ for (udelays = 0; udelays < 500; ++udelays) {
+ if (udelays > pmu_wait_done_udelays_max)
+ pmu_wait_done_udelays_max = udelays;
+
+ if (pmu_read_busy_status() == 0)
+ return 0;
+
+ udelay(100);
+ pmu_wait_done_udelays++;
+ }
+
+ /*
+ * if this fires, observe
+ * /sys/kernel/debug/mrst_pmu_wait_done_calls
+ * /sys/kernel/debug/mrst_pmu_wait_done_udelays
+ */
+ WARN_ONCE(1, "SCU not done for 50ms");
+ return -EBUSY;
+}
+
+u32 mrst_pmu_msi_is_disabled(void)
+{
+ return pmu_msi_is_disabled();
+}
+
+void mrst_pmu_enable_msi(void)
+{
+ pmu_msi_enable();
+}
+
+/**
+ * pmu_irq - pmu driver interrupt handler
+ * Context: interrupt context
+ */
+static irqreturn_t pmu_irq(int irq, void *dummy)
+{
+ union pmu_pm_ics pmu_ics;
+
+ pmu_ics.value = pmu_read_ics();
+
+ if (!pmu_ics.bits.pending)
+ return IRQ_NONE;
+
+ switch (pmu_ics.bits.cause) {
+ case INT_SPURIOUS:
+ case INT_CMD_DONE:
+ case INT_CMD_ERR:
+ case INT_WAKE_RX:
+ case INT_SS_ERROR:
+ case INT_S0IX_MISS:
+ case INT_NO_ACKC6:
+ pmu_irq_stats[pmu_ics.bits.cause]++;
+ break;
+ default:
+ pmu_irq_stats[INT_INVALID]++;
+ }
+
+ pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Translate PCI power management to MRST LSS D-states
+ */
+static int pci_2_mrst_state(int lss, pci_power_t pci_state)
+{
+ switch (pci_state) {
+ case PCI_D0:
+ if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
+ return D0i1;
+ else
+ return D0;
+ case PCI_D1:
+ return D0i1;
+ case PCI_D2:
+ return D0i2;
+ case PCI_D3hot:
+ case PCI_D3cold:
+ return D0i3;
+ default:
+ WARN(1, "pci_state %d\n", pci_state);
+ return 0;
+ }
+}
+
+static int pmu_issue_command(u32 pm_ssc)
+{
+ union pmu_pm_set_cfg_cmd_t command;
+
+ if (pmu_read_busy_status()) {
+ pr_debug("pmu is busy, Operation not permitted\n");
+ return -1;
+ }
+
+ /*
+ * enable interrupts in PMU so that interrupts are
+ * propagated when ioc bit for a particular set
+ * command is set
+ */
+
+ pmu_irq_enable();
+
+ /* Configure the sub systems for pmu2 */
+
+ pmu_write_ssc(pm_ssc);
+
+ /*
+ * Send the set config command for pmu its configured
+ * for mode CM_IMMEDIATE & hence with No Trigger
+ */
+
+ command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
+ command.pmu2_params.d_param.cfg_delay = 0;
+ command.pmu2_params.d_param.rsvd = 0;
+
+ /* construct the command to send SET_CFG to particular PMU */
+ command.pmu2_params.d_param.cmd = SET_CFG_CMD;
+ command.pmu2_params.d_param.ioc = 0;
+ command.pmu2_params.d_param.mode_id = 0;
+ command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
+
+ /* write the value of PM_CMD into particular PMU */
+ pr_debug("pmu command being written %x\n",
+ command.pmu_pm_set_cfg_cmd_value);
+
+ pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
+
+ return 0;
+}
+
+static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
+{
+ u16 existing_request;
+ int i;
+
+ for (i = 0; ids[i]; ++i) {
+ struct mrst_device *mrst_dev;
+
+ mrst_dev = pci_id_2_mrst_dev(ids[i]);
+ if (unlikely(!mrst_dev))
+ continue;
+
+ existing_request = mrst_dev->latest_request;
+ if (existing_request < pci_state)
+ pci_state = existing_request;
+ }
+ return pci_state;
+}
+
+/**
+ * pmu_pci_set_power_state - Callback function is used by all the PCI devices
+ * for a platform specific device power on/shutdown.
+ */
+
+int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
+{
+ u32 old_sss, new_sss;
+ int status = 0;
+ struct mrst_device *mrst_dev;
+
+ pmu_set_power_state_entry++;
+
+ BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
+ BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
+
+ mrst_dev = pci_id_2_mrst_dev(pdev->device);
+ if (unlikely(!mrst_dev))
+ return -ENODEV;
+
+ mrst_dev->pci_state_counts[pci_state]++; /* count invocations */
+
+ /* PMU driver calls self as part of PCI initialization, ignore */
+ if (pdev->device == PCI_DEV_ID_MRST_PMU)
+ return 0;
+
+ BUG_ON(!pmu_reg); /* SW bug if called before initialized */
+
+ spin_lock(&mrst_pmu_power_state_lock);
+
+ if (pdev->d3_delay) {
+ dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
+ pdev->d3_delay);
+ pdev->d3_delay = 0;
+ }
+ /*
+ * If Lincroft graphics, simply remember state
+ */
+ if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
+ && !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
+ if (pci_state == PCI_D0)
+ graphics_is_off = 0;
+ else
+ graphics_is_off = 1;
+ goto ret;
+ }
+
+ if (!mrst_dev->lss)
+ goto ret; /* device with no LSS */
+
+ if (mrst_dev->latest_request == pci_state)
+ goto ret; /* no change */
+
+ mrst_dev->latest_request = pci_state; /* record latest request */
+
+ /*
+ * LSS9 and LSS10 contain multiple PCI devices.
+ * Use the lowest numbered (highest power) state in the LSS
+ */
+ if (mrst_dev->lss == 9)
+ pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
+ else if (mrst_dev->lss == 10)
+ pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
+
+ status = pmu_wait_ready();
+ if (status)
+ goto ret;
+
+ old_sss = pmu_read_sss();
+ new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
+ new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
+ mrst_dev->lss);
+
+ if (new_sss == old_sss)
+ goto ret; /* nothing to do */
+
+ pmu_set_power_state_send_cmd++;
+
+ status = pmu_issue_command(new_sss);
+
+ if (unlikely(status != 0)) {
+ dev_err(&pdev->dev, "Failed to Issue a PM command\n");
+ goto ret;
+ }
+
+ if (pmu_wait_done())
+ goto ret;
+
+ lss_s0i3_enabled =
+ ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
+ret:
+ spin_unlock(&mrst_pmu_power_state_lock);
+ return status;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
+
+static inline const char *d0ix_name(int state)
+{
+ return d0ix_names[(int) state];
+}
+
+static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
+{
+ struct pci_dev *pdev = NULL;
+ u32 cur_pmsss;
+ int lss;
+
+ seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
+
+ cur_pmsss = pmu_read_sss();
+
+ seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
+
+ seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
+ seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
+
+ if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
+ seq_printf(s, "cpu0 is only cpu online\n");
+ else
+ seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
+
+ seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
+
+
+ for_each_pci_dev(pdev) {
+ int pos;
+ u16 pmcsr;
+ struct mrst_device *mrst_dev;
+ int i;
+
+ mrst_dev = pci_id_2_mrst_dev(pdev->device);
+
+ seq_printf(s, "%s %04x/%04X %-16.16s ",
+ dev_name(&pdev->dev),
+ pdev->vendor, pdev->device,
+ dev_driver_string(&pdev->dev));
+
+ if (unlikely (!mrst_dev)) {
+ seq_printf(s, " UNKNOWN\n");
+ continue;
+ }
+
+ if (mrst_dev->lss)
+ seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
+ d0ix_name(((cur_pmsss >>
+ (mrst_dev->lss * 2)) & 0x3)));
+ else
+ seq_printf(s, " ");
+
+ /* PCI PM config space setting */
+ pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+ if (pos != 0) {
+ pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+ seq_printf(s, "PCI-%-4s",
+ pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
+ } else {
+ seq_printf(s, " ");
+ }
+
+ seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
+ for (i = 0; i <= PCI_D3cold; ++i)
+ seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
+
+ if (mrst_dev->lss) {
+ unsigned int lssmask;
+
+ lssmask = SSMSK(D0i3, mrst_dev->lss);
+
+ if ((lssmask & S0I3_SSS_TARGET) &&
+ ((lssmask & cur_pmsss) !=
+ (lssmask & S0I3_SSS_TARGET)))
+ seq_printf(s , "[BLOCKS s0i3]");
+ }
+
+ seq_printf(s, "\n");
+ }
+ seq_printf(s, "Wake Counters:\n");
+ for (lss = 0; lss < MRST_NUM_LSS; ++lss)
+ seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
+
+ seq_printf(s, "Interrupt Counters:\n");
+ seq_printf(s,
+ "INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
+ "INT_CMD_ERR \t%8u\n" "INT_WAKE_RX \t%8u\n"
+ "INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
+ "INT_NO_ACKC6 \t%8u\n" "INT_INVALID \t%8u\n",
+ pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
+ pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
+ pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
+ pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
+
+ seq_printf(s, "mrst_pmu_wait_ready_calls %8d\n",
+ pmu_wait_ready_calls);
+ seq_printf(s, "mrst_pmu_wait_ready_udelays %8d\n",
+ pmu_wait_ready_udelays);
+ seq_printf(s, "mrst_pmu_wait_ready_udelays_max %8d\n",
+ pmu_wait_ready_udelays_max);
+ seq_printf(s, "mrst_pmu_wait_done_calls %8d\n",
+ pmu_wait_done_calls);
+ seq_printf(s, "mrst_pmu_wait_done_udelays %8d\n",
+ pmu_wait_done_udelays);
+ seq_printf(s, "mrst_pmu_wait_done_udelays_max %8d\n",
+ pmu_wait_done_udelays_max);
+ seq_printf(s, "mrst_pmu_set_power_state_entry %8d\n",
+ pmu_set_power_state_entry);
+ seq_printf(s, "mrst_pmu_set_power_state_send_cmd %8d\n",
+ pmu_set_power_state_send_cmd);
+ seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
+
+ return 0;
+}
+
+static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, debug_mrst_pmu_show, NULL);
+}
+
+static const struct file_operations devices_state_operations = {
+ .open = debug_mrst_pmu_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif /* DEBUG_FS */
+
+/*
+ * Validate SCU PCI shim PCI vendor capability byte
+ * against LSS hard-coded in mrst_devs[] above.
+ * DEBUG only.
+ */
+static void pmu_scu_firmware_debug(void)
+{
+ struct pci_dev *pdev = NULL;
+
+ for_each_pci_dev(pdev) {
+ struct mrst_device *mrst_dev;
+ u8 pci_config_lss;
+ int pos;
+
+ mrst_dev = pci_id_2_mrst_dev(pdev->device);
+ if (unlikely(!mrst_dev)) {
+ printk(KERN_ERR FW_BUG "pmu: Unknown "
+ "PCI device 0x%04X\n", pdev->device);
+ continue;
+ }
+
+ if (mrst_dev->lss == 0)
+ continue; /* no LSS in our table */
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
+ if (!pos != 0) {
+ printk(KERN_ERR FW_BUG "pmu: 0x%04X "
+ "missing PCI Vendor Capability\n",
+ pdev->device);
+ continue;
+ }
+ pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
+ if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
+ printk(KERN_ERR FW_BUG "pmu: 0x%04X "
+ "invalid PCI Vendor Capability 0x%x "
+ " expected LSS 0x%X\n",
+ pdev->device, pci_config_lss, mrst_dev->lss);
+ continue;
+ }
+ pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
+
+ if (mrst_dev->lss == pci_config_lss)
+ continue;
+
+ printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
+ pdev->device, pci_config_lss, mrst_dev->lss);
+ }
+}
+
+/**
+ * pmu_probe
+ */
+static int __devinit pmu_probe(struct pci_dev *pdev,
+ const struct pci_device_id *pci_id)
+{
+ int ret;
+ struct mrst_pmu_reg *pmu;
+
+ /* Init the device */
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "Unable to Enable PCI device\n");
+ return ret;
+ }
+
+ ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
+ goto out_err1;
+ }
+
+ /* Map the memory of PMU reg base */
+ pmu = pci_iomap(pdev, 0, 0);
+ if (!pmu) {
+ dev_err(&pdev->dev, "Unable to map the PMU address space\n");
+ ret = -ENOMEM;
+ goto out_err2;
+ }
+
+#ifdef CONFIG_DEBUG_FS
+ /* /sys/kernel/debug/mrst_pmu */
+ (void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
+ NULL, NULL, &devices_state_operations);
+#endif
+ pmu_reg = pmu; /* success */
+
+ if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
+ dev_err(&pdev->dev, "Registering isr has failed\n");
+ ret = -1;
+ goto out_err3;
+ }
+
+ pmu_scu_firmware_debug();
+
+ pmu_write_wkc(S0I3_WAKE_SOURCES); /* Enable S0i3 wakeup sources */
+
+ pmu_wait_ready();
+
+ pmu_write_ssc(D0I1_ACG_SSS_TARGET); /* Enable Auto-Clock_Gating */
+ pmu_write_cmd(0x201);
+
+ spin_lock_init(&mrst_pmu_power_state_lock);
+
+ /* Enable the hardware interrupt */
+ pmu_irq_enable();
+ return 0;
+
+out_err3:
+ free_irq(pdev->irq, NULL);
+ pci_iounmap(pdev, pmu_reg);
+ pmu_reg = NULL;
+out_err2:
+ pci_release_region(pdev, 0);
+out_err1:
+ pci_disable_device(pdev);
+ return ret;
+}
+
+static void __devexit pmu_remove(struct pci_dev *pdev)
+{
+ dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
+
+ /* Freeing up the irq */
+ free_irq(pdev->irq, NULL);
+
+ pci_iounmap(pdev, pmu_reg);
+ pmu_reg = NULL;
+
+ /* disable the current PCI device */
+ pci_release_region(pdev, 0);
+ pci_disable_device(pdev);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
+ { PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
+ { }
+};
+
+MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
+
+static struct pci_driver driver = {
+ .name = MRST_PMU_DRV_NAME,
+ .id_table = pmu_pci_ids,
+ .probe = pmu_probe,
+ .remove = __devexit_p(pmu_remove),
+};
+
+/**
+ * pmu_pci_register - register the PMU driver as PCI device
+ */
+static int __init pmu_pci_register(void)
+{
+ return pci_register_driver(&driver);
+}
+
+/* Register and probe via fs_initcall() to preceed device_initcall() */
+fs_initcall(pmu_pci_register);
+
+static void __exit mid_pci_cleanup(void)
+{
+ pci_unregister_driver(&driver);
+}
+
+static int ia_major;
+static int ia_minor;
+
+static int pmu_sfi_parse_oem(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+
+ sb = (struct sfi_table_simple *)table;
+ ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
+ ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
+ printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
+ ia_major, ia_minor);
+
+ return 0;
+}
+
+static int __init scu_fw_check(void)
+{
+ int ret;
+ u32 fw_version;
+
+ if (!pmu_reg)
+ return 0; /* this driver didn't probe-out */
+
+ sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
+
+ if (ia_major < 0x6005 || ia_minor < 0x1525) {
+ WARN(1, "mrst_pmu: IA FW version too old\n");
+ return -1;
+ }
+
+ ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
+ &fw_version, 1);
+
+ if (ret) {
+ WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
+ } else {
+ int scu_major = (fw_version >> 8) & 0xFF;
+ int scu_minor = (fw_version >> 0) & 0xFF;
+
+ printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
+
+ if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
+ printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
+ mrst_pmu_s0i3_enable = true;
+ } else {
+ WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
+ scu_major, scu_minor);
+ }
+ }
+ return 0;
+}
+late_initcall(scu_fw_check);
+module_exit(mid_pci_cleanup);
diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h
new file mode 100644
index 00000000000..bfbfe64b167
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.h
@@ -0,0 +1,234 @@
+/*
+ * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _MRST_PMU_H_
+#define _MRST_PMU_H_
+
+#define PCI_DEV_ID_MRST_PMU 0x0810
+#define MRST_PMU_DRV_NAME "mrst_pmu"
+#define PCI_SUB_CLASS_MASK 0xFF00
+
+#define PCI_VENDOR_CAP_LOG_ID_MASK 0x7F
+#define PCI_VENDOR_CAP_LOG_SS_MASK 0x80
+
+#define SUB_SYS_ALL_D0I1 0x01155555
+#define S0I3_WAKE_SOURCES 0x00001FFF
+
+#define PM_S0I3_COMMAND \
+ ((0 << 31) | /* Reserved */ \
+ (0 << 30) | /* Core must be idle */ \
+ (0xc2 << 22) | /* ACK C6 trigger */ \
+ (3 << 19) | /* Trigger on DMI message */ \
+ (3 << 16) | /* Enter S0i3 */ \
+ (0 << 13) | /* Numeric mode ID (sw) */ \
+ (3 << 9) | /* Trigger mode */ \
+ (0 << 8) | /* Do not interrupt */ \
+ (1 << 0)) /* Set configuration */
+
+#define LSS_DMI 0
+#define LSS_SD_HC0 1
+#define LSS_SD_HC1 2
+#define LSS_NAND 3
+#define LSS_IMAGING 4
+#define LSS_SECURITY 5
+#define LSS_DISPLAY 6
+#define LSS_USB_HC 7
+#define LSS_USB_OTG 8
+#define LSS_AUDIO 9
+#define LSS_AUDIO_LPE 9
+#define LSS_AUDIO_SSP 9
+#define LSS_I2C0 10
+#define LSS_I2C1 10
+#define LSS_I2C2 10
+#define LSS_KBD 10
+#define LSS_SPI0 10
+#define LSS_SPI1 10
+#define LSS_SPI2 10
+#define LSS_GPIO 10
+#define LSS_SRAM 11 /* used by SCU, do not touch */
+#define LSS_SD_HC2 12
+/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
+#define MRST_NUM_LSS 13
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+#define SSMSK(mask, lss) ((mask) << ((lss) * 2))
+#define D0 0
+#define D0i1 1
+#define D0i2 2
+#define D0i3 3
+
+#define S0I3_SSS_TARGET ( \
+ SSMSK(D0i1, LSS_DMI) | \
+ SSMSK(D0i3, LSS_SD_HC0) | \
+ SSMSK(D0i3, LSS_SD_HC1) | \
+ SSMSK(D0i3, LSS_NAND) | \
+ SSMSK(D0i3, LSS_SD_HC2) | \
+ SSMSK(D0i3, LSS_IMAGING) | \
+ SSMSK(D0i3, LSS_SECURITY) | \
+ SSMSK(D0i3, LSS_DISPLAY) | \
+ SSMSK(D0i3, LSS_USB_HC) | \
+ SSMSK(D0i3, LSS_USB_OTG) | \
+ SSMSK(D0i3, LSS_AUDIO) | \
+ SSMSK(D0i1, LSS_I2C0))
+
+/*
+ * D0i1 on Langwell is Autonomous Clock Gating (ACG).
+ * Enable ACG on every LSS except camera and audio
+ */
+#define D0I1_ACG_SSS_TARGET \
+ (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
+
+enum cm_mode {
+ CM_NOP, /* ignore the config mode value */
+ CM_IMMEDIATE,
+ CM_DELAY,
+ CM_TRIGGER,
+ CM_INVALID
+};
+
+enum sys_state {
+ SYS_STATE_S0I0,
+ SYS_STATE_S0I1,
+ SYS_STATE_S0I2,
+ SYS_STATE_S0I3,
+ SYS_STATE_S3,
+ SYS_STATE_S5
+};
+
+#define SET_CFG_CMD 1
+
+enum int_status {
+ INT_SPURIOUS = 0,
+ INT_CMD_DONE = 1,
+ INT_CMD_ERR = 2,
+ INT_WAKE_RX = 3,
+ INT_SS_ERROR = 4,
+ INT_S0IX_MISS = 5,
+ INT_NO_ACKC6 = 6,
+ INT_INVALID = 7,
+};
+
+/* PMU register interface */
+static struct mrst_pmu_reg {
+ u32 pm_sts; /* 0x00 */
+ u32 pm_cmd; /* 0x04 */
+ u32 pm_ics; /* 0x08 */
+ u32 _resv1; /* 0x0C */
+ u32 pm_wkc[2]; /* 0x10 */
+ u32 pm_wks[2]; /* 0x18 */
+ u32 pm_ssc[4]; /* 0x20 */
+ u32 pm_sss[4]; /* 0x30 */
+ u32 pm_wssc[4]; /* 0x40 */
+ u32 pm_c3c4; /* 0x50 */
+ u32 pm_c5c6; /* 0x54 */
+ u32 pm_msi_disable; /* 0x58 */
+} *pmu_reg;
+
+static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
+static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
+static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
+static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
+
+static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
+static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
+static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
+static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
+static inline void pmu_write_wssc(u32 arg)
+ { writel(arg, &pmu_reg->pm_wssc[0]); }
+
+static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
+static inline u32 pmu_msi_is_disabled(void)
+ { return readl(&pmu_reg->pm_msi_disable); }
+
+union pmu_pm_ics {
+ struct {
+ u32 cause:8;
+ u32 enable:1;
+ u32 pending:1;
+ u32 reserved:22;
+ } bits;
+ u32 value;
+};
+
+static inline void pmu_irq_enable(void)
+{
+ union pmu_pm_ics pmu_ics;
+
+ pmu_ics.value = pmu_read_ics();
+ pmu_ics.bits.enable = 1;
+ pmu_write_ics(pmu_ics.value);
+}
+
+union pmu_pm_status {
+ struct {
+ u32 pmu_rev:8;
+ u32 pmu_busy:1;
+ u32 mode_id:4;
+ u32 Reserved:19;
+ } pmu_status_parts;
+ u32 pmu_status_value;
+};
+
+static inline int pmu_read_busy_status(void)
+{
+ union pmu_pm_status result;
+
+ result.pmu_status_value = pmu_read_sts();
+
+ return result.pmu_status_parts.pmu_busy;
+}
+
+/* pmu set config parameters */
+struct cfg_delay_param_t {
+ u32 cmd:8;
+ u32 ioc:1;
+ u32 cfg_mode:4;
+ u32 mode_id:3;
+ u32 sys_state:3;
+ u32 cfg_delay:8;
+ u32 rsvd:5;
+};
+
+struct cfg_trig_param_t {
+ u32 cmd:8;
+ u32 ioc:1;
+ u32 cfg_mode:4;
+ u32 mode_id:3;
+ u32 sys_state:3;
+ u32 cfg_trig_type:3;
+ u32 cfg_trig_val:8;
+ u32 cmbi:1;
+ u32 rsvd1:1;
+};
+
+union pmu_pm_set_cfg_cmd_t {
+ union {
+ struct cfg_delay_param_t d_param;
+ struct cfg_trig_param_t t_param;
+ } pmu2_params;
+ u32 pmu_pm_set_cfg_cmd_value;
+};
+
+#ifdef FUTURE_PATCH
+extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
+#else
+static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
+#endif
+#endif
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
new file mode 100644
index 00000000000..225bd0f0f67
--- /dev/null
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -0,0 +1,169 @@
+/*
+ * vrtc.c: Driver for virtual RTC device on Intel MID platform
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Note:
+ * VRTC is emulated by system controller firmware, the real HW
+ * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
+ * in a memory mapped IO space that is visible to the host IA
+ * processor.
+ *
+ * This driver is based on RTC CMOS driver.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/platform_device.h>
+
+#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
+#include <asm/time.h>
+#include <asm/fixmap.h>
+
+static unsigned char __iomem *vrtc_virt_base;
+
+unsigned char vrtc_cmos_read(unsigned char reg)
+{
+ unsigned char retval;
+
+ /* vRTC's registers range from 0x0 to 0xD */
+ if (reg > 0xd || !vrtc_virt_base)
+ return 0xff;
+
+ lock_cmos_prefix(reg);
+ retval = __raw_readb(vrtc_virt_base + (reg << 2));
+ lock_cmos_suffix(reg);
+ return retval;
+}
+EXPORT_SYMBOL_GPL(vrtc_cmos_read);
+
+void vrtc_cmos_write(unsigned char val, unsigned char reg)
+{
+ if (reg > 0xd || !vrtc_virt_base)
+ return;
+
+ lock_cmos_prefix(reg);
+ __raw_writeb(val, vrtc_virt_base + (reg << 2));
+ lock_cmos_suffix(reg);
+}
+EXPORT_SYMBOL_GPL(vrtc_cmos_write);
+
+unsigned long vrtc_get_time(void)
+{
+ u8 sec, min, hour, mday, mon;
+ unsigned long flags;
+ u32 year;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+
+ while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
+ cpu_relax();
+
+ sec = vrtc_cmos_read(RTC_SECONDS);
+ min = vrtc_cmos_read(RTC_MINUTES);
+ hour = vrtc_cmos_read(RTC_HOURS);
+ mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
+ mon = vrtc_cmos_read(RTC_MONTH);
+ year = vrtc_cmos_read(RTC_YEAR);
+
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ /* vRTC YEAR reg contains the offset to 1972 */
+ year += 1972;
+
+ printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
+ "mon: %d year: %d\n", sec, min, hour, mday, mon, year);
+
+ return mktime(year, mon, mday, hour, min, sec);
+}
+
+/* Only care about the minutes and seconds */
+int vrtc_set_mmss(unsigned long nowtime)
+{
+ int real_sec, real_min;
+ unsigned long flags;
+ int vrtc_min;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ vrtc_min = vrtc_cmos_read(RTC_MINUTES);
+
+ real_sec = nowtime % 60;
+ real_min = nowtime / 60;
+ if (((abs(real_min - vrtc_min) + 15)/30) & 1)
+ real_min += 30;
+ real_min %= 60;
+
+ vrtc_cmos_write(real_sec, RTC_SECONDS);
+ vrtc_cmos_write(real_min, RTC_MINUTES);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ return 0;
+}
+
+void __init mrst_rtc_init(void)
+{
+ unsigned long vrtc_paddr;
+
+ sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
+
+ vrtc_paddr = sfi_mrtc_array[0].phys_addr;
+ if (!sfi_mrtc_num || !vrtc_paddr)
+ return;
+
+ vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC,
+ vrtc_paddr);
+ x86_platform.get_wallclock = vrtc_get_time;
+ x86_platform.set_wallclock = vrtc_set_mmss;
+}
+
+/*
+ * The Moorestown platform has a memory mapped virtual RTC device that emulates
+ * the programming interface of the RTC.
+ */
+
+static struct resource vrtc_resources[] = {
+ [0] = {
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .flags = IORESOURCE_IRQ,
+ }
+};
+
+static struct platform_device vrtc_device = {
+ .name = "rtc_mrst",
+ .id = -1,
+ .resource = vrtc_resources,
+ .num_resources = ARRAY_SIZE(vrtc_resources),
+};
+
+/* Register the RTC device if appropriate */
+static int __init mrst_device_create(void)
+{
+ /* No Moorestown, no device */
+ if (!mrst_identify_cpu())
+ return -ENODEV;
+ /* No timer, no device */
+ if (!sfi_mrtc_num)
+ return -ENODEV;
+
+ /* iomem resource */
+ vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr;
+ vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr +
+ MRST_VRTC_MAP_SZ;
+ /* irq resource */
+ vrtc_resources[1].start = sfi_mrtc_array[0].irq;
+ vrtc_resources[1].end = sfi_mrtc_array[0].irq;
+
+ return platform_device_register(&vrtc_device);
+}
+
+module_init(mrst_device_create);
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
new file mode 100644
index 00000000000..fd332c53394
--- /dev/null
+++ b/arch/x86/platform/olpc/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_OLPC) += olpc.o olpc_ofw.o olpc_dt.o
+obj-$(CONFIG_OLPC_XO1_PM) += olpc-xo1-pm.o xo1-wakeup.o
+obj-$(CONFIG_OLPC_XO1_RTC) += olpc-xo1-rtc.o
+obj-$(CONFIG_OLPC_XO1_SCI) += olpc-xo1-sci.o
+obj-$(CONFIG_OLPC_XO15_SCI) += olpc-xo15-sci.o
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
new file mode 100644
index 00000000000..0ce8616c88a
--- /dev/null
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -0,0 +1,216 @@
+/*
+ * Support for power management features of the OLPC XO-1 laptop
+ *
+ * Copyright (C) 2010 Andres Salomon <dilinger@queued.net>
+ * Copyright (C) 2010 One Laptop per Child
+ * Copyright (C) 2006 Red Hat, Inc.
+ * Copyright (C) 2006 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/cs5535.h>
+#include <linux/platform_device.h>
+#include <linux/export.h>
+#include <linux/pm.h>
+#include <linux/mfd/core.h>
+#include <linux/suspend.h>
+
+#include <asm/io.h>
+#include <asm/olpc.h>
+
+#define DRV_NAME "olpc-xo1-pm"
+
+static unsigned long acpi_base;
+static unsigned long pms_base;
+
+static u16 wakeup_mask = CS5536_PM_PWRBTN;
+
+static struct {
+ unsigned long address;
+ unsigned short segment;
+} ofw_bios_entry = { 0xF0000 + PAGE_OFFSET, __KERNEL_CS };
+
+/* Set bits in the wakeup mask */
+void olpc_xo1_pm_wakeup_set(u16 value)
+{
+ wakeup_mask |= value;
+}
+EXPORT_SYMBOL_GPL(olpc_xo1_pm_wakeup_set);
+
+/* Clear bits in the wakeup mask */
+void olpc_xo1_pm_wakeup_clear(u16 value)
+{
+ wakeup_mask &= ~value;
+}
+EXPORT_SYMBOL_GPL(olpc_xo1_pm_wakeup_clear);
+
+static int xo1_power_state_enter(suspend_state_t pm_state)
+{
+ unsigned long saved_sci_mask;
+ int r;
+
+ /* Only STR is supported */
+ if (pm_state != PM_SUSPEND_MEM)
+ return -EINVAL;
+
+ r = olpc_ec_cmd(EC_SET_SCI_INHIBIT, NULL, 0, NULL, 0);
+ if (r)
+ return r;
+
+ /*
+ * Save SCI mask (this gets lost since PM1_EN is used as a mask for
+ * wakeup events, which is not necessarily the same event set)
+ */
+ saved_sci_mask = inl(acpi_base + CS5536_PM1_STS);
+ saved_sci_mask &= 0xffff0000;
+
+ /* Save CPU state */
+ do_olpc_suspend_lowlevel();
+
+ /* Resume path starts here */
+
+ /* Restore SCI mask (using dword access to CS5536_PM1_EN) */
+ outl(saved_sci_mask, acpi_base + CS5536_PM1_STS);
+
+ /* Tell the EC to stop inhibiting SCIs */
+ olpc_ec_cmd(EC_SET_SCI_INHIBIT_RELEASE, NULL, 0, NULL, 0);
+
+ /*
+ * Tell the wireless module to restart USB communication.
+ * Must be done twice.
+ */
+ olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
+ olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
+
+ return 0;
+}
+
+asmlinkage int xo1_do_sleep(u8 sleep_state)
+{
+ void *pgd_addr = __va(read_cr3());
+
+ /* Program wakeup mask (using dword access to CS5536_PM1_EN) */
+ outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS);
+
+ __asm__("movl %0,%%eax" : : "r" (pgd_addr));
+ __asm__("call *(%%edi); cld"
+ : : "D" (&ofw_bios_entry));
+ __asm__("movb $0x34, %al\n\t"
+ "outb %al, $0x70\n\t"
+ "movb $0x30, %al\n\t"
+ "outb %al, $0x71\n\t");
+ return 0;
+}
+
+static void xo1_power_off(void)
+{
+ printk(KERN_INFO "OLPC XO-1 power off sequence...\n");
+
+ /* Enable all of these controls with 0 delay */
+ outl(0x40000000, pms_base + CS5536_PM_SCLK);
+ outl(0x40000000, pms_base + CS5536_PM_IN_SLPCTL);
+ outl(0x40000000, pms_base + CS5536_PM_WKXD);
+ outl(0x40000000, pms_base + CS5536_PM_WKD);
+
+ /* Clear status bits (possibly unnecessary) */
+ outl(0x0002ffff, pms_base + CS5536_PM_SSC);
+ outl(0xffffffff, acpi_base + CS5536_PM_GPE0_STS);
+
+ /* Write SLP_EN bit to start the machinery */
+ outl(0x00002000, acpi_base + CS5536_PM1_CNT);
+}
+
+static int xo1_power_state_valid(suspend_state_t pm_state)
+{
+ /* suspend-to-RAM only */
+ return pm_state == PM_SUSPEND_MEM;
+}
+
+static const struct platform_suspend_ops xo1_suspend_ops = {
+ .valid = xo1_power_state_valid,
+ .enter = xo1_power_state_enter,
+};
+
+static int __devinit xo1_pm_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int err;
+
+ /* don't run on non-XOs */
+ if (!machine_is_olpc())
+ return -ENODEV;
+
+ err = mfd_cell_enable(pdev);
+ if (err)
+ return err;
+
+ res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "can't fetch device resource info\n");
+ return -EIO;
+ }
+ if (strcmp(pdev->name, "cs5535-pms") == 0)
+ pms_base = res->start;
+ else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
+ acpi_base = res->start;
+
+ /* If we have both addresses, we can override the poweroff hook */
+ if (pms_base && acpi_base) {
+ suspend_set_ops(&xo1_suspend_ops);
+ pm_power_off = xo1_power_off;
+ printk(KERN_INFO "OLPC XO-1 support registered\n");
+ }
+
+ return 0;
+}
+
+static int __devexit xo1_pm_remove(struct platform_device *pdev)
+{
+ mfd_cell_disable(pdev);
+
+ if (strcmp(pdev->name, "cs5535-pms") == 0)
+ pms_base = 0;
+ else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
+ acpi_base = 0;
+
+ pm_power_off = NULL;
+ return 0;
+}
+
+static struct platform_driver cs5535_pms_driver = {
+ .driver = {
+ .name = "cs5535-pms",
+ .owner = THIS_MODULE,
+ },
+ .probe = xo1_pm_probe,
+ .remove = __devexit_p(xo1_pm_remove),
+};
+
+static struct platform_driver cs5535_acpi_driver = {
+ .driver = {
+ .name = "olpc-xo1-pm-acpi",
+ .owner = THIS_MODULE,
+ },
+ .probe = xo1_pm_probe,
+ .remove = __devexit_p(xo1_pm_remove),
+};
+
+static int __init xo1_pm_init(void)
+{
+ int r;
+
+ r = platform_driver_register(&cs5535_pms_driver);
+ if (r)
+ return r;
+
+ r = platform_driver_register(&cs5535_acpi_driver);
+ if (r)
+ platform_driver_unregister(&cs5535_pms_driver);
+
+ return r;
+}
+arch_initcall(xo1_pm_init);
diff --git a/arch/x86/platform/olpc/olpc-xo1-rtc.c b/arch/x86/platform/olpc/olpc-xo1-rtc.c
new file mode 100644
index 00000000000..a2b4efddd61
--- /dev/null
+++ b/arch/x86/platform/olpc/olpc-xo1-rtc.c
@@ -0,0 +1,81 @@
+/*
+ * Support for OLPC XO-1 Real Time Clock (RTC)
+ *
+ * Copyright (C) 2011 One Laptop per Child
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/mc146818rtc.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+#include <linux/of.h>
+
+#include <asm/msr.h>
+#include <asm/olpc.h>
+
+static void rtc_wake_on(struct device *dev)
+{
+ olpc_xo1_pm_wakeup_set(CS5536_PM_RTC);
+}
+
+static void rtc_wake_off(struct device *dev)
+{
+ olpc_xo1_pm_wakeup_clear(CS5536_PM_RTC);
+}
+
+static struct resource rtc_platform_resource[] = {
+ [0] = {
+ .start = RTC_PORT(0),
+ .end = RTC_PORT(1),
+ .flags = IORESOURCE_IO,
+ },
+ [1] = {
+ .start = RTC_IRQ,
+ .end = RTC_IRQ,
+ .flags = IORESOURCE_IRQ,
+ }
+};
+
+static struct cmos_rtc_board_info rtc_info = {
+ .rtc_day_alarm = 0,
+ .rtc_mon_alarm = 0,
+ .rtc_century = 0,
+ .wake_on = rtc_wake_on,
+ .wake_off = rtc_wake_off,
+};
+
+static struct platform_device xo1_rtc_device = {
+ .name = "rtc_cmos",
+ .id = -1,
+ .num_resources = ARRAY_SIZE(rtc_platform_resource),
+ .dev.platform_data = &rtc_info,
+ .resource = rtc_platform_resource,
+};
+
+static int __init xo1_rtc_init(void)
+{
+ int r;
+ struct device_node *node;
+
+ node = of_find_compatible_node(NULL, NULL, "olpc,xo1-rtc");
+ if (!node)
+ return 0;
+ of_node_put(node);
+
+ pr_info("olpc-xo1-rtc: Initializing OLPC XO-1 RTC\n");
+ rdmsrl(MSR_RTC_DOMA_OFFSET, rtc_info.rtc_day_alarm);
+ rdmsrl(MSR_RTC_MONA_OFFSET, rtc_info.rtc_mon_alarm);
+ rdmsrl(MSR_RTC_CEN_OFFSET, rtc_info.rtc_century);
+
+ r = platform_device_register(&xo1_rtc_device);
+ if (r)
+ return r;
+
+ device_init_wakeup(&xo1_rtc_device.dev, 1);
+ return 0;
+}
+arch_initcall(xo1_rtc_init);
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
new file mode 100644
index 00000000000..1d4c783d732
--- /dev/null
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -0,0 +1,614 @@
+/*
+ * Support for OLPC XO-1 System Control Interrupts (SCI)
+ *
+ * Copyright (C) 2010 One Laptop per Child
+ * Copyright (C) 2006 Red Hat, Inc.
+ * Copyright (C) 2006 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/cs5535.h>
+#include <linux/device.h>
+#include <linux/gpio.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/mfd/core.h>
+#include <linux/power_supply.h>
+#include <linux/suspend.h>
+#include <linux/workqueue.h>
+
+#include <asm/io.h>
+#include <asm/msr.h>
+#include <asm/olpc.h>
+
+#define DRV_NAME "olpc-xo1-sci"
+#define PFX DRV_NAME ": "
+
+static unsigned long acpi_base;
+static struct input_dev *power_button_idev;
+static struct input_dev *ebook_switch_idev;
+static struct input_dev *lid_switch_idev;
+
+static int sci_irq;
+
+static bool lid_open;
+static bool lid_inverted;
+static int lid_wake_mode;
+
+enum lid_wake_modes {
+ LID_WAKE_ALWAYS,
+ LID_WAKE_OPEN,
+ LID_WAKE_CLOSE,
+};
+
+static const char * const lid_wake_mode_names[] = {
+ [LID_WAKE_ALWAYS] = "always",
+ [LID_WAKE_OPEN] = "open",
+ [LID_WAKE_CLOSE] = "close",
+};
+
+static void battery_status_changed(void)
+{
+ struct power_supply *psy = power_supply_get_by_name("olpc-battery");
+
+ if (psy) {
+ power_supply_changed(psy);
+ put_device(psy->dev);
+ }
+}
+
+static void ac_status_changed(void)
+{
+ struct power_supply *psy = power_supply_get_by_name("olpc-ac");
+
+ if (psy) {
+ power_supply_changed(psy);
+ put_device(psy->dev);
+ }
+}
+
+/* Report current ebook switch state through input layer */
+static void send_ebook_state(void)
+{
+ unsigned char state;
+
+ if (olpc_ec_cmd(EC_READ_EB_MODE, NULL, 0, &state, 1)) {
+ pr_err(PFX "failed to get ebook state\n");
+ return;
+ }
+
+ input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state);
+ input_sync(ebook_switch_idev);
+}
+
+static void flip_lid_inverter(void)
+{
+ /* gpio is high; invert so we'll get l->h event interrupt */
+ if (lid_inverted)
+ cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_INPUT_INVERT);
+ else
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_INPUT_INVERT);
+ lid_inverted = !lid_inverted;
+}
+
+static void detect_lid_state(void)
+{
+ /*
+ * the edge detector hookup on the gpio inputs on the geode is
+ * odd, to say the least. See http://dev.laptop.org/ticket/5703
+ * for details, but in a nutshell: we don't use the edge
+ * detectors. instead, we make use of an anomoly: with the both
+ * edge detectors turned off, we still get an edge event on a
+ * positive edge transition. to take advantage of this, we use the
+ * front-end inverter to ensure that that's the edge we're always
+ * going to see next.
+ */
+
+ int state;
+
+ state = cs5535_gpio_isset(OLPC_GPIO_LID, GPIO_READ_BACK);
+ lid_open = !state ^ !lid_inverted; /* x ^^ y */
+ if (!state)
+ return;
+
+ flip_lid_inverter();
+}
+
+/* Report current lid switch state through input layer */
+static void send_lid_state(void)
+{
+ input_report_switch(lid_switch_idev, SW_LID, !lid_open);
+ input_sync(lid_switch_idev);
+}
+
+static ssize_t lid_wake_mode_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const char *mode = lid_wake_mode_names[lid_wake_mode];
+ return sprintf(buf, "%s\n", mode);
+}
+static ssize_t lid_wake_mode_set(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(lid_wake_mode_names); i++) {
+ const char *mode = lid_wake_mode_names[i];
+ if (strlen(mode) != count || strncasecmp(mode, buf, count))
+ continue;
+
+ lid_wake_mode = i;
+ return count;
+ }
+ return -EINVAL;
+}
+static DEVICE_ATTR(lid_wake_mode, S_IWUSR | S_IRUGO, lid_wake_mode_show,
+ lid_wake_mode_set);
+
+/*
+ * Process all items in the EC's SCI queue.
+ *
+ * This is handled in a workqueue because olpc_ec_cmd can be slow (and
+ * can even timeout).
+ *
+ * If propagate_events is false, the queue is drained without events being
+ * generated for the interrupts.
+ */
+static void process_sci_queue(bool propagate_events)
+{
+ int r;
+ u16 data;
+
+ do {
+ r = olpc_ec_sci_query(&data);
+ if (r || !data)
+ break;
+
+ pr_debug(PFX "SCI 0x%x received\n", data);
+
+ switch (data) {
+ case EC_SCI_SRC_BATERR:
+ case EC_SCI_SRC_BATSOC:
+ case EC_SCI_SRC_BATTERY:
+ case EC_SCI_SRC_BATCRIT:
+ battery_status_changed();
+ break;
+ case EC_SCI_SRC_ACPWR:
+ ac_status_changed();
+ break;
+ }
+
+ if (data == EC_SCI_SRC_EBOOK && propagate_events)
+ send_ebook_state();
+ } while (data);
+
+ if (r)
+ pr_err(PFX "Failed to clear SCI queue");
+}
+
+static void process_sci_queue_work(struct work_struct *work)
+{
+ process_sci_queue(true);
+}
+
+static DECLARE_WORK(sci_work, process_sci_queue_work);
+
+static irqreturn_t xo1_sci_intr(int irq, void *dev_id)
+{
+ struct platform_device *pdev = dev_id;
+ u32 sts;
+ u32 gpe;
+
+ sts = inl(acpi_base + CS5536_PM1_STS);
+ outl(sts | 0xffff, acpi_base + CS5536_PM1_STS);
+
+ gpe = inl(acpi_base + CS5536_PM_GPE0_STS);
+ outl(0xffffffff, acpi_base + CS5536_PM_GPE0_STS);
+
+ dev_dbg(&pdev->dev, "sts %x gpe %x\n", sts, gpe);
+
+ if (sts & CS5536_PWRBTN_FLAG && !(sts & CS5536_WAK_FLAG)) {
+ input_report_key(power_button_idev, KEY_POWER, 1);
+ input_sync(power_button_idev);
+ input_report_key(power_button_idev, KEY_POWER, 0);
+ input_sync(power_button_idev);
+ }
+
+ if (gpe & CS5536_GPIOM7_PME_FLAG) { /* EC GPIO */
+ cs5535_gpio_set(OLPC_GPIO_ECSCI, GPIO_NEGATIVE_EDGE_STS);
+ schedule_work(&sci_work);
+ }
+
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_NEGATIVE_EDGE_STS);
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_POSITIVE_EDGE_STS);
+ detect_lid_state();
+ send_lid_state();
+
+ return IRQ_HANDLED;
+}
+
+static int xo1_sci_suspend(struct platform_device *pdev, pm_message_t state)
+{
+ if (device_may_wakeup(&power_button_idev->dev))
+ olpc_xo1_pm_wakeup_set(CS5536_PM_PWRBTN);
+ else
+ olpc_xo1_pm_wakeup_clear(CS5536_PM_PWRBTN);
+
+ if (device_may_wakeup(&ebook_switch_idev->dev))
+ olpc_ec_wakeup_set(EC_SCI_SRC_EBOOK);
+ else
+ olpc_ec_wakeup_clear(EC_SCI_SRC_EBOOK);
+
+ if (!device_may_wakeup(&lid_switch_idev->dev)) {
+ cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE);
+ } else if ((lid_open && lid_wake_mode == LID_WAKE_OPEN) ||
+ (!lid_open && lid_wake_mode == LID_WAKE_CLOSE)) {
+ flip_lid_inverter();
+
+ /* we may have just caused an event */
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_NEGATIVE_EDGE_STS);
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_POSITIVE_EDGE_STS);
+
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE);
+ }
+
+ return 0;
+}
+
+static int xo1_sci_resume(struct platform_device *pdev)
+{
+ /*
+ * We don't know what may have happened while we were asleep.
+ * Reestablish our lid setup so we're sure to catch all transitions.
+ */
+ detect_lid_state();
+ send_lid_state();
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE);
+
+ /* Enable all EC events */
+ olpc_ec_mask_write(EC_SCI_SRC_ALL);
+
+ /* Power/battery status might have changed too */
+ battery_status_changed();
+ ac_status_changed();
+ return 0;
+}
+
+static int __devinit setup_sci_interrupt(struct platform_device *pdev)
+{
+ u32 lo, hi;
+ u32 sts;
+ int r;
+
+ rdmsr(0x51400020, lo, hi);
+ sci_irq = (lo >> 20) & 15;
+
+ if (sci_irq) {
+ dev_info(&pdev->dev, "SCI is mapped to IRQ %d\n", sci_irq);
+ } else {
+ /* Zero means masked */
+ dev_info(&pdev->dev, "SCI unmapped. Mapping to IRQ 3\n");
+ sci_irq = 3;
+ lo |= 0x00300000;
+ wrmsrl(0x51400020, lo);
+ }
+
+ /* Select level triggered in PIC */
+ if (sci_irq < 8) {
+ lo = inb(CS5536_PIC_INT_SEL1);
+ lo |= 1 << sci_irq;
+ outb(lo, CS5536_PIC_INT_SEL1);
+ } else {
+ lo = inb(CS5536_PIC_INT_SEL2);
+ lo |= 1 << (sci_irq - 8);
+ outb(lo, CS5536_PIC_INT_SEL2);
+ }
+
+ /* Enable SCI from power button, and clear pending interrupts */
+ sts = inl(acpi_base + CS5536_PM1_STS);
+ outl((CS5536_PM_PWRBTN << 16) | 0xffff, acpi_base + CS5536_PM1_STS);
+
+ r = request_irq(sci_irq, xo1_sci_intr, 0, DRV_NAME, pdev);
+ if (r)
+ dev_err(&pdev->dev, "can't request interrupt\n");
+
+ return r;
+}
+
+static int __devinit setup_ec_sci(void)
+{
+ int r;
+
+ r = gpio_request(OLPC_GPIO_ECSCI, "OLPC-ECSCI");
+ if (r)
+ return r;
+
+ gpio_direction_input(OLPC_GPIO_ECSCI);
+
+ /* Clear pending EC SCI events */
+ cs5535_gpio_set(OLPC_GPIO_ECSCI, GPIO_NEGATIVE_EDGE_STS);
+ cs5535_gpio_set(OLPC_GPIO_ECSCI, GPIO_POSITIVE_EDGE_STS);
+
+ /*
+ * Enable EC SCI events, and map them to both a PME and the SCI
+ * interrupt.
+ *
+ * Ordinarily, in addition to functioning as GPIOs, Geode GPIOs can
+ * be mapped to regular interrupts *or* Geode-specific Power
+ * Management Events (PMEs) - events that bring the system out of
+ * suspend. In this case, we want both of those things - the system
+ * wakeup, *and* the ability to get an interrupt when an event occurs.
+ *
+ * To achieve this, we map the GPIO to a PME, and then we use one
+ * of the many generic knobs on the CS5535 PIC to additionally map the
+ * PME to the regular SCI interrupt line.
+ */
+ cs5535_gpio_set(OLPC_GPIO_ECSCI, GPIO_EVENTS_ENABLE);
+
+ /* Set the SCI to cause a PME event on group 7 */
+ cs5535_gpio_setup_event(OLPC_GPIO_ECSCI, 7, 1);
+
+ /* And have group 7 also fire the SCI interrupt */
+ cs5535_pic_unreqz_select_high(7, sci_irq);
+
+ return 0;
+}
+
+static void free_ec_sci(void)
+{
+ gpio_free(OLPC_GPIO_ECSCI);
+}
+
+static int __devinit setup_lid_events(void)
+{
+ int r;
+
+ r = gpio_request(OLPC_GPIO_LID, "OLPC-LID");
+ if (r)
+ return r;
+
+ gpio_direction_input(OLPC_GPIO_LID);
+
+ cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_INPUT_INVERT);
+ lid_inverted = 0;
+
+ /* Clear edge detection and event enable for now */
+ cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE);
+ cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_NEGATIVE_EDGE_EN);
+ cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_POSITIVE_EDGE_EN);
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_NEGATIVE_EDGE_STS);
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_POSITIVE_EDGE_STS);
+
+ /* Set the LID to cause an PME event on group 6 */
+ cs5535_gpio_setup_event(OLPC_GPIO_LID, 6, 1);
+
+ /* Set PME group 6 to fire the SCI interrupt */
+ cs5535_gpio_set_irq(6, sci_irq);
+
+ /* Enable the event */
+ cs5535_gpio_set(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE);
+
+ return 0;
+}
+
+static void free_lid_events(void)
+{
+ gpio_free(OLPC_GPIO_LID);
+}
+
+static int __devinit setup_power_button(struct platform_device *pdev)
+{
+ int r;
+
+ power_button_idev = input_allocate_device();
+ if (!power_button_idev)
+ return -ENOMEM;
+
+ power_button_idev->name = "Power Button";
+ power_button_idev->phys = DRV_NAME "/input0";
+ set_bit(EV_KEY, power_button_idev->evbit);
+ set_bit(KEY_POWER, power_button_idev->keybit);
+
+ power_button_idev->dev.parent = &pdev->dev;
+ device_init_wakeup(&power_button_idev->dev, 1);
+
+ r = input_register_device(power_button_idev);
+ if (r) {
+ dev_err(&pdev->dev, "failed to register power button: %d\n", r);
+ input_free_device(power_button_idev);
+ }
+
+ return r;
+}
+
+static void free_power_button(void)
+{
+ input_unregister_device(power_button_idev);
+ input_free_device(power_button_idev);
+}
+
+static int __devinit setup_ebook_switch(struct platform_device *pdev)
+{
+ int r;
+
+ ebook_switch_idev = input_allocate_device();
+ if (!ebook_switch_idev)
+ return -ENOMEM;
+
+ ebook_switch_idev->name = "EBook Switch";
+ ebook_switch_idev->phys = DRV_NAME "/input1";
+ set_bit(EV_SW, ebook_switch_idev->evbit);
+ set_bit(SW_TABLET_MODE, ebook_switch_idev->swbit);
+
+ ebook_switch_idev->dev.parent = &pdev->dev;
+ device_set_wakeup_capable(&ebook_switch_idev->dev, true);
+
+ r = input_register_device(ebook_switch_idev);
+ if (r) {
+ dev_err(&pdev->dev, "failed to register ebook switch: %d\n", r);
+ input_free_device(ebook_switch_idev);
+ }
+
+ return r;
+}
+
+static void free_ebook_switch(void)
+{
+ input_unregister_device(ebook_switch_idev);
+ input_free_device(ebook_switch_idev);
+}
+
+static int __devinit setup_lid_switch(struct platform_device *pdev)
+{
+ int r;
+
+ lid_switch_idev = input_allocate_device();
+ if (!lid_switch_idev)
+ return -ENOMEM;
+
+ lid_switch_idev->name = "Lid Switch";
+ lid_switch_idev->phys = DRV_NAME "/input2";
+ set_bit(EV_SW, lid_switch_idev->evbit);
+ set_bit(SW_LID, lid_switch_idev->swbit);
+
+ lid_switch_idev->dev.parent = &pdev->dev;
+ device_set_wakeup_capable(&lid_switch_idev->dev, true);
+
+ r = input_register_device(lid_switch_idev);
+ if (r) {
+ dev_err(&pdev->dev, "failed to register lid switch: %d\n", r);
+ goto err_register;
+ }
+
+ r = device_create_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode);
+ if (r) {
+ dev_err(&pdev->dev, "failed to create wake mode attr: %d\n", r);
+ goto err_create_attr;
+ }
+
+ return 0;
+
+err_create_attr:
+ input_unregister_device(lid_switch_idev);
+err_register:
+ input_free_device(lid_switch_idev);
+ return r;
+}
+
+static void free_lid_switch(void)
+{
+ device_remove_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode);
+ input_unregister_device(lid_switch_idev);
+ input_free_device(lid_switch_idev);
+}
+
+static int __devinit xo1_sci_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int r;
+
+ /* don't run on non-XOs */
+ if (!machine_is_olpc())
+ return -ENODEV;
+
+ r = mfd_cell_enable(pdev);
+ if (r)
+ return r;
+
+ res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "can't fetch device resource info\n");
+ return -EIO;
+ }
+ acpi_base = res->start;
+
+ r = setup_power_button(pdev);
+ if (r)
+ return r;
+
+ r = setup_ebook_switch(pdev);
+ if (r)
+ goto err_ebook;
+
+ r = setup_lid_switch(pdev);
+ if (r)
+ goto err_lid;
+
+ r = setup_lid_events();
+ if (r)
+ goto err_lidevt;
+
+ r = setup_ec_sci();
+ if (r)
+ goto err_ecsci;
+
+ /* Enable PME generation for EC-generated events */
+ outl(CS5536_GPIOM6_PME_EN | CS5536_GPIOM7_PME_EN,
+ acpi_base + CS5536_PM_GPE0_EN);
+
+ /* Clear pending events */
+ outl(0xffffffff, acpi_base + CS5536_PM_GPE0_STS);
+ process_sci_queue(false);
+
+ /* Initial sync */
+ send_ebook_state();
+ detect_lid_state();
+ send_lid_state();
+
+ r = setup_sci_interrupt(pdev);
+ if (r)
+ goto err_sci;
+
+ /* Enable all EC events */
+ olpc_ec_mask_write(EC_SCI_SRC_ALL);
+
+ return r;
+
+err_sci:
+ free_ec_sci();
+err_ecsci:
+ free_lid_events();
+err_lidevt:
+ free_lid_switch();
+err_lid:
+ free_ebook_switch();
+err_ebook:
+ free_power_button();
+ return r;
+}
+
+static int __devexit xo1_sci_remove(struct platform_device *pdev)
+{
+ mfd_cell_disable(pdev);
+ free_irq(sci_irq, pdev);
+ cancel_work_sync(&sci_work);
+ free_ec_sci();
+ free_lid_events();
+ free_lid_switch();
+ free_ebook_switch();
+ free_power_button();
+ acpi_base = 0;
+ return 0;
+}
+
+static struct platform_driver xo1_sci_driver = {
+ .driver = {
+ .name = "olpc-xo1-sci-acpi",
+ },
+ .probe = xo1_sci_probe,
+ .remove = __devexit_p(xo1_sci_remove),
+ .suspend = xo1_sci_suspend,
+ .resume = xo1_sci_resume,
+};
+
+static int __init xo1_sci_init(void)
+{
+ return platform_driver_register(&xo1_sci_driver);
+}
+arch_initcall(xo1_sci_init);
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
new file mode 100644
index 00000000000..2b235b77d9a
--- /dev/null
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -0,0 +1,168 @@
+/*
+ * Support for OLPC XO-1.5 System Control Interrupts (SCI)
+ *
+ * Copyright (C) 2009-2010 One Laptop per Child
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/power_supply.h>
+
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <asm/olpc.h>
+
+#define DRV_NAME "olpc-xo15-sci"
+#define PFX DRV_NAME ": "
+#define XO15_SCI_CLASS DRV_NAME
+#define XO15_SCI_DEVICE_NAME "OLPC XO-1.5 SCI"
+
+static unsigned long xo15_sci_gpe;
+
+static void battery_status_changed(void)
+{
+ struct power_supply *psy = power_supply_get_by_name("olpc-battery");
+
+ if (psy) {
+ power_supply_changed(psy);
+ put_device(psy->dev);
+ }
+}
+
+static void ac_status_changed(void)
+{
+ struct power_supply *psy = power_supply_get_by_name("olpc-ac");
+
+ if (psy) {
+ power_supply_changed(psy);
+ put_device(psy->dev);
+ }
+}
+
+static void process_sci_queue(void)
+{
+ u16 data;
+ int r;
+
+ do {
+ r = olpc_ec_sci_query(&data);
+ if (r || !data)
+ break;
+
+ pr_debug(PFX "SCI 0x%x received\n", data);
+
+ switch (data) {
+ case EC_SCI_SRC_BATERR:
+ case EC_SCI_SRC_BATSOC:
+ case EC_SCI_SRC_BATTERY:
+ case EC_SCI_SRC_BATCRIT:
+ battery_status_changed();
+ break;
+ case EC_SCI_SRC_ACPWR:
+ ac_status_changed();
+ break;
+ }
+ } while (data);
+
+ if (r)
+ pr_err(PFX "Failed to clear SCI queue");
+}
+
+static void process_sci_queue_work(struct work_struct *work)
+{
+ process_sci_queue();
+}
+
+static DECLARE_WORK(sci_work, process_sci_queue_work);
+
+static u32 xo15_sci_gpe_handler(acpi_handle gpe_device, u32 gpe, void *context)
+{
+ schedule_work(&sci_work);
+ return ACPI_INTERRUPT_HANDLED | ACPI_REENABLE_GPE;
+}
+
+static int xo15_sci_add(struct acpi_device *device)
+{
+ unsigned long long tmp;
+ acpi_status status;
+
+ if (!device)
+ return -EINVAL;
+
+ strcpy(acpi_device_name(device), XO15_SCI_DEVICE_NAME);
+ strcpy(acpi_device_class(device), XO15_SCI_CLASS);
+
+ /* Get GPE bit assignment (EC events). */
+ status = acpi_evaluate_integer(device->handle, "_GPE", NULL, &tmp);
+ if (ACPI_FAILURE(status))
+ return -EINVAL;
+
+ xo15_sci_gpe = tmp;
+ status = acpi_install_gpe_handler(NULL, xo15_sci_gpe,
+ ACPI_GPE_EDGE_TRIGGERED,
+ xo15_sci_gpe_handler, device);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ dev_info(&device->dev, "Initialized, GPE = 0x%lx\n", xo15_sci_gpe);
+
+ /* Flush queue, and enable all SCI events */
+ process_sci_queue();
+ olpc_ec_mask_write(EC_SCI_SRC_ALL);
+
+ acpi_enable_gpe(NULL, xo15_sci_gpe);
+
+ /* Enable wake-on-EC */
+ if (device->wakeup.flags.valid)
+ device_init_wakeup(&device->dev, true);
+
+ return 0;
+}
+
+static int xo15_sci_remove(struct acpi_device *device, int type)
+{
+ acpi_disable_gpe(NULL, xo15_sci_gpe);
+ acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
+ cancel_work_sync(&sci_work);
+ return 0;
+}
+
+static int xo15_sci_resume(struct acpi_device *device)
+{
+ /* Enable all EC events */
+ olpc_ec_mask_write(EC_SCI_SRC_ALL);
+
+ /* Power/battery status might have changed */
+ battery_status_changed();
+ ac_status_changed();
+
+ return 0;
+}
+
+static const struct acpi_device_id xo15_sci_device_ids[] = {
+ {"XO15EC", 0},
+ {"", 0},
+};
+
+static struct acpi_driver xo15_sci_drv = {
+ .name = DRV_NAME,
+ .class = XO15_SCI_CLASS,
+ .ids = xo15_sci_device_ids,
+ .ops = {
+ .add = xo15_sci_add,
+ .remove = xo15_sci_remove,
+ .resume = xo15_sci_resume,
+ },
+};
+
+static int __init xo15_sci_init(void)
+{
+ return acpi_bus_register_driver(&xo15_sci_drv);
+}
+device_initcall(xo15_sci_init);
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
new file mode 100644
index 00000000000..7cce722667b
--- /dev/null
+++ b/arch/x86/platform/olpc/olpc.c
@@ -0,0 +1,379 @@
+/*
+ * Support for the OLPC DCON and OLPC EC access
+ *
+ * Copyright © 2006 Advanced Micro Devices, Inc.
+ * Copyright © 2007-2008 Andres Salomon <dilinger@debian.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/geode.h>
+#include <asm/setup.h>
+#include <asm/olpc.h>
+#include <asm/olpc_ofw.h>
+
+struct olpc_platform_t olpc_platform_info;
+EXPORT_SYMBOL_GPL(olpc_platform_info);
+
+static DEFINE_SPINLOCK(ec_lock);
+
+/* EC event mask to be applied during suspend (defining wakeup sources). */
+static u16 ec_wakeup_mask;
+
+/* what the timeout *should* be (in ms) */
+#define EC_BASE_TIMEOUT 20
+
+/* the timeout that bugs in the EC might force us to actually use */
+static int ec_timeout = EC_BASE_TIMEOUT;
+
+static int __init olpc_ec_timeout_set(char *str)
+{
+ if (get_option(&str, &ec_timeout) != 1) {
+ ec_timeout = EC_BASE_TIMEOUT;
+ printk(KERN_ERR "olpc-ec: invalid argument to "
+ "'olpc_ec_timeout=', ignoring!\n");
+ }
+ printk(KERN_DEBUG "olpc-ec: using %d ms delay for EC commands.\n",
+ ec_timeout);
+ return 1;
+}
+__setup("olpc_ec_timeout=", olpc_ec_timeout_set);
+
+/*
+ * These {i,o}bf_status functions return whether the buffers are full or not.
+ */
+
+static inline unsigned int ibf_status(unsigned int port)
+{
+ return !!(inb(port) & 0x02);
+}
+
+static inline unsigned int obf_status(unsigned int port)
+{
+ return inb(port) & 0x01;
+}
+
+#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d))
+static int __wait_on_ibf(unsigned int line, unsigned int port, int desired)
+{
+ unsigned int timeo;
+ int state = ibf_status(port);
+
+ for (timeo = ec_timeout; state != desired && timeo; timeo--) {
+ mdelay(1);
+ state = ibf_status(port);
+ }
+
+ if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
+ timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
+ printk(KERN_WARNING "olpc-ec: %d: waited %u ms for IBF!\n",
+ line, ec_timeout - timeo);
+ }
+
+ return !(state == desired);
+}
+
+#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d))
+static int __wait_on_obf(unsigned int line, unsigned int port, int desired)
+{
+ unsigned int timeo;
+ int state = obf_status(port);
+
+ for (timeo = ec_timeout; state != desired && timeo; timeo--) {
+ mdelay(1);
+ state = obf_status(port);
+ }
+
+ if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
+ timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
+ printk(KERN_WARNING "olpc-ec: %d: waited %u ms for OBF!\n",
+ line, ec_timeout - timeo);
+ }
+
+ return !(state == desired);
+}
+
+/*
+ * This allows the kernel to run Embedded Controller commands. The EC is
+ * documented at <http://wiki.laptop.org/go/Embedded_controller>, and the
+ * available EC commands are here:
+ * <http://wiki.laptop.org/go/Ec_specification>. Unfortunately, while
+ * OpenFirmware's source is available, the EC's is not.
+ */
+int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
+ unsigned char *outbuf, size_t outlen)
+{
+ unsigned long flags;
+ int ret = -EIO;
+ int i;
+ int restarts = 0;
+
+ spin_lock_irqsave(&ec_lock, flags);
+
+ /* Clear OBF */
+ for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++)
+ inb(0x68);
+ if (i == 10) {
+ printk(KERN_ERR "olpc-ec: timeout while attempting to "
+ "clear OBF flag!\n");
+ goto err;
+ }
+
+ if (wait_on_ibf(0x6c, 0)) {
+ printk(KERN_ERR "olpc-ec: timeout waiting for EC to "
+ "quiesce!\n");
+ goto err;
+ }
+
+restart:
+ /*
+ * Note that if we time out during any IBF checks, that's a failure;
+ * we have to return. There's no way for the kernel to clear that.
+ *
+ * If we time out during an OBF check, we can restart the command;
+ * reissuing it will clear the OBF flag, and we should be alright.
+ * The OBF flag will sometimes misbehave due to what we believe
+ * is a hardware quirk..
+ */
+ pr_devel("olpc-ec: running cmd 0x%x\n", cmd);
+ outb(cmd, 0x6c);
+
+ if (wait_on_ibf(0x6c, 0)) {
+ printk(KERN_ERR "olpc-ec: timeout waiting for EC to read "
+ "command!\n");
+ goto err;
+ }
+
+ if (inbuf && inlen) {
+ /* write data to EC */
+ for (i = 0; i < inlen; i++) {
+ pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
+ outb(inbuf[i], 0x68);
+ if (wait_on_ibf(0x6c, 0)) {
+ printk(KERN_ERR "olpc-ec: timeout waiting for"
+ " EC accept data!\n");
+ goto err;
+ }
+ }
+ }
+ if (outbuf && outlen) {
+ /* read data from EC */
+ for (i = 0; i < outlen; i++) {
+ if (wait_on_obf(0x6c, 1)) {
+ printk(KERN_ERR "olpc-ec: timeout waiting for"
+ " EC to provide data!\n");
+ if (restarts++ < 10)
+ goto restart;
+ goto err;
+ }
+ outbuf[i] = inb(0x68);
+ pr_devel("olpc-ec: received 0x%x\n", outbuf[i]);
+ }
+ }
+
+ ret = 0;
+err:
+ spin_unlock_irqrestore(&ec_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(olpc_ec_cmd);
+
+void olpc_ec_wakeup_set(u16 value)
+{
+ ec_wakeup_mask |= value;
+}
+EXPORT_SYMBOL_GPL(olpc_ec_wakeup_set);
+
+void olpc_ec_wakeup_clear(u16 value)
+{
+ ec_wakeup_mask &= ~value;
+}
+EXPORT_SYMBOL_GPL(olpc_ec_wakeup_clear);
+
+/*
+ * Returns true if the compile and runtime configurations allow for EC events
+ * to wake the system.
+ */
+bool olpc_ec_wakeup_available(void)
+{
+ if (!machine_is_olpc())
+ return false;
+
+ /*
+ * XO-1 EC wakeups are available when olpc-xo1-sci driver is
+ * compiled in
+ */
+#ifdef CONFIG_OLPC_XO1_SCI
+ if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) /* XO-1 */
+ return true;
+#endif
+
+ /*
+ * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is
+ * compiled in
+ */
+#ifdef CONFIG_OLPC_XO15_SCI
+ if (olpc_platform_info.boardrev >= olpc_board_pre(0xd0)) /* XO-1.5 */
+ return true;
+#endif
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(olpc_ec_wakeup_available);
+
+int olpc_ec_mask_write(u16 bits)
+{
+ if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) {
+ __be16 ec_word = cpu_to_be16(bits);
+ return olpc_ec_cmd(EC_WRITE_EXT_SCI_MASK, (void *) &ec_word, 2,
+ NULL, 0);
+ } else {
+ unsigned char ec_byte = bits & 0xff;
+ return olpc_ec_cmd(EC_WRITE_SCI_MASK, &ec_byte, 1, NULL, 0);
+ }
+}
+EXPORT_SYMBOL_GPL(olpc_ec_mask_write);
+
+int olpc_ec_sci_query(u16 *sci_value)
+{
+ int ret;
+
+ if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) {
+ __be16 ec_word;
+ ret = olpc_ec_cmd(EC_EXT_SCI_QUERY,
+ NULL, 0, (void *) &ec_word, 2);
+ if (ret == 0)
+ *sci_value = be16_to_cpu(ec_word);
+ } else {
+ unsigned char ec_byte;
+ ret = olpc_ec_cmd(EC_SCI_QUERY, NULL, 0, &ec_byte, 1);
+ if (ret == 0)
+ *sci_value = ec_byte;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(olpc_ec_sci_query);
+
+static int olpc_ec_suspend(void)
+{
+ return olpc_ec_mask_write(ec_wakeup_mask);
+}
+
+static bool __init check_ofw_architecture(struct device_node *root)
+{
+ const char *olpc_arch;
+ int propsize;
+
+ olpc_arch = of_get_property(root, "architecture", &propsize);
+ return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
+}
+
+static u32 __init get_board_revision(struct device_node *root)
+{
+ int propsize;
+ const __be32 *rev;
+
+ rev = of_get_property(root, "board-revision-int", &propsize);
+ if (propsize != 4)
+ return 0;
+
+ return be32_to_cpu(*rev);
+}
+
+static bool __init platform_detect(void)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ bool success;
+
+ if (!root)
+ return false;
+
+ success = check_ofw_architecture(root);
+ if (success) {
+ olpc_platform_info.boardrev = get_board_revision(root);
+ olpc_platform_info.flags |= OLPC_F_PRESENT;
+ }
+
+ of_node_put(root);
+ return success;
+}
+
+static int __init add_xo1_platform_devices(void)
+{
+ struct platform_device *pdev;
+
+ pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ return 0;
+}
+
+static struct syscore_ops olpc_syscore_ops = {
+ .suspend = olpc_ec_suspend,
+};
+
+static int __init olpc_init(void)
+{
+ int r = 0;
+
+ if (!olpc_ofw_present() || !platform_detect())
+ return 0;
+
+ spin_lock_init(&ec_lock);
+
+ /* assume B1 and above models always have a DCON */
+ if (olpc_board_at_least(olpc_board(0xb1)))
+ olpc_platform_info.flags |= OLPC_F_DCON;
+
+ /* get the EC revision */
+ olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
+ (unsigned char *) &olpc_platform_info.ecver, 1);
+
+#ifdef CONFIG_PCI_OLPC
+ /* If the VSA exists let it emulate PCI, if not emulate in kernel.
+ * XO-1 only. */
+ if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) &&
+ !cs5535_has_vsa2())
+ x86_init.pci.arch_init = pci_olpc_init;
+#endif
+ /* EC version 0x5f adds support for wide SCI mask */
+ if (olpc_platform_info.ecver >= 0x5f)
+ olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI;
+
+ printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
+ ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
+ olpc_platform_info.boardrev >> 4,
+ olpc_platform_info.ecver);
+
+ if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */
+ r = add_xo1_platform_devices();
+ if (r)
+ return r;
+ }
+
+ register_syscore_ops(&olpc_syscore_ops);
+
+ return 0;
+}
+
+postcore_initcall(olpc_init);
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
new file mode 100644
index 00000000000..d6ee9298692
--- /dev/null
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -0,0 +1,304 @@
+/*
+ * OLPC-specific OFW device tree support code.
+ *
+ * Paul Mackerras August 1996.
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
+ * {engebret|bergner}@us.ibm.com
+ *
+ * Adapted for sparc by David S. Miller davem@davemloft.net
+ * Adapted for x86/OLPC by Andres Salomon <dilinger@queued.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bootmem.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_pdt.h>
+#include <asm/olpc.h>
+#include <asm/olpc_ofw.h>
+
+static phandle __init olpc_dt_getsibling(phandle node)
+{
+ const void *args[] = { (void *)node };
+ void *res[] = { &node };
+
+ if ((s32)node == -1)
+ return 0;
+
+ if (olpc_ofw("peer", args, res) || (s32)node == -1)
+ return 0;
+
+ return node;
+}
+
+static phandle __init olpc_dt_getchild(phandle node)
+{
+ const void *args[] = { (void *)node };
+ void *res[] = { &node };
+
+ if ((s32)node == -1)
+ return 0;
+
+ if (olpc_ofw("child", args, res) || (s32)node == -1) {
+ pr_err("PROM: %s: fetching child failed!\n", __func__);
+ return 0;
+ }
+
+ return node;
+}
+
+static int __init olpc_dt_getproplen(phandle node, const char *prop)
+{
+ const void *args[] = { (void *)node, prop };
+ int len;
+ void *res[] = { &len };
+
+ if ((s32)node == -1)
+ return -1;
+
+ if (olpc_ofw("getproplen", args, res)) {
+ pr_err("PROM: %s: getproplen failed!\n", __func__);
+ return -1;
+ }
+
+ return len;
+}
+
+static int __init olpc_dt_getproperty(phandle node, const char *prop,
+ char *buf, int bufsize)
+{
+ int plen;
+
+ plen = olpc_dt_getproplen(node, prop);
+ if (plen > bufsize || plen < 1) {
+ return -1;
+ } else {
+ const void *args[] = { (void *)node, prop, buf, (void *)plen };
+ void *res[] = { &plen };
+
+ if (olpc_ofw("getprop", args, res)) {
+ pr_err("PROM: %s: getprop failed!\n", __func__);
+ return -1;
+ }
+ }
+
+ return plen;
+}
+
+static int __init olpc_dt_nextprop(phandle node, char *prev, char *buf)
+{
+ const void *args[] = { (void *)node, prev, buf };
+ int success;
+ void *res[] = { &success };
+
+ buf[0] = '\0';
+
+ if ((s32)node == -1)
+ return -1;
+
+ if (olpc_ofw("nextprop", args, res) || success != 1)
+ return -1;
+
+ return 0;
+}
+
+static int __init olpc_dt_pkg2path(phandle node, char *buf,
+ const int buflen, int *len)
+{
+ const void *args[] = { (void *)node, buf, (void *)buflen };
+ void *res[] = { len };
+
+ if ((s32)node == -1)
+ return -1;
+
+ if (olpc_ofw("package-to-path", args, res) || *len < 1)
+ return -1;
+
+ return 0;
+}
+
+static unsigned int prom_early_allocated __initdata;
+
+void * __init prom_early_alloc(unsigned long size)
+{
+ static u8 *mem;
+ static size_t free_mem;
+ void *res;
+
+ if (free_mem < size) {
+ const size_t chunk_size = max(PAGE_SIZE, size);
+
+ /*
+ * To mimimize the number of allocations, grab at least
+ * PAGE_SIZE of memory (that's an arbitrary choice that's
+ * fast enough on the platforms we care about while minimizing
+ * wasted bootmem) and hand off chunks of it to callers.
+ */
+ res = alloc_bootmem(chunk_size);
+ BUG_ON(!res);
+ prom_early_allocated += chunk_size;
+ memset(res, 0, chunk_size);
+ free_mem = chunk_size;
+ mem = res;
+ }
+
+ /* allocate from the local cache */
+ free_mem -= size;
+ res = mem;
+ mem += size;
+ return res;
+}
+
+static struct of_pdt_ops prom_olpc_ops __initdata = {
+ .nextprop = olpc_dt_nextprop,
+ .getproplen = olpc_dt_getproplen,
+ .getproperty = olpc_dt_getproperty,
+ .getchild = olpc_dt_getchild,
+ .getsibling = olpc_dt_getsibling,
+ .pkg2path = olpc_dt_pkg2path,
+};
+
+static phandle __init olpc_dt_finddevice(const char *path)
+{
+ phandle node;
+ const void *args[] = { path };
+ void *res[] = { &node };
+
+ if (olpc_ofw("finddevice", args, res)) {
+ pr_err("olpc_dt: finddevice failed!\n");
+ return 0;
+ }
+
+ if ((s32) node == -1)
+ return 0;
+
+ return node;
+}
+
+static int __init olpc_dt_interpret(const char *words)
+{
+ int result;
+ const void *args[] = { words };
+ void *res[] = { &result };
+
+ if (olpc_ofw("interpret", args, res)) {
+ pr_err("olpc_dt: interpret failed!\n");
+ return -1;
+ }
+
+ return result;
+}
+
+/*
+ * Extract board revision directly from OFW device tree.
+ * We can't use olpc_platform_info because that hasn't been set up yet.
+ */
+static u32 __init olpc_dt_get_board_revision(void)
+{
+ phandle node;
+ __be32 rev;
+ int r;
+
+ node = olpc_dt_finddevice("/");
+ if (!node)
+ return 0;
+
+ r = olpc_dt_getproperty(node, "board-revision-int",
+ (char *) &rev, sizeof(rev));
+ if (r < 0)
+ return 0;
+
+ return be32_to_cpu(rev);
+}
+
+void __init olpc_dt_fixup(void)
+{
+ int r;
+ char buf[64];
+ phandle node;
+ u32 board_rev;
+
+ node = olpc_dt_finddevice("/battery@0");
+ if (!node)
+ return;
+
+ /*
+ * If the battery node has a compatible property, we are running a new
+ * enough firmware and don't have fixups to make.
+ */
+ r = olpc_dt_getproperty(node, "compatible", buf, sizeof(buf));
+ if (r > 0)
+ return;
+
+ pr_info("PROM DT: Old firmware detected, applying fixes\n");
+
+ /* Add olpc,xo1-battery compatible marker to battery node */
+ olpc_dt_interpret("\" /battery@0\" find-device"
+ " \" olpc,xo1-battery\" +compatible"
+ " device-end");
+
+ board_rev = olpc_dt_get_board_revision();
+ if (!board_rev)
+ return;
+
+ if (board_rev >= olpc_board_pre(0xd0)) {
+ /* XO-1.5: add dcon device */
+ olpc_dt_interpret("\" /pci/display@1\" find-device"
+ " new-device"
+ " \" dcon\" device-name \" olpc,xo1-dcon\" +compatible"
+ " finish-device device-end");
+ } else {
+ /* XO-1: add dcon device, mark RTC as olpc,xo1-rtc */
+ olpc_dt_interpret("\" /pci/display@1,1\" find-device"
+ " new-device"
+ " \" dcon\" device-name \" olpc,xo1-dcon\" +compatible"
+ " finish-device device-end"
+ " \" /rtc\" find-device"
+ " \" olpc,xo1-rtc\" +compatible"
+ " device-end");
+ }
+}
+
+void __init olpc_dt_build_devicetree(void)
+{
+ phandle root;
+
+ if (!olpc_ofw_is_installed())
+ return;
+
+ olpc_dt_fixup();
+
+ root = olpc_dt_getsibling(0);
+ if (!root) {
+ pr_err("PROM: unable to get root node from OFW!\n");
+ return;
+ }
+ of_pdt_build_devicetree(root, &prom_olpc_ops);
+
+ pr_info("PROM DT: Built device tree with %u bytes of memory.\n",
+ prom_early_allocated);
+}
+
+/* A list of DT node/bus matches that we want to expose as platform devices */
+static struct of_device_id __initdata of_ids[] = {
+ { .compatible = "olpc,xo1-battery" },
+ { .compatible = "olpc,xo1-dcon" },
+ { .compatible = "olpc,xo1-rtc" },
+ {},
+};
+
+static int __init olpc_create_platform_devices(void)
+{
+ if (machine_is_olpc())
+ return of_platform_bus_probe(NULL, of_ids, NULL);
+ else
+ return 0;
+}
+device_initcall(olpc_create_platform_devices);
diff --git a/arch/x86/platform/olpc/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c
new file mode 100644
index 00000000000..e7604f62870
--- /dev/null
+++ b/arch/x86/platform/olpc/olpc_ofw.c
@@ -0,0 +1,117 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/olpc_ofw.h>
+
+/* address of OFW callback interface; will be NULL if OFW isn't found */
+static int (*olpc_ofw_cif)(int *);
+
+/* page dir entry containing OFW's pgdir table; filled in by head_32.S */
+u32 olpc_ofw_pgd __initdata;
+
+static DEFINE_SPINLOCK(ofw_lock);
+
+#define MAXARGS 10
+
+void __init setup_olpc_ofw_pgd(void)
+{
+ pgd_t *base, *ofw_pde;
+
+ if (!olpc_ofw_cif)
+ return;
+
+ /* fetch OFW's PDE */
+ base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
+ if (!base) {
+ printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n");
+ olpc_ofw_cif = NULL;
+ return;
+ }
+ ofw_pde = &base[OLPC_OFW_PDE_NR];
+
+ /* install OFW's PDE permanently into the kernel's pgtable */
+ set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde);
+ /* implicit optimization barrier here due to uninline function return */
+
+ early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
+}
+
+int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res,
+ void **res)
+{
+ int ofw_args[MAXARGS + 3];
+ unsigned long flags;
+ int ret, i, *p;
+
+ BUG_ON(nr_args + nr_res > MAXARGS);
+
+ if (!olpc_ofw_cif)
+ return -EIO;
+
+ ofw_args[0] = (int)name;
+ ofw_args[1] = nr_args;
+ ofw_args[2] = nr_res;
+
+ p = &ofw_args[3];
+ for (i = 0; i < nr_args; i++, p++)
+ *p = (int)args[i];
+
+ /* call into ofw */
+ spin_lock_irqsave(&ofw_lock, flags);
+ ret = olpc_ofw_cif(ofw_args);
+ spin_unlock_irqrestore(&ofw_lock, flags);
+
+ if (!ret) {
+ for (i = 0; i < nr_res; i++, p++)
+ *((int *)res[i]) = *p;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__olpc_ofw);
+
+bool olpc_ofw_present(void)
+{
+ return olpc_ofw_cif != NULL;
+}
+EXPORT_SYMBOL_GPL(olpc_ofw_present);
+
+/* OFW cif _should_ be above this address */
+#define OFW_MIN 0xff000000
+
+/* OFW starts on a 1MB boundary */
+#define OFW_BOUND (1<<20)
+
+void __init olpc_ofw_detect(void)
+{
+ struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header;
+ unsigned long start;
+
+ /* ensure OFW booted us by checking for "OFW " string */
+ if (hdr->ofw_magic != OLPC_OFW_SIG)
+ return;
+
+ olpc_ofw_cif = (int (*)(int *))hdr->cif_handler;
+
+ if ((unsigned long)olpc_ofw_cif < OFW_MIN) {
+ printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n",
+ (unsigned long)olpc_ofw_cif);
+ olpc_ofw_cif = NULL;
+ return;
+ }
+
+ /* determine where OFW starts in memory */
+ start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND);
+ printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n",
+ (unsigned long)olpc_ofw_cif, (-start) >> 20);
+ reserve_top_address(-start);
+}
+
+bool __init olpc_ofw_is_installed(void)
+{
+ return olpc_ofw_cif != NULL;
+}
diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S
new file mode 100644
index 00000000000..948deb28975
--- /dev/null
+++ b/arch/x86/platform/olpc/xo1-wakeup.S
@@ -0,0 +1,124 @@
+.text
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/pgtable_32.h>
+
+ .macro writepost,value
+ movb $0x34, %al
+ outb %al, $0x70
+ movb $\value, %al
+ outb %al, $0x71
+ .endm
+
+wakeup_start:
+ # OFW lands us here, running in protected mode, with a
+ # kernel-compatible GDT already setup.
+
+ # Clear any dangerous flags
+ pushl $0
+ popfl
+
+ writepost 0x31
+
+ # Set up %cr3
+ movl $initial_page_table - __PAGE_OFFSET, %eax
+ movl %eax, %cr3
+
+ movl saved_cr4, %eax
+ movl %eax, %cr4
+
+ movl saved_cr0, %eax
+ movl %eax, %cr0
+
+ # Control registers were modified, pipeline resync is needed
+ jmp 1f
+1:
+
+ movw $__KERNEL_DS, %ax
+ movw %ax, %ss
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %fs
+ movw %ax, %gs
+
+ lgdt saved_gdt
+ lidt saved_idt
+ lldt saved_ldt
+ ljmp $(__KERNEL_CS),$1f
+1:
+ movl %cr3, %eax
+ movl %eax, %cr3
+ wbinvd
+
+ # Go back to the return point
+ jmp ret_point
+
+save_registers:
+ sgdt saved_gdt
+ sidt saved_idt
+ sldt saved_ldt
+
+ pushl %edx
+ movl %cr4, %edx
+ movl %edx, saved_cr4
+
+ movl %cr0, %edx
+ movl %edx, saved_cr0
+
+ popl %edx
+
+ movl %ebx, saved_context_ebx
+ movl %ebp, saved_context_ebp
+ movl %esi, saved_context_esi
+ movl %edi, saved_context_edi
+
+ pushfl
+ popl saved_context_eflags
+
+ ret
+
+restore_registers:
+ movl saved_context_ebp, %ebp
+ movl saved_context_ebx, %ebx
+ movl saved_context_esi, %esi
+ movl saved_context_edi, %edi
+
+ pushl saved_context_eflags
+ popfl
+
+ ret
+
+ENTRY(do_olpc_suspend_lowlevel)
+ call save_processor_state
+ call save_registers
+
+ # This is the stack context we want to remember
+ movl %esp, saved_context_esp
+
+ pushl $3
+ call xo1_do_sleep
+
+ jmp wakeup_start
+ .p2align 4,,7
+ret_point:
+ movl saved_context_esp, %esp
+
+ writepost 0x32
+
+ call restore_registers
+ call restore_processor_state
+ ret
+
+.data
+saved_gdt: .long 0,0
+saved_idt: .long 0,0
+saved_ldt: .long 0
+saved_cr4: .long 0
+saved_cr0: .long 0
+saved_context_esp: .long 0
+saved_context_edi: .long 0
+saved_context_esi: .long 0
+saved_context_ebx: .long 0
+saved_context_ebp: .long 0
+saved_context_eflags: .long 0
diff --git a/arch/x86/platform/scx200/Makefile b/arch/x86/platform/scx200/Makefile
new file mode 100644
index 00000000000..762b4c7f431
--- /dev/null
+++ b/arch/x86/platform/scx200/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_SCx200) += scx200.o
+scx200-y += scx200_32.o
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
new file mode 100644
index 00000000000..7e004acbe52
--- /dev/null
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
+ *
+ * National Semiconductor SCx200 support.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+
+#include <linux/scx200.h>
+#include <linux/scx200_gpio.h>
+
+/* Verify that the configuration block really is there */
+#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
+
+#define NAME "scx200"
+
+MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
+MODULE_DESCRIPTION("NatSemi SCx200 Driver");
+MODULE_LICENSE("GPL");
+
+unsigned scx200_gpio_base = 0;
+unsigned long scx200_gpio_shadow[2];
+
+unsigned scx200_cb_base = 0;
+
+static struct pci_device_id scx200_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
+ { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
+ { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
+ { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
+ { },
+};
+MODULE_DEVICE_TABLE(pci,scx200_tbl);
+
+static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *);
+
+static struct pci_driver scx200_pci_driver = {
+ .name = "scx200",
+ .id_table = scx200_tbl,
+ .probe = scx200_probe,
+};
+
+static DEFINE_MUTEX(scx200_gpio_config_lock);
+
+static void __devinit scx200_init_shadow(void)
+{
+ int bank;
+
+ /* read the current values driven on the GPIO signals */
+ for (bank = 0; bank < 2; ++bank)
+ scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
+}
+
+static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ unsigned base;
+
+ if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
+ pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
+ base = pci_resource_start(pdev, 0);
+ printk(KERN_INFO NAME ": GPIO base 0x%x\n", base);
+
+ if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) {
+ printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n");
+ return -EBUSY;
+ }
+
+ scx200_gpio_base = base;
+ scx200_init_shadow();
+
+ } else {
+ /* find the base of the Configuration Block */
+ if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) {
+ scx200_cb_base = SCx200_CB_BASE_FIXED;
+ } else {
+ pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base);
+ if (scx200_cb_probe(base)) {
+ scx200_cb_base = base;
+ } else {
+ printk(KERN_WARNING NAME ": Configuration Block not found\n");
+ return -ENODEV;
+ }
+ }
+ printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base);
+ }
+
+ return 0;
+}
+
+u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits)
+{
+ u32 config, new_config;
+
+ mutex_lock(&scx200_gpio_config_lock);
+
+ outl(index, scx200_gpio_base + 0x20);
+ config = inl(scx200_gpio_base + 0x24);
+
+ new_config = (config & mask) | bits;
+ outl(new_config, scx200_gpio_base + 0x24);
+
+ mutex_unlock(&scx200_gpio_config_lock);
+
+ return config;
+}
+
+static int __init scx200_init(void)
+{
+ printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
+
+ return pci_register_driver(&scx200_pci_driver);
+}
+
+static void __exit scx200_cleanup(void)
+{
+ pci_unregister_driver(&scx200_pci_driver);
+ release_region(scx200_gpio_base, SCx200_GPIO_SIZE);
+}
+
+module_init(scx200_init);
+module_exit(scx200_cleanup);
+
+EXPORT_SYMBOL(scx200_gpio_base);
+EXPORT_SYMBOL(scx200_gpio_shadow);
+EXPORT_SYMBOL(scx200_gpio_configure);
+EXPORT_SYMBOL(scx200_cb_base);
diff --git a/arch/x86/platform/sfi/Makefile b/arch/x86/platform/sfi/Makefile
new file mode 100644
index 00000000000..cc5db1168a5
--- /dev/null
+++ b/arch/x86/platform/sfi/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SFI) += sfi.o
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
new file mode 100644
index 00000000000..7785b72ecc3
--- /dev/null
+++ b/arch/x86/platform/sfi/sfi.c
@@ -0,0 +1,109 @@
+/*
+ * sfi.c - x86 architecture SFI support.
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#define KMSG_COMPONENT "SFI"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/io.h>
+
+#include <asm/io_apic.h>
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+
+/* All CPUs enumerated by SFI must be present and enabled */
+static void __cpuinit mp_sfi_register_lapic(u8 id)
+{
+ if (MAX_LOCAL_APIC - id <= 0) {
+ pr_warning("Processor #%d invalid (max %d)\n",
+ id, MAX_LOCAL_APIC);
+ return;
+ }
+
+ pr_info("registering lapic[%d]\n", id);
+
+ generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
+}
+
+static int __init sfi_parse_cpus(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_cpu_table_entry *pentry;
+ int i;
+ int cpu_num;
+
+ sb = (struct sfi_table_simple *)table;
+ cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
+ pentry = (struct sfi_cpu_table_entry *)sb->pentry;
+
+ for (i = 0; i < cpu_num; i++) {
+ mp_sfi_register_lapic(pentry->apic_id);
+ pentry++;
+ }
+
+ smp_found_config = 1;
+ return 0;
+}
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+
+static int __init sfi_parse_ioapic(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_apic_table_entry *pentry;
+ int i, num;
+
+ sb = (struct sfi_table_simple *)table;
+ num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
+ pentry = (struct sfi_apic_table_entry *)sb->pentry;
+
+ for (i = 0; i < num; i++) {
+ mp_register_ioapic(i, pentry->phys_addr, gsi_top);
+ pentry++;
+ }
+
+ WARN(pic_mode, KERN_WARNING
+ "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
+ pic_mode = 0;
+ return 0;
+}
+#endif /* CONFIG_X86_IO_APIC */
+
+/*
+ * sfi_platform_init(): register lapics & io-apics
+ */
+int __init sfi_platform_init(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ register_lapic_address(sfi_lapic_addr);
+ sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
+#endif
+ return 0;
+}
diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile
new file mode 100644
index 00000000000..6c40995fefb
--- /dev/null
+++ b/arch/x86/platform/uv/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
new file mode 100644
index 00000000000..766612137a6
--- /dev/null
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -0,0 +1,216 @@
+/*
+ * BIOS run time interface routines.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
+ */
+
+#include <linux/efi.h>
+#include <linux/export.h>
+#include <asm/efi.h>
+#include <linux/io.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv_hub.h>
+
+static struct uv_systab uv_systab;
+
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+{
+ struct uv_systab *tab = &uv_systab;
+ s64 ret;
+
+ if (!tab->function)
+ /*
+ * BIOS does not support UV systab
+ */
+ return BIOS_STATUS_UNIMPLEMENTED;
+
+ ret = efi_call6((void *)__va(tab->function), (u64)which,
+ a1, a2, a3, a4, a5);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(uv_bios_call);
+
+s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+ u64 a4, u64 a5)
+{
+ unsigned long bios_flags;
+ s64 ret;
+
+ local_irq_save(bios_flags);
+ ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+ local_irq_restore(bios_flags);
+
+ return ret;
+}
+
+s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+ u64 a4, u64 a5)
+{
+ s64 ret;
+
+ preempt_disable();
+ ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+ preempt_enable();
+
+ return ret;
+}
+
+
+long sn_partition_id;
+EXPORT_SYMBOL_GPL(sn_partition_id);
+long sn_coherency_id;
+EXPORT_SYMBOL_GPL(sn_coherency_id);
+long sn_region_size;
+EXPORT_SYMBOL_GPL(sn_region_size);
+long system_serial_number;
+EXPORT_SYMBOL_GPL(system_serial_number);
+int uv_type;
+EXPORT_SYMBOL_GPL(uv_type);
+
+
+s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
+ long *region, long *ssn)
+{
+ s64 ret;
+ u64 v0, v1;
+ union partition_info_u part;
+
+ ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
+ (u64)(&v0), (u64)(&v1), 0, 0);
+ if (ret != BIOS_STATUS_SUCCESS)
+ return ret;
+
+ part.val = v0;
+ if (uvtype)
+ *uvtype = part.hub_version;
+ if (partid)
+ *partid = part.partition_id;
+ if (coher)
+ *coher = part.coherence_id;
+ if (region)
+ *region = part.region_size;
+ if (ssn)
+ *ssn = v1;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
+
+int
+uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
+ unsigned long *intr_mmr_offset)
+{
+ u64 watchlist;
+ s64 ret;
+
+ /*
+ * bios returns watchlist number or negative error number.
+ */
+ ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
+ mq_size, (u64)intr_mmr_offset,
+ (u64)&watchlist, 0);
+ if (ret < BIOS_STATUS_SUCCESS)
+ return ret;
+
+ return watchlist;
+}
+EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc);
+
+int
+uv_bios_mq_watchlist_free(int blade, int watchlist_num)
+{
+ return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE,
+ blade, watchlist_num, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
+
+s64
+uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
+{
+ return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len,
+ perms, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
+
+s64
+uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
+{
+ s64 ret;
+
+ ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
+ (u64)addr, buf, (u64)len, 0);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
+
+s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
+{
+ return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
+ (u64)ticks_per_second, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_freq_base);
+
+/*
+ * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
+ * @decode: true to enable target, false to disable target
+ * @domain: PCI domain number
+ * @bus: PCI bus number
+ *
+ * Returns:
+ * 0: Success
+ * -EINVAL: Invalid domain or bus number
+ * -ENOSYS: Capability not available
+ * -EBUSY: Legacy VGA I/O cannot be retargeted at this time
+ */
+int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
+{
+ return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
+ (u64)decode, (u64)domain, (u64)bus, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
+
+
+#ifdef CONFIG_EFI
+void uv_bios_init(void)
+{
+ struct uv_systab *tab;
+
+ if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+ (efi.uv_systab == (unsigned long)NULL)) {
+ printk(KERN_CRIT "No EFI UV System Table.\n");
+ uv_systab.function = (unsigned long)NULL;
+ return;
+ }
+
+ tab = (struct uv_systab *)ioremap(efi.uv_systab,
+ sizeof(struct uv_systab));
+ if (strncmp(tab->signature, "UVST", 4) != 0)
+ printk(KERN_ERR "bad signature in UV system table!");
+
+ /*
+ * Copy table to permanent spot for later use.
+ */
+ memcpy(&uv_systab, tab, sizeof(struct uv_systab));
+ iounmap(tab);
+
+ printk(KERN_INFO "EFI UV System Table Revision %d\n",
+ uv_systab.revision);
+}
+#else /* !CONFIG_EFI */
+
+void uv_bios_init(void) { }
+#endif
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
new file mode 100644
index 00000000000..3ae0e61abd2
--- /dev/null
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -0,0 +1,2123 @@
+/*
+ * SGI UltraViolet TLB flush routines.
+ *
+ * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI.
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ */
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+
+#include <asm/mmu_context.h>
+#include <asm/uv/uv.h>
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_bau.h>
+#include <asm/apic.h>
+#include <asm/idle.h>
+#include <asm/tsc.h>
+#include <asm/irq_vectors.h>
+#include <asm/timer.h>
+
+/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
+static int timeout_base_ns[] = {
+ 20,
+ 160,
+ 1280,
+ 10240,
+ 81920,
+ 655360,
+ 5242880,
+ 167772160
+};
+
+static int timeout_us;
+static int nobau;
+static int baudisabled;
+static spinlock_t disable_lock;
+static cycles_t congested_cycles;
+
+/* tunables: */
+static int max_concurr = MAX_BAU_CONCURRENT;
+static int max_concurr_const = MAX_BAU_CONCURRENT;
+static int plugged_delay = PLUGGED_DELAY;
+static int plugsb4reset = PLUGSB4RESET;
+static int timeoutsb4reset = TIMEOUTSB4RESET;
+static int ipi_reset_limit = IPI_RESET_LIMIT;
+static int complete_threshold = COMPLETE_THRESHOLD;
+static int congested_respns_us = CONGESTED_RESPONSE_US;
+static int congested_reps = CONGESTED_REPS;
+static int congested_period = CONGESTED_PERIOD;
+
+static struct tunables tunables[] = {
+ {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
+ {&plugged_delay, PLUGGED_DELAY},
+ {&plugsb4reset, PLUGSB4RESET},
+ {&timeoutsb4reset, TIMEOUTSB4RESET},
+ {&ipi_reset_limit, IPI_RESET_LIMIT},
+ {&complete_threshold, COMPLETE_THRESHOLD},
+ {&congested_respns_us, CONGESTED_RESPONSE_US},
+ {&congested_reps, CONGESTED_REPS},
+ {&congested_period, CONGESTED_PERIOD}
+};
+
+static struct dentry *tunables_dir;
+static struct dentry *tunables_file;
+
+/* these correspond to the statistics printed by ptc_seq_show() */
+static char *stat_description[] = {
+ "sent: number of shootdown messages sent",
+ "stime: time spent sending messages",
+ "numuvhubs: number of hubs targeted with shootdown",
+ "numuvhubs16: number times 16 or more hubs targeted",
+ "numuvhubs8: number times 8 or more hubs targeted",
+ "numuvhubs4: number times 4 or more hubs targeted",
+ "numuvhubs2: number times 2 or more hubs targeted",
+ "numuvhubs1: number times 1 hub targeted",
+ "numcpus: number of cpus targeted with shootdown",
+ "dto: number of destination timeouts",
+ "retries: destination timeout retries sent",
+ "rok: : destination timeouts successfully retried",
+ "resetp: ipi-style resource resets for plugs",
+ "resett: ipi-style resource resets for timeouts",
+ "giveup: fall-backs to ipi-style shootdowns",
+ "sto: number of source timeouts",
+ "bz: number of stay-busy's",
+ "throt: number times spun in throttle",
+ "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
+ "recv: shootdown messages received",
+ "rtime: time spent processing messages",
+ "all: shootdown all-tlb messages",
+ "one: shootdown one-tlb messages",
+ "mult: interrupts that found multiple messages",
+ "none: interrupts that found no messages",
+ "retry: number of retry messages processed",
+ "canc: number messages canceled by retries",
+ "nocan: number retries that found nothing to cancel",
+ "reset: number of ipi-style reset requests processed",
+ "rcan: number messages canceled by reset requests",
+ "disable: number times use of the BAU was disabled",
+ "enable: number times use of the BAU was re-enabled"
+};
+
+static int __init
+setup_nobau(char *arg)
+{
+ nobau = 1;
+ return 0;
+}
+early_param("nobau", setup_nobau);
+
+/* base pnode in this partition */
+static int uv_base_pnode __read_mostly;
+
+static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
+static DEFINE_PER_CPU(struct bau_control, bau_control);
+static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
+
+/*
+ * Determine the first node on a uvhub. 'Nodes' are used for kernel
+ * memory allocation.
+ */
+static int __init uvhub_to_first_node(int uvhub)
+{
+ int node, b;
+
+ for_each_online_node(node) {
+ b = uv_node_to_blade_id(node);
+ if (uvhub == b)
+ return node;
+ }
+ return -1;
+}
+
+/*
+ * Determine the apicid of the first cpu on a uvhub.
+ */
+static int __init uvhub_to_first_apicid(int uvhub)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (uvhub == uv_cpu_to_blade_id(cpu))
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return -1;
+}
+
+/*
+ * Free a software acknowledge hardware resource by clearing its Pending
+ * bit. This will return a reply to the sender.
+ * If the message has timed out, a reply has already been sent by the
+ * hardware but the resource has not been released. In that case our
+ * clear of the Timeout bit (as well) will free the resource. No reply will
+ * be sent (the hardware will only do one reply per message).
+ */
+static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
+ int do_acknowledge)
+{
+ unsigned long dw;
+ struct bau_pq_entry *msg;
+
+ msg = mdp->msg;
+ if (!msg->canceled && do_acknowledge) {
+ dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
+ write_mmr_sw_ack(dw);
+ }
+ msg->replied_to = 1;
+ msg->swack_vec = 0;
+}
+
+/*
+ * Process the receipt of a RETRY message
+ */
+static void bau_process_retry_msg(struct msg_desc *mdp,
+ struct bau_control *bcp)
+{
+ int i;
+ int cancel_count = 0;
+ unsigned long msg_res;
+ unsigned long mmr = 0;
+ struct bau_pq_entry *msg = mdp->msg;
+ struct bau_pq_entry *msg2;
+ struct ptc_stats *stat = bcp->statp;
+
+ stat->d_retries++;
+ /*
+ * cancel any message from msg+1 to the retry itself
+ */
+ for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
+ if (msg2 > mdp->queue_last)
+ msg2 = mdp->queue_first;
+ if (msg2 == msg)
+ break;
+
+ /* same conditions for cancellation as do_reset */
+ if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
+ (msg2->swack_vec) && ((msg2->swack_vec &
+ msg->swack_vec) == 0) &&
+ (msg2->sending_cpu == msg->sending_cpu) &&
+ (msg2->msg_type != MSG_NOOP)) {
+ mmr = read_mmr_sw_ack();
+ msg_res = msg2->swack_vec;
+ /*
+ * This is a message retry; clear the resources held
+ * by the previous message only if they timed out.
+ * If it has not timed out we have an unexpected
+ * situation to report.
+ */
+ if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
+ unsigned long mr;
+ /*
+ * Is the resource timed out?
+ * Make everyone ignore the cancelled message.
+ */
+ msg2->canceled = 1;
+ stat->d_canceled++;
+ cancel_count++;
+ mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
+ write_mmr_sw_ack(mr);
+ }
+ }
+ }
+ if (!cancel_count)
+ stat->d_nocanceled++;
+}
+
+/*
+ * Do all the things a cpu should do for a TLB shootdown message.
+ * Other cpu's may come here at the same time for this message.
+ */
+static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
+ int do_acknowledge)
+{
+ short socket_ack_count = 0;
+ short *sp;
+ struct atomic_short *asp;
+ struct ptc_stats *stat = bcp->statp;
+ struct bau_pq_entry *msg = mdp->msg;
+ struct bau_control *smaster = bcp->socket_master;
+
+ /*
+ * This must be a normal message, or retry of a normal message
+ */
+ if (msg->address == TLB_FLUSH_ALL) {
+ local_flush_tlb();
+ stat->d_alltlb++;
+ } else {
+ __flush_tlb_one(msg->address);
+ stat->d_onetlb++;
+ }
+ stat->d_requestee++;
+
+ /*
+ * One cpu on each uvhub has the additional job on a RETRY
+ * of releasing the resource held by the message that is
+ * being retried. That message is identified by sending
+ * cpu number.
+ */
+ if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
+ bau_process_retry_msg(mdp, bcp);
+
+ /*
+ * This is a swack message, so we have to reply to it.
+ * Count each responding cpu on the socket. This avoids
+ * pinging the count's cache line back and forth between
+ * the sockets.
+ */
+ sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
+ asp = (struct atomic_short *)sp;
+ socket_ack_count = atom_asr(1, asp);
+ if (socket_ack_count == bcp->cpus_in_socket) {
+ int msg_ack_count;
+ /*
+ * Both sockets dump their completed count total into
+ * the message's count.
+ */
+ smaster->socket_acknowledge_count[mdp->msg_slot] = 0;
+ asp = (struct atomic_short *)&msg->acknowledge_count;
+ msg_ack_count = atom_asr(socket_ack_count, asp);
+
+ if (msg_ack_count == bcp->cpus_in_uvhub) {
+ /*
+ * All cpus in uvhub saw it; reply
+ * (unless we are in the UV2 workaround)
+ */
+ reply_to_message(mdp, bcp, do_acknowledge);
+ }
+ }
+
+ return;
+}
+
+/*
+ * Determine the first cpu on a pnode.
+ */
+static int pnode_to_first_cpu(int pnode, struct bau_control *smaster)
+{
+ int cpu;
+ struct hub_and_pnode *hpp;
+
+ for_each_present_cpu(cpu) {
+ hpp = &smaster->thp[cpu];
+ if (pnode == hpp->pnode)
+ return cpu;
+ }
+ return -1;
+}
+
+/*
+ * Last resort when we get a large number of destination timeouts is
+ * to clear resources held by a given cpu.
+ * Do this with IPI so that all messages in the BAU message queue
+ * can be identified by their nonzero swack_vec field.
+ *
+ * This is entered for a single cpu on the uvhub.
+ * The sender want's this uvhub to free a specific message's
+ * swack resources.
+ */
+static void do_reset(void *ptr)
+{
+ int i;
+ struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
+ struct reset_args *rap = (struct reset_args *)ptr;
+ struct bau_pq_entry *msg;
+ struct ptc_stats *stat = bcp->statp;
+
+ stat->d_resets++;
+ /*
+ * We're looking for the given sender, and
+ * will free its swack resource.
+ * If all cpu's finally responded after the timeout, its
+ * message 'replied_to' was set.
+ */
+ for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
+ unsigned long msg_res;
+ /* do_reset: same conditions for cancellation as
+ bau_process_retry_msg() */
+ if ((msg->replied_to == 0) &&
+ (msg->canceled == 0) &&
+ (msg->sending_cpu == rap->sender) &&
+ (msg->swack_vec) &&
+ (msg->msg_type != MSG_NOOP)) {
+ unsigned long mmr;
+ unsigned long mr;
+ /*
+ * make everyone else ignore this message
+ */
+ msg->canceled = 1;
+ /*
+ * only reset the resource if it is still pending
+ */
+ mmr = read_mmr_sw_ack();
+ msg_res = msg->swack_vec;
+ mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
+ if (mmr & msg_res) {
+ stat->d_rcanceled++;
+ write_mmr_sw_ack(mr);
+ }
+ }
+ }
+ return;
+}
+
+/*
+ * Use IPI to get all target uvhubs to release resources held by
+ * a given sending cpu number.
+ */
+static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
+{
+ int pnode;
+ int apnode;
+ int maskbits;
+ int sender = bcp->cpu;
+ cpumask_t *mask = bcp->uvhub_master->cpumask;
+ struct bau_control *smaster = bcp->socket_master;
+ struct reset_args reset_args;
+
+ reset_args.sender = sender;
+ cpus_clear(*mask);
+ /* find a single cpu for each uvhub in this distribution mask */
+ maskbits = sizeof(struct pnmask) * BITSPERBYTE;
+ /* each bit is a pnode relative to the partition base pnode */
+ for (pnode = 0; pnode < maskbits; pnode++) {
+ int cpu;
+ if (!bau_uvhub_isset(pnode, distribution))
+ continue;
+ apnode = pnode + bcp->partition_base_pnode;
+ cpu = pnode_to_first_cpu(apnode, smaster);
+ cpu_set(cpu, *mask);
+ }
+
+ /* IPI all cpus; preemption is already disabled */
+ smp_call_function_many(mask, do_reset, (void *)&reset_args, 1);
+ return;
+}
+
+static inline unsigned long cycles_2_us(unsigned long long cyc)
+{
+ unsigned long long ns;
+ unsigned long us;
+ int cpu = smp_processor_id();
+
+ ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR;
+ us = ns / 1000;
+ return us;
+}
+
+/*
+ * wait for all cpus on this hub to finish their sends and go quiet
+ * leaves uvhub_quiesce set so that no new broadcasts are started by
+ * bau_flush_send_and_wait()
+ */
+static inline void quiesce_local_uvhub(struct bau_control *hmaster)
+{
+ atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
+}
+
+/*
+ * mark this quiet-requestor as done
+ */
+static inline void end_uvhub_quiesce(struct bau_control *hmaster)
+{
+ atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
+}
+
+static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift)
+{
+ unsigned long descriptor_status;
+
+ descriptor_status = uv_read_local_mmr(mmr_offset);
+ descriptor_status >>= right_shift;
+ descriptor_status &= UV_ACT_STATUS_MASK;
+ return descriptor_status;
+}
+
+/*
+ * Wait for completion of a broadcast software ack message
+ * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
+ */
+static int uv1_wait_completion(struct bau_desc *bau_desc,
+ unsigned long mmr_offset, int right_shift,
+ struct bau_control *bcp, long try)
+{
+ unsigned long descriptor_status;
+ cycles_t ttm;
+ struct ptc_stats *stat = bcp->statp;
+
+ descriptor_status = uv1_read_status(mmr_offset, right_shift);
+ /* spin on the status MMR, waiting for it to go idle */
+ while ((descriptor_status != DS_IDLE)) {
+ /*
+ * Our software ack messages may be blocked because
+ * there are no swack resources available. As long
+ * as none of them has timed out hardware will NACK
+ * our message and its state will stay IDLE.
+ */
+ if (descriptor_status == DS_SOURCE_TIMEOUT) {
+ stat->s_stimeout++;
+ return FLUSH_GIVEUP;
+ } else if (descriptor_status == DS_DESTINATION_TIMEOUT) {
+ stat->s_dtimeout++;
+ ttm = get_cycles();
+
+ /*
+ * Our retries may be blocked by all destination
+ * swack resources being consumed, and a timeout
+ * pending. In that case hardware returns the
+ * ERROR that looks like a destination timeout.
+ */
+ if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
+ bcp->conseccompletes = 0;
+ return FLUSH_RETRY_PLUGGED;
+ }
+
+ bcp->conseccompletes = 0;
+ return FLUSH_RETRY_TIMEOUT;
+ } else {
+ /*
+ * descriptor_status is still BUSY
+ */
+ cpu_relax();
+ }
+ descriptor_status = uv1_read_status(mmr_offset, right_shift);
+ }
+ bcp->conseccompletes++;
+ return FLUSH_COMPLETE;
+}
+
+/*
+ * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
+ */
+static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
+{
+ unsigned long descriptor_status;
+ unsigned long descriptor_status2;
+
+ descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
+ descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL;
+ descriptor_status = (descriptor_status << 1) | descriptor_status2;
+ return descriptor_status;
+}
+
+/*
+ * Return whether the status of the descriptor that is normally used for this
+ * cpu (the one indexed by its hub-relative cpu number) is busy.
+ * The status of the original 32 descriptors is always reflected in the 64
+ * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
+ * The bit provided by the activation_status_2 register is irrelevant to
+ * the status if it is only being tested for busy or not busy.
+ */
+int normal_busy(struct bau_control *bcp)
+{
+ int cpu = bcp->uvhub_cpu;
+ int mmr_offset;
+ int right_shift;
+
+ mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
+ right_shift = cpu * UV_ACT_STATUS_SIZE;
+ return (((((read_lmmr(mmr_offset) >> right_shift) &
+ UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
+}
+
+/*
+ * Entered when a bau descriptor has gone into a permanent busy wait because
+ * of a hardware bug.
+ * Workaround the bug.
+ */
+int handle_uv2_busy(struct bau_control *bcp)
+{
+ int busy_one = bcp->using_desc;
+ int normal = bcp->uvhub_cpu;
+ int selected = -1;
+ int i;
+ unsigned long descriptor_status;
+ unsigned long status;
+ int mmr_offset;
+ struct bau_desc *bau_desc_old;
+ struct bau_desc *bau_desc_new;
+ struct bau_control *hmaster = bcp->uvhub_master;
+ struct ptc_stats *stat = bcp->statp;
+ cycles_t ttm;
+
+ stat->s_uv2_wars++;
+ spin_lock(&hmaster->uvhub_lock);
+ /* try for the original first */
+ if (busy_one != normal) {
+ if (!normal_busy(bcp))
+ selected = normal;
+ }
+ if (selected < 0) {
+ /* can't use the normal, select an alternate */
+ mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
+ descriptor_status = read_lmmr(mmr_offset);
+
+ /* scan available descriptors 32-63 */
+ for (i = 0; i < UV_CPUS_PER_AS; i++) {
+ if ((hmaster->inuse_map & (1 << i)) == 0) {
+ status = ((descriptor_status >>
+ (i * UV_ACT_STATUS_SIZE)) &
+ UV_ACT_STATUS_MASK) << 1;
+ if (status != UV2H_DESC_BUSY) {
+ selected = i + UV_CPUS_PER_AS;
+ break;
+ }
+ }
+ }
+ }
+
+ if (busy_one != normal)
+ /* mark the busy alternate as not in-use */
+ hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS));
+
+ if (selected >= 0) {
+ /* switch to the selected descriptor */
+ if (selected != normal) {
+ /* set the selected alternate as in-use */
+ hmaster->inuse_map |=
+ (1 << (selected - UV_CPUS_PER_AS));
+ if (selected > stat->s_uv2_wars_hw)
+ stat->s_uv2_wars_hw = selected;
+ }
+ bau_desc_old = bcp->descriptor_base;
+ bau_desc_old += (ITEMS_PER_DESC * busy_one);
+ bcp->using_desc = selected;
+ bau_desc_new = bcp->descriptor_base;
+ bau_desc_new += (ITEMS_PER_DESC * selected);
+ *bau_desc_new = *bau_desc_old;
+ } else {
+ /*
+ * All are busy. Wait for the normal one for this cpu to
+ * free up.
+ */
+ stat->s_uv2_war_waits++;
+ spin_unlock(&hmaster->uvhub_lock);
+ ttm = get_cycles();
+ do {
+ cpu_relax();
+ } while (normal_busy(bcp));
+ spin_lock(&hmaster->uvhub_lock);
+ /* switch to the original descriptor */
+ bcp->using_desc = normal;
+ bau_desc_old = bcp->descriptor_base;
+ bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc);
+ bcp->using_desc = (ITEMS_PER_DESC * normal);
+ bau_desc_new = bcp->descriptor_base;
+ bau_desc_new += (ITEMS_PER_DESC * normal);
+ *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */
+ }
+ spin_unlock(&hmaster->uvhub_lock);
+ return FLUSH_RETRY_BUSYBUG;
+}
+
+static int uv2_wait_completion(struct bau_desc *bau_desc,
+ unsigned long mmr_offset, int right_shift,
+ struct bau_control *bcp, long try)
+{
+ unsigned long descriptor_stat;
+ cycles_t ttm;
+ int desc = bcp->using_desc;
+ long busy_reps = 0;
+ struct ptc_stats *stat = bcp->statp;
+
+ descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
+
+ /* spin on the status MMR, waiting for it to go idle */
+ while (descriptor_stat != UV2H_DESC_IDLE) {
+ /*
+ * Our software ack messages may be blocked because
+ * there are no swack resources available. As long
+ * as none of them has timed out hardware will NACK
+ * our message and its state will stay IDLE.
+ */
+ if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) ||
+ (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) {
+ stat->s_stimeout++;
+ return FLUSH_GIVEUP;
+ } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) {
+ stat->s_strongnacks++;
+ bcp->conseccompletes = 0;
+ return FLUSH_GIVEUP;
+ } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
+ stat->s_dtimeout++;
+ bcp->conseccompletes = 0;
+ return FLUSH_RETRY_TIMEOUT;
+ } else {
+ busy_reps++;
+ if (busy_reps > 1000000) {
+ /* not to hammer on the clock */
+ busy_reps = 0;
+ ttm = get_cycles();
+ if ((ttm - bcp->send_message) >
+ (bcp->clocks_per_100_usec)) {
+ return handle_uv2_busy(bcp);
+ }
+ }
+ /*
+ * descriptor_stat is still BUSY
+ */
+ cpu_relax();
+ }
+ descriptor_stat = uv2_read_status(mmr_offset, right_shift,
+ desc);
+ }
+ bcp->conseccompletes++;
+ return FLUSH_COMPLETE;
+}
+
+/*
+ * There are 2 status registers; each and array[32] of 2 bits. Set up for
+ * which register to read and position in that register based on cpu in
+ * current hub.
+ */
+static int wait_completion(struct bau_desc *bau_desc,
+ struct bau_control *bcp, long try)
+{
+ int right_shift;
+ unsigned long mmr_offset;
+ int desc = bcp->using_desc;
+
+ if (desc < UV_CPUS_PER_AS) {
+ mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
+ right_shift = desc * UV_ACT_STATUS_SIZE;
+ } else {
+ mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
+ right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
+ }
+
+ if (bcp->uvhub_version == 1)
+ return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
+ bcp, try);
+ else
+ return uv2_wait_completion(bau_desc, mmr_offset, right_shift,
+ bcp, try);
+}
+
+static inline cycles_t sec_2_cycles(unsigned long sec)
+{
+ unsigned long ns;
+ cycles_t cyc;
+
+ ns = sec * 1000000000;
+ cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
+ return cyc;
+}
+
+/*
+ * Our retries are blocked by all destination sw ack resources being
+ * in use, and a timeout is pending. In that case hardware immediately
+ * returns the ERROR that looks like a destination timeout.
+ */
+static void destination_plugged(struct bau_desc *bau_desc,
+ struct bau_control *bcp,
+ struct bau_control *hmaster, struct ptc_stats *stat)
+{
+ udelay(bcp->plugged_delay);
+ bcp->plugged_tries++;
+
+ if (bcp->plugged_tries >= bcp->plugsb4reset) {
+ bcp->plugged_tries = 0;
+
+ quiesce_local_uvhub(hmaster);
+
+ spin_lock(&hmaster->queue_lock);
+ reset_with_ipi(&bau_desc->distribution, bcp);
+ spin_unlock(&hmaster->queue_lock);
+
+ end_uvhub_quiesce(hmaster);
+
+ bcp->ipi_attempts++;
+ stat->s_resets_plug++;
+ }
+}
+
+static void destination_timeout(struct bau_desc *bau_desc,
+ struct bau_control *bcp, struct bau_control *hmaster,
+ struct ptc_stats *stat)
+{
+ hmaster->max_concurr = 1;
+ bcp->timeout_tries++;
+ if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
+ bcp->timeout_tries = 0;
+
+ quiesce_local_uvhub(hmaster);
+
+ spin_lock(&hmaster->queue_lock);
+ reset_with_ipi(&bau_desc->distribution, bcp);
+ spin_unlock(&hmaster->queue_lock);
+
+ end_uvhub_quiesce(hmaster);
+
+ bcp->ipi_attempts++;
+ stat->s_resets_timeout++;
+ }
+}
+
+/*
+ * Completions are taking a very long time due to a congested numalink
+ * network.
+ */
+static void disable_for_congestion(struct bau_control *bcp,
+ struct ptc_stats *stat)
+{
+ /* let only one cpu do this disabling */
+ spin_lock(&disable_lock);
+
+ if (!baudisabled && bcp->period_requests &&
+ ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
+ int tcpu;
+ struct bau_control *tbcp;
+ /* it becomes this cpu's job to turn on the use of the
+ BAU again */
+ baudisabled = 1;
+ bcp->set_bau_off = 1;
+ bcp->set_bau_on_time = get_cycles();
+ bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period);
+ stat->s_bau_disabled++;
+ for_each_present_cpu(tcpu) {
+ tbcp = &per_cpu(bau_control, tcpu);
+ tbcp->baudisabled = 1;
+ }
+ }
+
+ spin_unlock(&disable_lock);
+}
+
+static void count_max_concurr(int stat, struct bau_control *bcp,
+ struct bau_control *hmaster)
+{
+ bcp->plugged_tries = 0;
+ bcp->timeout_tries = 0;
+ if (stat != FLUSH_COMPLETE)
+ return;
+ if (bcp->conseccompletes <= bcp->complete_threshold)
+ return;
+ if (hmaster->max_concurr >= hmaster->max_concurr_const)
+ return;
+ hmaster->max_concurr++;
+}
+
+static void record_send_stats(cycles_t time1, cycles_t time2,
+ struct bau_control *bcp, struct ptc_stats *stat,
+ int completion_status, int try)
+{
+ cycles_t elapsed;
+
+ if (time2 > time1) {
+ elapsed = time2 - time1;
+ stat->s_time += elapsed;
+
+ if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
+ bcp->period_requests++;
+ bcp->period_time += elapsed;
+ if ((elapsed > congested_cycles) &&
+ (bcp->period_requests > bcp->cong_reps))
+ disable_for_congestion(bcp, stat);
+ }
+ } else
+ stat->s_requestor--;
+
+ if (completion_status == FLUSH_COMPLETE && try > 1)
+ stat->s_retriesok++;
+ else if (completion_status == FLUSH_GIVEUP)
+ stat->s_giveup++;
+}
+
+/*
+ * Because of a uv1 hardware bug only a limited number of concurrent
+ * requests can be made.
+ */
+static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
+{
+ spinlock_t *lock = &hmaster->uvhub_lock;
+ atomic_t *v;
+
+ v = &hmaster->active_descriptor_count;
+ if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) {
+ stat->s_throttles++;
+ do {
+ cpu_relax();
+ } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr));
+ }
+}
+
+/*
+ * Handle the completion status of a message send.
+ */
+static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
+ struct bau_control *bcp, struct bau_control *hmaster,
+ struct ptc_stats *stat)
+{
+ if (completion_status == FLUSH_RETRY_PLUGGED)
+ destination_plugged(bau_desc, bcp, hmaster, stat);
+ else if (completion_status == FLUSH_RETRY_TIMEOUT)
+ destination_timeout(bau_desc, bcp, hmaster, stat);
+}
+
+/*
+ * Send a broadcast and wait for it to complete.
+ *
+ * The flush_mask contains the cpus the broadcast is to be sent to including
+ * cpus that are on the local uvhub.
+ *
+ * Returns 0 if all flushing represented in the mask was done.
+ * Returns 1 if it gives up entirely and the original cpu mask is to be
+ * returned to the kernel.
+ */
+int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
+{
+ int seq_number = 0;
+ int completion_stat = 0;
+ int uv1 = 0;
+ long try = 0;
+ unsigned long index;
+ cycles_t time1;
+ cycles_t time2;
+ struct ptc_stats *stat = bcp->statp;
+ struct bau_control *hmaster = bcp->uvhub_master;
+ struct uv1_bau_msg_header *uv1_hdr = NULL;
+ struct uv2_bau_msg_header *uv2_hdr = NULL;
+ struct bau_desc *bau_desc;
+
+ if (bcp->uvhub_version == 1)
+ uv1_throttle(hmaster, stat);
+
+ while (hmaster->uvhub_quiesce)
+ cpu_relax();
+
+ time1 = get_cycles();
+ do {
+ bau_desc = bcp->descriptor_base;
+ bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
+ if (bcp->uvhub_version == 1) {
+ uv1 = 1;
+ uv1_hdr = &bau_desc->header.uv1_hdr;
+ } else
+ uv2_hdr = &bau_desc->header.uv2_hdr;
+ if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) {
+ if (uv1)
+ uv1_hdr->msg_type = MSG_REGULAR;
+ else
+ uv2_hdr->msg_type = MSG_REGULAR;
+ seq_number = bcp->message_number++;
+ } else {
+ if (uv1)
+ uv1_hdr->msg_type = MSG_RETRY;
+ else
+ uv2_hdr->msg_type = MSG_RETRY;
+ stat->s_retry_messages++;
+ }
+
+ if (uv1)
+ uv1_hdr->sequence = seq_number;
+ else
+ uv2_hdr->sequence = seq_number;
+ index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc;
+ bcp->send_message = get_cycles();
+
+ write_mmr_activation(index);
+
+ try++;
+ completion_stat = wait_completion(bau_desc, bcp, try);
+ /* UV2: wait_completion() may change the bcp->using_desc */
+
+ handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
+
+ if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
+ bcp->ipi_attempts = 0;
+ completion_stat = FLUSH_GIVEUP;
+ break;
+ }
+ cpu_relax();
+ } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
+ (completion_stat == FLUSH_RETRY_BUSYBUG) ||
+ (completion_stat == FLUSH_RETRY_TIMEOUT));
+
+ time2 = get_cycles();
+
+ count_max_concurr(completion_stat, bcp, hmaster);
+
+ while (hmaster->uvhub_quiesce)
+ cpu_relax();
+
+ atomic_dec(&hmaster->active_descriptor_count);
+
+ record_send_stats(time1, time2, bcp, stat, completion_stat, try);
+
+ if (completion_stat == FLUSH_GIVEUP)
+ /* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
+ return 1;
+ return 0;
+}
+
+/*
+ * The BAU is disabled. When the disabled time period has expired, the cpu
+ * that disabled it must re-enable it.
+ * Return 0 if it is re-enabled for all cpus.
+ */
+static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
+{
+ int tcpu;
+ struct bau_control *tbcp;
+
+ if (bcp->set_bau_off) {
+ if (get_cycles() >= bcp->set_bau_on_time) {
+ stat->s_bau_reenabled++;
+ baudisabled = 0;
+ for_each_present_cpu(tcpu) {
+ tbcp = &per_cpu(bau_control, tcpu);
+ tbcp->baudisabled = 0;
+ tbcp->period_requests = 0;
+ tbcp->period_time = 0;
+ }
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
+ int remotes, struct bau_desc *bau_desc)
+{
+ stat->s_requestor++;
+ stat->s_ntargcpu += remotes + locals;
+ stat->s_ntargremotes += remotes;
+ stat->s_ntarglocals += locals;
+
+ /* uvhub statistics */
+ hubs = bau_uvhub_weight(&bau_desc->distribution);
+ if (locals) {
+ stat->s_ntarglocaluvhub++;
+ stat->s_ntargremoteuvhub += (hubs - 1);
+ } else
+ stat->s_ntargremoteuvhub += hubs;
+
+ stat->s_ntarguvhub += hubs;
+
+ if (hubs >= 16)
+ stat->s_ntarguvhub16++;
+ else if (hubs >= 8)
+ stat->s_ntarguvhub8++;
+ else if (hubs >= 4)
+ stat->s_ntarguvhub4++;
+ else if (hubs >= 2)
+ stat->s_ntarguvhub2++;
+ else
+ stat->s_ntarguvhub1++;
+}
+
+/*
+ * Translate a cpu mask to the uvhub distribution mask in the BAU
+ * activation descriptor.
+ */
+static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
+ struct bau_desc *bau_desc, int *localsp, int *remotesp)
+{
+ int cpu;
+ int pnode;
+ int cnt = 0;
+ struct hub_and_pnode *hpp;
+
+ for_each_cpu(cpu, flush_mask) {
+ /*
+ * The distribution vector is a bit map of pnodes, relative
+ * to the partition base pnode (and the partition base nasid
+ * in the header).
+ * Translate cpu to pnode and hub using a local memory array.
+ */
+ hpp = &bcp->socket_master->thp[cpu];
+ pnode = hpp->pnode - bcp->partition_base_pnode;
+ bau_uvhub_set(pnode, &bau_desc->distribution);
+ cnt++;
+ if (hpp->uvhub == bcp->uvhub)
+ (*localsp)++;
+ else
+ (*remotesp)++;
+ }
+ if (!cnt)
+ return 1;
+ return 0;
+}
+
+/*
+ * globally purge translation cache of a virtual address or all TLB's
+ * @cpumask: mask of all cpu's in which the address is to be removed
+ * @mm: mm_struct containing virtual address range
+ * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
+ * @cpu: the current cpu
+ *
+ * This is the entry point for initiating any UV global TLB shootdown.
+ *
+ * Purges the translation caches of all specified processors of the given
+ * virtual address, or purges all TLB's on specified processors.
+ *
+ * The caller has derived the cpumask from the mm_struct. This function
+ * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
+ *
+ * The cpumask is converted into a uvhubmask of the uvhubs containing
+ * those cpus.
+ *
+ * Note that this function should be called with preemption disabled.
+ *
+ * Returns NULL if all remote flushing was done.
+ * Returns pointer to cpumask if some remote flushing remains to be
+ * done. The returned pointer is valid till preemption is re-enabled.
+ */
+const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm, unsigned long va,
+ unsigned int cpu)
+{
+ int locals = 0;
+ int remotes = 0;
+ int hubs = 0;
+ struct bau_desc *bau_desc;
+ struct cpumask *flush_mask;
+ struct ptc_stats *stat;
+ struct bau_control *bcp;
+
+ /* kernel was booted 'nobau' */
+ if (nobau)
+ return cpumask;
+
+ bcp = &per_cpu(bau_control, cpu);
+ stat = bcp->statp;
+
+ /* bau was disabled due to slow response */
+ if (bcp->baudisabled) {
+ if (check_enable(bcp, stat))
+ return cpumask;
+ }
+
+ /*
+ * Each sending cpu has a per-cpu mask which it fills from the caller's
+ * cpu mask. All cpus are converted to uvhubs and copied to the
+ * activation descriptor.
+ */
+ flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
+ /* don't actually do a shootdown of the local cpu */
+ cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
+
+ if (cpu_isset(cpu, *cpumask))
+ stat->s_ntargself++;
+
+ bau_desc = bcp->descriptor_base;
+ bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
+ bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
+ if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
+ return NULL;
+
+ record_send_statistics(stat, locals, hubs, remotes, bau_desc);
+
+ bau_desc->payload.address = va;
+ bau_desc->payload.sending_cpu = cpu;
+ /*
+ * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
+ * or 1 if it gave up and the original cpumask should be returned.
+ */
+ if (!uv_flush_send_and_wait(flush_mask, bcp))
+ return NULL;
+ else
+ return cpumask;
+}
+
+/*
+ * Search the message queue for any 'other' message with the same software
+ * acknowledge resource bit vector.
+ */
+struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
+ struct bau_control *bcp, unsigned char swack_vec)
+{
+ struct bau_pq_entry *msg_next = msg + 1;
+
+ if (msg_next > bcp->queue_last)
+ msg_next = bcp->queue_first;
+ while ((msg_next->swack_vec != 0) && (msg_next != msg)) {
+ if (msg_next->swack_vec == swack_vec)
+ return msg_next;
+ msg_next++;
+ if (msg_next > bcp->queue_last)
+ msg_next = bcp->queue_first;
+ }
+ return NULL;
+}
+
+/*
+ * UV2 needs to work around a bug in which an arriving message has not
+ * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register.
+ * Such a message must be ignored.
+ */
+void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
+{
+ unsigned long mmr_image;
+ unsigned char swack_vec;
+ struct bau_pq_entry *msg = mdp->msg;
+ struct bau_pq_entry *other_msg;
+
+ mmr_image = read_mmr_sw_ack();
+ swack_vec = msg->swack_vec;
+
+ if ((swack_vec & mmr_image) == 0) {
+ /*
+ * This message was assigned a swack resource, but no
+ * reserved acknowlegment is pending.
+ * The bug has prevented this message from setting the MMR.
+ * And no other message has used the same sw_ack resource.
+ * Do the requested shootdown but do not reply to the msg.
+ * (the 0 means make no acknowledge)
+ */
+ bau_process_message(mdp, bcp, 0);
+ return;
+ }
+
+ /*
+ * Some message has set the MMR 'pending' bit; it might have been
+ * another message. Look for that message.
+ */
+ other_msg = find_another_by_swack(msg, bcp, msg->swack_vec);
+ if (other_msg) {
+ /* There is another. Do not ack the current one. */
+ bau_process_message(mdp, bcp, 0);
+ /*
+ * Let the natural processing of that message acknowledge
+ * it. Don't get the processing of sw_ack's out of order.
+ */
+ return;
+ }
+
+ /*
+ * There is no other message using this sw_ack, so it is safe to
+ * acknowledge it.
+ */
+ bau_process_message(mdp, bcp, 1);
+
+ return;
+}
+
+/*
+ * The BAU message interrupt comes here. (registered by set_intr_gate)
+ * See entry_64.S
+ *
+ * We received a broadcast assist message.
+ *
+ * Interrupts are disabled; this interrupt could represent
+ * the receipt of several messages.
+ *
+ * All cores/threads on this hub get this interrupt.
+ * The last one to see it does the software ack.
+ * (the resource will not be freed until noninterruptable cpus see this
+ * interrupt; hardware may timeout the s/w ack and reply ERROR)
+ */
+void uv_bau_message_interrupt(struct pt_regs *regs)
+{
+ int count = 0;
+ cycles_t time_start;
+ struct bau_pq_entry *msg;
+ struct bau_control *bcp;
+ struct ptc_stats *stat;
+ struct msg_desc msgdesc;
+
+ ack_APIC_irq();
+ time_start = get_cycles();
+
+ bcp = &per_cpu(bau_control, smp_processor_id());
+ stat = bcp->statp;
+
+ msgdesc.queue_first = bcp->queue_first;
+ msgdesc.queue_last = bcp->queue_last;
+
+ msg = bcp->bau_msg_head;
+ while (msg->swack_vec) {
+ count++;
+
+ msgdesc.msg_slot = msg - msgdesc.queue_first;
+ msgdesc.msg = msg;
+ if (bcp->uvhub_version == 2)
+ process_uv2_message(&msgdesc, bcp);
+ else
+ bau_process_message(&msgdesc, bcp, 1);
+
+ msg++;
+ if (msg > msgdesc.queue_last)
+ msg = msgdesc.queue_first;
+ bcp->bau_msg_head = msg;
+ }
+ stat->d_time += (get_cycles() - time_start);
+ if (!count)
+ stat->d_nomsg++;
+ else if (count > 1)
+ stat->d_multmsg++;
+}
+
+/*
+ * Each target uvhub (i.e. a uvhub that has cpu's) needs to have
+ * shootdown message timeouts enabled. The timeout does not cause
+ * an interrupt, but causes an error message to be returned to
+ * the sender.
+ */
+static void __init enable_timeouts(void)
+{
+ int uvhub;
+ int nuvhubs;
+ int pnode;
+ unsigned long mmr_image;
+
+ nuvhubs = uv_num_possible_blades();
+
+ for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
+ if (!uv_blade_nr_possible_cpus(uvhub))
+ continue;
+
+ pnode = uv_blade_to_pnode(uvhub);
+ mmr_image = read_mmr_misc_control(pnode);
+ /*
+ * Set the timeout period and then lock it in, in three
+ * steps; captures and locks in the period.
+ *
+ * To program the period, the SOFT_ACK_MODE must be off.
+ */
+ mmr_image &= ~(1L << SOFTACK_MSHIFT);
+ write_mmr_misc_control(pnode, mmr_image);
+ /*
+ * Set the 4-bit period.
+ */
+ mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT);
+ mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT);
+ write_mmr_misc_control(pnode, mmr_image);
+ /*
+ * UV1:
+ * Subsequent reversals of the timebase bit (3) cause an
+ * immediate timeout of one or all INTD resources as
+ * indicated in bits 2:0 (7 causes all of them to timeout).
+ */
+ mmr_image |= (1L << SOFTACK_MSHIFT);
+ if (is_uv2_hub()) {
+ mmr_image &= ~(1L << UV2_LEG_SHFT);
+ mmr_image |= (1L << UV2_EXT_SHFT);
+ }
+ write_mmr_misc_control(pnode, mmr_image);
+ }
+}
+
+static void *ptc_seq_start(struct seq_file *file, loff_t *offset)
+{
+ if (*offset < num_possible_cpus())
+ return offset;
+ return NULL;
+}
+
+static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
+{
+ (*offset)++;
+ if (*offset < num_possible_cpus())
+ return offset;
+ return NULL;
+}
+
+static void ptc_seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static inline unsigned long long usec_2_cycles(unsigned long microsec)
+{
+ unsigned long ns;
+ unsigned long long cyc;
+
+ ns = microsec * 1000;
+ cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
+ return cyc;
+}
+
+/*
+ * Display the statistics thru /proc/sgi_uv/ptc_statistics
+ * 'data' points to the cpu number
+ * Note: see the descriptions in stat_description[].
+ */
+static int ptc_seq_show(struct seq_file *file, void *data)
+{
+ struct ptc_stats *stat;
+ int cpu;
+
+ cpu = *(loff_t *)data;
+ if (!cpu) {
+ seq_printf(file,
+ "# cpu sent stime self locals remotes ncpus localhub ");
+ seq_printf(file,
+ "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
+ seq_printf(file,
+ "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok ");
+ seq_printf(file,
+ "resetp resett giveup sto bz throt swack recv rtime ");
+ seq_printf(file,
+ "all one mult none retry canc nocan reset rcan ");
+ seq_printf(file,
+ "disable enable wars warshw warwaits\n");
+ }
+ if (cpu < num_possible_cpus() && cpu_online(cpu)) {
+ stat = &per_cpu(ptcstats, cpu);
+ /* source side statistics */
+ seq_printf(file,
+ "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+ cpu, stat->s_requestor, cycles_2_us(stat->s_time),
+ stat->s_ntargself, stat->s_ntarglocals,
+ stat->s_ntargremotes, stat->s_ntargcpu,
+ stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
+ stat->s_ntarguvhub, stat->s_ntarguvhub16);
+ seq_printf(file, "%ld %ld %ld %ld %ld %ld ",
+ stat->s_ntarguvhub8, stat->s_ntarguvhub4,
+ stat->s_ntarguvhub2, stat->s_ntarguvhub1,
+ stat->s_dtimeout, stat->s_strongnacks);
+ seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
+ stat->s_retry_messages, stat->s_retriesok,
+ stat->s_resets_plug, stat->s_resets_timeout,
+ stat->s_giveup, stat->s_stimeout,
+ stat->s_busy, stat->s_throttles);
+
+ /* destination side statistics */
+ seq_printf(file,
+ "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+ read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
+ stat->d_requestee, cycles_2_us(stat->d_time),
+ stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
+ stat->d_nomsg, stat->d_retries, stat->d_canceled,
+ stat->d_nocanceled, stat->d_resets,
+ stat->d_rcanceled);
+ seq_printf(file, "%ld %ld %ld %ld %ld\n",
+ stat->s_bau_disabled, stat->s_bau_reenabled,
+ stat->s_uv2_wars, stat->s_uv2_wars_hw,
+ stat->s_uv2_war_waits);
+ }
+ return 0;
+}
+
+/*
+ * Display the tunables thru debugfs
+ */
+static ssize_t tunables_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char *buf;
+ int ret;
+
+ buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n",
+ "max_concur plugged_delay plugsb4reset",
+ "timeoutsb4reset ipi_reset_limit complete_threshold",
+ "congested_response_us congested_reps congested_period",
+ max_concurr, plugged_delay, plugsb4reset,
+ timeoutsb4reset, ipi_reset_limit, complete_threshold,
+ congested_respns_us, congested_reps, congested_period);
+
+ if (!buf)
+ return -ENOMEM;
+
+ ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
+ kfree(buf);
+ return ret;
+}
+
+/*
+ * handle a write to /proc/sgi_uv/ptc_statistics
+ * -1: reset the statistics
+ * 0: display meaning of the statistics
+ */
+static ssize_t ptc_proc_write(struct file *file, const char __user *user,
+ size_t count, loff_t *data)
+{
+ int cpu;
+ int i;
+ int elements;
+ long input_arg;
+ char optstr[64];
+ struct ptc_stats *stat;
+
+ if (count == 0 || count > sizeof(optstr))
+ return -EINVAL;
+ if (copy_from_user(optstr, user, count))
+ return -EFAULT;
+ optstr[count - 1] = '\0';
+
+ if (strict_strtol(optstr, 10, &input_arg) < 0) {
+ printk(KERN_DEBUG "%s is invalid\n", optstr);
+ return -EINVAL;
+ }
+
+ if (input_arg == 0) {
+ elements = sizeof(stat_description)/sizeof(*stat_description);
+ printk(KERN_DEBUG "# cpu: cpu number\n");
+ printk(KERN_DEBUG "Sender statistics:\n");
+ for (i = 0; i < elements; i++)
+ printk(KERN_DEBUG "%s\n", stat_description[i]);
+ } else if (input_arg == -1) {
+ for_each_present_cpu(cpu) {
+ stat = &per_cpu(ptcstats, cpu);
+ memset(stat, 0, sizeof(struct ptc_stats));
+ }
+ }
+
+ return count;
+}
+
+static int local_atoi(const char *name)
+{
+ int val = 0;
+
+ for (;; name++) {
+ switch (*name) {
+ case '0' ... '9':
+ val = 10*val+(*name-'0');
+ break;
+ default:
+ return val;
+ }
+ }
+}
+
+/*
+ * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables.
+ * Zero values reset them to defaults.
+ */
+static int parse_tunables_write(struct bau_control *bcp, char *instr,
+ int count)
+{
+ char *p;
+ char *q;
+ int cnt = 0;
+ int val;
+ int e = sizeof(tunables) / sizeof(*tunables);
+
+ p = instr + strspn(instr, WHITESPACE);
+ q = p;
+ for (; *p; p = q + strspn(q, WHITESPACE)) {
+ q = p + strcspn(p, WHITESPACE);
+ cnt++;
+ if (q == p)
+ break;
+ }
+ if (cnt != e) {
+ printk(KERN_INFO "bau tunable error: should be %d values\n", e);
+ return -EINVAL;
+ }
+
+ p = instr + strspn(instr, WHITESPACE);
+ q = p;
+ for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) {
+ q = p + strcspn(p, WHITESPACE);
+ val = local_atoi(p);
+ switch (cnt) {
+ case 0:
+ if (val == 0) {
+ max_concurr = MAX_BAU_CONCURRENT;
+ max_concurr_const = MAX_BAU_CONCURRENT;
+ continue;
+ }
+ if (val < 1 || val > bcp->cpus_in_uvhub) {
+ printk(KERN_DEBUG
+ "Error: BAU max concurrent %d is invalid\n",
+ val);
+ return -EINVAL;
+ }
+ max_concurr = val;
+ max_concurr_const = val;
+ continue;
+ default:
+ if (val == 0)
+ *tunables[cnt].tunp = tunables[cnt].deflt;
+ else
+ *tunables[cnt].tunp = val;
+ continue;
+ }
+ if (q == p)
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables)
+ */
+static ssize_t tunables_write(struct file *file, const char __user *user,
+ size_t count, loff_t *data)
+{
+ int cpu;
+ int ret;
+ char instr[100];
+ struct bau_control *bcp;
+
+ if (count == 0 || count > sizeof(instr)-1)
+ return -EINVAL;
+ if (copy_from_user(instr, user, count))
+ return -EFAULT;
+
+ instr[count] = '\0';
+
+ cpu = get_cpu();
+ bcp = &per_cpu(bau_control, cpu);
+ ret = parse_tunables_write(bcp, instr, count);
+ put_cpu();
+ if (ret)
+ return ret;
+
+ for_each_present_cpu(cpu) {
+ bcp = &per_cpu(bau_control, cpu);
+ bcp->max_concurr = max_concurr;
+ bcp->max_concurr_const = max_concurr;
+ bcp->plugged_delay = plugged_delay;
+ bcp->plugsb4reset = plugsb4reset;
+ bcp->timeoutsb4reset = timeoutsb4reset;
+ bcp->ipi_reset_limit = ipi_reset_limit;
+ bcp->complete_threshold = complete_threshold;
+ bcp->cong_response_us = congested_respns_us;
+ bcp->cong_reps = congested_reps;
+ bcp->cong_period = congested_period;
+ }
+ return count;
+}
+
+static const struct seq_operations uv_ptc_seq_ops = {
+ .start = ptc_seq_start,
+ .next = ptc_seq_next,
+ .stop = ptc_seq_stop,
+ .show = ptc_seq_show
+};
+
+static int ptc_proc_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &uv_ptc_seq_ops);
+}
+
+static int tunables_open(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+static const struct file_operations proc_uv_ptc_operations = {
+ .open = ptc_proc_open,
+ .read = seq_read,
+ .write = ptc_proc_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static const struct file_operations tunables_fops = {
+ .open = tunables_open,
+ .read = tunables_read,
+ .write = tunables_write,
+ .llseek = default_llseek,
+};
+
+static int __init uv_ptc_init(void)
+{
+ struct proc_dir_entry *proc_uv_ptc;
+
+ if (!is_uv_system())
+ return 0;
+
+ proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
+ &proc_uv_ptc_operations);
+ if (!proc_uv_ptc) {
+ printk(KERN_ERR "unable to create %s proc entry\n",
+ UV_PTC_BASENAME);
+ return -EINVAL;
+ }
+
+ tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
+ if (!tunables_dir) {
+ printk(KERN_ERR "unable to create debugfs directory %s\n",
+ UV_BAU_TUNABLES_DIR);
+ return -EINVAL;
+ }
+ tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
+ tunables_dir, NULL, &tunables_fops);
+ if (!tunables_file) {
+ printk(KERN_ERR "unable to create debugfs file %s\n",
+ UV_BAU_TUNABLES_FILE);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * Initialize the sending side's sending buffers.
+ */
+static void activation_descriptor_init(int node, int pnode, int base_pnode)
+{
+ int i;
+ int cpu;
+ int uv1 = 0;
+ unsigned long gpa;
+ unsigned long m;
+ unsigned long n;
+ size_t dsize;
+ struct bau_desc *bau_desc;
+ struct bau_desc *bd2;
+ struct uv1_bau_msg_header *uv1_hdr;
+ struct uv2_bau_msg_header *uv2_hdr;
+ struct bau_control *bcp;
+
+ /*
+ * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC)
+ * per cpu; and one per cpu on the uvhub (ADP_SZ)
+ */
+ dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC;
+ bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
+ BUG_ON(!bau_desc);
+
+ gpa = uv_gpa(bau_desc);
+ n = uv_gpa_to_gnode(gpa);
+ m = uv_gpa_to_offset(gpa);
+ if (is_uv1_hub())
+ uv1 = 1;
+
+ /* the 14-bit pnode */
+ write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
+ /*
+ * Initializing all 8 (ITEMS_PER_DESC) descriptors for each
+ * cpu even though we only use the first one; one descriptor can
+ * describe a broadcast to 256 uv hubs.
+ */
+ for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
+ memset(bd2, 0, sizeof(struct bau_desc));
+ if (uv1) {
+ uv1_hdr = &bd2->header.uv1_hdr;
+ uv1_hdr->swack_flag = 1;
+ /*
+ * The base_dest_nasid set in the message header
+ * is the nasid of the first uvhub in the partition.
+ * The bit map will indicate destination pnode numbers
+ * relative to that base. They may not be consecutive
+ * if nasid striding is being used.
+ */
+ uv1_hdr->base_dest_nasid =
+ UV_PNODE_TO_NASID(base_pnode);
+ uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID;
+ uv1_hdr->command = UV_NET_ENDPOINT_INTD;
+ uv1_hdr->int_both = 1;
+ /*
+ * all others need to be set to zero:
+ * fairness chaining multilevel count replied_to
+ */
+ } else {
+ uv2_hdr = &bd2->header.uv2_hdr;
+ uv2_hdr->swack_flag = 1;
+ uv2_hdr->base_dest_nasid =
+ UV_PNODE_TO_NASID(base_pnode);
+ uv2_hdr->dest_subnodeid = UV_LB_SUBNODEID;
+ uv2_hdr->command = UV_NET_ENDPOINT_INTD;
+ }
+ }
+ for_each_present_cpu(cpu) {
+ if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
+ continue;
+ bcp = &per_cpu(bau_control, cpu);
+ bcp->descriptor_base = bau_desc;
+ }
+}
+
+/*
+ * initialize the destination side's receiving buffers
+ * entered for each uvhub in the partition
+ * - node is first node (kernel memory notion) on the uvhub
+ * - pnode is the uvhub's physical identifier
+ */
+static void pq_init(int node, int pnode)
+{
+ int cpu;
+ size_t plsize;
+ char *cp;
+ void *vp;
+ unsigned long pn;
+ unsigned long first;
+ unsigned long pn_first;
+ unsigned long last;
+ struct bau_pq_entry *pqp;
+ struct bau_control *bcp;
+
+ plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
+ vp = kmalloc_node(plsize, GFP_KERNEL, node);
+ pqp = (struct bau_pq_entry *)vp;
+ BUG_ON(!pqp);
+
+ cp = (char *)pqp + 31;
+ pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
+
+ for_each_present_cpu(cpu) {
+ if (pnode != uv_cpu_to_pnode(cpu))
+ continue;
+ /* for every cpu on this pnode: */
+ bcp = &per_cpu(bau_control, cpu);
+ bcp->queue_first = pqp;
+ bcp->bau_msg_head = pqp;
+ bcp->queue_last = pqp + (DEST_Q_SIZE - 1);
+ }
+ /*
+ * need the gnode of where the memory was really allocated
+ */
+ pn = uv_gpa_to_gnode(uv_gpa(pqp));
+ first = uv_physnodeaddr(pqp);
+ pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first;
+ last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1));
+ write_mmr_payload_first(pnode, pn_first);
+ write_mmr_payload_tail(pnode, first);
+ write_mmr_payload_last(pnode, last);
+ write_gmmr_sw_ack(pnode, 0xffffUL);
+
+ /* in effect, all msg_type's are set to MSG_NOOP */
+ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
+}
+
+/*
+ * Initialization of each UV hub's structures
+ */
+static void __init init_uvhub(int uvhub, int vector, int base_pnode)
+{
+ int node;
+ int pnode;
+ unsigned long apicid;
+
+ node = uvhub_to_first_node(uvhub);
+ pnode = uv_blade_to_pnode(uvhub);
+
+ activation_descriptor_init(node, pnode, base_pnode);
+
+ pq_init(node, pnode);
+ /*
+ * The below initialization can't be in firmware because the
+ * messaging IRQ will be determined by the OS.
+ */
+ apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
+ write_mmr_data_config(pnode, ((apicid << 32) | vector));
+}
+
+/*
+ * We will set BAU_MISC_CONTROL with a timeout period.
+ * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
+ * So the destination timeout period has to be calculated from them.
+ */
+static int calculate_destination_timeout(void)
+{
+ unsigned long mmr_image;
+ int mult1;
+ int mult2;
+ int index;
+ int base;
+ int ret;
+ unsigned long ts_ns;
+
+ if (is_uv1_hub()) {
+ mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
+ mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
+ index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
+ mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
+ mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
+ base = timeout_base_ns[index];
+ ts_ns = base * mult1 * mult2;
+ ret = ts_ns / 1000;
+ } else {
+ /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
+ mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
+ mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
+ if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
+ base = 80;
+ else
+ base = 10;
+ mult1 = mmr_image & UV2_ACK_MASK;
+ ret = mult1 * base;
+ }
+ return ret;
+}
+
+static void __init init_per_cpu_tunables(void)
+{
+ int cpu;
+ struct bau_control *bcp;
+
+ for_each_present_cpu(cpu) {
+ bcp = &per_cpu(bau_control, cpu);
+ bcp->baudisabled = 0;
+ bcp->statp = &per_cpu(ptcstats, cpu);
+ /* time interval to catch a hardware stay-busy bug */
+ bcp->timeout_interval = usec_2_cycles(2*timeout_us);
+ bcp->max_concurr = max_concurr;
+ bcp->max_concurr_const = max_concurr;
+ bcp->plugged_delay = plugged_delay;
+ bcp->plugsb4reset = plugsb4reset;
+ bcp->timeoutsb4reset = timeoutsb4reset;
+ bcp->ipi_reset_limit = ipi_reset_limit;
+ bcp->complete_threshold = complete_threshold;
+ bcp->cong_response_us = congested_respns_us;
+ bcp->cong_reps = congested_reps;
+ bcp->cong_period = congested_period;
+ bcp->clocks_per_100_usec = usec_2_cycles(100);
+ spin_lock_init(&bcp->queue_lock);
+ spin_lock_init(&bcp->uvhub_lock);
+ }
+}
+
+/*
+ * Scan all cpus to collect blade and socket summaries.
+ */
+static int __init get_cpu_topology(int base_pnode,
+ struct uvhub_desc *uvhub_descs,
+ unsigned char *uvhub_mask)
+{
+ int cpu;
+ int pnode;
+ int uvhub;
+ int socket;
+ struct bau_control *bcp;
+ struct uvhub_desc *bdp;
+ struct socket_desc *sdp;
+
+ for_each_present_cpu(cpu) {
+ bcp = &per_cpu(bau_control, cpu);
+
+ memset(bcp, 0, sizeof(struct bau_control));
+
+ pnode = uv_cpu_hub_info(cpu)->pnode;
+ if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
+ printk(KERN_EMERG
+ "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
+ cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
+ return 1;
+ }
+
+ bcp->osnode = cpu_to_node(cpu);
+ bcp->partition_base_pnode = base_pnode;
+
+ uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
+ *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
+ bdp = &uvhub_descs[uvhub];
+
+ bdp->num_cpus++;
+ bdp->uvhub = uvhub;
+ bdp->pnode = pnode;
+
+ /* kludge: 'assuming' one node per socket, and assuming that
+ disabling a socket just leaves a gap in node numbers */
+ socket = bcp->osnode & 1;
+ bdp->socket_mask |= (1 << socket);
+ sdp = &bdp->socket[socket];
+ sdp->cpu_number[sdp->num_cpus] = cpu;
+ sdp->num_cpus++;
+ if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
+ printk(KERN_EMERG "%d cpus per socket invalid\n",
+ sdp->num_cpus);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Each socket is to get a local array of pnodes/hubs.
+ */
+static void make_per_cpu_thp(struct bau_control *smaster)
+{
+ int cpu;
+ size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
+
+ smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
+ memset(smaster->thp, 0, hpsz);
+ for_each_present_cpu(cpu) {
+ smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
+ smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
+ }
+}
+
+/*
+ * Each uvhub is to get a local cpumask.
+ */
+static void make_per_hub_cpumask(struct bau_control *hmaster)
+{
+ int sz = sizeof(cpumask_t);
+
+ hmaster->cpumask = kzalloc_node(sz, GFP_KERNEL, hmaster->osnode);
+}
+
+/*
+ * Initialize all the per_cpu information for the cpu's on a given socket,
+ * given what has been gathered into the socket_desc struct.
+ * And reports the chosen hub and socket masters back to the caller.
+ */
+static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
+ struct bau_control **smasterp,
+ struct bau_control **hmasterp)
+{
+ int i;
+ int cpu;
+ struct bau_control *bcp;
+
+ for (i = 0; i < sdp->num_cpus; i++) {
+ cpu = sdp->cpu_number[i];
+ bcp = &per_cpu(bau_control, cpu);
+ bcp->cpu = cpu;
+ if (i == 0) {
+ *smasterp = bcp;
+ if (!(*hmasterp))
+ *hmasterp = bcp;
+ }
+ bcp->cpus_in_uvhub = bdp->num_cpus;
+ bcp->cpus_in_socket = sdp->num_cpus;
+ bcp->socket_master = *smasterp;
+ bcp->uvhub = bdp->uvhub;
+ if (is_uv1_hub())
+ bcp->uvhub_version = 1;
+ else if (is_uv2_hub())
+ bcp->uvhub_version = 2;
+ else {
+ printk(KERN_EMERG "uvhub version not 1 or 2\n");
+ return 1;
+ }
+ bcp->uvhub_master = *hmasterp;
+ bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ bcp->using_desc = bcp->uvhub_cpu;
+ if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
+ printk(KERN_EMERG "%d cpus per uvhub invalid\n",
+ bcp->uvhub_cpu);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Summarize the blade and socket topology into the per_cpu structures.
+ */
+static int __init summarize_uvhub_sockets(int nuvhubs,
+ struct uvhub_desc *uvhub_descs,
+ unsigned char *uvhub_mask)
+{
+ int socket;
+ int uvhub;
+ unsigned short socket_mask;
+
+ for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
+ struct uvhub_desc *bdp;
+ struct bau_control *smaster = NULL;
+ struct bau_control *hmaster = NULL;
+
+ if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
+ continue;
+
+ bdp = &uvhub_descs[uvhub];
+ socket_mask = bdp->socket_mask;
+ socket = 0;
+ while (socket_mask) {
+ struct socket_desc *sdp;
+ if ((socket_mask & 1)) {
+ sdp = &bdp->socket[socket];
+ if (scan_sock(sdp, bdp, &smaster, &hmaster))
+ return 1;
+ make_per_cpu_thp(smaster);
+ }
+ socket++;
+ socket_mask = (socket_mask >> 1);
+ }
+ make_per_hub_cpumask(hmaster);
+ }
+ return 0;
+}
+
+/*
+ * initialize the bau_control structure for each cpu
+ */
+static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
+{
+ unsigned char *uvhub_mask;
+ void *vp;
+ struct uvhub_desc *uvhub_descs;
+
+ timeout_us = calculate_destination_timeout();
+
+ vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
+ uvhub_descs = (struct uvhub_desc *)vp;
+ memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
+ uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
+
+ if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
+ goto fail;
+
+ if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask))
+ goto fail;
+
+ kfree(uvhub_descs);
+ kfree(uvhub_mask);
+ init_per_cpu_tunables();
+ return 0;
+
+fail:
+ kfree(uvhub_descs);
+ kfree(uvhub_mask);
+ return 1;
+}
+
+/*
+ * Initialization of BAU-related structures
+ */
+static int __init uv_bau_init(void)
+{
+ int uvhub;
+ int pnode;
+ int nuvhubs;
+ int cur_cpu;
+ int cpus;
+ int vector;
+ cpumask_var_t *mask;
+
+ if (!is_uv_system())
+ return 0;
+
+ if (nobau)
+ return 0;
+
+ for_each_possible_cpu(cur_cpu) {
+ mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
+ zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
+ }
+
+ nuvhubs = uv_num_possible_blades();
+ spin_lock_init(&disable_lock);
+ congested_cycles = usec_2_cycles(congested_respns_us);
+
+ uv_base_pnode = 0x7fffffff;
+ for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
+ cpus = uv_blade_nr_possible_cpus(uvhub);
+ if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode))
+ uv_base_pnode = uv_blade_to_pnode(uvhub);
+ }
+
+ enable_timeouts();
+
+ if (init_per_cpu(nuvhubs, uv_base_pnode)) {
+ nobau = 1;
+ return 0;
+ }
+
+ vector = UV_BAU_MESSAGE;
+ for_each_possible_blade(uvhub)
+ if (uv_blade_nr_possible_cpus(uvhub))
+ init_uvhub(uvhub, vector, uv_base_pnode);
+
+ alloc_intr_gate(vector, uv_bau_message_intr1);
+
+ for_each_possible_blade(uvhub) {
+ if (uv_blade_nr_possible_cpus(uvhub)) {
+ unsigned long val;
+ unsigned long mmr;
+ pnode = uv_blade_to_pnode(uvhub);
+ /* INIT the bau */
+ val = 1L << 63;
+ write_gmmr_activation(pnode, val);
+ mmr = 1; /* should be 1 to broadcast to both sockets */
+ if (!is_uv1_hub())
+ write_mmr_data_broadcast(pnode, mmr);
+ }
+ }
+
+ return 0;
+}
+core_initcall(uv_bau_init);
+fs_initcall(uv_ptc_init);
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
new file mode 100644
index 00000000000..f25c2765a5c
--- /dev/null
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -0,0 +1,285 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+
+#include <asm/apic.h>
+#include <asm/uv/uv_irq.h>
+#include <asm/uv/uv_hub.h>
+
+/* MMR offset and pnode of hub sourcing interrupts for a given irq */
+struct uv_irq_2_mmr_pnode{
+ struct rb_node list;
+ unsigned long offset;
+ int pnode;
+ int irq;
+};
+
+static DEFINE_SPINLOCK(uv_irq_lock);
+static struct rb_root uv_irq_root;
+
+static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
+
+static void uv_noop(struct irq_data *data) { }
+
+static void uv_ack_apic(struct irq_data *data)
+{
+ ack_APIC_irq();
+}
+
+static struct irq_chip uv_irq_chip = {
+ .name = "UV-CORE",
+ .irq_mask = uv_noop,
+ .irq_unmask = uv_noop,
+ .irq_eoi = uv_ack_apic,
+ .irq_set_affinity = uv_set_irq_affinity,
+};
+
+/*
+ * Add offset and pnode information of the hub sourcing interrupts to the
+ * rb tree for a specific irq.
+ */
+static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+{
+ struct rb_node **link = &uv_irq_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct uv_irq_2_mmr_pnode *n;
+ struct uv_irq_2_mmr_pnode *e;
+ unsigned long irqflags;
+
+ n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
+ uv_blade_to_memory_nid(blade));
+ if (!n)
+ return -ENOMEM;
+
+ n->irq = irq;
+ n->offset = offset;
+ n->pnode = uv_blade_to_pnode(blade);
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ /* Find the right place in the rbtree: */
+ while (*link) {
+ parent = *link;
+ e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
+
+ if (unlikely(irq == e->irq)) {
+ /* irq entry exists */
+ e->pnode = uv_blade_to_pnode(blade);
+ e->offset = offset;
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ kfree(n);
+ return 0;
+ }
+
+ if (irq < e->irq)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ /* Insert the node into the rbtree. */
+ rb_link_node(&n->list, parent, link);
+ rb_insert_color(&n->list, &uv_irq_root);
+
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return 0;
+}
+
+/* Retrieve offset and pnode information from the rb tree for a specific irq */
+int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+{
+ struct uv_irq_2_mmr_pnode *e;
+ struct rb_node *n;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ n = uv_irq_root.rb_node;
+ while (n) {
+ e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+
+ if (e->irq == irq) {
+ *offset = e->offset;
+ *pnode = e->pnode;
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return 0;
+ }
+
+ if (irq < e->irq)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return -1;
+}
+
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+static int
+arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+ unsigned long mmr_offset, int limit)
+{
+ const struct cpumask *eligible_cpu = cpumask_of(cpu);
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ int mmr_pnode, err;
+
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ err = assign_irq_vector(irq, cfg, eligible_cpu);
+ if (err != 0)
+ return err;
+
+ if (limit == UV_AFFINITY_CPU)
+ irq_set_status_flags(irq, IRQ_NO_BALANCING);
+ else
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+
+ irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+ irq_name);
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+
+ mmr_pnode = uv_blade_to_pnode(mmr_blade);
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
+{
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->mask = 1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+
+static int
+uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest;
+ unsigned long mmr_value, mmr_offset;
+ struct uv_IO_APIC_route_entry *entry;
+ int mmr_pnode;
+
+ if (__ioapic_set_affinity(data, mask, &dest))
+ return -1;
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = dest;
+
+ /* Get previously stored MMR and pnode of hub sourcing interrupts */
+ if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
+ return -1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return 0;
+}
+
+/*
+ * Set up a mapping of an available irq and vector, and enable the specified
+ * MMR that defines the MSI that is to be sent to the specified CPU when an
+ * interrupt is raised.
+ */
+int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
+ unsigned long mmr_offset, int limit)
+{
+ int irq, ret;
+
+ irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
+
+ if (irq <= 0)
+ return -EBUSY;
+
+ ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
+ limit);
+ if (ret == irq)
+ uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
+ else
+ destroy_irq(irq);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(uv_setup_irq);
+
+/*
+ * Tear down a mapping of an irq and vector, and disable the specified MMR that
+ * defined the MSI that was to be sent to the specified CPU when an interrupt
+ * was raised.
+ *
+ * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
+ */
+void uv_teardown_irq(unsigned int irq)
+{
+ struct uv_irq_2_mmr_pnode *e;
+ struct rb_node *n;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ n = uv_irq_root.rb_node;
+ while (n) {
+ e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+ if (e->irq == irq) {
+ arch_disable_uv_irq(e->pnode, e->offset);
+ rb_erase(n, &uv_irq_root);
+ kfree(e);
+ break;
+ }
+ if (irq < e->irq)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ destroy_irq(irq);
+}
+EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c
new file mode 100644
index 00000000000..5d4ba301e77
--- /dev/null
+++ b/arch/x86/platform/uv/uv_sysfs.c
@@ -0,0 +1,76 @@
+/*
+ * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Russ Anderson
+ */
+
+#include <linux/device.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+
+struct kobject *sgi_uv_kobj;
+
+static ssize_t partition_id_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
+}
+
+static ssize_t coherence_id_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
+}
+
+static struct kobj_attribute partition_id_attr =
+ __ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
+
+static struct kobj_attribute coherence_id_attr =
+ __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
+
+
+static int __init sgi_uv_sysfs_init(void)
+{
+ unsigned long ret;
+
+ if (!is_uv_system())
+ return -ENODEV;
+
+ if (!sgi_uv_kobj)
+ sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
+ if (!sgi_uv_kobj) {
+ printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n");
+ return -EINVAL;
+ }
+
+ ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
+ if (ret) {
+ printk(KERN_WARNING "sysfs_create_file partition_id failed\n");
+ return ret;
+ }
+
+ ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
+ if (ret) {
+ printk(KERN_WARNING "sysfs_create_file coherence_id failed\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+device_initcall(sgi_uv_sysfs_init);
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
new file mode 100644
index 00000000000..9f29a01ee1b
--- /dev/null
+++ b/arch/x86/platform/uv/uv_time.c
@@ -0,0 +1,429 @@
+/*
+ * SGI RTC clock/timer routines.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Dimitri Sivanich
+ */
+#include <linux/clockchips.h>
+#include <linux/slab.h>
+
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/cpu.h>
+
+#define RTC_NAME "sgi_rtc"
+
+static cycle_t uv_read_rtc(struct clocksource *cs);
+static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
+static void uv_rtc_timer_setup(enum clock_event_mode,
+ struct clock_event_device *);
+
+static struct clocksource clocksource_uv = {
+ .name = RTC_NAME,
+ .rating = 400,
+ .read = uv_read_rtc,
+ .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static struct clock_event_device clock_event_device_uv = {
+ .name = RTC_NAME,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ .shift = 20,
+ .rating = 400,
+ .irq = -1,
+ .set_next_event = uv_rtc_next_event,
+ .set_mode = uv_rtc_timer_setup,
+ .event_handler = NULL,
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
+
+/* There is one of these allocated per node */
+struct uv_rtc_timer_head {
+ spinlock_t lock;
+ /* next cpu waiting for timer, local node relative: */
+ int next_cpu;
+ /* number of cpus on this node: */
+ int ncpus;
+ struct {
+ int lcpu; /* systemwide logical cpu number */
+ u64 expires; /* next timer expiration for this cpu */
+ } cpu[1];
+};
+
+/*
+ * Access to uv_rtc_timer_head via blade id.
+ */
+static struct uv_rtc_timer_head **blade_info __read_mostly;
+
+static int uv_rtc_evt_enable;
+
+/*
+ * Hardware interface routines
+ */
+
+/* Send IPIs to another node */
+static void uv_rtc_send_IPI(int cpu)
+{
+ unsigned long apicid, val;
+ int pnode;
+
+ apicid = cpu_physical_id(cpu);
+ pnode = uv_apicid_to_pnode(apicid);
+ apicid |= uv_apicid_hibits;
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+ (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+ (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
+
+ uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+}
+
+/* Check for an RTC interrupt pending */
+static int uv_intr_pending(int pnode)
+{
+ if (is_uv1_hub())
+ return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
+ UV1H_EVENT_OCCURRED0_RTC1_MASK;
+ else
+ return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) &
+ UV2H_EVENT_OCCURRED2_RTC_1_MASK;
+}
+
+/* Setup interrupt and return non-zero if early expiration occurred. */
+static int uv_setup_intr(int cpu, u64 expires)
+{
+ u64 val;
+ unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits;
+ int pnode = uv_cpu_to_pnode(cpu);
+
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+ UVH_RTC1_INT_CONFIG_M_MASK);
+ uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
+
+ if (is_uv1_hub())
+ uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
+ UV1H_EVENT_OCCURRED0_RTC1_MASK);
+ else
+ uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS,
+ UV2H_EVENT_OCCURRED2_RTC_1_MASK);
+
+ val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
+ ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
+
+ /* Set configuration */
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
+ /* Initialize comparator value */
+ uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
+
+ if (uv_read_rtc(NULL) <= expires)
+ return 0;
+
+ return !uv_intr_pending(pnode);
+}
+
+/*
+ * Per-cpu timer tracking routines
+ */
+
+static __init void uv_rtc_deallocate_timers(void)
+{
+ int bid;
+
+ for_each_possible_blade(bid) {
+ kfree(blade_info[bid]);
+ }
+ kfree(blade_info);
+}
+
+/* Allocate per-node list of cpu timer expiration times. */
+static __init int uv_rtc_allocate_timers(void)
+{
+ int cpu;
+
+ blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL);
+ if (!blade_info)
+ return -ENOMEM;
+ memset(blade_info, 0, uv_possible_blades * sizeof(void *));
+
+ for_each_present_cpu(cpu) {
+ int nid = cpu_to_node(cpu);
+ int bid = uv_cpu_to_blade_id(cpu);
+ int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ struct uv_rtc_timer_head *head = blade_info[bid];
+
+ if (!head) {
+ head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
+ (uv_blade_nr_possible_cpus(bid) *
+ 2 * sizeof(u64)),
+ GFP_KERNEL, nid);
+ if (!head) {
+ uv_rtc_deallocate_timers();
+ return -ENOMEM;
+ }
+ spin_lock_init(&head->lock);
+ head->ncpus = uv_blade_nr_possible_cpus(bid);
+ head->next_cpu = -1;
+ blade_info[bid] = head;
+ }
+
+ head->cpu[bcpu].lcpu = cpu;
+ head->cpu[bcpu].expires = ULLONG_MAX;
+ }
+
+ return 0;
+}
+
+/* Find and set the next expiring timer. */
+static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
+{
+ u64 lowest = ULLONG_MAX;
+ int c, bcpu = -1;
+
+ head->next_cpu = -1;
+ for (c = 0; c < head->ncpus; c++) {
+ u64 exp = head->cpu[c].expires;
+ if (exp < lowest) {
+ bcpu = c;
+ lowest = exp;
+ }
+ }
+ if (bcpu >= 0) {
+ head->next_cpu = bcpu;
+ c = head->cpu[bcpu].lcpu;
+ if (uv_setup_intr(c, lowest))
+ /* If we didn't set it up in time, trigger */
+ uv_rtc_send_IPI(c);
+ } else {
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+ UVH_RTC1_INT_CONFIG_M_MASK);
+ }
+}
+
+/*
+ * Set expiration time for current cpu.
+ *
+ * Returns 1 if we missed the expiration time.
+ */
+static int uv_rtc_set_timer(int cpu, u64 expires)
+{
+ int pnode = uv_cpu_to_pnode(cpu);
+ int bid = uv_cpu_to_blade_id(cpu);
+ struct uv_rtc_timer_head *head = blade_info[bid];
+ int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ u64 *t = &head->cpu[bcpu].expires;
+ unsigned long flags;
+ int next_cpu;
+
+ spin_lock_irqsave(&head->lock, flags);
+
+ next_cpu = head->next_cpu;
+ *t = expires;
+
+ /* Will this one be next to go off? */
+ if (next_cpu < 0 || bcpu == next_cpu ||
+ expires < head->cpu[next_cpu].expires) {
+ head->next_cpu = bcpu;
+ if (uv_setup_intr(cpu, expires)) {
+ *t = ULLONG_MAX;
+ uv_rtc_find_next_timer(head, pnode);
+ spin_unlock_irqrestore(&head->lock, flags);
+ return -ETIME;
+ }
+ }
+
+ spin_unlock_irqrestore(&head->lock, flags);
+ return 0;
+}
+
+/*
+ * Unset expiration time for current cpu.
+ *
+ * Returns 1 if this timer was pending.
+ */
+static int uv_rtc_unset_timer(int cpu, int force)
+{
+ int pnode = uv_cpu_to_pnode(cpu);
+ int bid = uv_cpu_to_blade_id(cpu);
+ struct uv_rtc_timer_head *head = blade_info[bid];
+ int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ u64 *t = &head->cpu[bcpu].expires;
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&head->lock, flags);
+
+ if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
+ rc = 1;
+
+ if (rc) {
+ *t = ULLONG_MAX;
+ /* Was the hardware setup for this timer? */
+ if (head->next_cpu == bcpu)
+ uv_rtc_find_next_timer(head, pnode);
+ }
+
+ spin_unlock_irqrestore(&head->lock, flags);
+
+ return rc;
+}
+
+
+/*
+ * Kernel interface routines.
+ */
+
+/*
+ * Read the RTC.
+ *
+ * Starting with HUB rev 2.0, the UV RTC register is replicated across all
+ * cachelines of it's own page. This allows faster simultaneous reads
+ * from a given socket.
+ */
+static cycle_t uv_read_rtc(struct clocksource *cs)
+{
+ unsigned long offset;
+
+ if (uv_get_min_hub_revision_id() == 1)
+ offset = 0;
+ else
+ offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
+
+ return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
+}
+
+/*
+ * Program the next event, relative to now
+ */
+static int uv_rtc_next_event(unsigned long delta,
+ struct clock_event_device *ced)
+{
+ int ced_cpu = cpumask_first(ced->cpumask);
+
+ return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL));
+}
+
+/*
+ * Setup the RTC timer in oneshot mode
+ */
+static void uv_rtc_timer_setup(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ int ced_cpu = cpumask_first(evt->cpumask);
+
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
+ /* Nothing to do here yet */
+ break;
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ uv_rtc_unset_timer(ced_cpu, 1);
+ break;
+ }
+}
+
+static void uv_rtc_interrupt(void)
+{
+ int cpu = smp_processor_id();
+ struct clock_event_device *ced = &per_cpu(cpu_ced, cpu);
+
+ if (!ced || !ced->event_handler)
+ return;
+
+ if (uv_rtc_unset_timer(cpu, 0) != 1)
+ return;
+
+ ced->event_handler(ced);
+}
+
+static int __init uv_enable_evt_rtc(char *str)
+{
+ uv_rtc_evt_enable = 1;
+
+ return 1;
+}
+__setup("uvrtcevt", uv_enable_evt_rtc);
+
+static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
+{
+ struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+
+ *ced = clock_event_device_uv;
+ ced->cpumask = cpumask_of(smp_processor_id());
+ clockevents_register_device(ced);
+}
+
+static __init int uv_rtc_setup_clock(void)
+{
+ int rc;
+
+ if (!is_uv_system())
+ return -ENODEV;
+
+ /* If single blade, prefer tsc */
+ if (uv_num_possible_blades() == 1)
+ clocksource_uv.rating = 250;
+
+ rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second);
+ if (rc)
+ printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
+ else
+ printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n",
+ sn_rtc_cycles_per_second/(unsigned long)1E6);
+
+ if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback)
+ return rc;
+
+ /* Setup and register clockevents */
+ rc = uv_rtc_allocate_timers();
+ if (rc)
+ goto error;
+
+ x86_platform_ipi_callback = uv_rtc_interrupt;
+
+ clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
+ NSEC_PER_SEC, clock_event_device_uv.shift);
+
+ clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
+ sn_rtc_cycles_per_second;
+
+ clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
+ (NSEC_PER_SEC / sn_rtc_cycles_per_second);
+
+ rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
+ if (rc) {
+ x86_platform_ipi_callback = NULL;
+ uv_rtc_deallocate_timers();
+ goto error;
+ }
+
+ printk(KERN_INFO "UV RTC clockevents registered\n");
+
+ return 0;
+
+error:
+ clocksource_unregister(&clocksource_uv);
+ printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc);
+
+ return rc;
+}
+arch_initcall(uv_rtc_setup_clock);
diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile
new file mode 100644
index 00000000000..91bc17ab2fd
--- /dev/null
+++ b/arch/x86/platform/visws/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_VISWS) += visws_quirks.o
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
new file mode 100644
index 00000000000..c7abf13a213
--- /dev/null
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -0,0 +1,608 @@
+/*
+ * SGI Visual Workstation support and quirks, unmaintained.
+ *
+ * Split out from setup.c by davej@suse.de
+ *
+ * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
+ *
+ * SGI Visual Workstation interrupt controller
+ *
+ * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
+ * which serves as the main interrupt controller in the system. Non-legacy
+ * hardware in the system uses this controller directly. Legacy devices
+ * are connected to the PIIX4 which in turn has its 8259(s) connected to
+ * a of the Cobalt APIC entry.
+ *
+ * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
+ *
+ * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
+ */
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/visws/cobalt.h>
+#include <asm/visws/piix4.h>
+#include <asm/io_apic.h>
+#include <asm/fixmap.h>
+#include <asm/reboot.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/e820.h>
+#include <asm/time.h>
+#include <asm/io.h>
+
+#include <linux/kernel_stat.h>
+
+#include <asm/i8259.h>
+#include <asm/irq_vectors.h>
+#include <asm/visws/lithium.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+extern int no_broadcast;
+
+char visws_board_type = -1;
+char visws_board_rev = -1;
+
+static void __init visws_time_init(void)
+{
+ printk(KERN_INFO "Starting Cobalt Timer system clock\n");
+
+ /* Set the countdown value */
+ co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
+
+ /* Start the timer */
+ co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
+
+ /* Enable (unmask) the timer interrupt */
+ co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
+
+ setup_default_timer_irq();
+}
+
+/* Replaces the default init_ISA_irqs in the generic setup */
+static void __init visws_pre_intr_init(void);
+
+/* Quirk for machine specific memory setup. */
+
+#define MB (1024 * 1024)
+
+unsigned long sgivwfb_mem_phys;
+unsigned long sgivwfb_mem_size;
+EXPORT_SYMBOL(sgivwfb_mem_phys);
+EXPORT_SYMBOL(sgivwfb_mem_size);
+
+long long mem_size __initdata = 0;
+
+static char * __init visws_memory_setup(void)
+{
+ long long gfx_mem_size = 8 * MB;
+
+ mem_size = boot_params.alt_mem_k;
+
+ if (!mem_size) {
+ printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
+ mem_size = 128 * MB;
+ }
+
+ /*
+ * this hardcodes the graphics memory to 8 MB
+ * it really should be sized dynamically (or at least
+ * set as a boot param)
+ */
+ if (!sgivwfb_mem_size) {
+ printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
+ sgivwfb_mem_size = 8 * MB;
+ }
+
+ /*
+ * Trim to nearest MB
+ */
+ sgivwfb_mem_size &= ~((1 << 20) - 1);
+ sgivwfb_mem_phys = mem_size - gfx_mem_size;
+
+ e820_add_region(0, LOWMEMSIZE(), E820_RAM);
+ e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
+ e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
+
+ return "PROM";
+}
+
+static void visws_machine_emergency_restart(void)
+{
+ /*
+ * Visual Workstations restart after this
+ * register is poked on the PIIX4
+ */
+ outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
+}
+
+static void visws_machine_power_off(void)
+{
+ unsigned short pm_status;
+/* extern unsigned int pci_bus0; */
+
+ while ((pm_status = inw(PMSTS_PORT)) & 0x100)
+ outw(pm_status, PMSTS_PORT);
+
+ outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
+
+ mdelay(10);
+
+#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
+ (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
+
+/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
+ outl(PIIX_SPECIAL_STOP, 0xCFC);
+}
+
+static void __init visws_get_smp_config(unsigned int early)
+{
+}
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesn't have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+
+static void __init MP_processor_info(struct mpc_cpu *m)
+{
+ int ver, logical_apicid;
+ physid_mask_t apic_cpus;
+
+ if (!(m->cpuflag & CPU_ENABLED))
+ return;
+
+ logical_apicid = m->apicid;
+ printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
+ m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
+ m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver);
+
+ if (m->cpuflag & CPU_BOOTPROCESSOR)
+ boot_cpu_physical_apicid = m->apicid;
+
+ ver = m->apicver;
+ if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
+ printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
+ m->apicid, MAX_LOCAL_APIC);
+ return;
+ }
+
+ apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
+ physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
+ /*
+ * Validate version
+ */
+ if (ver == 0x0) {
+ printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
+ "fixing up to 0x10. (tell your hw vendor)\n",
+ m->apicid);
+ ver = 0x10;
+ }
+ apic_version[m->apicid] = ver;
+}
+
+static void __init visws_find_smp_config(void)
+{
+ struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
+ unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
+
+ if (ncpus > CO_CPU_MAX) {
+ printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
+ ncpus, mp);
+
+ ncpus = CO_CPU_MAX;
+ }
+
+ if (ncpus > setup_max_cpus)
+ ncpus = setup_max_cpus;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ smp_found_config = 1;
+#endif
+ while (ncpus--)
+ MP_processor_info(mp++);
+
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+}
+
+static void visws_trap_init(void);
+
+void __init visws_early_detect(void)
+{
+ int raw;
+
+ visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
+ >> PIIX_GPI_BD_SHIFT;
+
+ if (visws_board_type < 0)
+ return;
+
+ /*
+ * Override the default platform setup functions
+ */
+ x86_init.resources.memory_setup = visws_memory_setup;
+ x86_init.mpparse.get_smp_config = visws_get_smp_config;
+ x86_init.mpparse.find_smp_config = visws_find_smp_config;
+ x86_init.irqs.pre_vector_init = visws_pre_intr_init;
+ x86_init.irqs.trap_init = visws_trap_init;
+ x86_init.timers.timer_init = visws_time_init;
+ x86_init.pci.init = pci_visws_init;
+ x86_init.pci.init_irq = x86_init_noop;
+
+ /*
+ * Install reboot quirks:
+ */
+ pm_power_off = visws_machine_power_off;
+ machine_ops.emergency_restart = visws_machine_emergency_restart;
+
+ /*
+ * Do not use broadcast IPIs:
+ */
+ no_broadcast = 0;
+
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Turn off IO-APIC detection and initialization:
+ */
+ skip_ioapic_setup = 1;
+#endif
+
+ /*
+ * Get Board rev.
+ * First, we have to initialize the 307 part to allow us access
+ * to the GPIO registers. Let's map them at 0x0fc0 which is right
+ * after the PIIX4 PM section.
+ */
+ outb_p(SIO_DEV_SEL, SIO_INDEX);
+ outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
+
+ outb_p(SIO_DEV_MSB, SIO_INDEX);
+ outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
+
+ outb_p(SIO_DEV_LSB, SIO_INDEX);
+ outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
+
+ outb_p(SIO_DEV_ENB, SIO_INDEX);
+ outb_p(1, SIO_DATA); /* Enable GPIO registers. */
+
+ /*
+ * Now, we have to map the power management section to write
+ * a bit which enables access to the GPIO registers.
+ * What lunatic came up with this shit?
+ */
+ outb_p(SIO_DEV_SEL, SIO_INDEX);
+ outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
+
+ outb_p(SIO_DEV_MSB, SIO_INDEX);
+ outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
+
+ outb_p(SIO_DEV_LSB, SIO_INDEX);
+ outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
+
+ outb_p(SIO_DEV_ENB, SIO_INDEX);
+ outb_p(1, SIO_DATA); /* Enable PM registers. */
+
+ /*
+ * Now, write the PM register which enables the GPIO registers.
+ */
+ outb_p(SIO_PM_FER2, SIO_PM_INDEX);
+ outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
+
+ /*
+ * Now, initialize the GPIO registers.
+ * We want them all to be inputs which is the
+ * power on default, so let's leave them alone.
+ * So, let's just read the board rev!
+ */
+ raw = inb_p(SIO_GP_DATA1);
+ raw &= 0x7f; /* 7 bits of valid board revision ID. */
+
+ if (visws_board_type == VISWS_320) {
+ if (raw < 0x6) {
+ visws_board_rev = 4;
+ } else if (raw < 0xc) {
+ visws_board_rev = 5;
+ } else {
+ visws_board_rev = 6;
+ }
+ } else if (visws_board_type == VISWS_540) {
+ visws_board_rev = 2;
+ } else {
+ visws_board_rev = raw;
+ }
+
+ printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
+ (visws_board_type == VISWS_320 ? "320" :
+ (visws_board_type == VISWS_540 ? "540" :
+ "unknown")), visws_board_rev);
+}
+
+#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
+#define BCD (LI_INTB | LI_INTC | LI_INTD)
+#define ALLDEVS (A01234 | BCD)
+
+static __init void lithium_init(void)
+{
+ set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
+ set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
+
+ if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
+ (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
+ printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
+/* panic("This machine is not SGI Visual Workstation 320/540"); */
+ }
+
+ if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
+ (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
+ printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
+/* panic("This machine is not SGI Visual Workstation 320/540"); */
+ }
+
+ li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
+ li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
+}
+
+static __init void cobalt_init(void)
+{
+ /*
+ * On normal SMP PC this is used only with SMP, but we have to
+ * use it and set it up here to start the Cobalt clock
+ */
+ set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
+ setup_local_APIC();
+ printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
+ (unsigned int)apic_read(APIC_LVR),
+ (unsigned int)apic_read(APIC_ID));
+
+ set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
+ set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
+ printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
+ co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
+
+ /* Enable Cobalt APIC being careful to NOT change the ID! */
+ co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
+
+ printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
+ co_apic_read(CO_APIC_ID));
+}
+
+static void __init visws_trap_init(void)
+{
+ lithium_init();
+ cobalt_init();
+}
+
+/*
+ * IRQ controller / APIC support:
+ */
+
+static DEFINE_SPINLOCK(cobalt_lock);
+
+/*
+ * Set the given Cobalt APIC Redirection Table entry to point
+ * to the given IDT vector/index.
+ */
+static inline void co_apic_set(int entry, int irq)
+{
+ co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
+ co_apic_write(CO_APIC_HI(entry), 0);
+}
+
+/*
+ * Cobalt (IO)-APIC functions to handle PCI devices.
+ */
+static inline int co_apic_ide0_hack(void)
+{
+ extern char visws_board_type;
+ extern char visws_board_rev;
+
+ if (visws_board_type == VISWS_320 && visws_board_rev == 5)
+ return 5;
+ return CO_APIC_IDE0;
+}
+
+static int is_co_apic(unsigned int irq)
+{
+ if (IS_CO_APIC(irq))
+ return CO_APIC(irq);
+
+ switch (irq) {
+ case 0: return CO_APIC_CPU;
+ case CO_IRQ_IDE0: return co_apic_ide0_hack();
+ case CO_IRQ_IDE1: return CO_APIC_IDE1;
+ default: return -1;
+ }
+}
+
+
+/*
+ * This is the SGI Cobalt (IO-)APIC:
+ */
+static void enable_cobalt_irq(struct irq_data *data)
+{
+ co_apic_set(is_co_apic(data->irq), data->irq);
+}
+
+static void disable_cobalt_irq(struct irq_data *data)
+{
+ int entry = is_co_apic(data->irq);
+
+ co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
+ co_apic_read(CO_APIC_LO(entry));
+}
+
+static void ack_cobalt_irq(struct irq_data *data)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cobalt_lock, flags);
+ disable_cobalt_irq(data);
+ apic_write(APIC_EOI, APIC_EIO_ACK);
+ spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static struct irq_chip cobalt_irq_type = {
+ .name = "Cobalt-APIC",
+ .irq_enable = enable_cobalt_irq,
+ .irq_disable = disable_cobalt_irq,
+ .irq_ack = ack_cobalt_irq,
+};
+
+
+/*
+ * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
+ * -- not the manner expected by the code in i8259.c.
+ *
+ * there is a 'master' physical interrupt source that gets sent to
+ * the CPU. But in the chipset there are various 'virtual' interrupts
+ * waiting to be handled. We represent this to Linux through a 'master'
+ * interrupt controller type, and through a special virtual interrupt-
+ * controller. Device drivers only see the virtual interrupt sources.
+ */
+static unsigned int startup_piix4_master_irq(struct irq_data *data)
+{
+ legacy_pic->init(0);
+ enable_cobalt_irq(data);
+ return 0;
+}
+
+static struct irq_chip piix4_master_irq_type = {
+ .name = "PIIX4-master",
+ .irq_startup = startup_piix4_master_irq,
+ .irq_ack = ack_cobalt_irq,
+};
+
+static void pii4_mask(struct irq_data *data) { }
+
+static struct irq_chip piix4_virtual_irq_type = {
+ .name = "PIIX4-virtual",
+ .irq_mask = pii4_mask,
+};
+
+/*
+ * PIIX4-8259 master/virtual functions to handle interrupt requests
+ * from legacy devices: floppy, parallel, serial, rtc.
+ *
+ * None of these get Cobalt APIC entries, neither do they have IDT
+ * entries. These interrupts are purely virtual and distributed from
+ * the 'master' interrupt source: CO_IRQ_8259.
+ *
+ * When the 8259 interrupts its handler figures out which of these
+ * devices is interrupting and dispatches to its handler.
+ *
+ * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
+ * enable_irq gets the right irq. This 'master' irq is never directly
+ * manipulated by any driver.
+ */
+static irqreturn_t piix4_master_intr(int irq, void *dev_id)
+{
+ unsigned long flags;
+ int realirq;
+
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
+
+ /* Find out what's interrupting in the PIIX4 master 8259 */
+ outb(0x0c, 0x20); /* OCW3 Poll command */
+ realirq = inb(0x20);
+
+ /*
+ * Bit 7 == 0 means invalid/spurious
+ */
+ if (unlikely(!(realirq & 0x80)))
+ goto out_unlock;
+
+ realirq &= 7;
+
+ if (unlikely(realirq == 2)) {
+ outb(0x0c, 0xa0);
+ realirq = inb(0xa0);
+
+ if (unlikely(!(realirq & 0x80)))
+ goto out_unlock;
+
+ realirq = (realirq & 7) + 8;
+ }
+
+ /* mask and ack interrupt */
+ cached_irq_mask |= 1 << realirq;
+ if (unlikely(realirq > 7)) {
+ inb(0xa1);
+ outb(cached_slave_mask, 0xa1);
+ outb(0x60 + (realirq & 7), 0xa0);
+ outb(0x60 + 2, 0x20);
+ } else {
+ inb(0x21);
+ outb(cached_master_mask, 0x21);
+ outb(0x60 + realirq, 0x20);
+ }
+
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ /*
+ * handle this 'virtual interrupt' as a Cobalt one now.
+ */
+ generic_handle_irq(realirq);
+
+ return IRQ_HANDLED;
+
+out_unlock:
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+ return IRQ_NONE;
+}
+
+static struct irqaction master_action = {
+ .handler = piix4_master_intr,
+ .name = "PIIX4-8259",
+ .flags = IRQF_NO_THREAD,
+};
+
+static struct irqaction cascade_action = {
+ .handler = no_action,
+ .name = "cascade",
+ .flags = IRQF_NO_THREAD,
+};
+
+static inline void set_piix4_virtual_irq_type(void)
+{
+ piix4_virtual_irq_type.irq_enable = i8259A_chip.irq_unmask;
+ piix4_virtual_irq_type.irq_disable = i8259A_chip.irq_mask;
+ piix4_virtual_irq_type.irq_unmask = i8259A_chip.irq_unmask;
+}
+
+static void __init visws_pre_intr_init(void)
+{
+ int i;
+
+ set_piix4_virtual_irq_type();
+
+ for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
+ struct irq_chip *chip = NULL;
+
+ if (i == 0)
+ chip = &cobalt_irq_type;
+ else if (i == CO_IRQ_IDE0)
+ chip = &cobalt_irq_type;
+ else if (i == CO_IRQ_IDE1)
+ chip = &cobalt_irq_type;
+ else if (i == CO_IRQ_8259)
+ chip = &piix4_master_irq_type;
+ else if (i < CO_IRQ_APIC0)
+ chip = &piix4_virtual_irq_type;
+ else if (IS_CO_APIC(i))
+ chip = &cobalt_irq_type;
+
+ if (chip)
+ irq_set_chip(i, chip);
+ }
+
+ setup_irq(CO_IRQ_8259, &master_action);
+ setup_irq(2, &cascade_action);
+}