summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/apic
diff options
context:
space:
mode:
authorAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
committerAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
commitb4b6116a13633898cf868f2f103c96a90c4c20f8 (patch)
tree93d1b7e2cfcdf473d8d4ff3ad141fa864f8491f6 /arch/x86/kernel/apic
parentedd4be777c953e5faafc80d091d3084b4343f5d3 (diff)
downloadkernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.gz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.xz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.zip
fedora kernel: d9aad82f3319f3cfd1aebc01234254ef0c37ad84v3.3.2-1
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'arch/x86/kernel/apic')
-rw-r--r--arch/x86/kernel/apic/Makefile27
-rw-r--r--arch/x86/kernel/apic/apic.c2464
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c396
-rw-r--r--arch/x86/kernel/apic/apic_noop.c191
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c294
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c278
-rw-r--r--arch/x86/kernel/apic/es7000_32.c755
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c90
-rw-r--r--arch/x86/kernel/apic/io_apic.c4043
-rw-r--r--arch/x86/kernel/apic/ipi.c167
-rw-r--r--arch/x86/kernel/apic/numaq_32.c541
-rw-r--r--arch/x86/kernel/apic/probe_32.c270
-rw-r--r--arch/x86/kernel/apic/probe_64.c79
-rw-r--r--arch/x86/kernel/apic/summit_32.c556
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c268
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c174
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c889
17 files changed, 11482 insertions, 0 deletions
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
new file mode 100644
index 00000000000..0ae0323b1f9
--- /dev/null
+++ b/arch/x86/kernel/apic/Makefile
@@ -0,0 +1,27 @@
+#
+# Makefile for local APIC drivers and for the IO-APIC code
+#
+
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o
+obj-y += hw_nmi.o
+
+obj-$(CONFIG_X86_IO_APIC) += io_apic.o
+obj-$(CONFIG_SMP) += ipi.o
+
+ifeq ($(CONFIG_X86_64),y)
+# APIC probe will depend on the listing order here
+obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
+obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
+obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
+obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
+obj-y += apic_flat_64.o
+endif
+
+# APIC probe will depend on the listing order here
+obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
+obj-$(CONFIG_X86_SUMMIT) += summit_32.o
+obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
+obj-$(CONFIG_X86_ES7000) += es7000_32.o
+
+# For 32bit, probe_32 need to be listed last
+obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
new file mode 100644
index 00000000000..2eec05b6d1b
--- /dev/null
+++ b/arch/x86/kernel/apic/apic.c
@@ -0,0 +1,2464 @@
+/*
+ * Local APIC handling, local APIC timers
+ *
+ * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively.
+ * Maciej W. Rozycki : Various updates and fixes.
+ * Mikael Pettersson : Power Management for UP-APIC.
+ * Pavel Machek and
+ * Mikael Pettersson : PM converted to driver model.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/acpi_pmtmr.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/bootmem.h>
+#include <linux/ftrace.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/syscore_ops.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/i8253.h>
+#include <linux/dmar.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/dmi.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
+#include <asm/perf_event.h>
+#include <asm/x86_init.h>
+#include <asm/pgalloc.h>
+#include <linux/atomic.h>
+#include <asm/mpspec.h>
+#include <asm/i8259.h>
+#include <asm/proto.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/desc.h>
+#include <asm/hpet.h>
+#include <asm/idle.h>
+#include <asm/mtrr.h>
+#include <asm/time.h>
+#include <asm/smp.h>
+#include <asm/mce.h>
+#include <asm/tsc.h>
+#include <asm/hypervisor.h>
+
+unsigned int num_processors;
+
+unsigned disabled_cpus __cpuinitdata;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+
+/*
+ * The highest APIC ID seen during enumeration.
+ */
+unsigned int max_physical_apicid;
+
+/*
+ * Bitmask of physically existing CPUs:
+ */
+physid_mask_t phys_cpu_present_map;
+
+/*
+ * Map cpu index to physical APIC ID
+ */
+DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
+#ifdef CONFIG_X86_32
+
+/*
+ * On x86_32, the mapping between cpu and logical apicid may vary
+ * depending on apic in use. The following early percpu variable is
+ * used for the mapping. This is where the behaviors of x86_64 and 32
+ * actually diverge. Let's keep it ugly for now.
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
+
+/*
+ * Knob to control our willingness to enable the local APIC.
+ *
+ * +1=force-enable
+ */
+static int force_enable_local_apic __initdata;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+ force_enable_local_apic = 1;
+ return 0;
+}
+early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
+/*
+ * Handle interrupt mode configuration register (IMCR).
+ * This register controls whether the interrupt signals
+ * that reach the BSP come from the master PIC or from the
+ * local APIC. Before entering Symmetric I/O Mode, either
+ * the BIOS or the operating system must switch out of
+ * PIC Mode by changing the IMCR.
+ */
+static inline void imcr_pic_to_apic(void)
+{
+ /* select IMCR register */
+ outb(0x70, 0x22);
+ /* NMI and 8259 INTR go through APIC */
+ outb(0x01, 0x23);
+}
+
+static inline void imcr_apic_to_pic(void)
+{
+ /* select IMCR register */
+ outb(0x70, 0x22);
+ /* NMI and 8259 INTR go directly to BSP */
+ outb(0x00, 0x23);
+}
+#endif
+
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+ apic_calibrate_pmtmr = 1;
+ notsc_setup(NULL);
+ return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+int x2apic_mode;
+#ifdef CONFIG_X86_X2APIC
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
+static int x2apic_disabled;
+static int nox2apic;
+static __init int setup_nox2apic(char *str)
+{
+ if (x2apic_enabled()) {
+ int apicid = native_apic_msr_read(APIC_ID);
+
+ if (apicid >= 255) {
+ pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
+ apicid);
+ return 0;
+ }
+
+ pr_warning("x2apic already enabled. will disable it\n");
+ } else
+ setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+
+ nox2apic = 1;
+
+ return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
+
+unsigned long mp_lapic_addr;
+int disable_apic;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+static int disable_apic_timer __initdata;
+/* Local APIC timer works in C2 */
+int local_apic_timer_c2_ok;
+EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
+
+int first_system_vector = 0xfe;
+
+/*
+ * Debug level, exported for io_apic.c
+ */
+unsigned int apic_verbosity;
+
+int pic_mode;
+
+/* Have we found an MP table */
+int smp_found_config;
+
+static struct resource lapic_resource = {
+ .name = "Local APIC",
+ .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
+unsigned int lapic_timer_frequency = 0;
+
+static void apic_pm_activate(void);
+
+static unsigned long apic_phys;
+
+/*
+ * Get the LAPIC version
+ */
+static inline int lapic_get_version(void)
+{
+ return GET_APIC_VERSION(apic_read(APIC_LVR));
+}
+
+/*
+ * Check, if the APIC is integrated or a separate chip
+ */
+static inline int lapic_is_integrated(void)
+{
+#ifdef CONFIG_X86_64
+ return 1;
+#else
+ return APIC_INTEGRATED(lapic_get_version());
+#endif
+}
+
+/*
+ * Check, whether this is a modern or a first generation APIC
+ */
+static int modern_apic(void)
+{
+ /* AMD systems use old APIC versions, so check the CPU */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ boot_cpu_data.x86 >= 0xf)
+ return 1;
+ return lapic_get_version() >= 0x14;
+}
+
+/*
+ * right after this call apic become NOOP driven
+ * so apic->write/read doesn't do anything
+ */
+static void __init apic_disable(void)
+{
+ pr_info("APIC: switched to apic NOOP\n");
+ apic = &apic_noop;
+}
+
+void native_apic_wait_icr_idle(void)
+{
+ while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+ cpu_relax();
+}
+
+u32 native_safe_apic_wait_icr_idle(void)
+{
+ u32 send_status;
+ int timeout;
+
+ timeout = 0;
+ do {
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ if (!send_status)
+ break;
+ inc_irq_stat(icr_read_retry_count);
+ udelay(100);
+ } while (timeout++ < 1000);
+
+ return send_status;
+}
+
+void native_apic_icr_write(u32 low, u32 id)
+{
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+ apic_write(APIC_ICR, low);
+}
+
+u64 native_apic_icr_read(void)
+{
+ u32 icr1, icr2;
+
+ icr2 = apic_read(APIC_ICR2);
+ icr1 = apic_read(APIC_ICR);
+
+ return icr1 | ((u64)icr2 << 32);
+}
+
+#ifdef CONFIG_X86_32
+/**
+ * get_physical_broadcast - Get number of physical broadcast IDs
+ */
+int get_physical_broadcast(void)
+{
+ return modern_apic() ? 0xff : 0xf;
+}
+#endif
+
+/**
+ * lapic_get_maxlvt - get the maximum number of local vector table entries
+ */
+int lapic_get_maxlvt(void)
+{
+ unsigned int v;
+
+ v = apic_read(APIC_LVR);
+ /*
+ * - we always have APIC integrated on 64bit mode
+ * - 82489DXs do not report # of LVT entries
+ */
+ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
+}
+
+/*
+ * Local APIC timer
+ */
+
+/* Clock divisor */
+#define APIC_DIVISOR 16
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
+{
+ unsigned int lvtt_value, tmp_value;
+
+ lvtt_value = LOCAL_TIMER_VECTOR;
+ if (!oneshot)
+ lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+ if (!lapic_is_integrated())
+ lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+
+ if (!irqen)
+ lvtt_value |= APIC_LVT_MASKED;
+
+ apic_write(APIC_LVTT, lvtt_value);
+
+ /*
+ * Divide PICLK by 16
+ */
+ tmp_value = apic_read(APIC_TDCR);
+ apic_write(APIC_TDCR,
+ (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
+ APIC_TDR_DIV_16);
+
+ if (!oneshot)
+ apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
+}
+
+/*
+ * Setup extended LVT, AMD specific
+ *
+ * Software should use the LVT offsets the BIOS provides. The offsets
+ * are determined by the subsystems using it like those for MCE
+ * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts
+ * are supported. Beginning with family 10h at least 4 offsets are
+ * available.
+ *
+ * Since the offsets must be consistent for all cores, we keep track
+ * of the LVT offsets in software and reserve the offset for the same
+ * vector also to be used on other cores. An offset is freed by
+ * setting the entry to APIC_EILVT_MASKED.
+ *
+ * If the BIOS is right, there should be no conflicts. Otherwise a
+ * "[Firmware Bug]: ..." error message is generated. However, if
+ * software does not properly determines the offsets, it is not
+ * necessarily a BIOS bug.
+ */
+
+static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
+
+static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
+{
+ return (old & APIC_EILVT_MASKED)
+ || (new == APIC_EILVT_MASKED)
+ || ((new & ~APIC_EILVT_MASKED) == old);
+}
+
+static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
+{
+ unsigned int rsvd; /* 0: uninitialized */
+
+ if (offset >= APIC_EILVT_NR_MAX)
+ return ~0;
+
+ rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED;
+ do {
+ if (rsvd &&
+ !eilvt_entry_is_changeable(rsvd, new))
+ /* may not change if vectors are different */
+ return rsvd;
+ rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
+ } while (rsvd != new);
+
+ return new;
+}
+
+/*
+ * If mask=1, the LVT entry does not generate interrupts while mask=0
+ * enables the vector. See also the BKDGs. Must be called with
+ * preemption disabled.
+ */
+
+int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
+{
+ unsigned long reg = APIC_EILVTn(offset);
+ unsigned int new, old, reserved;
+
+ new = (mask << 16) | (msg_type << 8) | vector;
+ old = apic_read(reg);
+ reserved = reserve_eilvt_offset(offset, new);
+
+ if (reserved != new) {
+ pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+ "vector 0x%x, but the register is already in use for "
+ "vector 0x%x on another cpu\n",
+ smp_processor_id(), reg, offset, new, reserved);
+ return -EINVAL;
+ }
+
+ if (!eilvt_entry_is_changeable(old, new)) {
+ pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+ "vector 0x%x, but the register is already in use for "
+ "vector 0x%x on this cpu\n",
+ smp_processor_id(), reg, offset, new, old);
+ return -EBUSY;
+ }
+
+ apic_write(reg, new);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
+
+/*
+ * Program the next event, relative to now
+ */
+static int lapic_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ apic_write(APIC_TMICT, delta);
+ return 0;
+}
+
+/*
+ * Setup the lapic timer in periodic or oneshot mode
+ */
+static void lapic_timer_setup(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ unsigned long flags;
+ unsigned int v;
+
+ /* Lapic used as dummy for broadcast ? */
+ if (evt->features & CLOCK_EVT_FEAT_DUMMY)
+ return;
+
+ local_irq_save(flags);
+
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ case CLOCK_EVT_MODE_ONESHOT:
+ __setup_APIC_LVTT(lapic_timer_frequency,
+ mode != CLOCK_EVT_MODE_PERIODIC, 1);
+ break;
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ v = apic_read(APIC_LVTT);
+ v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+ apic_write(APIC_LVTT, v);
+ apic_write(APIC_TMICT, 0);
+ break;
+ case CLOCK_EVT_MODE_RESUME:
+ /* Nothing to do here */
+ break;
+ }
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Local APIC timer broadcast function
+ */
+static void lapic_timer_broadcast(const struct cpumask *mask)
+{
+#ifdef CONFIG_SMP
+ apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#endif
+}
+
+
+/*
+ * The local apic timer can be used for any function which is CPU local.
+ */
+static struct clock_event_device lapic_clockevent = {
+ .name = "lapic",
+ .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
+ | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
+ .shift = 32,
+ .set_mode = lapic_timer_setup,
+ .set_next_event = lapic_next_event,
+ .broadcast = lapic_timer_broadcast,
+ .rating = 100,
+ .irq = -1,
+};
+static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+
+/*
+ * Setup the local APIC timer for this CPU. Copy the initialized values
+ * of the boot CPU and register the clock event in the framework.
+ */
+static void __cpuinit setup_APIC_timer(void)
+{
+ struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+
+ if (this_cpu_has(X86_FEATURE_ARAT)) {
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
+ /* Make LAPIC timer preferrable over percpu HPET */
+ lapic_clockevent.rating = 150;
+ }
+
+ memcpy(levt, &lapic_clockevent, sizeof(*levt));
+ levt->cpumask = cpumask_of(smp_processor_id());
+
+ clockevents_register_device(levt);
+}
+
+/*
+ * In this functions we calibrate APIC bus clocks to the external timer.
+ *
+ * We want to do the calibration only once since we want to have local timer
+ * irqs syncron. CPUs connected by the same APIC bus have the very same bus
+ * frequency.
+ *
+ * This was previously done by reading the PIT/HPET and waiting for a wrap
+ * around to find out, that a tick has elapsed. I have a box, where the PIT
+ * readout is broken, so it never gets out of the wait loop again. This was
+ * also reported by others.
+ *
+ * Monitoring the jiffies value is inaccurate and the clockevents
+ * infrastructure allows us to do a simple substitution of the interrupt
+ * handler.
+ *
+ * The calibration routine also uses the pm_timer when possible, as the PIT
+ * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
+ * back to normal later in the boot process).
+ */
+
+#define LAPIC_CAL_LOOPS (HZ/10)
+
+static __initdata int lapic_cal_loops = -1;
+static __initdata long lapic_cal_t1, lapic_cal_t2;
+static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
+static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
+static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
+
+/*
+ * Temporary interrupt handler.
+ */
+static void __init lapic_cal_handler(struct clock_event_device *dev)
+{
+ unsigned long long tsc = 0;
+ long tapic = apic_read(APIC_TMCCT);
+ unsigned long pm = acpi_pm_read_early();
+
+ if (cpu_has_tsc)
+ rdtscll(tsc);
+
+ switch (lapic_cal_loops++) {
+ case 0:
+ lapic_cal_t1 = tapic;
+ lapic_cal_tsc1 = tsc;
+ lapic_cal_pm1 = pm;
+ lapic_cal_j1 = jiffies;
+ break;
+
+ case LAPIC_CAL_LOOPS:
+ lapic_cal_t2 = tapic;
+ lapic_cal_tsc2 = tsc;
+ if (pm < lapic_cal_pm1)
+ pm += ACPI_PM_OVRRUN;
+ lapic_cal_pm2 = pm;
+ lapic_cal_j2 = jiffies;
+ break;
+ }
+}
+
+static int __init
+calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
+{
+ const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+ const long pm_thresh = pm_100ms / 100;
+ unsigned long mult;
+ u64 res;
+
+#ifndef CONFIG_X86_PM_TIMER
+ return -1;
+#endif
+
+ apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
+
+ /* Check, if the PM timer is available */
+ if (!deltapm)
+ return -1;
+
+ mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+ if (deltapm > (pm_100ms - pm_thresh) &&
+ deltapm < (pm_100ms + pm_thresh)) {
+ apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
+ return 0;
+ }
+
+ res = (((u64)deltapm) * mult) >> 22;
+ do_div(res, 1000000);
+ pr_warning("APIC calibration not consistent "
+ "with PM-Timer: %ldms instead of 100ms\n",(long)res);
+
+ /* Correct the lapic counter value */
+ res = (((u64)(*delta)) * pm_100ms);
+ do_div(res, deltapm);
+ pr_info("APIC delta adjusted to PM-Timer: "
+ "%lu (%ld)\n", (unsigned long)res, *delta);
+ *delta = (long)res;
+
+ /* Correct the tsc counter value */
+ if (cpu_has_tsc) {
+ res = (((u64)(*deltatsc)) * pm_100ms);
+ do_div(res, deltapm);
+ apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
+ "PM-Timer: %lu (%ld)\n",
+ (unsigned long)res, *deltatsc);
+ *deltatsc = (long)res;
+ }
+
+ return 0;
+}
+
+static int __init calibrate_APIC_clock(void)
+{
+ struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+ void (*real_handler)(struct clock_event_device *dev);
+ unsigned long deltaj;
+ long delta, deltatsc;
+ int pm_referenced = 0;
+
+ /**
+ * check if lapic timer has already been calibrated by platform
+ * specific routine, such as tsc calibration code. if so, we just fill
+ * in the clockevent structure and return.
+ */
+
+ if (lapic_timer_frequency) {
+ apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
+ lapic_timer_frequency);
+ lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+ TICK_NSEC, lapic_clockevent.shift);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+ return 0;
+ }
+
+ local_irq_disable();
+
+ /* Replace the global interrupt handler */
+ real_handler = global_clock_event->event_handler;
+ global_clock_event->event_handler = lapic_cal_handler;
+
+ /*
+ * Setup the APIC counter to maximum. There is no way the lapic
+ * can underflow in the 100ms detection time frame
+ */
+ __setup_APIC_LVTT(0xffffffff, 0, 0);
+
+ /* Let the interrupts run */
+ local_irq_enable();
+
+ while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+ cpu_relax();
+
+ local_irq_disable();
+
+ /* Restore the real event handler */
+ global_clock_event->event_handler = real_handler;
+
+ /* Build delta t1-t2 as apic timer counts down */
+ delta = lapic_cal_t1 - lapic_cal_t2;
+ apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+
+ deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
+
+ /* we trust the PM based calibration if possible */
+ pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+ &delta, &deltatsc);
+
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+ lapic_clockevent.shift);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+
+ lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+
+ apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
+ apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
+ apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
+ lapic_timer_frequency);
+
+ if (cpu_has_tsc) {
+ apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
+ "%ld.%04ld MHz.\n",
+ (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
+ (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+ }
+
+ apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
+ "%u.%04u MHz.\n",
+ lapic_timer_frequency / (1000000 / HZ),
+ lapic_timer_frequency % (1000000 / HZ));
+
+ /*
+ * Do a sanity check on the APIC calibration result
+ */
+ if (lapic_timer_frequency < (1000000 / HZ)) {
+ local_irq_enable();
+ pr_warning("APIC frequency too slow, disabling apic timer\n");
+ return -1;
+ }
+
+ levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
+
+ /*
+ * PM timer calibration failed or not turned on
+ * so lets try APIC timer based calibration
+ */
+ if (!pm_referenced) {
+ apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+
+ /*
+ * Setup the apic timer manually
+ */
+ levt->event_handler = lapic_cal_handler;
+ lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
+ lapic_cal_loops = -1;
+
+ /* Let the interrupts run */
+ local_irq_enable();
+
+ while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+ cpu_relax();
+
+ /* Stop the lapic timer */
+ lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
+
+ /* Jiffies delta */
+ deltaj = lapic_cal_j2 - lapic_cal_j1;
+ apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+
+ /* Check, if the jiffies result is consistent */
+ if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
+ apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
+ else
+ levt->features |= CLOCK_EVT_FEAT_DUMMY;
+ } else
+ local_irq_enable();
+
+ if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
+ pr_warning("APIC timer disabled due to verification failure\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Setup the boot APIC
+ *
+ * Calibrate and verify the result.
+ */
+void __init setup_boot_APIC_clock(void)
+{
+ /*
+ * The local apic timer can be disabled via the kernel
+ * commandline or from the CPU detection code. Register the lapic
+ * timer as a dummy clock event source on SMP systems, so the
+ * broadcast mechanism is used. On UP systems simply ignore it.
+ */
+ if (disable_apic_timer) {
+ pr_info("Disabling APIC timer\n");
+ /* No broadcast on UP ! */
+ if (num_possible_cpus() > 1) {
+ lapic_clockevent.mult = 1;
+ setup_APIC_timer();
+ }
+ return;
+ }
+
+ apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
+ "calibrating APIC timer ...\n");
+
+ if (calibrate_APIC_clock()) {
+ /* No broadcast on UP ! */
+ if (num_possible_cpus() > 1)
+ setup_APIC_timer();
+ return;
+ }
+
+ /*
+ * If nmi_watchdog is set to IO_APIC, we need the
+ * PIT/HPET going. Otherwise register lapic as a dummy
+ * device.
+ */
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+
+ /* Setup the lapic or request the broadcast */
+ setup_APIC_timer();
+}
+
+void __cpuinit setup_secondary_APIC_clock(void)
+{
+ setup_APIC_timer();
+}
+
+/*
+ * The guts of the apic timer interrupt
+ */
+static void local_apic_timer_interrupt(void)
+{
+ int cpu = smp_processor_id();
+ struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+
+ /*
+ * Normally we should not be here till LAPIC has been initialized but
+ * in some cases like kdump, its possible that there is a pending LAPIC
+ * timer interrupt from previous kernel's context and is delivered in
+ * new kernel the moment interrupts are enabled.
+ *
+ * Interrupts are enabled early and LAPIC is setup much later, hence
+ * its possible that when we get here evt->event_handler is NULL.
+ * Check for event_handler being NULL and discard the interrupt as
+ * spurious.
+ */
+ if (!evt->event_handler) {
+ pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+ /* Switch it off */
+ lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+ return;
+ }
+
+ /*
+ * the NMI deadlock-detector uses this.
+ */
+ inc_irq_stat(apic_timer_irqs);
+
+ evt->event_handler(evt);
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ * interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ /*
+ * NOTE! We'd better ACK the irq immediately,
+ * because timer handling can be slow.
+ */
+ ack_APIC_irq();
+ /*
+ * update_process_times() expects us to have done irq_enter().
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
+ */
+ irq_enter();
+ exit_idle();
+ local_apic_timer_interrupt();
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return -EINVAL;
+}
+
+/*
+ * Local APIC start and shutdown
+ */
+
+/**
+ * clear_local_APIC - shutdown the local APIC
+ *
+ * This is called, when a CPU is disabled and before rebooting, so the state of
+ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
+ * leftovers during boot.
+ */
+void clear_local_APIC(void)
+{
+ int maxlvt;
+ u32 v;
+
+ /* APIC hasn't been mapped yet */
+ if (!x2apic_mode && !apic_phys)
+ return;
+
+ maxlvt = lapic_get_maxlvt();
+ /*
+ * Masking an LVT entry can trigger a local APIC error
+ * if the vector is zero. Mask LVTERR first to prevent this.
+ */
+ if (maxlvt >= 3) {
+ v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
+ apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+ }
+ /*
+ * Careful: we have to set masks only first to deassert
+ * any level-triggered sources.
+ */
+ v = apic_read(APIC_LVTT);
+ apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+ v = apic_read(APIC_LVT0);
+ apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+ v = apic_read(APIC_LVT1);
+ apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
+ if (maxlvt >= 4) {
+ v = apic_read(APIC_LVTPC);
+ apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
+ }
+
+ /* lets not touch this if we didn't frob it */
+#ifdef CONFIG_X86_THERMAL_VECTOR
+ if (maxlvt >= 5) {
+ v = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
+ }
+#endif
+#ifdef CONFIG_X86_MCE_INTEL
+ if (maxlvt >= 6) {
+ v = apic_read(APIC_LVTCMCI);
+ if (!(v & APIC_LVT_MASKED))
+ apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
+ }
+#endif
+
+ /*
+ * Clean APIC state for other OSs:
+ */
+ apic_write(APIC_LVTT, APIC_LVT_MASKED);
+ apic_write(APIC_LVT0, APIC_LVT_MASKED);
+ apic_write(APIC_LVT1, APIC_LVT_MASKED);
+ if (maxlvt >= 3)
+ apic_write(APIC_LVTERR, APIC_LVT_MASKED);
+ if (maxlvt >= 4)
+ apic_write(APIC_LVTPC, APIC_LVT_MASKED);
+
+ /* Integrated APIC (!82489DX) ? */
+ if (lapic_is_integrated()) {
+ if (maxlvt > 3)
+ /* Clear ESR due to Pentium errata 3AP and 11AP */
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+ unsigned int value;
+
+ /* APIC hasn't been mapped yet */
+ if (!x2apic_mode && !apic_phys)
+ return;
+
+ clear_local_APIC();
+
+ /*
+ * Disable APIC (implies clearing of registers
+ * for 82489DX!).
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_SPIV_APIC_ENABLED;
+ apic_write(APIC_SPIV, value);
+
+#ifdef CONFIG_X86_32
+ /*
+ * When LAPIC was disabled by the BIOS and enabled by the kernel,
+ * restore the disabled state.
+ */
+ if (enabled_via_apicbase) {
+ unsigned int l, h;
+
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ l &= ~MSR_IA32_APICBASE_ENABLE;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ }
+#endif
+}
+
+/*
+ * If Linux enabled the LAPIC against the BIOS default disable it down before
+ * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
+ * not power-off. Additionally clear all LVT entries before disable_local_APIC
+ * for the case where Linux didn't enable the LAPIC.
+ */
+void lapic_shutdown(void)
+{
+ unsigned long flags;
+
+ if (!cpu_has_apic && !apic_from_smp_config())
+ return;
+
+ local_irq_save(flags);
+
+#ifdef CONFIG_X86_32
+ if (!enabled_via_apicbase)
+ clear_local_APIC();
+ else
+#endif
+ disable_local_APIC();
+
+
+ local_irq_restore(flags);
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+ unsigned int reg0, reg1;
+
+ /*
+ * The version register is read-only in a real APIC.
+ */
+ reg0 = apic_read(APIC_LVR);
+ apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
+ apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+ reg1 = apic_read(APIC_LVR);
+ apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
+
+ /*
+ * The two version reads above should print the same
+ * numbers. If the second one is different, then we
+ * poke at a non-APIC.
+ */
+ if (reg1 != reg0)
+ return 0;
+
+ /*
+ * Check if the version looks reasonably.
+ */
+ reg1 = GET_APIC_VERSION(reg0);
+ if (reg1 == 0x00 || reg1 == 0xff)
+ return 0;
+ reg1 = lapic_get_maxlvt();
+ if (reg1 < 0x02 || reg1 == 0xff)
+ return 0;
+
+ /*
+ * The ID register is read/write in a real APIC.
+ */
+ reg0 = apic_read(APIC_ID);
+ apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
+ apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
+ reg1 = apic_read(APIC_ID);
+ apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
+ apic_write(APIC_ID, reg0);
+ if (reg1 != (reg0 ^ apic->apic_id_mask))
+ return 0;
+
+ /*
+ * The next two are just to see if we have sane values.
+ * They're only really relevant if we're in Virtual Wire
+ * compatibility mode, but most boxes are anymore.
+ */
+ reg0 = apic_read(APIC_LVT0);
+ apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
+ reg1 = apic_read(APIC_LVT1);
+ apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
+
+ return 1;
+}
+
+/**
+ * sync_Arb_IDs - synchronize APIC bus arbitration IDs
+ */
+void __init sync_Arb_IDs(void)
+{
+ /*
+ * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
+ * needed on AMD.
+ */
+ if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return;
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
+ apic_write(APIC_ICR, APIC_DEST_ALLINC |
+ APIC_INT_LEVELTRIG | APIC_DM_INIT);
+}
+
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+ unsigned int value;
+
+ /*
+ * Don't do the setup now if we have a SMP BIOS as the
+ * through-I/O-APIC virtual wire mode might be active.
+ */
+ if (smp_found_config || !cpu_has_apic)
+ return;
+
+ /*
+ * Do not trust the local APIC being empty at bootup.
+ */
+ clear_local_APIC();
+
+ /*
+ * Enable APIC.
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+ /* This bit is reserved on P4/Xeon and should be cleared */
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ (boot_cpu_data.x86 == 15))
+ value &= ~APIC_SPIV_FOCUS_DISABLED;
+ else
+#endif
+ value |= APIC_SPIV_FOCUS_DISABLED;
+ value |= SPURIOUS_APIC_VECTOR;
+ apic_write(APIC_SPIV, value);
+
+ /*
+ * Set up the virtual wire mode.
+ */
+ apic_write(APIC_LVT0, APIC_DM_EXTINT);
+ value = APIC_DM_NMI;
+ if (!lapic_is_integrated()) /* 82489DX */
+ value |= APIC_LVT_LEVEL_TRIGGER;
+ apic_write(APIC_LVT1, value);
+}
+
+static void __cpuinit lapic_setup_esr(void)
+{
+ unsigned int oldvalue, value, maxlvt;
+
+ if (!lapic_is_integrated()) {
+ pr_info("No ESR for 82489DX.\n");
+ return;
+ }
+
+ if (apic->disable_esr) {
+ /*
+ * Something untraceable is creating bad interrupts on
+ * secondary quads ... for the moment, just leave the
+ * ESR disabled - we can't do anything useful with the
+ * errors anyway - mbligh
+ */
+ pr_info("Leaving ESR disabled.\n");
+ return;
+ }
+
+ maxlvt = lapic_get_maxlvt();
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ oldvalue = apic_read(APIC_ESR);
+
+ /* enables sending errors */
+ value = ERROR_APIC_VECTOR;
+ apic_write(APIC_LVTERR, value);
+
+ /*
+ * spec says clear errors after enabling vector.
+ */
+ if (maxlvt > 3)
+ apic_write(APIC_ESR, 0);
+ value = apic_read(APIC_ESR);
+ if (value != oldvalue)
+ apic_printk(APIC_VERBOSE, "ESR value before enabling "
+ "vector: 0x%08x after: 0x%08x\n",
+ oldvalue, value);
+}
+
+/**
+ * setup_local_APIC - setup the local APIC
+ *
+ * Used to setup local APIC while initializing BSP or bringin up APs.
+ * Always called with preemption disabled.
+ */
+void __cpuinit setup_local_APIC(void)
+{
+ int cpu = smp_processor_id();
+ unsigned int value, queued;
+ int i, j, acked = 0;
+ unsigned long long tsc = 0, ntsc;
+ long long max_loops = cpu_khz;
+
+ if (cpu_has_tsc)
+ rdtscll(tsc);
+
+ if (disable_apic) {
+ disable_ioapic_support();
+ return;
+ }
+
+#ifdef CONFIG_X86_32
+ /* Pound the ESR really hard over the head with a big hammer - mbligh */
+ if (lapic_is_integrated() && apic->disable_esr) {
+ apic_write(APIC_ESR, 0);
+ apic_write(APIC_ESR, 0);
+ apic_write(APIC_ESR, 0);
+ apic_write(APIC_ESR, 0);
+ }
+#endif
+ perf_events_lapic_init();
+
+ /*
+ * Double-check whether this APIC is really registered.
+ * This is meaningless in clustered apic mode, so we skip it.
+ */
+ BUG_ON(!apic->apic_id_registered());
+
+ /*
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+ apic->init_apic_ldr();
+
+#ifdef CONFIG_X86_32
+ /*
+ * APIC LDR is initialized. If logical_apicid mapping was
+ * initialized during get_smp_config(), make sure it matches the
+ * actual value.
+ */
+ i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
+ /* always use the value from LDR */
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ logical_smp_processor_id();
+
+ /*
+ * Some NUMA implementations (NUMAQ) don't initialize apicid to
+ * node mapping during NUMA init. Now that logical apicid is
+ * guaranteed to be known, give it another chance. This is already
+ * a bit too late - percpu allocation has already happened without
+ * proper NUMA affinity.
+ */
+ if (apic->x86_32_numa_cpu_node)
+ set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
+ apic->x86_32_numa_cpu_node(cpu));
+#endif
+
+ /*
+ * Set Task Priority to 'accept all'. We never change this
+ * later on.
+ */
+ value = apic_read(APIC_TASKPRI);
+ value &= ~APIC_TPRI_MASK;
+ apic_write(APIC_TASKPRI, value);
+
+ /*
+ * After a crash, we no longer service the interrupts and a pending
+ * interrupt from previous kernel might still have ISR bit set.
+ *
+ * Most probably by now CPU has serviced that pending interrupt and
+ * it might not have done the ack_APIC_irq() because it thought,
+ * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
+ * does not clear the ISR bit and cpu thinks it has already serivced
+ * the interrupt. Hence a vector might get locked. It was noticed
+ * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+ */
+ do {
+ queued = 0;
+ for (i = APIC_ISR_NR - 1; i >= 0; i--)
+ queued |= apic_read(APIC_IRR + i*0x10);
+
+ for (i = APIC_ISR_NR - 1; i >= 0; i--) {
+ value = apic_read(APIC_ISR + i*0x10);
+ for (j = 31; j >= 0; j--) {
+ if (value & (1<<j)) {
+ ack_APIC_irq();
+ acked++;
+ }
+ }
+ }
+ if (acked > 256) {
+ printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n",
+ acked);
+ break;
+ }
+ if (cpu_has_tsc) {
+ rdtscll(ntsc);
+ max_loops = (cpu_khz << 10) - (ntsc - tsc);
+ } else
+ max_loops--;
+ } while (queued && max_loops > 0);
+ WARN_ON(max_loops <= 0);
+
+ /*
+ * Now that we are all set up, enable the APIC
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ /*
+ * Enable APIC
+ */
+ value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+ /*
+ * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+ * certain networking cards. If high frequency interrupts are
+ * happening on a particular IOAPIC pin, plus the IOAPIC routing
+ * entry is masked/unmasked at a high rate as well then sooner or
+ * later IOAPIC line gets 'stuck', no more interrupts are received
+ * from the device. If focus CPU is disabled then the hang goes
+ * away, oh well :-(
+ *
+ * [ This bug can be reproduced easily with a level-triggered
+ * PCI Ne2000 networking cards and PII/PIII processors, dual
+ * BX chipset. ]
+ */
+ /*
+ * Actually disabling the focus CPU check just makes the hang less
+ * frequent as it makes the interrupt distributon model be more
+ * like LRU than MRU (the short-term load is more even across CPUs).
+ * See also the comment in end_level_ioapic_irq(). --macro
+ */
+
+ /*
+ * - enable focus processor (bit==0)
+ * - 64bit mode always use processor focus
+ * so no need to set it
+ */
+ value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
+
+ /*
+ * Set spurious IRQ vector
+ */
+ value |= SPURIOUS_APIC_VECTOR;
+ apic_write(APIC_SPIV, value);
+
+ /*
+ * Set up LVT0, LVT1:
+ *
+ * set up through-local-APIC on the BP's LINT0. This is not
+ * strictly necessary in pure symmetric-IO mode, but sometimes
+ * we delegate interrupts to the 8259A.
+ */
+ /*
+ * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+ */
+ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+ if (!cpu && (pic_mode || !value)) {
+ value = APIC_DM_EXTINT;
+ apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
+ } else {
+ value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+ apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
+ }
+ apic_write(APIC_LVT0, value);
+
+ /*
+ * only the BP should see the LINT1 NMI signal, obviously.
+ */
+ if (!cpu)
+ value = APIC_DM_NMI;
+ else
+ value = APIC_DM_NMI | APIC_LVT_MASKED;
+ if (!lapic_is_integrated()) /* 82489DX */
+ value |= APIC_LVT_LEVEL_TRIGGER;
+ apic_write(APIC_LVT1, value);
+
+#ifdef CONFIG_X86_MCE_INTEL
+ /* Recheck CMCI information after local APIC is up on CPU #0 */
+ if (!cpu)
+ cmci_recheck();
+#endif
+}
+
+void __cpuinit end_local_APIC_setup(void)
+{
+ lapic_setup_esr();
+
+#ifdef CONFIG_X86_32
+ {
+ unsigned int value;
+ /* Disable the local apic timer */
+ value = apic_read(APIC_LVTT);
+ value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+ apic_write(APIC_LVTT, value);
+ }
+#endif
+
+ apic_pm_activate();
+}
+
+void __init bsp_end_local_APIC_setup(void)
+{
+ end_local_APIC_setup();
+
+ /*
+ * Now that local APIC setup is completed for BP, configure the fault
+ * handling for interrupt remapping.
+ */
+ if (intr_remapping_enabled)
+ enable_drhd_fault_handling();
+
+}
+
+#ifdef CONFIG_X86_X2APIC
+/*
+ * Need to disable xapic and x2apic at the same time and then enable xapic mode
+ */
+static inline void __disable_x2apic(u64 msr)
+{
+ wrmsrl(MSR_IA32_APICBASE,
+ msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
+ wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
+}
+
+static __init void disable_x2apic(void)
+{
+ u64 msr;
+
+ if (!cpu_has_x2apic)
+ return;
+
+ rdmsrl(MSR_IA32_APICBASE, msr);
+ if (msr & X2APIC_ENABLE) {
+ u32 x2apic_id = read_apic_id();
+
+ if (x2apic_id >= 255)
+ panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
+
+ pr_info("Disabling x2apic\n");
+ __disable_x2apic(msr);
+
+ if (nox2apic) {
+ clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC);
+ setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+ }
+
+ x2apic_disabled = 1;
+ x2apic_mode = 0;
+
+ register_lapic_address(mp_lapic_addr);
+ }
+}
+
+void check_x2apic(void)
+{
+ if (x2apic_enabled()) {
+ pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
+ x2apic_preenabled = x2apic_mode = 1;
+ }
+}
+
+void enable_x2apic(void)
+{
+ u64 msr;
+
+ rdmsrl(MSR_IA32_APICBASE, msr);
+ if (x2apic_disabled) {
+ __disable_x2apic(msr);
+ return;
+ }
+
+ if (!x2apic_mode)
+ return;
+
+ if (!(msr & X2APIC_ENABLE)) {
+ printk_once(KERN_INFO "Enabling x2apic\n");
+ wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
+ }
+}
+#endif /* CONFIG_X86_X2APIC */
+
+int __init enable_IR(void)
+{
+#ifdef CONFIG_IRQ_REMAP
+ if (!intr_remapping_supported()) {
+ pr_debug("intr-remapping not supported\n");
+ return -1;
+ }
+
+ if (!x2apic_preenabled && skip_ioapic_setup) {
+ pr_info("Skipped enabling intr-remap because of skipping "
+ "io-apic setup\n");
+ return -1;
+ }
+
+ return enable_intr_remapping();
+#endif
+ return -1;
+}
+
+void __init enable_IR_x2apic(void)
+{
+ unsigned long flags;
+ int ret, x2apic_enabled = 0;
+ int dmar_table_init_ret;
+
+ dmar_table_init_ret = dmar_table_init();
+ if (dmar_table_init_ret && !x2apic_supported())
+ return;
+
+ ret = save_ioapic_entries();
+ if (ret) {
+ pr_info("Saving IO-APIC state failed: %d\n", ret);
+ return;
+ }
+
+ local_irq_save(flags);
+ legacy_pic->mask_all();
+ mask_ioapic_entries();
+
+ if (x2apic_preenabled && nox2apic)
+ disable_x2apic();
+
+ if (dmar_table_init_ret)
+ ret = -1;
+ else
+ ret = enable_IR();
+
+ if (!x2apic_supported())
+ goto skip_x2apic;
+
+ if (ret < 0) {
+ /* IR is required if there is APIC ID > 255 even when running
+ * under KVM
+ */
+ if (max_physical_apicid > 255 ||
+ !hypervisor_x2apic_available()) {
+ if (x2apic_preenabled)
+ disable_x2apic();
+ goto skip_x2apic;
+ }
+ /*
+ * without IR all CPUs can be addressed by IOAPIC/MSI
+ * only in physical mode
+ */
+ x2apic_force_phys();
+ }
+
+ if (ret == IRQ_REMAP_XAPIC_MODE) {
+ pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
+ goto skip_x2apic;
+ }
+
+ x2apic_enabled = 1;
+
+ if (x2apic_supported() && !x2apic_mode) {
+ x2apic_mode = 1;
+ enable_x2apic();
+ pr_info("Enabled x2apic\n");
+ }
+
+skip_x2apic:
+ if (ret < 0) /* IR enabling failed */
+ restore_ioapic_entries();
+ legacy_pic->restore_mask();
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+ if (!cpu_has_apic) {
+ pr_info("No local APIC present\n");
+ return -1;
+ }
+
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+ return 0;
+}
+#else
+
+static int __init apic_verify(void)
+{
+ u32 features, h, l;
+
+ /*
+ * The APIC feature bit should now be enabled
+ * in `cpuid'
+ */
+ features = cpuid_edx(1);
+ if (!(features & (1 << X86_FEATURE_APIC))) {
+ pr_warning("Could not enable APIC!\n");
+ return -1;
+ }
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ /* The BIOS may have set up the APIC at some other address */
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ if (l & MSR_IA32_APICBASE_ENABLE)
+ mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+ pr_info("Found and enabled local APIC!\n");
+ return 0;
+}
+
+int __init apic_force_enable(unsigned long addr)
+{
+ u32 h, l;
+
+ if (disable_apic)
+ return -1;
+
+ /*
+ * Some BIOSes disable the local APIC in the APIC_BASE
+ * MSR. This can only be done in software for Intel P6 or later
+ * and AMD K7 (Model > 1) or later.
+ */
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+ pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | addr;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ enabled_via_apicbase = 1;
+ }
+ return apic_verify();
+}
+
+/*
+ * Detect and initialize APIC
+ */
+static int __init detect_init_APIC(void)
+{
+ /* Disabled by kernel option? */
+ if (disable_apic)
+ return -1;
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
+ (boot_cpu_data.x86 >= 15))
+ break;
+ goto no_apic;
+ case X86_VENDOR_INTEL:
+ if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
+ (boot_cpu_data.x86 == 5 && cpu_has_apic))
+ break;
+ goto no_apic;
+ default:
+ goto no_apic;
+ }
+
+ if (!cpu_has_apic) {
+ /*
+ * Over-ride BIOS and try to enable the local APIC only if
+ * "lapic" specified.
+ */
+ if (!force_enable_local_apic) {
+ pr_info("Local APIC disabled by BIOS -- "
+ "you can enable it with \"lapic\"\n");
+ return -1;
+ }
+ if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
+ return -1;
+ } else {
+ if (apic_verify())
+ return -1;
+ }
+
+ apic_pm_activate();
+
+ return 0;
+
+no_apic:
+ pr_info("No local APIC present or hardware disabled\n");
+ return -1;
+}
+#endif
+
+/**
+ * init_apic_mappings - initialize APIC mappings
+ */
+void __init init_apic_mappings(void)
+{
+ unsigned int new_apicid;
+
+ if (x2apic_mode) {
+ boot_cpu_physical_apicid = read_apic_id();
+ return;
+ }
+
+ /* If no local APIC can be found return early */
+ if (!smp_found_config && detect_init_APIC()) {
+ /* lets NOP'ify apic operations */
+ pr_info("APIC: disable apic facility\n");
+ apic_disable();
+ } else {
+ apic_phys = mp_lapic_addr;
+
+ /*
+ * acpi lapic path already maps that address in
+ * acpi_register_lapic_address()
+ */
+ if (!acpi_lapic && !smp_found_config)
+ register_lapic_address(apic_phys);
+ }
+
+ /*
+ * Fetch the APIC ID of the BSP in case we have a
+ * default configuration (or the MP table is broken).
+ */
+ new_apicid = read_apic_id();
+ if (boot_cpu_physical_apicid != new_apicid) {
+ boot_cpu_physical_apicid = new_apicid;
+ /*
+ * yeah -- we lie about apic_version
+ * in case if apic was disabled via boot option
+ * but it's not a problem for SMP compiled kernel
+ * since smp_sanity_check is prepared for such a case
+ * and disable smp mode
+ */
+ apic_version[new_apicid] =
+ GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
+}
+
+void __init register_lapic_address(unsigned long address)
+{
+ mp_lapic_addr = address;
+
+ if (!x2apic_mode) {
+ set_fixmap_nocache(FIX_APIC_BASE, address);
+ apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+ APIC_BASE, mp_lapic_addr);
+ }
+ if (boot_cpu_physical_apicid == -1U) {
+ boot_cpu_physical_apicid = read_apic_id();
+ apic_version[boot_cpu_physical_apicid] =
+ GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int apic_version[MAX_LOCAL_APIC];
+
+int __init APIC_init_uniprocessor(void)
+{
+ if (disable_apic) {
+ pr_info("Apic disabled\n");
+ return -1;
+ }
+#ifdef CONFIG_X86_64
+ if (!cpu_has_apic) {
+ disable_apic = 1;
+ pr_info("Apic disabled by BIOS\n");
+ return -1;
+ }
+#else
+ if (!smp_found_config && !cpu_has_apic)
+ return -1;
+
+ /*
+ * Complain if the BIOS pretends there is one.
+ */
+ if (!cpu_has_apic &&
+ APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+ pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+ boot_cpu_physical_apicid);
+ return -1;
+ }
+#endif
+
+ default_setup_apic_routing();
+
+ verify_local_APIC();
+ connect_bsp_APIC();
+
+#ifdef CONFIG_X86_64
+ apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
+ /*
+ * Hack: In case of kdump, after a crash, kernel might be booting
+ * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
+ * might be zero if read from MP tables. Get it from LAPIC.
+ */
+# ifdef CONFIG_CRASH_DUMP
+ boot_cpu_physical_apicid = read_apic_id();
+# endif
+#endif
+ physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
+ setup_local_APIC();
+
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Now enable IO-APICs, actually call clear_IO_APIC
+ * We need clear_IO_APIC before enabling error vector
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ enable_IO_APIC();
+#endif
+
+ bsp_end_local_APIC_setup();
+
+#ifdef CONFIG_X86_IO_APIC
+ if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+ else {
+ nr_ioapics = 0;
+ }
+#endif
+
+ x86_init.timers.setup_percpu_clockev();
+ return 0;
+}
+
+/*
+ * Local APIC interrupts
+ */
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+void smp_spurious_interrupt(struct pt_regs *regs)
+{
+ u32 v;
+
+ irq_enter();
+ exit_idle();
+ /*
+ * Check if this really is a spurious interrupt and ACK it
+ * if it is a vectored one. Just in case...
+ * Spurious interrupts should not be ACKed.
+ */
+ v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+ if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+ ack_APIC_irq();
+
+ inc_irq_stat(irq_spurious_count);
+
+ /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+ pr_info("spurious APIC interrupt on CPU#%d, "
+ "should never happen.\n", smp_processor_id());
+ irq_exit();
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+void smp_error_interrupt(struct pt_regs *regs)
+{
+ u32 v0, v1;
+ u32 i = 0;
+ static const char * const error_interrupt_reason[] = {
+ "Send CS error", /* APIC Error Bit 0 */
+ "Receive CS error", /* APIC Error Bit 1 */
+ "Send accept error", /* APIC Error Bit 2 */
+ "Receive accept error", /* APIC Error Bit 3 */
+ "Redirectable IPI", /* APIC Error Bit 4 */
+ "Send illegal vector", /* APIC Error Bit 5 */
+ "Received illegal vector", /* APIC Error Bit 6 */
+ "Illegal register address", /* APIC Error Bit 7 */
+ };
+
+ irq_enter();
+ exit_idle();
+ /* First tickle the hardware, only then report what went on. -- REW */
+ v0 = apic_read(APIC_ESR);
+ apic_write(APIC_ESR, 0);
+ v1 = apic_read(APIC_ESR);
+ ack_APIC_irq();
+ atomic_inc(&irq_err_count);
+
+ apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
+ smp_processor_id(), v0 , v1);
+
+ v1 = v1 & 0xff;
+ while (v1) {
+ if (v1 & 0x1)
+ apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
+ i++;
+ v1 >>= 1;
+ };
+
+ apic_printk(APIC_DEBUG, KERN_CONT "\n");
+
+ irq_exit();
+}
+
+/**
+ * connect_bsp_APIC - attach the APIC to the interrupt system
+ */
+void __init connect_bsp_APIC(void)
+{
+#ifdef CONFIG_X86_32
+ if (pic_mode) {
+ /*
+ * Do not trust the local APIC being empty at bootup.
+ */
+ clear_local_APIC();
+ /*
+ * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
+ * local APIC to INT and NMI lines.
+ */
+ apic_printk(APIC_VERBOSE, "leaving PIC mode, "
+ "enabling APIC mode.\n");
+ imcr_pic_to_apic();
+ }
+#endif
+ if (apic->enable_apic_mode)
+ apic->enable_apic_mode();
+}
+
+/**
+ * disconnect_bsp_APIC - detach the APIC from the interrupt system
+ * @virt_wire_setup: indicates, whether virtual wire mode is selected
+ *
+ * Virtual wire mode is necessary to deliver legacy interrupts even when the
+ * APIC is disabled.
+ */
+void disconnect_bsp_APIC(int virt_wire_setup)
+{
+ unsigned int value;
+
+#ifdef CONFIG_X86_32
+ if (pic_mode) {
+ /*
+ * Put the board back into PIC mode (has an effect only on
+ * certain older boards). Note that APIC interrupts, including
+ * IPIs, won't work beyond this point! The only exception are
+ * INIT IPIs.
+ */
+ apic_printk(APIC_VERBOSE, "disabling APIC mode, "
+ "entering PIC mode.\n");
+ imcr_apic_to_pic();
+ return;
+ }
+#endif
+
+ /* Go back to Virtual Wire compatibility mode */
+
+ /* For the spurious interrupt use vector F, and enable it */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+ value |= 0xf;
+ apic_write(APIC_SPIV, value);
+
+ if (!virt_wire_setup) {
+ /*
+ * For LVT0 make it edge triggered, active high,
+ * external and enabled
+ */
+ value = apic_read(APIC_LVT0);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+ apic_write(APIC_LVT0, value);
+ } else {
+ /* Disable LVT0 */
+ apic_write(APIC_LVT0, APIC_LVT_MASKED);
+ }
+
+ /*
+ * For LVT1 make it edge triggered, active high,
+ * nmi and enabled
+ */
+ value = apic_read(APIC_LVT1);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+ apic_write(APIC_LVT1, value);
+}
+
+void __cpuinit generic_processor_info(int apicid, int version)
+{
+ int cpu, max = nr_cpu_ids;
+ bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
+ phys_cpu_present_map);
+
+ /*
+ * If boot cpu has not been detected yet, then only allow upto
+ * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
+ */
+ if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
+ apicid != boot_cpu_physical_apicid) {
+ int thiscpu = max + disabled_cpus - 1;
+
+ pr_warning(
+ "ACPI: NR_CPUS/possible_cpus limit of %i almost"
+ " reached. Keeping one slot for boot cpu."
+ " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+
+ disabled_cpus++;
+ return;
+ }
+
+ if (num_processors >= nr_cpu_ids) {
+ int thiscpu = max + disabled_cpus;
+
+ pr_warning(
+ "ACPI: NR_CPUS/possible_cpus limit of %i reached."
+ " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+
+ disabled_cpus++;
+ return;
+ }
+
+ num_processors++;
+ if (apicid == boot_cpu_physical_apicid) {
+ /*
+ * x86_bios_cpu_apicid is required to have processors listed
+ * in same order as logical cpu numbers. Hence the first
+ * entry is BSP, and so on.
+ * boot_cpu_init() already hold bit 0 in cpu_present_mask
+ * for BSP.
+ */
+ cpu = 0;
+ } else
+ cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+ /*
+ * Validate version
+ */
+ if (version == 0x0) {
+ pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+ cpu, apicid);
+ version = 0x10;
+ }
+ apic_version[apicid] = version;
+
+ if (version != apic_version[boot_cpu_physical_apicid]) {
+ pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+ apic_version[boot_cpu_physical_apicid], cpu, version);
+ }
+
+ physid_set(apicid, phys_cpu_present_map);
+ if (apicid > max_physical_apicid)
+ max_physical_apicid = apicid;
+
+#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
+ early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+ early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+#endif
+#ifdef CONFIG_X86_32
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ apic->x86_32_early_logical_apicid(cpu);
+#endif
+ set_cpu_possible(cpu, true);
+ set_cpu_present(cpu, true);
+}
+
+int hard_smp_processor_id(void)
+{
+ return read_apic_id();
+}
+
+void default_init_apic_ldr(void)
+{
+ unsigned long val;
+
+ apic_write(APIC_DFR, APIC_DFR_VALUE);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+ apic_write(APIC_LDR, val);
+}
+
+/*
+ * Power management
+ */
+#ifdef CONFIG_PM
+
+static struct {
+ /*
+ * 'active' is true if the local APIC was enabled by us and
+ * not the BIOS; this signifies that we are also responsible
+ * for disabling it before entering apm/acpi suspend
+ */
+ int active;
+ /* r/w apic fields */
+ unsigned int apic_id;
+ unsigned int apic_taskpri;
+ unsigned int apic_ldr;
+ unsigned int apic_dfr;
+ unsigned int apic_spiv;
+ unsigned int apic_lvtt;
+ unsigned int apic_lvtpc;
+ unsigned int apic_lvt0;
+ unsigned int apic_lvt1;
+ unsigned int apic_lvterr;
+ unsigned int apic_tmict;
+ unsigned int apic_tdcr;
+ unsigned int apic_thmr;
+} apic_pm_state;
+
+static int lapic_suspend(void)
+{
+ unsigned long flags;
+ int maxlvt;
+
+ if (!apic_pm_state.active)
+ return 0;
+
+ maxlvt = lapic_get_maxlvt();
+
+ apic_pm_state.apic_id = apic_read(APIC_ID);
+ apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
+ apic_pm_state.apic_ldr = apic_read(APIC_LDR);
+ apic_pm_state.apic_dfr = apic_read(APIC_DFR);
+ apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
+ apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
+ if (maxlvt >= 4)
+ apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+ apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
+ apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
+ apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
+ apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
+ apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
+#ifdef CONFIG_X86_THERMAL_VECTOR
+ if (maxlvt >= 5)
+ apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
+
+ local_irq_save(flags);
+ disable_local_APIC();
+
+ if (intr_remapping_enabled)
+ disable_intr_remapping();
+
+ local_irq_restore(flags);
+ return 0;
+}
+
+static void lapic_resume(void)
+{
+ unsigned int l, h;
+ unsigned long flags;
+ int maxlvt;
+
+ if (!apic_pm_state.active)
+ return;
+
+ local_irq_save(flags);
+ if (intr_remapping_enabled) {
+ /*
+ * IO-APIC and PIC have their own resume routines.
+ * We just mask them here to make sure the interrupt
+ * subsystem is completely quiet while we enable x2apic
+ * and interrupt-remapping.
+ */
+ mask_ioapic_entries();
+ legacy_pic->mask_all();
+ }
+
+ if (x2apic_mode)
+ enable_x2apic();
+ else {
+ /*
+ * Make sure the APICBASE points to the right address
+ *
+ * FIXME! This will be wrong if we ever support suspend on
+ * SMP! We'll need to do this as part of the CPU restore!
+ */
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ }
+
+ maxlvt = lapic_get_maxlvt();
+ apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+ apic_write(APIC_ID, apic_pm_state.apic_id);
+ apic_write(APIC_DFR, apic_pm_state.apic_dfr);
+ apic_write(APIC_LDR, apic_pm_state.apic_ldr);
+ apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+ apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
+ apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
+ apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+ if (maxlvt >= 5)
+ apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+ if (maxlvt >= 4)
+ apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+ apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
+ apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
+ apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+
+ if (intr_remapping_enabled)
+ reenable_intr_remapping(x2apic_mode);
+
+ local_irq_restore(flags);
+}
+
+/*
+ * This device has no shutdown method - fully functioning local APICs
+ * are needed on every CPU up until machine_halt/restart/poweroff.
+ */
+
+static struct syscore_ops lapic_syscore_ops = {
+ .resume = lapic_resume,
+ .suspend = lapic_suspend,
+};
+
+static void __cpuinit apic_pm_activate(void)
+{
+ apic_pm_state.active = 1;
+}
+
+static int __init init_lapic_sysfs(void)
+{
+ /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+ if (cpu_has_apic)
+ register_syscore_ops(&lapic_syscore_ops);
+
+ return 0;
+}
+
+/* local apic needs to resume before other devices access its registers. */
+core_initcall(init_lapic_sysfs);
+
+#else /* CONFIG_PM */
+
+static void apic_pm_activate(void) { }
+
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_X86_64
+
+static int __cpuinit apic_cluster_num(void)
+{
+ int i, clusters, zeros;
+ unsigned id;
+ u16 *bios_cpu_apicid;
+ DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+
+ bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+ bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+
+ for (i = 0; i < nr_cpu_ids; i++) {
+ /* are we being called early in kernel startup? */
+ if (bios_cpu_apicid) {
+ id = bios_cpu_apicid[i];
+ } else if (i < nr_cpu_ids) {
+ if (cpu_present(i))
+ id = per_cpu(x86_bios_cpu_apicid, i);
+ else
+ continue;
+ } else
+ break;
+
+ if (id != BAD_APICID)
+ __set_bit(APIC_CLUSTERID(id), clustermap);
+ }
+
+ /* Problem: Partially populated chassis may not have CPUs in some of
+ * the APIC clusters they have been allocated. Only present CPUs have
+ * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+ * Since clusters are allocated sequentially, count zeros only if
+ * they are bounded by ones.
+ */
+ clusters = 0;
+ zeros = 0;
+ for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+ if (test_bit(i, clustermap)) {
+ clusters += 1 + zeros;
+ zeros = 0;
+ } else
+ ++zeros;
+ }
+
+ return clusters;
+}
+
+static int __cpuinitdata multi_checked;
+static int __cpuinitdata multi;
+
+static int __cpuinit set_multi(const struct dmi_system_id *d)
+{
+ if (multi)
+ return 0;
+ pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
+ multi = 1;
+ return 0;
+}
+
+static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
+ {
+ .callback = set_multi,
+ .ident = "IBM System Summit2",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
+ },
+ },
+ {}
+};
+
+static void __cpuinit dmi_check_multi(void)
+{
+ if (multi_checked)
+ return;
+
+ dmi_check_system(multi_dmi_table);
+ multi_checked = 1;
+}
+
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis.
+ * Use DMI to check them
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+ dmi_check_multi();
+ if (multi)
+ return 1;
+
+ if (!is_vsmp_box())
+ return 0;
+
+ /*
+ * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+ * not guaranteed to be synced between boards
+ */
+ if (apic_cluster_num() > 1)
+ return 1;
+
+ return 0;
+}
+#endif
+
+/*
+ * APIC command line parameters
+ */
+static int __init setup_disableapic(char *arg)
+{
+ disable_apic = 1;
+ setup_clear_cpu_cap(X86_FEATURE_APIC);
+ return 0;
+}
+early_param("disableapic", setup_disableapic);
+
+/* same as disableapic, for compatibility */
+static int __init setup_nolapic(char *arg)
+{
+ return setup_disableapic(arg);
+}
+early_param("nolapic", setup_nolapic);
+
+static int __init parse_lapic_timer_c2_ok(char *arg)
+{
+ local_apic_timer_c2_ok = 1;
+ return 0;
+}
+early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
+
+static int __init parse_disable_apic_timer(char *arg)
+{
+ disable_apic_timer = 1;
+ return 0;
+}
+early_param("noapictimer", parse_disable_apic_timer);
+
+static int __init parse_nolapic_timer(char *arg)
+{
+ disable_apic_timer = 1;
+ return 0;
+}
+early_param("nolapic_timer", parse_nolapic_timer);
+
+static int __init apic_set_verbosity(char *arg)
+{
+ if (!arg) {
+#ifdef CONFIG_X86_64
+ skip_ioapic_setup = 0;
+ return 0;
+#endif
+ return -EINVAL;
+ }
+
+ if (strcmp("debug", arg) == 0)
+ apic_verbosity = APIC_DEBUG;
+ else if (strcmp("verbose", arg) == 0)
+ apic_verbosity = APIC_VERBOSE;
+ else {
+ pr_warning("APIC Verbosity level %s not recognised"
+ " use apic=verbose or apic=debug\n", arg);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+early_param("apic", apic_set_verbosity);
+
+static int __init lapic_insert_resource(void)
+{
+ if (!apic_phys)
+ return -1;
+
+ /* Put local APIC into the resource map. */
+ lapic_resource.start = apic_phys;
+ lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+ insert_resource(&iomem_resource, &lapic_resource);
+
+ return 0;
+}
+
+/*
+ * need call insert after e820_reserve_resources()
+ * that is using request_resource
+ */
+late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
new file mode 100644
index 00000000000..8c3cdded6f2
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Flat APIC subarch code.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/module.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+
+static struct apic apic_physflat;
+static struct apic apic_flat;
+
+struct apic __read_mostly *apic = &apic_flat;
+EXPORT_SYMBOL_GPL(apic);
+
+static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return 1;
+}
+
+static const struct cpumask *flat_target_cpus(void)
+{
+ return cpu_online_mask;
+}
+
+static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+void flat_init_apic_ldr(void)
+{
+ unsigned long val;
+ unsigned long num, id;
+
+ num = smp_processor_id();
+ id = 1UL << num;
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(id);
+ apic_write(APIC_LDR, val);
+}
+
+static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+ local_irq_restore(flags);
+}
+
+static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+
+ _flat_send_IPI_mask(mask, vector);
+}
+
+static void
+ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+ int cpu = smp_processor_id();
+
+ if (cpu < BITS_PER_LONG)
+ clear_bit(cpu, &mask);
+
+ _flat_send_IPI_mask(mask, vector);
+}
+
+static void flat_send_IPI_allbutself(int vector)
+{
+ int cpu = smp_processor_id();
+#ifdef CONFIG_HOTPLUG_CPU
+ int hotplug = 1;
+#else
+ int hotplug = 0;
+#endif
+ if (hotplug || vector == NMI_VECTOR) {
+ if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+ unsigned long mask = cpumask_bits(cpu_online_mask)[0];
+
+ if (cpu < BITS_PER_LONG)
+ clear_bit(cpu, &mask);
+
+ _flat_send_IPI_mask(mask, vector);
+ }
+ } else if (num_online_cpus() > 1) {
+ __default_send_IPI_shortcut(APIC_DEST_ALLBUT,
+ vector, apic->dest_logical);
+ }
+}
+
+static void flat_send_IPI_all(int vector)
+{
+ if (vector == NMI_VECTOR) {
+ flat_send_IPI_mask(cpu_online_mask, vector);
+ } else {
+ __default_send_IPI_shortcut(APIC_DEST_ALLINC,
+ vector, apic->dest_logical);
+ }
+}
+
+static unsigned int flat_get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ id = (((x)>>24) & 0xFFu);
+
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = ((id & 0xFFu)<<24);
+ return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+ unsigned int id;
+
+ id = flat_get_apic_id(apic_read(APIC_ID));
+ return id;
+}
+
+static int flat_apic_id_registered(void)
+{
+ return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+ return initial_apic_id >> index_msb;
+}
+
+static int flat_probe(void)
+{
+ return 1;
+}
+
+static struct apic apic_flat = {
+ .name = "flat",
+ .probe = flat_probe,
+ .acpi_madt_oem_check = flat_acpi_madt_oem_check,
+ .apic_id_registered = flat_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ .irq_dest_mode = 1, /* logical */
+
+ .target_cpus = flat_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = flat_vector_allocation_domain,
+ .init_apic_ldr = flat_init_apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = flat_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = flat_get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xFFu << 24,
+
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = flat_send_IPI_mask,
+ .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = flat_send_IPI_allbutself,
+ .send_IPI_all = flat_send_IPI_all,
+ .send_IPI_self = apic_send_IPI_self,
+
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
+
+/*
+ * Physflat mode is used when there are more than 8 CPUs on a system.
+ * We cannot use logical delivery in this case because the mask
+ * overflows, so use physical mode.
+ */
+static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+#ifdef CONFIG_ACPI
+ /*
+ * Quirk: some x86_64 machines can only use physical APIC mode
+ * regardless of how many processors are present (x86_64 ES7000
+ * is an example).
+ */
+ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+ printk(KERN_DEBUG "system APIC only can use physical flat");
+ return 1;
+ }
+
+ if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) {
+ printk(KERN_DEBUG "IBM Summit detected, will use apic physical");
+ return 1;
+ }
+#endif
+
+ return 0;
+}
+
+static const struct cpumask *physflat_target_cpus(void)
+{
+ return cpu_online_mask;
+}
+
+static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+ default_send_IPI_mask_sequence_phys(cpumask, vector);
+}
+
+static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+ int vector)
+{
+ default_send_IPI_mask_allbutself_phys(cpumask, vector);
+}
+
+static void physflat_send_IPI_allbutself(int vector)
+{
+ default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+}
+
+static void physflat_send_IPI_all(int vector)
+{
+ physflat_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = cpumask_first(cpumask);
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int
+physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+ return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+static int physflat_probe(void)
+{
+ if (apic == &apic_physflat || num_possible_cpus() > 8)
+ return 1;
+
+ return 0;
+}
+
+static struct apic apic_physflat = {
+
+ .name = "physical flat",
+ .probe = physflat_probe,
+ .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
+ .apic_id_registered = flat_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ .irq_dest_mode = 0, /* physical */
+
+ .target_cpus = physflat_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = 0,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = physflat_vector_allocation_domain,
+ /* not needed, but shouldn't hurt: */
+ .init_apic_ldr = flat_init_apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = flat_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = flat_get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xFFu << 24,
+
+ .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = physflat_send_IPI_mask,
+ .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = physflat_send_IPI_allbutself,
+ .send_IPI_all = physflat_send_IPI_all,
+ .send_IPI_self = apic_send_IPI_self,
+
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
+
+/*
+ * We need to check for physflat first, so this order is important.
+ */
+apic_drivers(apic_physflat, apic_flat);
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
new file mode 100644
index 00000000000..775b82bc655
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -0,0 +1,191 @@
+/*
+ * NOOP APIC driver.
+ *
+ * Does almost nothing and should be substituted by a real apic driver via
+ * probe routine.
+ *
+ * Though in case if apic is disabled (for some reason) we try
+ * to not uglify the caller's code and allow to call (some) apic routines
+ * like self-ipi, etc...
+ */
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+#include <asm/apic.h>
+#include <asm/setup.h>
+
+#include <linux/smp.h>
+#include <asm/ipi.h>
+
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/e820.h>
+
+static void noop_init_apic_ldr(void) { }
+static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_allbutself(int vector) { }
+static void noop_send_IPI_all(int vector) { }
+static void noop_send_IPI_self(int vector) { }
+static void noop_apic_wait_icr_idle(void) { }
+static void noop_apic_icr_write(u32 low, u32 id) { }
+
+static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
+{
+ return -1;
+}
+
+static u32 noop_safe_apic_wait_icr_idle(void)
+{
+ return 0;
+}
+
+static u64 noop_apic_icr_read(void)
+{
+ return 0;
+}
+
+static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return 0;
+}
+
+static unsigned int noop_get_apic_id(unsigned long x)
+{
+ return 0;
+}
+
+static int noop_probe(void)
+{
+ /*
+ * NOOP apic should not ever be
+ * enabled via probe routine
+ */
+ return 0;
+}
+
+static int noop_apic_id_registered(void)
+{
+ /*
+ * if we would be really "pedantic"
+ * we should pass read_apic_id() here
+ * but since NOOP suppose APIC ID = 0
+ * lets save a few cycles
+ */
+ return physid_isset(0, phys_cpu_present_map);
+}
+
+static const struct cpumask *noop_target_cpus(void)
+{
+ /* only BSP here */
+ return cpumask_of(0);
+}
+
+static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid)
+{
+ return physid_isset(apicid, *map);
+}
+
+static unsigned long noop_check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ if (cpu != 0)
+ pr_warning("APIC: Vector allocated for non-BSP cpu\n");
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+static u32 noop_apic_read(u32 reg)
+{
+ WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+ return 0;
+}
+
+static void noop_apic_write(u32 reg, u32 v)
+{
+ WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+}
+
+struct apic apic_noop = {
+ .name = "noop",
+ .probe = noop_probe,
+ .acpi_madt_oem_check = NULL,
+
+ .apic_id_registered = noop_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* logical delivery broadcast to all CPUs: */
+ .irq_dest_mode = 1,
+
+ .target_cpus = noop_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = noop_check_apicid_used,
+ .check_apicid_present = noop_check_apicid_present,
+
+ .vector_allocation_domain = noop_vector_allocation_domain,
+ .init_apic_ldr = noop_init_apic_ldr,
+
+ .ioapic_phys_id_map = default_ioapic_phys_id_map,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = physid_set_mask_of_physid,
+
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+
+ .phys_pkg_id = noop_phys_pkg_id,
+
+ .mps_oem_check = NULL,
+
+ .get_apic_id = noop_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0x0F << 24,
+
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = noop_send_IPI_mask,
+ .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = noop_send_IPI_allbutself,
+ .send_IPI_all = noop_send_IPI_all,
+ .send_IPI_self = noop_send_IPI_self,
+
+ .wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
+
+ /* should be safe */
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+ .wait_for_init_deassert = NULL,
+
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = noop_apic_read,
+ .write = noop_apic_write,
+ .icr_read = noop_apic_icr_read,
+ .icr_write = noop_apic_icr_write,
+ .wait_icr_idle = noop_apic_wait_icr_idle,
+ .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+#endif
+};
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644
index 00000000000..09d3d8c1cd9
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -0,0 +1,294 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific APIC Code
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
+
+#include <asm/numachip/numachip_csr.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/apic_flat_64.h>
+
+static int numachip_system __read_mostly;
+
+static struct apic apic_numachip __read_mostly;
+
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned long value;
+ unsigned int id;
+
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
+
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = ((id & 0xffU) << 24);
+ return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+ return get_apic_id(apic_read(APIC_ID));
+}
+
+static int numachip_apic_id_registered(void)
+{
+ return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+ return initial_apic_id >> index_msb;
+}
+
+static const struct cpumask *numachip_target_cpus(void)
+{
+ return cpu_online_mask;
+}
+
+static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+{
+ union numachip_csr_g3_ext_irq_gen int_gen;
+
+ int_gen.s._destination_apic_id = phys_apicid;
+ int_gen.s._vector = 0;
+ int_gen.s._msgtype = APIC_DM_INIT >> 8;
+ int_gen.s._index = 0;
+
+ write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+ int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
+ int_gen.s._vector = start_rip >> 12;
+
+ write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+ atomic_set(&init_deasserted, 1);
+ return 0;
+}
+
+static void numachip_send_IPI_one(int cpu, int vector)
+{
+ union numachip_csr_g3_ext_irq_gen int_gen;
+ int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+
+ int_gen.s._destination_apic_id = apicid;
+ int_gen.s._vector = vector;
+ int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
+ int_gen.s._index = 0;
+
+ write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+}
+
+static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask)
+ numachip_send_IPI_one(cpu, vector);
+}
+
+static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask) {
+ if (cpu != this_cpu)
+ numachip_send_IPI_one(cpu, vector);
+ }
+}
+
+static void numachip_send_IPI_allbutself(int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu != this_cpu)
+ numachip_send_IPI_one(cpu, vector);
+ }
+}
+
+static void numachip_send_IPI_all(int vector)
+{
+ numachip_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static void numachip_send_IPI_self(int vector)
+{
+ __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = cpumask_first(cpumask);
+ if (likely((unsigned)cpu < nr_cpu_ids))
+ return per_cpu(x86_cpu_to_apicid, cpu);
+
+ return BAD_APICID;
+}
+
+static unsigned int
+numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+ return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+static int __init numachip_probe(void)
+{
+ return apic == &apic_numachip;
+}
+
+static void __init map_csrs(void)
+{
+ printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
+ NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
+ init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
+
+ printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
+ NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
+ init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
+}
+
+static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
+{
+ c->phys_proc_id = node;
+ per_cpu(cpu_llc_id, smp_processor_id()) = node;
+}
+
+static int __init numachip_system_init(void)
+{
+ unsigned int val;
+
+ if (!numachip_system)
+ return 0;
+
+ x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+
+ map_csrs();
+
+ val = read_lcsr(CSR_G0_NODE_IDS);
+ printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
+
+ return 0;
+}
+early_initcall(numachip_system_init);
+
+static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (!strncmp(oem_id, "NUMASC", 6)) {
+ numachip_system = 1;
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct apic apic_numachip __refconst = {
+
+ .name = "NumaConnect system",
+ .probe = numachip_probe,
+ .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
+ .apic_id_registered = numachip_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ .irq_dest_mode = 0, /* physical */
+
+ .target_cpus = numachip_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = 0,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = numachip_vector_allocation_domain,
+ .init_apic_ldr = flat_init_apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = numachip_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xffU << 24,
+
+ .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = numachip_send_IPI_mask,
+ .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = numachip_send_IPI_allbutself,
+ .send_IPI_all = numachip_send_IPI_all,
+ .send_IPI_self = numachip_send_IPI_self,
+
+ .wakeup_secondary_cpu = numachip_wakeup_secondary,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL, /* REMRD not supported */
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
+apic_driver(apic_numachip);
+
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
new file mode 100644
index 00000000000..521bead0113
--- /dev/null
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -0,0 +1,278 @@
+/*
+ * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
+ *
+ * Drives the local APIC in "clustered mode".
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <linux/smp.h>
+
+#include <asm/apicdef.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+static unsigned bigsmp_get_apic_id(unsigned long x)
+{
+ return (x >> 24) & 0xFF;
+}
+
+static int bigsmp_apic_id_registered(void)
+{
+ return 1;
+}
+
+static const struct cpumask *bigsmp_target_cpus(void)
+{
+#ifdef CONFIG_SMP
+ return cpu_online_mask;
+#else
+ return cpumask_of(0);
+#endif
+}
+
+static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
+{
+ return 0;
+}
+
+static unsigned long bigsmp_check_apicid_present(int bit)
+{
+ return 1;
+}
+
+static int bigsmp_early_logical_apicid(int cpu)
+{
+ /* on bigsmp, logical apicid is the same as physical */
+ return early_per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+static inline unsigned long calculate_ldr(int cpu)
+{
+ unsigned long val, id;
+
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ id = per_cpu(x86_bios_cpu_apicid, cpu);
+ val |= SET_APIC_LOGICAL_ID(id);
+
+ return val;
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+static void bigsmp_init_apic_ldr(void)
+{
+ unsigned long val;
+ int cpu = smp_processor_id();
+
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
+ val = calculate_ldr(cpu);
+ apic_write(APIC_LDR, val);
+}
+
+static void bigsmp_setup_apic_routing(void)
+{
+ printk(KERN_INFO
+ "Enabling APIC mode: Physflat. Using %d I/O APICs\n",
+ nr_ioapics);
+}
+
+static int bigsmp_cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < nr_cpu_ids)
+ return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
+
+ return BAD_APICID;
+}
+
+static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
+{
+ /* For clustered we don't have a good way to do this yet - hack */
+ physids_promote(0xFFL, retmap);
+}
+
+static int bigsmp_check_phys_apicid_present(int phys_apicid)
+{
+ return 1;
+}
+
+/* As we are using single CPU as destination, pick only one CPU here */
+static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ int cpu = cpumask_first(cpumask);
+
+ if (cpu < nr_cpu_ids)
+ return cpu_physical_id(cpu);
+ return BAD_APICID;
+}
+
+static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ return cpu_physical_id(cpu);
+ }
+ return BAD_APICID;
+}
+
+static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ default_send_IPI_mask_sequence_phys(mask, vector);
+}
+
+static void bigsmp_send_IPI_allbutself(int vector)
+{
+ default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+}
+
+static void bigsmp_send_IPI_all(int vector)
+{
+ bigsmp_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int dmi_bigsmp; /* can be set by dmi scanners */
+
+static int hp_ht_bigsmp(const struct dmi_system_id *d)
+{
+ printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
+ dmi_bigsmp = 1;
+
+ return 0;
+}
+
+
+static const struct dmi_system_id bigsmp_dmi_table[] = {
+ { hp_ht_bigsmp, "HP ProLiant DL760 G2",
+ { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+ DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
+ }
+ },
+
+ { hp_ht_bigsmp, "HP ProLiant DL740",
+ { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+ DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
+ }
+ },
+ { } /* NULL entry stops DMI scanning */
+};
+
+static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+static int probe_bigsmp(void)
+{
+ if (def_to_bigsmp)
+ dmi_bigsmp = 1;
+ else
+ dmi_check_system(bigsmp_dmi_table);
+
+ return dmi_bigsmp;
+}
+
+static struct apic apic_bigsmp = {
+
+ .name = "bigsmp",
+ .probe = probe_bigsmp,
+ .acpi_madt_oem_check = NULL,
+ .apic_id_registered = bigsmp_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ /* phys delivery to target CPU: */
+ .irq_dest_mode = 0,
+
+ .target_cpus = bigsmp_target_cpus,
+ .disable_esr = 1,
+ .dest_logical = 0,
+ .check_apicid_used = bigsmp_check_apicid_used,
+ .check_apicid_present = bigsmp_check_apicid_present,
+
+ .vector_allocation_domain = bigsmp_vector_allocation_domain,
+ .init_apic_ldr = bigsmp_init_apic_ldr,
+
+ .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
+ .setup_apic_routing = bigsmp_setup_apic_routing,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
+ .apicid_to_cpu_present = physid_set_mask_of_physid,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = bigsmp_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = bigsmp_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = bigsmp_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0xFF << 24,
+
+ .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = bigsmp_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = bigsmp_send_IPI_allbutself,
+ .send_IPI_all = bigsmp_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+ .wait_for_init_deassert = default_wait_for_init_deassert,
+
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
+};
+
+void __init generic_bigsmp_probe(void)
+{
+ unsigned int cpu;
+
+ if (!probe_bigsmp())
+ return;
+
+ apic = &apic_bigsmp;
+
+ for_each_possible_cpu(cpu) {
+ if (early_per_cpu(x86_cpu_to_logical_apicid,
+ cpu) == BAD_APICID)
+ continue;
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ bigsmp_early_logical_apicid(cpu);
+ }
+
+ pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name);
+}
+
+apic_driver(apic_bigsmp);
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
new file mode 100644
index 00000000000..5d513bc47b6
--- /dev/null
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -0,0 +1,755 @@
+/*
+ * Written by: Garry Forsgren, Unisys Corporation
+ * Natalie Protasevich, Unisys Corporation
+ *
+ * This file contains the code to configure and interface
+ * with Unisys ES7000 series hardware system manager.
+ *
+ * Copyright (c) 2003 Unisys Corporation.
+ * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
+ *
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Unisys Corporation, Township Line & Union Meeting
+ * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
+ *
+ * http://www.unisys.com
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <linux/nmi.h>
+#include <linux/smp.h>
+#include <linux/io.h>
+
+#include <asm/apicdef.h>
+#include <linux/atomic.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+/*
+ * ES7000 chipsets
+ */
+
+#define NON_UNISYS 0
+#define ES7000_CLASSIC 1
+#define ES7000_ZORRO 2
+
+#define MIP_REG 1
+#define MIP_PSAI_REG 4
+
+#define MIP_BUSY 1
+#define MIP_SPIN 0xf0000
+#define MIP_VALID 0x0100000000000000ULL
+#define MIP_SW_APIC 0x1020b
+
+#define MIP_PORT(val) ((val >> 32) & 0xffff)
+
+#define MIP_RD_LO(val) (val & 0xffffffff)
+
+struct mip_reg {
+ unsigned long long off_0x00;
+ unsigned long long off_0x08;
+ unsigned long long off_0x10;
+ unsigned long long off_0x18;
+ unsigned long long off_0x20;
+ unsigned long long off_0x28;
+ unsigned long long off_0x30;
+ unsigned long long off_0x38;
+};
+
+struct mip_reg_info {
+ unsigned long long mip_info;
+ unsigned long long delivery_info;
+ unsigned long long host_reg;
+ unsigned long long mip_reg;
+};
+
+struct psai {
+ unsigned long long entry_type;
+ unsigned long long addr;
+ unsigned long long bep_addr;
+};
+
+#ifdef CONFIG_ACPI
+
+struct es7000_oem_table {
+ struct acpi_table_header Header;
+ u32 OEMTableAddr;
+ u32 OEMTableSize;
+};
+
+static unsigned long oem_addrX;
+static unsigned long oem_size;
+
+#endif
+
+/*
+ * ES7000 Globals
+ */
+
+static volatile unsigned long *psai;
+static struct mip_reg *mip_reg;
+static struct mip_reg *host_reg;
+static int mip_port;
+static unsigned long mip_addr;
+static unsigned long host_addr;
+
+int es7000_plat;
+
+/*
+ * GSI override for ES7000 platforms.
+ */
+
+
+static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+{
+ unsigned long vect = 0, psaival = 0;
+
+ if (psai == NULL)
+ return -1;
+
+ vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+ psaival = (0x1000000 | vect | cpu);
+
+ while (*psai & 0x1000000)
+ ;
+
+ *psai = psaival;
+
+ return 0;
+}
+
+static int es7000_apic_is_cluster(void)
+{
+ /* MPENTIUMIII */
+ if (boot_cpu_data.x86 == 6 &&
+ (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11))
+ return 1;
+
+ return 0;
+}
+
+static void setup_unisys(void)
+{
+ /*
+ * Determine the generation of the ES7000 currently running.
+ *
+ * es7000_plat = 1 if the machine is a 5xx ES7000 box
+ * es7000_plat = 2 if the machine is a x86_64 ES7000 box
+ *
+ */
+ if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
+ es7000_plat = ES7000_ZORRO;
+ else
+ es7000_plat = ES7000_CLASSIC;
+}
+
+/*
+ * Parse the OEM Table:
+ */
+static int parse_unisys_oem(char *oemptr)
+{
+ int i;
+ int success = 0;
+ unsigned char type, size;
+ unsigned long val;
+ char *tp = NULL;
+ struct psai *psaip = NULL;
+ struct mip_reg_info *mi;
+ struct mip_reg *host, *mip;
+
+ tp = oemptr;
+
+ tp += 8;
+
+ for (i = 0; i <= 6; i++) {
+ type = *tp++;
+ size = *tp++;
+ tp -= 2;
+ switch (type) {
+ case MIP_REG:
+ mi = (struct mip_reg_info *)tp;
+ val = MIP_RD_LO(mi->host_reg);
+ host_addr = val;
+ host = (struct mip_reg *)val;
+ host_reg = __va(host);
+ val = MIP_RD_LO(mi->mip_reg);
+ mip_port = MIP_PORT(mi->mip_info);
+ mip_addr = val;
+ mip = (struct mip_reg *)val;
+ mip_reg = __va(mip);
+ pr_debug("host_reg = 0x%lx\n",
+ (unsigned long)host_reg);
+ pr_debug("mip_reg = 0x%lx\n",
+ (unsigned long)mip_reg);
+ success++;
+ break;
+ case MIP_PSAI_REG:
+ psaip = (struct psai *)tp;
+ if (tp != NULL) {
+ if (psaip->addr)
+ psai = __va(psaip->addr);
+ else
+ psai = NULL;
+ success++;
+ }
+ break;
+ default:
+ break;
+ }
+ tp += size;
+ }
+
+ if (success < 2)
+ es7000_plat = NON_UNISYS;
+ else
+ setup_unisys();
+
+ return es7000_plat;
+}
+
+#ifdef CONFIG_ACPI
+static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
+{
+ struct acpi_table_header *header = NULL;
+ struct es7000_oem_table *table;
+ acpi_size tbl_size;
+ acpi_status ret;
+ int i = 0;
+
+ for (;;) {
+ ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size);
+ if (!ACPI_SUCCESS(ret))
+ return -1;
+
+ if (!memcmp((char *) &header->oem_id, "UNISYS", 6))
+ break;
+
+ early_acpi_os_unmap_memory(header, tbl_size);
+ }
+
+ table = (void *)header;
+
+ oem_addrX = table->OEMTableAddr;
+ oem_size = table->OEMTableSize;
+
+ early_acpi_os_unmap_memory(header, tbl_size);
+
+ *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size);
+
+ return 0;
+}
+
+static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+{
+ if (!oem_addr)
+ return;
+
+ __acpi_unmap_table((char *)oem_addr, oem_size);
+}
+
+static int es7000_check_dsdt(void)
+{
+ struct acpi_table_header header;
+
+ if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
+ !strncmp(header.oem_id, "UNISYS", 6))
+ return 1;
+ return 0;
+}
+
+static int es7000_acpi_ret;
+
+/* Hook from generic ACPI tables.c */
+static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ unsigned long oem_addr = 0;
+ int check_dsdt;
+ int ret = 0;
+
+ /* check dsdt at first to avoid clear fix_map for oem_addr */
+ check_dsdt = es7000_check_dsdt();
+
+ if (!find_unisys_acpi_oem_table(&oem_addr)) {
+ if (check_dsdt) {
+ ret = parse_unisys_oem((char *)oem_addr);
+ } else {
+ setup_unisys();
+ ret = 1;
+ }
+ /*
+ * we need to unmap it
+ */
+ unmap_unisys_acpi_oem_table(oem_addr);
+ }
+
+ es7000_acpi_ret = ret;
+
+ return ret && !es7000_apic_is_cluster();
+}
+
+static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
+{
+ int ret = es7000_acpi_ret;
+
+ return ret && es7000_apic_is_cluster();
+}
+
+#else /* !CONFIG_ACPI: */
+static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return 0;
+}
+
+static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
+{
+ return 0;
+}
+#endif /* !CONFIG_ACPI */
+
+static void es7000_spin(int n)
+{
+ int i = 0;
+
+ while (i++ < n)
+ rep_nop();
+}
+
+static int es7000_mip_write(struct mip_reg *mip_reg)
+{
+ int status = 0;
+ int spin;
+
+ spin = MIP_SPIN;
+ while ((host_reg->off_0x38 & MIP_VALID) != 0) {
+ if (--spin <= 0) {
+ WARN(1, "Timeout waiting for Host Valid Flag\n");
+ return -1;
+ }
+ es7000_spin(MIP_SPIN);
+ }
+
+ memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
+ outb(1, mip_port);
+
+ spin = MIP_SPIN;
+
+ while ((mip_reg->off_0x38 & MIP_VALID) == 0) {
+ if (--spin <= 0) {
+ WARN(1, "Timeout waiting for MIP Valid Flag\n");
+ return -1;
+ }
+ es7000_spin(MIP_SPIN);
+ }
+
+ status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48;
+ mip_reg->off_0x38 &= ~MIP_VALID;
+
+ return status;
+}
+
+static void es7000_enable_apic_mode(void)
+{
+ struct mip_reg es7000_mip_reg;
+ int mip_status;
+
+ if (!es7000_plat)
+ return;
+
+ pr_info("Enabling APIC mode.\n");
+ memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
+ es7000_mip_reg.off_0x00 = MIP_SW_APIC;
+ es7000_mip_reg.off_0x38 = MIP_VALID;
+
+ while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
+ WARN(1, "Command failed, status = %x\n", mip_status);
+}
+
+static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
+
+
+static void es7000_wait_for_init_deassert(atomic_t *deassert)
+{
+ while (!atomic_read(deassert))
+ cpu_relax();
+}
+
+static unsigned int es7000_get_apic_id(unsigned long x)
+{
+ return (x >> 24) & 0xFF;
+}
+
+static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ default_send_IPI_mask_sequence_phys(mask, vector);
+}
+
+static void es7000_send_IPI_allbutself(int vector)
+{
+ default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+}
+
+static void es7000_send_IPI_all(int vector)
+{
+ es7000_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int es7000_apic_id_registered(void)
+{
+ return 1;
+}
+
+static const struct cpumask *target_cpus_cluster(void)
+{
+ return cpu_all_mask;
+}
+
+static const struct cpumask *es7000_target_cpus(void)
+{
+ return cpumask_of(smp_processor_id());
+}
+
+static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid)
+{
+ return 0;
+}
+
+static unsigned long es7000_check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+static int es7000_early_logical_apicid(int cpu)
+{
+ /* on es7000, logical apicid is the same as physical */
+ return early_per_cpu(x86_bios_cpu_apicid, cpu);
+}
+
+static unsigned long calculate_ldr(int cpu)
+{
+ unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
+
+ return SET_APIC_LOGICAL_ID(id);
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LdR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+static void es7000_init_apic_ldr_cluster(void)
+{
+ unsigned long val;
+ int cpu = smp_processor_id();
+
+ apic_write(APIC_DFR, APIC_DFR_CLUSTER);
+ val = calculate_ldr(cpu);
+ apic_write(APIC_LDR, val);
+}
+
+static void es7000_init_apic_ldr(void)
+{
+ unsigned long val;
+ int cpu = smp_processor_id();
+
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
+ val = calculate_ldr(cpu);
+ apic_write(APIC_LDR, val);
+}
+
+static void es7000_setup_apic_routing(void)
+{
+ int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
+
+ pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
+ (apic_version[apic] == 0x14) ?
+ "Physical Cluster" : "Logical Cluster",
+ nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
+}
+
+static int es7000_cpu_present_to_apicid(int mps_cpu)
+{
+ if (!mps_cpu)
+ return boot_cpu_physical_apicid;
+ else if (mps_cpu < nr_cpu_ids)
+ return per_cpu(x86_bios_cpu_apicid, mps_cpu);
+ else
+ return BAD_APICID;
+}
+
+static int cpu_id;
+
+static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
+{
+ physid_set_mask_of_physid(cpu_id, retmap);
+ ++cpu_id;
+}
+
+static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
+{
+ /* For clustered we don't have a good way to do this yet - hack */
+ physids_promote(0xFFL, retmap);
+}
+
+static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
+{
+ boot_cpu_physical_apicid = read_apic_id();
+ return 1;
+}
+
+static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ unsigned int round = 0;
+ int cpu, uninitialized_var(apicid);
+
+ /*
+ * The cpus in the mask must all be on the apic cluster.
+ */
+ for_each_cpu(cpu, cpumask) {
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
+ WARN(1, "Not a valid mask!");
+
+ return BAD_APICID;
+ }
+ apicid = new_apicid;
+ round++;
+ }
+ return apicid;
+}
+
+static unsigned int
+es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
+ const struct cpumask *andmask)
+{
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
+ cpumask_var_t cpumask;
+
+ if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+ return apicid;
+
+ cpumask_and(cpumask, inmask, andmask);
+ cpumask_and(cpumask, cpumask, cpu_online_mask);
+ apicid = es7000_cpu_mask_to_apicid(cpumask);
+
+ free_cpumask_var(cpumask);
+
+ return apicid;
+}
+
+static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+static int probe_es7000(void)
+{
+ /* probed later in mptable/ACPI hooks */
+ return 0;
+}
+
+static int es7000_mps_ret;
+static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem,
+ char *productid)
+{
+ int ret = 0;
+
+ if (mpc->oemptr) {
+ struct mpc_oemtable *oem_table =
+ (struct mpc_oemtable *)mpc->oemptr;
+
+ if (!strncmp(oem, "UNISYS", 6))
+ ret = parse_unisys_oem((char *)oem_table);
+ }
+
+ es7000_mps_ret = ret;
+
+ return ret && !es7000_apic_is_cluster();
+}
+
+static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem,
+ char *productid)
+{
+ int ret = es7000_mps_ret;
+
+ return ret && es7000_apic_is_cluster();
+}
+
+/* We've been warned by a false positive warning.Use __refdata to keep calm. */
+static struct apic __refdata apic_es7000_cluster = {
+
+ .name = "es7000",
+ .probe = probe_es7000,
+ .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster,
+ .apic_id_registered = es7000_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* logical delivery broadcast to all procs: */
+ .irq_dest_mode = 1,
+
+ .target_cpus = target_cpus_cluster,
+ .disable_esr = 1,
+ .dest_logical = 0,
+ .check_apicid_used = es7000_check_apicid_used,
+ .check_apicid_present = es7000_check_apicid_present,
+
+ .vector_allocation_domain = es7000_vector_allocation_domain,
+ .init_apic_ldr = es7000_init_apic_ldr_cluster,
+
+ .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
+ .setup_apic_routing = es7000_setup_apic_routing,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
+ .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = es7000_check_phys_apicid_present,
+ .enable_apic_mode = es7000_enable_apic_mode,
+ .phys_pkg_id = es7000_phys_pkg_id,
+ .mps_oem_check = es7000_mps_oem_check_cluster,
+
+ .get_apic_id = es7000_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0xFF << 24,
+
+ .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = es7000_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = es7000_send_IPI_allbutself,
+ .send_IPI_all = es7000_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip,
+
+ .trampoline_phys_low = 0x467,
+ .trampoline_phys_high = 0x469,
+
+ .wait_for_init_deassert = NULL,
+
+ /* Nothing to do for most platforms, since cleared by the INIT cycle: */
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+};
+
+static struct apic __refdata apic_es7000 = {
+
+ .name = "es7000",
+ .probe = probe_es7000,
+ .acpi_madt_oem_check = es7000_acpi_madt_oem_check,
+ .apic_id_registered = es7000_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ /* phys delivery to target CPUs: */
+ .irq_dest_mode = 0,
+
+ .target_cpus = es7000_target_cpus,
+ .disable_esr = 1,
+ .dest_logical = 0,
+ .check_apicid_used = es7000_check_apicid_used,
+ .check_apicid_present = es7000_check_apicid_present,
+
+ .vector_allocation_domain = es7000_vector_allocation_domain,
+ .init_apic_ldr = es7000_init_apic_ldr,
+
+ .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
+ .setup_apic_routing = es7000_setup_apic_routing,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
+ .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = es7000_check_phys_apicid_present,
+ .enable_apic_mode = es7000_enable_apic_mode,
+ .phys_pkg_id = es7000_phys_pkg_id,
+ .mps_oem_check = es7000_mps_oem_check,
+
+ .get_apic_id = es7000_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0xFF << 24,
+
+ .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = es7000_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = es7000_send_IPI_allbutself,
+ .send_IPI_all = es7000_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .trampoline_phys_low = 0x467,
+ .trampoline_phys_high = 0x469,
+
+ .wait_for_init_deassert = es7000_wait_for_init_deassert,
+
+ /* Nothing to do for most platforms, since cleared by the INIT cycle: */
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+};
+
+/*
+ * Need to check for es7000 followed by es7000_cluster, so this order
+ * in apic_drivers is important.
+ */
+apic_drivers(apic_es7000, apic_es7000_cluster);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
new file mode 100644
index 00000000000..31cb9ae992b
--- /dev/null
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -0,0 +1,90 @@
+/*
+ * HW NMI watchdog support
+ *
+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Arch specific calls to support NMI watchdog
+ *
+ * Bits copied from original nmi.c file
+ *
+ */
+#include <asm/apic.h>
+
+#include <linux/cpumask.h>
+#include <linux/kdebug.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+ return (u64)(cpu_khz) * 1000 * watchdog_thresh;
+}
+#endif
+
+#ifdef arch_trigger_all_cpu_backtrace
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
+/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+static unsigned long backtrace_flag;
+
+void arch_trigger_all_cpu_backtrace(void)
+{
+ int i;
+
+ if (test_and_set_bit(0, &backtrace_flag))
+ /*
+ * If there is already a trigger_all_cpu_backtrace() in progress
+ * (backtrace_flag == 1), don't output double cpu dump infos.
+ */
+ return;
+
+ cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+
+ printk(KERN_INFO "sending NMI to all CPUs:\n");
+ apic->send_IPI_all(NMI_VECTOR);
+
+ /* Wait for up to 10 seconds for all CPUs to do the backtrace */
+ for (i = 0; i < 10 * 1000; i++) {
+ if (cpumask_empty(to_cpumask(backtrace_mask)))
+ break;
+ mdelay(1);
+ }
+
+ clear_bit(0, &backtrace_flag);
+ smp_mb__after_clear_bit();
+}
+
+static int __kprobes
+arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
+{
+ int cpu;
+
+ cpu = smp_processor_id();
+
+ if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
+ static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+ arch_spin_lock(&lock);
+ printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+ show_regs(regs);
+ arch_spin_unlock(&lock);
+ cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+ return NMI_HANDLED;
+ }
+
+ return NMI_DONE;
+}
+
+static int __init register_trigger_all_cpu_backtrace(void)
+{
+ register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler,
+ 0, "arch_bt");
+ return 0;
+}
+early_initcall(register_trigger_all_cpu_backtrace);
+#endif
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
new file mode 100644
index 00000000000..9a15d4b9055
--- /dev/null
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -0,0 +1,4043 @@
+/*
+ * Intel IO-APIC support for multi-Pentium hosts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *
+ * Many thanks to Stig Venaas for trying out countless experimental
+ * patches and reporting/debugging problems patiently!
+ *
+ * (c) 1999, Multiple IO-APIC support, developed by
+ * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ * further tested and cleaned up by Zach Brown <zab@redhat.com>
+ * and Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively
+ * Paul Diefenbaugh : Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/syscore_ops.h>
+#include <linux/msi.h>
+#include <linux/htirq.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h> /* time_after() */
+#include <linux/slab.h>
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+#include <linux/bootmem.h>
+#include <linux/dmar.h>
+#include <linux/hpet.h>
+
+#include <asm/idle.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+#include <asm/desc.h>
+#include <asm/proto.h>
+#include <asm/acpi.h>
+#include <asm/dma.h>
+#include <asm/timer.h>
+#include <asm/i8259.h>
+#include <asm/msidef.h>
+#include <asm/hypertransport.h>
+#include <asm/setup.h>
+#include <asm/irq_remapping.h>
+#include <asm/hpet.h>
+#include <asm/hw_irq.h>
+
+#include <asm/apic.h>
+
+#define __apicdebuginit(type) static type __init
+#define for_each_irq_pin(entry, head) \
+ for (entry = head; entry; entry = entry->next)
+
+/*
+ * Is the SiS APIC rmw bug present ?
+ * -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(vector_lock);
+
+static struct ioapic {
+ /*
+ * # of IRQ routing registers
+ */
+ int nr_registers;
+ /*
+ * Saved state during suspend/resume, or while enabling intr-remap.
+ */
+ struct IO_APIC_route_entry *saved_registers;
+ /* I/O APIC config */
+ struct mpc_ioapic mp_config;
+ /* IO APIC gsi routing info */
+ struct mp_ioapic_gsi gsi_config;
+ DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} ioapics[MAX_IO_APICS];
+
+#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver
+
+int mpc_ioapic_id(int ioapic_idx)
+{
+ return ioapics[ioapic_idx].mp_config.apicid;
+}
+
+unsigned int mpc_ioapic_addr(int ioapic_idx)
+{
+ return ioapics[ioapic_idx].mp_config.apicaddr;
+}
+
+struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
+{
+ return &ioapics[ioapic_idx].gsi_config;
+}
+
+int nr_ioapics;
+
+/* The one past the highest gsi number used */
+u32 gsi_top;
+
+/* MP IRQ source entries */
+struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
+/* GSI interrupts */
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
+
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+int skip_ioapic_setup;
+
+/**
+ * disable_ioapic_support() - disables ioapic support at runtime
+ */
+void disable_ioapic_support(void)
+{
+#ifdef CONFIG_PCI
+ noioapicquirk = 1;
+ noioapicreroute = -1;
+#endif
+ skip_ioapic_setup = 1;
+}
+
+static int __init parse_noapic(char *str)
+{
+ /* disable IO-APIC */
+ disable_ioapic_support();
+ return 0;
+}
+early_param("noapic", parse_noapic);
+
+static int io_apic_setup_irq_pin(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr);
+
+/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
+void mp_save_irq(struct mpc_intsrc *m)
+{
+ int i;
+
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
+ m->srcbusirq, m->dstapic, m->dstirq);
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
+ return;
+ }
+
+ memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!!\n");
+}
+
+struct irq_pin_list {
+ int apic, pin;
+ struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *alloc_irq_pin_list(int node)
+{
+ return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
+}
+
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
+
+int __init arch_early_irq_init(void)
+{
+ struct irq_cfg *cfg;
+ int count, node, i;
+
+ if (!legacy_pic->nr_legacy_irqs)
+ io_apic_irqs = ~0UL;
+
+ for (i = 0; i < nr_ioapics; i++) {
+ ioapics[i].saved_registers =
+ kzalloc(sizeof(struct IO_APIC_route_entry) *
+ ioapics[i].nr_registers, GFP_KERNEL);
+ if (!ioapics[i].saved_registers)
+ pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
+ }
+
+ cfg = irq_cfgx;
+ count = ARRAY_SIZE(irq_cfgx);
+ node = cpu_to_node(0);
+
+ /* Make sure the legacy interrupts are marked in the bitmap */
+ irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
+
+ for (i = 0; i < count; i++) {
+ irq_set_chip_data(i, &cfg[i]);
+ zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
+ zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
+ /*
+ * For legacy IRQ's, start with assigning irq0 to irq15 to
+ * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
+ */
+ if (i < legacy_pic->nr_legacy_irqs) {
+ cfg[i].vector = IRQ0_VECTOR + i;
+ cpumask_set_cpu(0, cfg[i].domain);
+ }
+ }
+
+ return 0;
+}
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+ return irq_get_chip_data(irq);
+}
+
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
+{
+ struct irq_cfg *cfg;
+
+ cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
+ if (!cfg)
+ return NULL;
+ if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
+ goto out_cfg;
+ if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+ goto out_domain;
+ return cfg;
+out_domain:
+ free_cpumask_var(cfg->domain);
+out_cfg:
+ kfree(cfg);
+ return NULL;
+}
+
+static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
+{
+ if (!cfg)
+ return;
+ irq_set_chip_data(at, NULL);
+ free_cpumask_var(cfg->domain);
+ free_cpumask_var(cfg->old_domain);
+ kfree(cfg);
+}
+
+static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
+{
+ int res = irq_alloc_desc_at(at, node);
+ struct irq_cfg *cfg;
+
+ if (res < 0) {
+ if (res != -EEXIST)
+ return NULL;
+ cfg = irq_get_chip_data(at);
+ if (cfg)
+ return cfg;
+ }
+
+ cfg = alloc_irq_cfg(at, node);
+ if (cfg)
+ irq_set_chip_data(at, cfg);
+ else
+ irq_free_desc(at);
+ return cfg;
+}
+
+static int alloc_irq_from(unsigned int from, int node)
+{
+ return irq_alloc_desc_from(from, node);
+}
+
+static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
+{
+ free_irq_cfg(at, cfg);
+ irq_free_desc(at);
+}
+
+struct io_apic {
+ unsigned int index;
+ unsigned int unused[3];
+ unsigned int data;
+ unsigned int unused2[11];
+ unsigned int eoi;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+ + (mpc_ioapic_addr(idx) & ~PAGE_MASK);
+}
+
+static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ writel(vector, &io_apic->eoi);
+}
+
+unsigned int native_ioapic_read(unsigned int apic, unsigned int reg)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ writel(reg, &io_apic->index);
+ return readl(&io_apic->data);
+}
+
+void native_ioapic_write(unsigned int apic, unsigned int reg,
+ unsigned int value)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ writel(reg, &io_apic->index);
+ writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
+ */
+void native_ioapic_modify(unsigned int apic, unsigned int reg,
+ unsigned int value)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+
+ if (sis_apic_bug)
+ writel(reg, &io_apic->index);
+ writel(value, &io_apic->data);
+}
+
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ unsigned int reg;
+ int pin;
+
+ pin = entry->pin;
+ reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ /* Is the remote IRR bit set? */
+ if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ return true;
+ }
+ }
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return false;
+}
+
+union entry_union {
+ struct { u32 w1, w2; };
+ struct IO_APIC_route_entry entry;
+};
+
+static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
+{
+ union entry_union eu;
+
+ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+ return eu.entry;
+}
+
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+{
+ union entry_union eu;
+ unsigned long flags;
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ eu.entry = __ioapic_read_entry(apic, pin);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ return eu.entry;
+}
+
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+ union entry_union eu = {{0, 0}};
+
+ eu.entry = e;
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+ unsigned long flags;
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ __ioapic_write_entry(apic, pin, e);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+ unsigned long flags;
+ union entry_union eu = { .entry.mask = 1 };
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static int
+__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+ struct irq_pin_list **last, *entry;
+
+ /* don't allow duplicates */
+ last = &cfg->irq_2_pin;
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ if (entry->apic == apic && entry->pin == pin)
+ return 0;
+ last = &entry->next;
+ }
+
+ entry = alloc_irq_pin_list(node);
+ if (!entry) {
+ printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
+ node, apic, pin);
+ return -ENOMEM;
+ }
+ entry->apic = apic;
+ entry->pin = pin;
+
+ *last = entry;
+ return 0;
+}
+
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+ if (__add_pin_to_irq_node(cfg, node, apic, pin))
+ panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
+}
+
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
+ int oldapic, int oldpin,
+ int newapic, int newpin)
+{
+ struct irq_pin_list *entry;
+
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ if (entry->apic == oldapic && entry->pin == oldpin) {
+ entry->apic = newapic;
+ entry->pin = newpin;
+ /* every one is different, right? */
+ return;
+ }
+ }
+
+ /* old apic/pin didn't exist, so just add new ones */
+ add_pin_to_irq_node(cfg, node, newapic, newpin);
+}
+
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
+ int mask_and, int mask_or,
+ void (*final)(struct irq_pin_list *entry))
+{
+ unsigned int reg, pin;
+
+ pin = entry->pin;
+ reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+ reg &= mask_and;
+ reg |= mask_or;
+ io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+ if (final)
+ final(entry);
+}
+
+static void io_apic_modify_irq(struct irq_cfg *cfg,
+ int mask_and, int mask_or,
+ void (*final)(struct irq_pin_list *entry))
+{
+ struct irq_pin_list *entry;
+
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ __io_apic_modify_irq(entry, mask_and, mask_or, final);
+}
+
+static void io_apic_sync(struct irq_pin_list *entry)
+{
+ /*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+ struct io_apic __iomem *io_apic;
+ io_apic = io_apic_base(entry->apic);
+ readl(&io_apic->data);
+}
+
+static void mask_ioapic(struct irq_cfg *cfg)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void mask_ioapic_irq(struct irq_data *data)
+{
+ mask_ioapic(data->chip_data);
+}
+
+static void __unmask_ioapic(struct irq_cfg *cfg)
+{
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
+
+static void unmask_ioapic(struct irq_cfg *cfg)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ __unmask_ioapic(cfg);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_ioapic_irq(struct irq_data *data)
+{
+ unmask_ioapic(data->chip_data);
+}
+
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ * 0Xh 82489DX
+ * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
+ * 30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+ */
+static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
+{
+ if (mpc_ioapic_ver(apic) >= 0x20) {
+ /*
+ * Intr-remapping uses pin number as the virtual vector
+ * in the RTE. Actual vector is programmed in
+ * intr-remapping table entry. Hence for the io-apic
+ * EOI we use the pin number.
+ */
+ if (cfg && irq_remapped(cfg))
+ io_apic_eoi(apic, pin);
+ else
+ io_apic_eoi(apic, vector);
+ } else {
+ struct IO_APIC_route_entry entry, entry1;
+
+ entry = entry1 = __ioapic_read_entry(apic, pin);
+
+ /*
+ * Mask the entry and change the trigger mode to edge.
+ */
+ entry1.mask = 1;
+ entry1.trigger = IOAPIC_EDGE;
+
+ __ioapic_write_entry(apic, pin, entry1);
+
+ /*
+ * Restore the previous level triggered entry.
+ */
+ __ioapic_write_entry(apic, pin, entry);
+ }
+}
+
+static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+ struct IO_APIC_route_entry entry;
+
+ /* Check delivery_mode to be sure we're not clearing an SMI pin */
+ entry = ioapic_read_entry(apic, pin);
+ if (entry.delivery_mode == dest_SMI)
+ return;
+
+ /*
+ * Make sure the entry is masked and re-read the contents to check
+ * if it is a level triggered pin and if the remote-IRR is set.
+ */
+ if (!entry.mask) {
+ entry.mask = 1;
+ ioapic_write_entry(apic, pin, entry);
+ entry = ioapic_read_entry(apic, pin);
+ }
+
+ if (entry.irr) {
+ unsigned long flags;
+
+ /*
+ * Make sure the trigger mode is set to level. Explicit EOI
+ * doesn't clear the remote-IRR if the trigger mode is not
+ * set to level.
+ */
+ if (!entry.trigger) {
+ entry.trigger = IOAPIC_LEVEL;
+ ioapic_write_entry(apic, pin, entry);
+ }
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ __eoi_ioapic_pin(apic, pin, entry.vector, NULL);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+
+ /*
+ * Clear the rest of the bits in the IO-APIC RTE except for the mask
+ * bit.
+ */
+ ioapic_mask_entry(apic, pin);
+ entry = ioapic_read_entry(apic, pin);
+ if (entry.irr)
+ printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n",
+ mpc_ioapic_id(apic), pin);
+}
+
+static void clear_IO_APIC (void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
+ clear_IO_APIC_pin(apic, pin);
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries[MAX_PIRQS] = {
+ [0 ... MAX_PIRQS - 1] = -1
+};
+
+static int __init ioapic_pirq_setup(char *str)
+{
+ int i, max;
+ int ints[MAX_PIRQS+1];
+
+ get_options(str, ARRAY_SIZE(ints), ints);
+
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "PIRQ redirection, working around broken MP-BIOS.\n");
+ max = MAX_PIRQS;
+ if (ints[0] < MAX_PIRQS)
+ max = ints[0];
+
+ for (i = 0; i < max; i++) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+ /*
+ * PIRQs are mapped upside down, usually.
+ */
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+ }
+ return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Saves all the IO-APIC RTE's
+ */
+int save_ioapic_entries(void)
+{
+ int apic, pin;
+ int err = 0;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ if (!ioapics[apic].saved_registers) {
+ err = -ENOMEM;
+ continue;
+ }
+
+ for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
+ ioapics[apic].saved_registers[pin] =
+ ioapic_read_entry(apic, pin);
+ }
+
+ return err;
+}
+
+/*
+ * Mask all IO APIC entries.
+ */
+void mask_ioapic_entries(void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ if (!ioapics[apic].saved_registers)
+ continue;
+
+ for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
+ struct IO_APIC_route_entry entry;
+
+ entry = ioapics[apic].saved_registers[pin];
+ if (!entry.mask) {
+ entry.mask = 1;
+ ioapic_write_entry(apic, pin, entry);
+ }
+ }
+ }
+}
+
+/*
+ * Restore IO APIC entries which was saved in the ioapic structure.
+ */
+int restore_ioapic_entries(void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ if (!ioapics[apic].saved_registers)
+ continue;
+
+ for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
+ ioapic_write_entry(apic, pin,
+ ioapics[apic].saved_registers[pin]);
+ }
+ return 0;
+}
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int ioapic_idx, int pin, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].irqtype == type &&
+ (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
+ mp_irqs[i].dstapic == MP_APIC_ALL) &&
+ mp_irqs[i].dstirq == pin)
+ return i;
+
+ return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].srcbus;
+
+ if (test_bit(lbus, mp_bus_not_pci) &&
+ (mp_irqs[i].irqtype == type) &&
+ (mp_irqs[i].srcbusirq == irq))
+
+ return mp_irqs[i].dstirq;
+ }
+ return -1;
+}
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].srcbus;
+
+ if (test_bit(lbus, mp_bus_not_pci) &&
+ (mp_irqs[i].irqtype == type) &&
+ (mp_irqs[i].srcbusirq == irq))
+ break;
+ }
+
+ if (i < mp_irq_entries) {
+ int ioapic_idx;
+
+ for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
+ return ioapic_idx;
+ }
+
+ return -1;
+}
+
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+ if (irq < legacy_pic->nr_legacy_irqs) {
+ unsigned int port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+ }
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "Broken MPtable reports ISA irq %d\n", irq);
+ return 0;
+}
+
+#endif
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx) (0)
+#define default_ISA_polarity(idx) (0)
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value. If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
+#define default_EISA_polarity(idx) default_ISA_polarity(idx)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx) (1)
+#define default_PCI_polarity(idx) (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx) (1)
+#define default_MCA_polarity(idx) default_ISA_polarity(idx)
+
+static int irq_polarity(int idx)
+{
+ int bus = mp_irqs[idx].srcbus;
+ int polarity;
+
+ /*
+ * Determine IRQ line polarity (high active or low active):
+ */
+ switch (mp_irqs[idx].irqflag & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent polarity */
+ if (test_bit(bus, mp_bus_not_pci))
+ polarity = default_ISA_polarity(idx);
+ else
+ polarity = default_PCI_polarity(idx);
+ break;
+ case 1: /* high active */
+ {
+ polarity = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ case 3: /* low active */
+ {
+ polarity = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ return polarity;
+}
+
+static int irq_trigger(int idx)
+{
+ int bus = mp_irqs[idx].srcbus;
+ int trigger;
+
+ /*
+ * Determine IRQ trigger mode (edge or level sensitive):
+ */
+ switch ((mp_irqs[idx].irqflag>>2) & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent */
+ if (test_bit(bus, mp_bus_not_pci))
+ trigger = default_ISA_trigger(idx);
+ else
+ trigger = default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+ switch (mp_bus_id_to_type[bus]) {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ /* set before the switch */
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ trigger = default_EISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ /* set before the switch */
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ trigger = default_MCA_trigger(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ }
+#endif
+ break;
+ case 1: /* edge */
+ {
+ trigger = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ case 3: /* level */
+ {
+ trigger = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 0;
+ break;
+ }
+ }
+ return trigger;
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+ int irq;
+ int bus = mp_irqs[idx].srcbus;
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic);
+
+ /*
+ * Debugging check, we are in big trouble if this message pops up!
+ */
+ if (mp_irqs[idx].dstirq != pin)
+ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+ if (test_bit(bus, mp_bus_not_pci)) {
+ irq = mp_irqs[idx].srcbusirq;
+ } else {
+ u32 gsi = gsi_cfg->gsi_base + pin;
+
+ if (gsi >= NR_IRQS_LEGACY)
+ irq = gsi;
+ else
+ irq = gsi_top + gsi;
+ }
+
+#ifdef CONFIG_X86_32
+ /*
+ * PCI IRQ command line redirection. Yes, limits are hardcoded.
+ */
+ if ((pin >= 16) && (pin <= 23)) {
+ if (pirq_entries[pin-16] != -1) {
+ if (!pirq_entries[pin-16]) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "disabling PIRQ%d\n", pin-16);
+ } else {
+ irq = pirq_entries[pin-16];
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "using PIRQ%d -> IRQ %d\n",
+ pin-16, irq);
+ }
+ }
+ }
+#endif
+
+ return irq;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+ struct io_apic_irq_attr *irq_attr)
+{
+ int ioapic_idx, i, best_guess = -1;
+
+ apic_printk(APIC_DEBUG,
+ "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+ bus, slot, pin);
+ if (test_bit(bus, mp_bus_not_pci)) {
+ apic_printk(APIC_VERBOSE,
+ "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+ return -1;
+ }
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].srcbus;
+
+ for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
+ mp_irqs[i].dstapic == MP_APIC_ALL)
+ break;
+
+ if (!test_bit(lbus, mp_bus_not_pci) &&
+ !mp_irqs[i].irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq);
+
+ if (!(ioapic_idx || IO_APIC_IRQ(irq)))
+ continue;
+
+ if (pin == (mp_irqs[i].srcbusirq & 3)) {
+ set_io_apic_irq_attr(irq_attr, ioapic_idx,
+ mp_irqs[i].dstirq,
+ irq_trigger(i),
+ irq_polarity(i));
+ return irq;
+ }
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_guess < 0) {
+ set_io_apic_irq_attr(irq_attr, ioapic_idx,
+ mp_irqs[i].dstirq,
+ irq_trigger(i),
+ irq_polarity(i));
+ best_guess = irq;
+ }
+ }
+ }
+ return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+void lock_vector_lock(void)
+{
+ /* Used to the online set of cpus does not change
+ * during assign_irq_vector.
+ */
+ raw_spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+ raw_spin_unlock(&vector_lock);
+}
+
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+ /*
+ * NOTE! The local APIC isn't very good at handling
+ * multiple interrupts at the same interrupt level.
+ * As the interrupt level is determined by taking the
+ * vector number and shifting that right by 4, we
+ * want to spread these out a bit so that they don't
+ * all fall in the same interrupt level.
+ *
+ * Also, we've got to be careful not to trash gate
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
+ */
+ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+ static int current_offset = VECTOR_OFFSET_START % 8;
+ unsigned int old_vector;
+ int cpu, err;
+ cpumask_var_t tmp_mask;
+
+ if (cfg->move_in_progress)
+ return -EBUSY;
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+
+ old_vector = cfg->vector;
+ if (old_vector) {
+ cpumask_and(tmp_mask, mask, cpu_online_mask);
+ cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+ if (!cpumask_empty(tmp_mask)) {
+ free_cpumask_var(tmp_mask);
+ return 0;
+ }
+ }
+
+ /* Only try and allocate irqs on cpus that are present */
+ err = -ENOSPC;
+ for_each_cpu_and(cpu, mask, cpu_online_mask) {
+ int new_cpu;
+ int vector, offset;
+
+ apic->vector_allocation_domain(cpu, tmp_mask);
+
+ vector = current_vector;
+ offset = current_offset;
+next:
+ vector += 8;
+ if (vector >= first_system_vector) {
+ /* If out of vectors on large boxen, must share them. */
+ offset = (offset + 1) % 8;
+ vector = FIRST_EXTERNAL_VECTOR + offset;
+ }
+ if (unlikely(current_vector == vector))
+ continue;
+
+ if (test_bit(vector, used_vectors))
+ goto next;
+
+ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+ if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+ goto next;
+ /* Found one! */
+ current_vector = vector;
+ current_offset = offset;
+ if (old_vector) {
+ cfg->move_in_progress = 1;
+ cpumask_copy(cfg->old_domain, cfg->domain);
+ }
+ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+ per_cpu(vector_irq, new_cpu)[vector] = irq;
+ cfg->vector = vector;
+ cpumask_copy(cfg->domain, tmp_mask);
+ err = 0;
+ break;
+ }
+ free_cpumask_var(tmp_mask);
+ return err;
+}
+
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+ int err;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ err = __assign_irq_vector(irq, cfg, mask);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ return err;
+}
+
+static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
+{
+ int cpu, vector;
+
+ BUG_ON(!cfg->vector);
+
+ vector = cfg->vector;
+ for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+ per_cpu(vector_irq, cpu)[vector] = -1;
+
+ cfg->vector = 0;
+ cpumask_clear(cfg->domain);
+
+ if (likely(!cfg->move_in_progress))
+ return;
+ for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+ vector++) {
+ if (per_cpu(vector_irq, cpu)[vector] != irq)
+ continue;
+ per_cpu(vector_irq, cpu)[vector] = -1;
+ break;
+ }
+ }
+ cfg->move_in_progress = 0;
+}
+
+void __setup_vector_irq(int cpu)
+{
+ /* Initialize vector_irq on a new cpu */
+ int irq, vector;
+ struct irq_cfg *cfg;
+
+ /*
+ * vector_lock will make sure that we don't run into irq vector
+ * assignments that might be happening on another cpu in parallel,
+ * while we setup our initial vector to irq mappings.
+ */
+ raw_spin_lock(&vector_lock);
+ /* Mark the inuse vectors */
+ for_each_active_irq(irq) {
+ cfg = irq_get_chip_data(irq);
+ if (!cfg)
+ continue;
+ /*
+ * If it is a legacy IRQ handled by the legacy PIC, this cpu
+ * will be part of the irq_cfg's domain.
+ */
+ if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
+ cpumask_set_cpu(cpu, cfg->domain);
+
+ if (!cpumask_test_cpu(cpu, cfg->domain))
+ continue;
+ vector = cfg->vector;
+ per_cpu(vector_irq, cpu)[vector] = irq;
+ }
+ /* Mark the free vectors */
+ for (vector = 0; vector < NR_VECTORS; ++vector) {
+ irq = per_cpu(vector_irq, cpu)[vector];
+ if (irq < 0)
+ continue;
+
+ cfg = irq_cfg(irq);
+ if (!cpumask_test_cpu(cpu, cfg->domain))
+ per_cpu(vector_irq, cpu)[vector] = -1;
+ }
+ raw_spin_unlock(&vector_lock);
+}
+
+static struct irq_chip ioapic_chip;
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+ int apic, idx, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
+ idx = find_irq_entry(apic, pin, mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+ return irq_trigger(idx);
+ }
+ }
+ /*
+ * nonexistent IRQs are edge default
+ */
+ return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+ return 1;
+}
+#endif
+
+static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
+ unsigned long trigger)
+{
+ struct irq_chip *chip = &ioapic_chip;
+ irq_flow_handler_t hdl;
+ bool fasteoi;
+
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL) {
+ irq_set_status_flags(irq, IRQ_LEVEL);
+ fasteoi = true;
+ } else {
+ irq_clear_status_flags(irq, IRQ_LEVEL);
+ fasteoi = false;
+ }
+
+ if (irq_remapped(cfg)) {
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+ irq_remap_modify_chip_defaults(chip);
+ fasteoi = trigger != 0;
+ }
+
+ hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
+ irq_set_chip_and_handler_name(irq, chip, hdl,
+ fasteoi ? "fasteoi" : "edge");
+}
+
+
+static int setup_ir_ioapic_entry(int irq,
+ struct IR_IO_APIC_route_entry *entry,
+ unsigned int destination, int vector,
+ struct io_apic_irq_attr *attr)
+{
+ int index;
+ struct irte irte;
+ int ioapic_id = mpc_ioapic_id(attr->ioapic);
+ struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
+
+ if (!iommu) {
+ pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
+ return -ENODEV;
+ }
+
+ index = alloc_irte(iommu, irq, 1);
+ if (index < 0) {
+ pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
+ return -ENOMEM;
+ }
+
+ prepare_irte(&irte, vector, destination);
+
+ /* Set source-id of interrupt request */
+ set_ioapic_sid(&irte, ioapic_id);
+
+ modify_irte(irq, &irte);
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
+ "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
+ "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
+ "Avail:%X Vector:%02X Dest:%08X "
+ "SID:%04X SQ:%X SVT:%X)\n",
+ attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
+ irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
+ irte.avail, irte.vector, irte.dest_id,
+ irte.sid, irte.sq, irte.svt);
+
+ memset(entry, 0, sizeof(*entry));
+
+ entry->index2 = (index >> 15) & 0x1;
+ entry->zero = 0;
+ entry->format = 1;
+ entry->index = (index & 0x7fff);
+ /*
+ * IO-APIC RTE will be configured with virtual vector.
+ * irq handler will do the explicit EOI to the io-apic.
+ */
+ entry->vector = attr->ioapic_pin;
+ entry->mask = 0; /* enable IRQ */
+ entry->trigger = attr->trigger;
+ entry->polarity = attr->polarity;
+
+ /* Mask level triggered irqs.
+ * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+ */
+ if (attr->trigger)
+ entry->mask = 1;
+
+ return 0;
+}
+
+static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
+ unsigned int destination, int vector,
+ struct io_apic_irq_attr *attr)
+{
+ if (intr_remapping_enabled)
+ return setup_ir_ioapic_entry(irq,
+ (struct IR_IO_APIC_route_entry *)entry,
+ destination, vector, attr);
+
+ memset(entry, 0, sizeof(*entry));
+
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->dest = destination;
+ entry->vector = vector;
+ entry->mask = 0; /* enable IRQ */
+ entry->trigger = attr->trigger;
+ entry->polarity = attr->polarity;
+
+ /*
+ * Mask level triggered irqs.
+ * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+ */
+ if (attr->trigger)
+ entry->mask = 1;
+
+ return 0;
+}
+
+static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
+ struct io_apic_irq_attr *attr)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned int dest;
+
+ if (!IO_APIC_IRQ(irq))
+ return;
+ /*
+ * For legacy irqs, cfg->domain starts with cpu 0 for legacy
+ * controllers like 8259. Now that IO-APIC can handle this irq, update
+ * the cfg->domain.
+ */
+ if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
+ apic->vector_allocation_domain(0, cfg->domain);
+
+ if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+ return;
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+
+ apic_printk(APIC_VERBOSE,KERN_DEBUG
+ "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
+ "IRQ %d Mode:%i Active:%i Dest:%d)\n",
+ attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
+ cfg->vector, irq, attr->trigger, attr->polarity, dest);
+
+ if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
+ pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
+ __clear_irq_vector(irq, cfg);
+
+ return;
+ }
+
+ ioapic_register_intr(irq, cfg, attr->trigger);
+ if (irq < legacy_pic->nr_legacy_irqs)
+ legacy_pic->mask(irq);
+
+ ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
+}
+
+static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin)
+{
+ if (idx != -1)
+ return false;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
+ mpc_ioapic_id(ioapic_idx), pin);
+ return true;
+}
+
+static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
+{
+ int idx, node = cpu_to_node(0);
+ struct io_apic_irq_attr attr;
+ unsigned int pin, irq;
+
+ for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
+ idx = find_irq_entry(ioapic_idx, pin, mp_INT);
+ if (io_apic_pin_not_connected(idx, ioapic_idx, pin))
+ continue;
+
+ irq = pin_2_irq(idx, ioapic_idx, pin);
+
+ if ((ioapic_idx > 0) && (irq > 16))
+ continue;
+
+ /*
+ * Skip the timer IRQ if there's a quirk handler
+ * installed and if it returns 1:
+ */
+ if (apic->multi_timer_check &&
+ apic->multi_timer_check(ioapic_idx, irq))
+ continue;
+
+ set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
+ irq_polarity(idx));
+
+ io_apic_setup_irq_pin(irq, node, &attr);
+ }
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+ unsigned int ioapic_idx;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ __io_apic_setup_irqs(ioapic_idx);
+}
+
+/*
+ * for the gsit that is not in first ioapic
+ * but could not use acpi_register_gsi()
+ * like some special sci in IBM x3330
+ */
+void setup_IO_APIC_irq_extra(u32 gsi)
+{
+ int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0);
+ struct io_apic_irq_attr attr;
+
+ /*
+ * Convert 'gsi' to 'ioapic.pin'.
+ */
+ ioapic_idx = mp_find_ioapic(gsi);
+ if (ioapic_idx < 0)
+ return;
+
+ pin = mp_find_ioapic_pin(ioapic_idx, gsi);
+ idx = find_irq_entry(ioapic_idx, pin, mp_INT);
+ if (idx == -1)
+ return;
+
+ irq = pin_2_irq(idx, ioapic_idx, pin);
+
+ /* Only handle the non legacy irqs on secondary ioapics */
+ if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY)
+ return;
+
+ set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
+ irq_polarity(idx));
+
+ io_apic_setup_irq_pin_once(irq, node, &attr);
+}
+
+/*
+ * Set up the timer pin, possibly with the 8259A-master behind.
+ */
+static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
+ unsigned int pin, int vector)
+{
+ struct IO_APIC_route_entry entry;
+
+ if (intr_remapping_enabled)
+ return;
+
+ memset(&entry, 0, sizeof(entry));
+
+ /*
+ * We use logical delivery to get the timer IRQ
+ * to the first CPU.
+ */
+ entry.dest_mode = apic->irq_dest_mode;
+ entry.mask = 0; /* don't mask IRQ for edge */
+ entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
+ entry.delivery_mode = apic->irq_delivery_mode;
+ entry.polarity = 0;
+ entry.trigger = 0;
+ entry.vector = vector;
+
+ /*
+ * The timer IRQ doesn't have to know that behind the
+ * scene we may have a 8259A-master in AEOI mode ...
+ */
+ irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+ "edge");
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ ioapic_write_entry(ioapic_idx, pin, entry);
+}
+
+__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+{
+ int i;
+ union IO_APIC_reg_00 reg_00;
+ union IO_APIC_reg_01 reg_01;
+ union IO_APIC_reg_02 reg_02;
+ union IO_APIC_reg_03 reg_03;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic_idx, 0);
+ reg_01.raw = io_apic_read(ioapic_idx, 1);
+ if (reg_01.bits.version >= 0x10)
+ reg_02.raw = io_apic_read(ioapic_idx, 2);
+ if (reg_01.bits.version >= 0x20)
+ reg_03.raw = io_apic_read(ioapic_idx, 3);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk("\n");
+ printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
+ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
+ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
+ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
+
+ printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+ printk(KERN_DEBUG "....... : max redirection entries: %02X\n",
+ reg_01.bits.entries);
+
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
+ printk(KERN_DEBUG "....... : IO APIC version: %02X\n",
+ reg_01.bits.version);
+
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+ * but the value of reg_02 is read as the previous read register
+ * value, so ignore it if reg_02 == reg_01.
+ */
+ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
+ }
+
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+ * or reg_03, but the value of reg_0[23] is read as the previous read
+ * register value, so ignore it if reg_03 == reg_0[12].
+ */
+ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+ reg_03.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
+ }
+
+ printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+ if (intr_remapping_enabled) {
+ printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
+ " Pol Stat Indx2 Zero Vect:\n");
+ } else {
+ printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
+ " Stat Dmod Deli Vect:\n");
+ }
+
+ for (i = 0; i <= reg_01.bits.entries; i++) {
+ if (intr_remapping_enabled) {
+ struct IO_APIC_route_entry entry;
+ struct IR_IO_APIC_route_entry *ir_entry;
+
+ entry = ioapic_read_entry(ioapic_idx, i);
+ ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
+ printk(KERN_DEBUG " %02x %04X ",
+ i,
+ ir_entry->index
+ );
+ printk("%1d %1d %1d %1d %1d "
+ "%1d %1d %X %02X\n",
+ ir_entry->format,
+ ir_entry->mask,
+ ir_entry->trigger,
+ ir_entry->irr,
+ ir_entry->polarity,
+ ir_entry->delivery_status,
+ ir_entry->index2,
+ ir_entry->zero,
+ ir_entry->vector
+ );
+ } else {
+ struct IO_APIC_route_entry entry;
+
+ entry = ioapic_read_entry(ioapic_idx, i);
+ printk(KERN_DEBUG " %02x %02X ",
+ i,
+ entry.dest
+ );
+ printk("%1d %1d %1d %1d %1d "
+ "%1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector
+ );
+ }
+ }
+}
+
+__apicdebuginit(void) print_IO_APICs(void)
+{
+ int ioapic_idx;
+ struct irq_cfg *cfg;
+ unsigned int irq;
+ struct irq_chip *chip;
+
+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+ for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+ mpc_ioapic_id(ioapic_idx),
+ ioapics[ioapic_idx].nr_registers);
+
+ /*
+ * We are a bit conservative about what we expect. We have to
+ * know about every hardware change ASAP.
+ */
+ printk(KERN_INFO "testing the IO APIC.......................\n");
+
+ for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ print_IO_APIC(ioapic_idx);
+
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ for_each_active_irq(irq) {
+ struct irq_pin_list *entry;
+
+ chip = irq_get_chip(irq);
+ if (chip != &ioapic_chip)
+ continue;
+
+ cfg = irq_get_chip_data(irq);
+ if (!cfg)
+ continue;
+ entry = cfg->irq_2_pin;
+ if (!entry)
+ continue;
+ printk(KERN_DEBUG "IRQ%d ", irq);
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ printk("-> %d:%d", entry->apic, entry->pin);
+ printk("\n");
+ }
+
+ printk(KERN_INFO ".................................... done.\n");
+}
+
+__apicdebuginit(void) print_APIC_field(int base)
+{
+ int i;
+
+ printk(KERN_DEBUG);
+
+ for (i = 0; i < 8; i++)
+ printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+
+ printk(KERN_CONT "\n");
+}
+
+__apicdebuginit(void) print_local_APIC(void *dummy)
+{
+ unsigned int i, v, ver, maxlvt;
+ u64 icr;
+
+ printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+ smp_processor_id(), hard_smp_processor_id());
+ v = apic_read(APIC_ID);
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
+ v = apic_read(APIC_LVR);
+ printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+ ver = GET_APIC_VERSION(v);
+ maxlvt = lapic_get_maxlvt();
+
+ v = apic_read(APIC_TASKPRI);
+ printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (!APIC_XAPIC(ver)) {
+ v = apic_read(APIC_ARBPRI);
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+ v & APIC_ARBPRI_MASK);
+ }
+ v = apic_read(APIC_PROCPRI);
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+ }
+
+ /*
+ * Remote read supported only in the 82489DX and local APIC for
+ * Pentium processors.
+ */
+ if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+ v = apic_read(APIC_RRR);
+ printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_LDR);
+ printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+ if (!x2apic_enabled()) {
+ v = apic_read(APIC_DFR);
+ printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+ }
+ v = apic_read(APIC_SPIV);
+ printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+ printk(KERN_DEBUG "... APIC ISR field:\n");
+ print_APIC_field(APIC_ISR);
+ printk(KERN_DEBUG "... APIC TMR field:\n");
+ print_APIC_field(APIC_TMR);
+ printk(KERN_DEBUG "... APIC IRR field:\n");
+ print_APIC_field(APIC_IRR);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+
+ v = apic_read(APIC_ESR);
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+ }
+
+ icr = apic_icr_read();
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
+
+ v = apic_read(APIC_LVTT);
+ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+ if (maxlvt > 3) { /* PC is LVT#4. */
+ v = apic_read(APIC_LVTPC);
+ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+ }
+ v = apic_read(APIC_LVT0);
+ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+ v = apic_read(APIC_LVT1);
+ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+ if (maxlvt > 2) { /* ERR is LVT#3. */
+ v = apic_read(APIC_LVTERR);
+ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_TMICT);
+ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+ v = apic_read(APIC_TMCCT);
+ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+ v = apic_read(APIC_TDCR);
+ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+ if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+ v = apic_read(APIC_EFEAT);
+ maxlvt = (v >> 16) & 0xff;
+ printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+ v = apic_read(APIC_ECTRL);
+ printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+ for (i = 0; i < maxlvt; i++) {
+ v = apic_read(APIC_EILVTn(i));
+ printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+ }
+ }
+ printk("\n");
+}
+
+__apicdebuginit(void) print_local_APICs(int maxcpu)
+{
+ int cpu;
+
+ if (!maxcpu)
+ return;
+
+ preempt_disable();
+ for_each_online_cpu(cpu) {
+ if (cpu >= maxcpu)
+ break;
+ smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+ }
+ preempt_enable();
+}
+
+__apicdebuginit(void) print_PIC(void)
+{
+ unsigned int v;
+ unsigned long flags;
+
+ if (!legacy_pic->nr_legacy_irqs)
+ return;
+
+ printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
+
+ v = inb(0xa1) << 8 | inb(0x21);
+ printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
+
+ v = inb(0xa0) << 8 | inb(0x20);
+ printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
+
+ outb(0x0b,0xa0);
+ outb(0x0b,0x20);
+ v = inb(0xa0) << 8 | inb(0x20);
+ outb(0x0a,0xa0);
+ outb(0x0a,0x20);
+
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
+
+ v = inb(0x4d1) << 8 | inb(0x4d0);
+ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+static int __initdata show_lapic = 1;
+static __init int setup_show_lapic(char *arg)
+{
+ int num = -1;
+
+ if (strcmp(arg, "all") == 0) {
+ show_lapic = CONFIG_NR_CPUS;
+ } else {
+ get_option(&arg, &num);
+ if (num >= 0)
+ show_lapic = num;
+ }
+
+ return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+__apicdebuginit(int) print_ICs(void)
+{
+ if (apic_verbosity == APIC_QUIET)
+ return 0;
+
+ print_PIC();
+
+ /* don't print out if apic is not there */
+ if (!cpu_has_apic && !apic_from_smp_config())
+ return 0;
+
+ print_local_APICs(show_lapic);
+ print_IO_APICs();
+
+ return 0;
+}
+
+late_initcall(print_ICs);
+
+
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
+void __init enable_IO_APIC(void)
+{
+ int i8259_apic, i8259_pin;
+ int apic;
+
+ if (!legacy_pic->nr_legacy_irqs)
+ return;
+
+ for(apic = 0; apic < nr_ioapics; apic++) {
+ int pin;
+ /* See if any of the pins is in ExtINT mode */
+ for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
+ struct IO_APIC_route_entry entry;
+ entry = ioapic_read_entry(apic, pin);
+
+ /* If the interrupt line is enabled and in ExtInt mode
+ * I have found the pin where the i8259 is connected.
+ */
+ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+ ioapic_i8259.apic = apic;
+ ioapic_i8259.pin = pin;
+ goto found_i8259;
+ }
+ }
+ }
+ found_i8259:
+ /* Look to see what if the MP table has reported the ExtINT */
+ /* If we could not find the appropriate pin by looking at the ioapic
+ * the i8259 probably is not connected the ioapic but give the
+ * mptable a chance anyway.
+ */
+ i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
+ i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+ /* Trust the MP table if nothing is setup in the hardware */
+ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+ printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+ ioapic_i8259.pin = i8259_pin;
+ ioapic_i8259.apic = i8259_apic;
+ }
+ /* Complain if the MP table and the hardware disagree */
+ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+ {
+ printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
+ }
+
+ /*
+ * Do not trust the IO-APIC being empty at bootup
+ */
+ clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+ /*
+ * Clear the IO-APIC before rebooting:
+ */
+ clear_IO_APIC();
+
+ if (!legacy_pic->nr_legacy_irqs)
+ return;
+
+ /*
+ * If the i8259 is routed through an IOAPIC
+ * Put that IOAPIC in virtual wire mode
+ * so legacy interrupts can be delivered.
+ *
+ * With interrupt-remapping, for now we will use virtual wire A mode,
+ * as virtual wire B is little complex (need to configure both
+ * IOAPIC RTE as well as interrupt-remapping table entry).
+ * As this gets called during crash dump, keep this simple for now.
+ */
+ if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
+ struct IO_APIC_route_entry entry;
+
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 0; /* Enabled */
+ entry.trigger = 0; /* Edge */
+ entry.irr = 0;
+ entry.polarity = 0; /* High */
+ entry.delivery_status = 0;
+ entry.dest_mode = 0; /* Physical */
+ entry.delivery_mode = dest_ExtINT; /* ExtInt */
+ entry.vector = 0;
+ entry.dest = read_apic_id();
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+ }
+
+ /*
+ * Use virtual wire A mode when interrupt remapping is enabled.
+ */
+ if (cpu_has_apic || apic_from_smp_config())
+ disconnect_bsp_APIC(!intr_remapping_enabled &&
+ ioapic_i8259.pin != -1);
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
+ */
+void __init setup_ioapic_ids_from_mpc_nocheck(void)
+{
+ union IO_APIC_reg_00 reg_00;
+ physid_mask_t phys_id_present_map;
+ int ioapic_idx;
+ int i;
+ unsigned char old_id;
+ unsigned long flags;
+
+ /*
+ * This is broken; anything with a real cpu count has to
+ * circumvent this idiocy regardless.
+ */
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
+
+ /*
+ * Set the IOAPIC ID to the value stored in the MPC table.
+ */
+ for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
+ /* Read the register 0 value */
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic_idx, 0);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ old_id = mpc_ioapic_id(ioapic_idx);
+
+ if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+ ioapic_idx, mpc_ioapic_id(ioapic_idx));
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ reg_00.bits.ID);
+ ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
+ }
+
+ /*
+ * Sanity check, is the ID really free? Every APIC in a
+ * system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (apic->check_apicid_used(&phys_id_present_map,
+ mpc_ioapic_id(ioapic_idx))) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+ ioapic_idx, mpc_ioapic_id(ioapic_idx));
+ for (i = 0; i < get_physical_broadcast(); i++)
+ if (!physid_isset(i, phys_id_present_map))
+ break;
+ if (i >= get_physical_broadcast())
+ panic("Max APIC ID exceeded!\n");
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ i);
+ physid_set(i, phys_id_present_map);
+ ioapics[ioapic_idx].mp_config.apicid = i;
+ } else {
+ physid_mask_t tmp;
+ apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
+ &tmp);
+ apic_printk(APIC_VERBOSE, "Setting %d in the "
+ "phys_id_present_map\n",
+ mpc_ioapic_id(ioapic_idx));
+ physids_or(phys_id_present_map, phys_id_present_map, tmp);
+ }
+
+ /*
+ * We need to adjust the IRQ routing table
+ * if the ID changed.
+ */
+ if (old_id != mpc_ioapic_id(ioapic_idx))
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].dstapic == old_id)
+ mp_irqs[i].dstapic
+ = mpc_ioapic_id(ioapic_idx);
+
+ /*
+ * Update the ID register according to the right value
+ * from the MPC table if they are different.
+ */
+ if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
+ continue;
+
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "...changing IO-APIC physical APIC ID to %d ...",
+ mpc_ioapic_id(ioapic_idx));
+
+ reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic_idx, 0, reg_00.raw);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /*
+ * Sanity check
+ */
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic_idx, 0);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
+ printk("could not set ID!\n");
+ else
+ apic_printk(APIC_VERBOSE, " ok.\n");
+ }
+}
+
+void __init setup_ioapic_ids_from_mpc(void)
+{
+
+ if (acpi_ioapic)
+ return;
+ /*
+ * Don't check I/O APIC IDs for xAPIC systems. They have
+ * no meaning without the serial APIC bus.
+ */
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return;
+ setup_ioapic_ids_from_mpc_nocheck();
+}
+#endif
+
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+ no_timer_check = 1;
+ return 1;
+}
+__setup("no_timer_check", notimercheck);
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ * - timer IRQ defaults to IO-APIC IRQ
+ * - if this function detects that timer IRQs are defunct, then we fall
+ * back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+ unsigned long t1 = jiffies;
+ unsigned long flags;
+
+ if (no_timer_check)
+ return 1;
+
+ local_save_flags(flags);
+ local_irq_enable();
+ /* Let ten ticks pass... */
+ mdelay((10 * 1000) / HZ);
+ local_irq_restore(flags);
+
+ /*
+ * Expect a few ticks at least, to be sure some possible
+ * glue logic does not lock up after one or two first
+ * ticks in a non-ExtINT mode. Also the local APIC
+ * might have cached one ExtINT interrupt. Finally, at
+ * least one tick may be lost due to delays.
+ */
+
+ /* jiffies wrap? */
+ if (time_after(jiffies, t1 + 4))
+ return 1;
+ return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_ioapic_irq(struct irq_data *data)
+{
+ int was_pending = 0, irq = data->irq;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ if (irq < legacy_pic->nr_legacy_irqs) {
+ legacy_pic->mask(irq);
+ if (legacy_pic->irq_pending(irq))
+ was_pending = 1;
+ }
+ __unmask_ioapic(data->chip_data);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return was_pending;
+}
+
+static int ioapic_retrigger_irq(struct irq_data *data)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+ return 1;
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+#ifdef CONFIG_SMP
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+ cpumask_var_t cleanup_mask;
+
+ if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+ unsigned int i;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+ } else {
+ cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+ apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ free_cpumask_var(cleanup_mask);
+ }
+ cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+ int apic, pin;
+ struct irq_pin_list *entry;
+ u8 vector = cfg->vector;
+
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ unsigned int reg;
+
+ apic = entry->apic;
+ pin = entry->pin;
+ /*
+ * With interrupt-remapping, destination information comes
+ * from interrupt-remapping table entry.
+ */
+ if (!irq_remapped(cfg))
+ io_apic_write(apic, 0x11 + pin*2, dest);
+ reg = io_apic_read(apic, 0x10 + pin*2);
+ reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+ reg |= vector;
+ io_apic_modify(apic, 0x10 + pin*2, reg);
+ }
+}
+
+/*
+ * Either sets data->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
+ * leaves data->affinity untouched.
+ */
+int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ unsigned int *dest_id)
+{
+ struct irq_cfg *cfg = data->chip_data;
+
+ if (!cpumask_intersects(mask, cpu_online_mask))
+ return -1;
+
+ if (assign_irq_vector(data->irq, data->chip_data, mask))
+ return -1;
+
+ cpumask_copy(data->affinity, mask);
+
+ *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
+ return 0;
+}
+
+static int
+ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
+{
+ unsigned int dest, irq = data->irq;
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ ret = __ioapic_set_affinity(data, mask, &dest);
+ if (!ret) {
+ /* Only the high 8 bits are valid. */
+ dest = SET_APIC_LOGICAL_ID(dest);
+ __target_IO_APIC_irq(irq, dest, data->chip_data);
+ }
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ return ret;
+}
+
+#ifdef CONFIG_IRQ_REMAP
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For both level and edge triggered, irq migration is a simple atomic
+ * update(of vector and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we eliminate the io-apic RTE modification (with the
+ * updated vector information), by using a virtual vector (io-apic pin number).
+ * Real vector that is used for interrupting cpu will be coming from
+ * the interrupt-remapping table entry.
+ *
+ * As the migration is a simple atomic update of IRTE, the same mechanism
+ * is used to migrate MSI irq's in the presence of interrupt-remapping.
+ */
+static int
+ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest, irq = data->irq;
+ struct irte irte;
+
+ if (!cpumask_intersects(mask, cpu_online_mask))
+ return -EINVAL;
+
+ if (get_irte(irq, &irte))
+ return -EBUSY;
+
+ if (assign_irq_vector(irq, cfg, mask))
+ return -EBUSY;
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ /*
+ * Atomically updates the IRTE with the new destination, vector
+ * and flushes the interrupt entry cache.
+ */
+ modify_irte(irq, &irte);
+
+ /*
+ * After this point, all the interrupts will start arriving
+ * at the new destination. So, time to cleanup the previous
+ * vector allocation.
+ */
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ cpumask_copy(data->affinity, mask);
+ return 0;
+}
+
+#else
+static inline int
+ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
+{
+ return 0;
+}
+#endif
+
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+ unsigned vector, me;
+
+ ack_APIC_irq();
+ irq_enter();
+ exit_idle();
+
+ me = smp_processor_id();
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+ unsigned int irq;
+ unsigned int irr;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ irq = __this_cpu_read(vector_irq[vector]);
+
+ if (irq == -1)
+ continue;
+
+ desc = irq_to_desc(irq);
+ if (!desc)
+ continue;
+
+ cfg = irq_cfg(irq);
+ raw_spin_lock(&desc->lock);
+
+ /*
+ * Check if the irq migration is in progress. If so, we
+ * haven't received the cleanup request yet for this irq.
+ */
+ if (cfg->move_in_progress)
+ goto unlock;
+
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+ goto unlock;
+
+ irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+ /*
+ * Check if the vector that needs to be cleanedup is
+ * registered at the cpu's IRR. If so, then this is not
+ * the best time to clean it up. Lets clean it up in the
+ * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+ * to myself.
+ */
+ if (irr & (1 << (vector % 32))) {
+ apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+ goto unlock;
+ }
+ __this_cpu_write(vector_irq[vector], -1);
+unlock:
+ raw_spin_unlock(&desc->lock);
+ }
+
+ irq_exit();
+}
+
+static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
+{
+ unsigned me;
+
+ if (likely(!cfg->move_in_progress))
+ return;
+
+ me = smp_processor_id();
+
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+ send_cleanup_vector(cfg);
+}
+
+static void irq_complete_move(struct irq_cfg *cfg)
+{
+ __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
+
+ if (!cfg)
+ return;
+
+ __irq_complete_move(cfg, cfg->vector);
+}
+#else
+static inline void irq_complete_move(struct irq_cfg *cfg) { }
+#endif
+
+static void ack_apic_edge(struct irq_data *data)
+{
+ irq_complete_move(data->chip_data);
+ irq_move_irq(data);
+ ack_APIC_irq();
+}
+
+atomic_t irq_mis_count;
+
+static void ack_apic_level(struct irq_data *data)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ int i, do_unmask_irq = 0, irq = data->irq;
+ unsigned long v;
+
+ irq_complete_move(cfg);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ /* If we are moving the irq we need to mask it */
+ if (unlikely(irqd_is_setaffinity_pending(data))) {
+ do_unmask_irq = 1;
+ mask_ioapic(cfg);
+ }
+#endif
+
+ /*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets). Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless. As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source. The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually. We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt. We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul. --macro
+ *
+ * Also in the case when cpu goes offline, fixup_irqs() will forward
+ * any unhandled interrupt on the offlined cpu to the new cpu
+ * destination that is handling the corresponding interrupt. This
+ * interrupt forwarding is done via IPI's. Hence, in this case also
+ * level-triggered io-apic interrupt will be seen as an edge
+ * interrupt in the IRR. And we can't rely on the cpu's EOI
+ * to be broadcasted to the IO-APIC's which will clear the remoteIRR
+ * corresponding to the level-triggered interrupt. Hence on IO-APIC's
+ * supporting EOI register, we do an explicit EOI to clear the
+ * remote IRR and on IO-APIC's which don't have an EOI register,
+ * we use the above logic (mask+edge followed by unmask+level) from
+ * Manfred Spraul to clear the remote IRR.
+ */
+ i = cfg->vector;
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+ /*
+ * We must acknowledge the irq before we move it or the acknowledge will
+ * not propagate properly.
+ */
+ ack_APIC_irq();
+
+ /*
+ * Tail end of clearing remote IRR bit (either by delivering the EOI
+ * message via io-apic EOI register write or simulating it using
+ * mask+edge followed by unnask+level logic) manually when the
+ * level triggered interrupt is seen as the edge triggered interrupt
+ * at the cpu.
+ */
+ if (!(v & (1 << (i & 0x1f)))) {
+ atomic_inc(&irq_mis_count);
+
+ eoi_ioapic_irq(irq, cfg);
+ }
+
+ /* Now we can move and renable the irq */
+ if (unlikely(do_unmask_irq)) {
+ /* Only migrate the irq if the ack has been received.
+ *
+ * On rare occasions the broadcast level triggered ack gets
+ * delayed going to ioapics, and if we reprogram the
+ * vector while Remote IRR is still set the irq will never
+ * fire again.
+ *
+ * To prevent this scenario we read the Remote IRR bit
+ * of the ioapic. This has two effects.
+ * - On any sane system the read of the ioapic will
+ * flush writes (and acks) going to the ioapic from
+ * this cpu.
+ * - We get to see if the ACK has actually been delivered.
+ *
+ * Based on failed experiments of reprogramming the
+ * ioapic entry from outside of irq context starting
+ * with masking the ioapic entry and then polling until
+ * Remote IRR was clear before reprogramming the
+ * ioapic I don't trust the Remote IRR bit to be
+ * completey accurate.
+ *
+ * However there appears to be no other way to plug
+ * this race, so if the Remote IRR bit is not
+ * accurate and is causing problems then it is a hardware bug
+ * and you can go talk to the chipset vendor about it.
+ */
+ if (!io_apic_level_ack_pending(cfg))
+ irq_move_masked_irq(data);
+ unmask_ioapic(cfg);
+ }
+}
+
+#ifdef CONFIG_IRQ_REMAP
+static void ir_ack_apic_edge(struct irq_data *data)
+{
+ ack_APIC_irq();
+}
+
+static void ir_ack_apic_level(struct irq_data *data)
+{
+ ack_APIC_irq();
+ eoi_ioapic_irq(data->irq, data->chip_data);
+}
+
+static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
+{
+ seq_printf(p, " IR-%s", data->chip->name);
+}
+
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+{
+ chip->irq_print_chip = ir_print_prefix;
+ chip->irq_ack = ir_ack_apic_edge;
+ chip->irq_eoi = ir_ack_apic_level;
+
+#ifdef CONFIG_SMP
+ chip->irq_set_affinity = ir_ioapic_set_affinity;
+#endif
+}
+#endif /* CONFIG_IRQ_REMAP */
+
+static struct irq_chip ioapic_chip __read_mostly = {
+ .name = "IO-APIC",
+ .irq_startup = startup_ioapic_irq,
+ .irq_mask = mask_ioapic_irq,
+ .irq_unmask = unmask_ioapic_irq,
+ .irq_ack = ack_apic_edge,
+ .irq_eoi = ack_apic_level,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = ioapic_set_affinity,
+#endif
+ .irq_retrigger = ioapic_retrigger_irq,
+};
+
+static inline void init_IO_APIC_traps(void)
+{
+ struct irq_cfg *cfg;
+ unsigned int irq;
+
+ /*
+ * NOTE! The local APIC isn't very good at handling
+ * multiple interrupts at the same interrupt level.
+ * As the interrupt level is determined by taking the
+ * vector number and shifting that right by 4, we
+ * want to spread these out a bit so that they don't
+ * all fall in the same interrupt level.
+ *
+ * Also, we've got to be careful not to trash gate
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
+ */
+ for_each_active_irq(irq) {
+ cfg = irq_get_chip_data(irq);
+ if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
+ /*
+ * Hmm.. We don't have an entry for this,
+ * so default to an old-fashioned 8259
+ * interrupt if we can..
+ */
+ if (irq < legacy_pic->nr_legacy_irqs)
+ legacy_pic->make_irq(irq);
+ else
+ /* Strange. Oh, well.. */
+ irq_set_chip(irq, &no_irq_chip);
+ }
+ }
+}
+
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(struct irq_data *data)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void unmask_lapic_irq(struct irq_data *data)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq(struct irq_data *data)
+{
+ ack_APIC_irq();
+}
+
+static struct irq_chip lapic_chip __read_mostly = {
+ .name = "local-APIC",
+ .irq_mask = mask_lapic_irq,
+ .irq_unmask = unmask_lapic_irq,
+ .irq_ack = ack_lapic_irq,
+};
+
+static void lapic_register_intr(int irq)
+{
+ irq_clear_status_flags(irq, IRQ_LEVEL);
+ irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+ "edge");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic. ICR does
+ * not support the ExtINT mode, unfortunately. We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA. --macro
+ */
+static inline void __init unlock_ExtINT_logic(void)
+{
+ int apic, pin, i;
+ struct IO_APIC_route_entry entry0, entry1;
+ unsigned char save_control, save_freq_select;
+
+ pin = find_isa_irq_pin(8, mp_INT);
+ if (pin == -1) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+ apic = find_isa_irq_apic(8, mp_INT);
+ if (apic == -1) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ entry0 = ioapic_read_entry(apic, pin);
+ clear_IO_APIC_pin(apic, pin);
+
+ memset(&entry1, 0, sizeof(entry1));
+
+ entry1.dest_mode = 0; /* physical delivery */
+ entry1.mask = 0; /* unmask IRQ now */
+ entry1.dest = hard_smp_processor_id();
+ entry1.delivery_mode = dest_ExtINT;
+ entry1.polarity = entry0.polarity;
+ entry1.trigger = 0;
+ entry1.vector = 0;
+
+ ioapic_write_entry(apic, pin, entry1);
+
+ save_control = CMOS_READ(RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+ RTC_FREQ_SELECT);
+ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+ i = 100;
+ while (i-- > 0) {
+ mdelay(10);
+ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+ i -= 10;
+ }
+
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ clear_IO_APIC_pin(apic, pin);
+
+ ioapic_write_entry(apic, pin, entry0);
+}
+
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+ disable_timer_pin_1 = 1;
+ return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
+ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ *
+ * FIXME: really need to revamp this for all platforms.
+ */
+static inline void __init check_timer(void)
+{
+ struct irq_cfg *cfg = irq_get_chip_data(0);
+ int node = cpu_to_node(0);
+ int apic1, pin1, apic2, pin2;
+ unsigned long flags;
+ int no_pin1 = 0;
+
+ local_irq_save(flags);
+
+ /*
+ * get/set the timer IRQ vector:
+ */
+ legacy_pic->mask(0);
+ assign_irq_vector(0, cfg, apic->target_cpus());
+
+ /*
+ * As IRQ0 is to be enabled in the 8259A, the virtual
+ * wire has to be disabled in the local APIC. Also
+ * timer interrupts need to be acknowledged manually in
+ * the 8259A for the i82489DX when using the NMI
+ * watchdog as that APIC treats NMIs as level-triggered.
+ * The AEOI mode will finish them in the 8259A
+ * automatically.
+ */
+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+ legacy_pic->init(1);
+
+ pin1 = find_isa_irq_pin(0, mp_INT);
+ apic1 = find_isa_irq_apic(0, mp_INT);
+ pin2 = ioapic_i8259.pin;
+ apic2 = ioapic_i8259.apic;
+
+ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
+ "apic1=%d pin1=%d apic2=%d pin2=%d\n",
+ cfg->vector, apic1, pin1, apic2, pin2);
+
+ /*
+ * Some BIOS writers are clueless and report the ExtINTA
+ * I/O APIC input from the cascaded 8259A as the timer
+ * interrupt input. So just in case, if only one pin
+ * was found above, try it both directly and through the
+ * 8259A.
+ */
+ if (pin1 == -1) {
+ if (intr_remapping_enabled)
+ panic("BIOS bug: timer not connected to IO-APIC");
+ pin1 = pin2;
+ apic1 = apic2;
+ no_pin1 = 1;
+ } else if (pin2 == -1) {
+ pin2 = pin1;
+ apic2 = apic1;
+ }
+
+ if (pin1 != -1) {
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ if (no_pin1) {
+ add_pin_to_irq_node(cfg, node, apic1, pin1);
+ setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+ } else {
+ /* for edge trigger, setup_ioapic_irq already
+ * leave it unmasked.
+ * so only need to unmask if it is level-trigger
+ * do we really have level trigger timer?
+ */
+ int idx;
+ idx = find_irq_entry(apic1, pin1, mp_INT);
+ if (idx != -1 && irq_trigger(idx))
+ unmask_ioapic(cfg);
+ }
+ if (timer_irq_works()) {
+ if (disable_timer_pin_1 > 0)
+ clear_IO_APIC_pin(0, pin1);
+ goto out;
+ }
+ if (intr_remapping_enabled)
+ panic("timer doesn't work through Interrupt-remapped IO-APIC");
+ local_irq_disable();
+ clear_IO_APIC_pin(apic1, pin1);
+ if (!no_pin1)
+ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
+ "8254 timer not connected to IO-APIC\n");
+
+ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
+ "(IRQ0) through the 8259A ...\n");
+ apic_printk(APIC_QUIET, KERN_INFO
+ "..... (found apic %d pin %d) ...\n", apic2, pin2);
+ /*
+ * legacy devices should be connected to IO APIC #0
+ */
+ replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
+ setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+ legacy_pic->unmask(0);
+ if (timer_irq_works()) {
+ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
+ timer_through_8259 = 1;
+ goto out;
+ }
+ /*
+ * Cleanup, just in case ...
+ */
+ local_irq_disable();
+ legacy_pic->mask(0);
+ clear_IO_APIC_pin(apic2, pin2);
+ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
+ }
+
+ apic_printk(APIC_QUIET, KERN_INFO
+ "...trying to set up timer as Virtual Wire IRQ...\n");
+
+ lapic_register_intr(0);
+ apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
+ legacy_pic->unmask(0);
+
+ if (timer_irq_works()) {
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+ goto out;
+ }
+ local_irq_disable();
+ legacy_pic->mask(0);
+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
+
+ apic_printk(APIC_QUIET, KERN_INFO
+ "...trying to set up timer as ExtINT IRQ...\n");
+
+ legacy_pic->init(0);
+ legacy_pic->make_irq(0);
+ apic_write(APIC_LVT0, APIC_DM_EXTINT);
+
+ unlock_ExtINT_logic();
+
+ if (timer_irq_works()) {
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+ goto out;
+ }
+ local_irq_disable();
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+ if (x2apic_preenabled)
+ apic_printk(APIC_QUIET, KERN_INFO
+ "Perhaps problem with the pre-enabled x2apic mode\n"
+ "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
+ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
+ "report. Then try booting with the 'noapic' option.\n");
+out:
+ local_irq_restore(flags);
+}
+
+/*
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ * to devices. However there may be an I/O APIC pin available for
+ * this interrupt regardless. The pin may be left unconnected, but
+ * typically it will be reused as an ExtINT cascade interrupt for
+ * the master 8259A. In the MPS case such a pin will normally be
+ * reported as an ExtINT interrupt in the MP table. With ACPI
+ * there is no provision for ExtINT interrupts, and in the absence
+ * of an override it would be treated as an ordinary ISA I/O APIC
+ * interrupt, that is edge-triggered and unmasked by default. We
+ * used to do this, but it caused problems on some systems because
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ * the same ExtINT cascade interrupt to drive the local APIC of the
+ * bootstrap processor. Therefore we refrain from routing IRQ2 to
+ * the I/O APIC in all cases now. No actual device should request
+ * it anyway. --macro
+ */
+#define PIC_IRQS (1UL << PIC_CASCADE_IR)
+
+void __init setup_IO_APIC(void)
+{
+
+ /*
+ * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+ */
+ io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
+
+ apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+ /*
+ * Set up IO-APIC IRQ routing.
+ */
+ x86_init.mpparse.setup_ioapic_ids();
+
+ sync_Arb_IDs();
+ setup_IO_APIC_irqs();
+ init_IO_APIC_traps();
+ if (legacy_pic->nr_legacy_irqs)
+ check_timer();
+}
+
+/*
+ * Called after all the initialization is done. If we didn't find any
+ * APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+ if (sis_apic_bug == -1)
+ sis_apic_bug = 0;
+ return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
+static void resume_ioapic_id(int ioapic_idx)
+{
+ unsigned long flags;
+ union IO_APIC_reg_00 reg_00;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic_idx, 0);
+ if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
+ reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
+ io_apic_write(ioapic_idx, 0, reg_00.raw);
+ }
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void ioapic_resume(void)
+{
+ int ioapic_idx;
+
+ for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--)
+ resume_ioapic_id(ioapic_idx);
+
+ restore_ioapic_entries();
+}
+
+static struct syscore_ops ioapic_syscore_ops = {
+ .suspend = save_ioapic_entries,
+ .resume = ioapic_resume,
+};
+
+static int __init ioapic_init_ops(void)
+{
+ register_syscore_ops(&ioapic_syscore_ops);
+
+ return 0;
+}
+
+device_initcall(ioapic_init_ops);
+
+/*
+ * Dynamic irq allocate and deallocation
+ */
+unsigned int create_irq_nr(unsigned int from, int node)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int ret = 0;
+ int irq;
+
+ if (from < nr_irqs_gsi)
+ from = nr_irqs_gsi;
+
+ irq = alloc_irq_from(from, node);
+ if (irq < 0)
+ return 0;
+ cfg = alloc_irq_cfg(irq, node);
+ if (!cfg) {
+ free_irq_at(irq, NULL);
+ return 0;
+ }
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
+ ret = irq;
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+ if (ret) {
+ irq_set_chip_data(irq, cfg);
+ irq_clear_status_flags(irq, IRQ_NOREQUEST);
+ } else {
+ free_irq_at(irq, cfg);
+ }
+ return ret;
+}
+
+int create_irq(void)
+{
+ int node = cpu_to_node(0);
+ unsigned int irq_want;
+ int irq;
+
+ irq_want = nr_irqs_gsi;
+ irq = create_irq_nr(irq_want, node);
+
+ if (irq == 0)
+ irq = -1;
+
+ return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
+ unsigned long flags;
+
+ irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
+
+ if (irq_remapped(cfg))
+ free_irte(irq);
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ __clear_irq_vector(irq, cfg);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ free_irq_at(irq, cfg);
+}
+
+/*
+ * MSI message composition
+ */
+#ifdef CONFIG_PCI_MSI
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+ struct msi_msg *msg, u8 hpet_id)
+{
+ struct irq_cfg *cfg;
+ int err;
+ unsigned dest;
+
+ if (disable_apic)
+ return -ENXIO;
+
+ cfg = irq_cfg(irq);
+ err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ if (err)
+ return err;
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+
+ if (irq_remapped(cfg)) {
+ struct irte irte;
+ int ir_index;
+ u16 sub_handle;
+
+ ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+ BUG_ON(ir_index == -1);
+
+ prepare_irte(&irte, cfg->vector, dest);
+
+ /* Set source-id of interrupt request */
+ if (pdev)
+ set_msi_sid(&irte, pdev);
+ else
+ set_hpet_sid(&irte, hpet_id);
+
+ modify_irte(irq, &irte);
+
+ msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->data = sub_handle;
+ msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+ MSI_ADDR_IR_SHV |
+ MSI_ADDR_IR_INDEX1(ir_index) |
+ MSI_ADDR_IR_INDEX2(ir_index);
+ } else {
+ if (x2apic_enabled())
+ msg->address_hi = MSI_ADDR_BASE_HI |
+ MSI_ADDR_EXT_DEST_ID(dest);
+ else
+ msg->address_hi = MSI_ADDR_BASE_HI;
+
+ msg->address_lo =
+ MSI_ADDR_BASE_LO |
+ ((apic->irq_dest_mode == 0) ?
+ MSI_ADDR_DEST_MODE_PHYSICAL:
+ MSI_ADDR_DEST_MODE_LOGICAL) |
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
+ MSI_ADDR_REDIRECTION_CPU:
+ MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_DEST_ID(dest);
+
+ msg->data =
+ MSI_DATA_TRIGGER_EDGE |
+ MSI_DATA_LEVEL_ASSERT |
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
+ MSI_DATA_DELIVERY_FIXED:
+ MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_VECTOR(cfg->vector);
+ }
+ return err;
+}
+
+#ifdef CONFIG_SMP
+static int
+msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ struct msi_msg msg;
+ unsigned int dest;
+
+ if (__ioapic_set_affinity(data, mask, &dest))
+ return -1;
+
+ __get_cached_msi_msg(data->msi_desc, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+ __write_msi_msg(data->msi_desc, &msg);
+
+ return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+ .name = "PCI-MSI",
+ .irq_unmask = unmask_msi_irq,
+ .irq_mask = mask_msi_irq,
+ .irq_ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = msi_set_affinity,
+#endif
+ .irq_retrigger = ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+ struct intel_iommu *iommu;
+ int index;
+
+ iommu = map_dev_to_ir(dev);
+ if (!iommu) {
+ printk(KERN_ERR
+ "Unable to map PCI %s to iommu\n", pci_name(dev));
+ return -ENOENT;
+ }
+
+ index = alloc_irte(iommu, irq, nvec);
+ if (index < 0) {
+ printk(KERN_ERR
+ "Unable to allocate %d IRTE for PCI %s\n", nvec,
+ pci_name(dev));
+ return -ENOSPC;
+ }
+ return index;
+}
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
+{
+ struct irq_chip *chip = &msi_chip;
+ struct msi_msg msg;
+ int ret;
+
+ ret = msi_compose_msg(dev, irq, &msg, -1);
+ if (ret < 0)
+ return ret;
+
+ irq_set_msi_desc(irq, msidesc);
+ write_msi_msg(irq, &msg);
+
+ if (irq_remapped(irq_get_chip_data(irq))) {
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+ irq_remap_modify_chip_defaults(chip);
+ }
+
+ irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+
+ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
+ return 0;
+}
+
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ int node, ret, sub_handle, index = 0;
+ unsigned int irq, irq_want;
+ struct msi_desc *msidesc;
+ struct intel_iommu *iommu = NULL;
+
+ /* x86 doesn't support multiple MSI yet */
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+
+ node = dev_to_node(&dev->dev);
+ irq_want = nr_irqs_gsi;
+ sub_handle = 0;
+ list_for_each_entry(msidesc, &dev->msi_list, list) {
+ irq = create_irq_nr(irq_want, node);
+ if (irq == 0)
+ return -1;
+ irq_want = irq + 1;
+ if (!intr_remapping_enabled)
+ goto no_ir;
+
+ if (!sub_handle) {
+ /*
+ * allocate the consecutive block of IRTE's
+ * for 'nvec'
+ */
+ index = msi_alloc_irte(dev, irq, nvec);
+ if (index < 0) {
+ ret = index;
+ goto error;
+ }
+ } else {
+ iommu = map_dev_to_ir(dev);
+ if (!iommu) {
+ ret = -ENOENT;
+ goto error;
+ }
+ /*
+ * setup the mapping between the irq and the IRTE
+ * base index, the sub_handle pointing to the
+ * appropriate interrupt remap table entry.
+ */
+ set_irte_irq(irq, iommu, index, sub_handle);
+ }
+no_ir:
+ ret = setup_msi_irq(dev, msidesc, irq);
+ if (ret < 0)
+ goto error;
+ sub_handle++;
+ }
+ return 0;
+
+error:
+ destroy_irq(irq);
+ return ret;
+}
+
+void native_teardown_msi_irq(unsigned int irq)
+{
+ destroy_irq(irq);
+}
+
+#ifdef CONFIG_DMAR_TABLE
+#ifdef CONFIG_SMP
+static int
+dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest, irq = data->irq;
+ struct msi_msg msg;
+
+ if (__ioapic_set_affinity(data, mask, &dest))
+ return -1;
+
+ dmar_msi_read(irq, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
+
+ dmar_msi_write(irq, &msg);
+
+ return 0;
+}
+
+#endif /* CONFIG_SMP */
+
+static struct irq_chip dmar_msi_type = {
+ .name = "DMAR_MSI",
+ .irq_unmask = dmar_msi_unmask,
+ .irq_mask = dmar_msi_mask,
+ .irq_ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = dmar_msi_set_affinity,
+#endif
+ .irq_retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+ int ret;
+ struct msi_msg msg;
+
+ ret = msi_compose_msg(NULL, irq, &msg, -1);
+ if (ret < 0)
+ return ret;
+ dmar_msi_write(irq, &msg);
+ irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+ "edge");
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_HPET_TIMER
+
+#ifdef CONFIG_SMP
+static int hpet_msi_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ struct msi_msg msg;
+ unsigned int dest;
+
+ if (__ioapic_set_affinity(data, mask, &dest))
+ return -1;
+
+ hpet_msi_read(data->handler_data, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+ hpet_msi_write(data->handler_data, &msg);
+
+ return 0;
+}
+
+#endif /* CONFIG_SMP */
+
+static struct irq_chip hpet_msi_type = {
+ .name = "HPET_MSI",
+ .irq_unmask = hpet_msi_unmask,
+ .irq_mask = hpet_msi_mask,
+ .irq_ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = hpet_msi_set_affinity,
+#endif
+ .irq_retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
+{
+ struct irq_chip *chip = &hpet_msi_type;
+ struct msi_msg msg;
+ int ret;
+
+ if (intr_remapping_enabled) {
+ struct intel_iommu *iommu = map_hpet_to_ir(id);
+ int index;
+
+ if (!iommu)
+ return -1;
+
+ index = alloc_irte(iommu, irq, 1);
+ if (index < 0)
+ return -1;
+ }
+
+ ret = msi_compose_msg(NULL, irq, &msg, id);
+ if (ret < 0)
+ return ret;
+
+ hpet_msi_write(irq_get_handler_data(irq), &msg);
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+ if (irq_remapped(irq_get_chip_data(irq)))
+ irq_remap_modify_chip_defaults(chip);
+
+ irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+ return 0;
+}
+#endif
+
+#endif /* CONFIG_PCI_MSI */
+/*
+ * Hypertransport interrupt support
+ */
+#ifdef CONFIG_HT_IRQ
+
+#ifdef CONFIG_SMP
+
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+ struct ht_irq_msg msg;
+ fetch_ht_irq_msg(irq, &msg);
+
+ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+ write_ht_irq_msg(irq, &msg);
+}
+
+static int
+ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest;
+
+ if (__ioapic_set_affinity(data, mask, &dest))
+ return -1;
+
+ target_ht_irq(data->irq, dest, cfg->vector);
+ return 0;
+}
+
+#endif
+
+static struct irq_chip ht_irq_chip = {
+ .name = "PCI-HT",
+ .irq_mask = mask_ht_irq,
+ .irq_unmask = unmask_ht_irq,
+ .irq_ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = ht_set_affinity,
+#endif
+ .irq_retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+ struct irq_cfg *cfg;
+ int err;
+
+ if (disable_apic)
+ return -ENXIO;
+
+ cfg = irq_cfg(irq);
+ err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ if (!err) {
+ struct ht_irq_msg msg;
+ unsigned dest;
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain,
+ apic->target_cpus());
+
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+ msg.address_lo =
+ HT_IRQ_LOW_BASE |
+ HT_IRQ_LOW_DEST_ID(dest) |
+ HT_IRQ_LOW_VECTOR(cfg->vector) |
+ ((apic->irq_dest_mode == 0) ?
+ HT_IRQ_LOW_DM_PHYSICAL :
+ HT_IRQ_LOW_DM_LOGICAL) |
+ HT_IRQ_LOW_RQEOI_EDGE |
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
+ HT_IRQ_LOW_MT_FIXED :
+ HT_IRQ_LOW_MT_ARBITRATED) |
+ HT_IRQ_LOW_IRQ_MASKED;
+
+ write_ht_irq_msg(irq, &msg);
+
+ irq_set_chip_and_handler_name(irq, &ht_irq_chip,
+ handle_edge_irq, "edge");
+
+ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
+ }
+ return err;
+}
+#endif /* CONFIG_HT_IRQ */
+
+static int
+io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
+{
+ struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+ int ret;
+
+ if (!cfg)
+ return -EINVAL;
+ ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
+ if (!ret)
+ setup_ioapic_irq(irq, cfg, attr);
+ return ret;
+}
+
+int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr)
+{
+ unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
+ int ret;
+
+ /* Avoid redundant programming */
+ if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mpc_ioapic_id(ioapic_idx), pin);
+ return 0;
+ }
+ ret = io_apic_setup_irq_pin(irq, node, attr);
+ if (!ret)
+ set_bit(pin, ioapics[ioapic_idx].pin_programmed);
+ return ret;
+}
+
+static int __init io_apic_get_redir_entries(int ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /* The register returns the maximum index redir index
+ * supported, which is one less than the total number of redir
+ * entries.
+ */
+ return reg_01.bits.entries + 1;
+}
+
+static void __init probe_nr_irqs_gsi(void)
+{
+ int nr;
+
+ nr = gsi_top + NR_IRQS_LEGACY;
+ if (nr > nr_irqs_gsi)
+ nr_irqs_gsi = nr;
+
+ printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
+}
+
+int get_nr_irqs_gsi(void)
+{
+ return nr_irqs_gsi;
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+ int nr;
+
+ if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
+ nr_irqs = NR_VECTORS * nr_cpu_ids;
+
+ nr = nr_irqs_gsi + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+ /*
+ * for MSI and HT dyn irq
+ */
+ nr += nr_irqs_gsi * 16;
+#endif
+ if (nr < nr_irqs)
+ nr_irqs = nr;
+
+ return NR_IRQS_LEGACY;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
+{
+ int node;
+
+ if (!IO_APIC_IRQ(irq)) {
+ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+ irq_attr->ioapic);
+ return -EINVAL;
+ }
+
+ node = dev ? dev_to_node(dev) : cpu_to_node(0);
+
+ return io_apic_setup_irq_pin_once(irq, node, irq_attr);
+}
+
+#ifdef CONFIG_X86_32
+static int __init io_apic_get_unique_id(int ioapic, int apic_id)
+{
+ union IO_APIC_reg_00 reg_00;
+ static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+ physid_mask_t tmp;
+ unsigned long flags;
+ int i = 0;
+
+ /*
+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
+ * supports up to 16 on one shared APIC bus.
+ *
+ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+ * advantage of new APIC bus architecture.
+ */
+
+ if (physids_empty(apic_id_map))
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ if (apic_id >= get_physical_broadcast()) {
+ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+ "%d\n", ioapic, apic_id, reg_00.bits.ID);
+ apic_id = reg_00.bits.ID;
+ }
+
+ /*
+ * Every APIC in a system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (apic->check_apicid_used(&apic_id_map, apic_id)) {
+
+ for (i = 0; i < get_physical_broadcast(); i++) {
+ if (!apic->check_apicid_used(&apic_id_map, i))
+ break;
+ }
+
+ if (i == get_physical_broadcast())
+ panic("Max apic_id exceeded!\n");
+
+ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+ "trying %d\n", ioapic, apic_id, i);
+
+ apic_id = i;
+ }
+
+ apic->apicid_to_cpu_present(apic_id, &tmp);
+ physids_or(apic_id_map, apic_id_map, tmp);
+
+ if (reg_00.bits.ID != apic_id) {
+ reg_00.bits.ID = apic_id;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /* Sanity check */
+ if (reg_00.bits.ID != apic_id) {
+ printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+ return -1;
+ }
+ }
+
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+ return apic_id;
+}
+
+static u8 __init io_apic_unique_id(u8 id)
+{
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return io_apic_get_unique_id(nr_ioapics, id);
+ else
+ return id;
+}
+#else
+static u8 __init io_apic_unique_id(u8 id)
+{
+ int i;
+ DECLARE_BITMAP(used, 256);
+
+ bitmap_zero(used, 256);
+ for (i = 0; i < nr_ioapics; i++) {
+ __set_bit(mpc_ioapic_id(i), used);
+ }
+ if (!test_bit(id, used))
+ return id;
+ return find_first_zero_bit(used, 256);
+}
+#endif
+
+static int __init io_apic_get_version(int ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.bits.version;
+}
+
+int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
+{
+ int ioapic, pin, idx;
+
+ if (skip_ioapic_setup)
+ return -1;
+
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return -1;
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+ if (pin < 0)
+ return -1;
+
+ idx = find_irq_entry(ioapic, pin, mp_INT);
+ if (idx < 0)
+ return -1;
+
+ *trigger = irq_trigger(idx);
+ *polarity = irq_polarity(idx);
+ return 0;
+}
+
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be apic->target_cpus()
+ */
+#ifdef CONFIG_SMP
+void __init setup_ioapic_dest(void)
+{
+ int pin, ioapic, irq, irq_entry;
+ const struct cpumask *mask;
+ struct irq_data *idata;
+
+ if (skip_ioapic_setup == 1)
+ return;
+
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
+ for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
+
+ if ((ioapic > 0) && (irq > 16))
+ continue;
+
+ idata = irq_get_irq_data(irq);
+
+ /*
+ * Honour affinities which have been set in early boot
+ */
+ if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
+ mask = idata->affinity;
+ else
+ mask = apic->target_cpus();
+
+ if (intr_remapping_enabled)
+ ir_ioapic_set_affinity(idata, mask, false);
+ else
+ ioapic_set_affinity(idata, mask, false);
+ }
+
+}
+#endif
+
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+
+static struct resource *ioapic_resources;
+
+static struct resource * __init ioapic_setup_resources(int nr_ioapics)
+{
+ unsigned long n;
+ struct resource *res;
+ char *mem;
+ int i;
+
+ if (nr_ioapics <= 0)
+ return NULL;
+
+ n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+ n *= nr_ioapics;
+
+ mem = alloc_bootmem(n);
+ res = (void *)mem;
+
+ mem += sizeof(struct resource) * nr_ioapics;
+
+ for (i = 0; i < nr_ioapics; i++) {
+ res[i].name = mem;
+ res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
+ mem += IOAPIC_RESOURCE_NAME_SIZE;
+ }
+
+ ioapic_resources = res;
+
+ return res;
+}
+
+void __init native_ioapic_init_mappings(void)
+{
+ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+ struct resource *ioapic_res;
+ int i;
+
+ ioapic_res = ioapic_setup_resources(nr_ioapics);
+ for (i = 0; i < nr_ioapics; i++) {
+ if (smp_found_config) {
+ ioapic_phys = mpc_ioapic_addr(i);
+#ifdef CONFIG_X86_32
+ if (!ioapic_phys) {
+ printk(KERN_ERR
+ "WARNING: bogus zero IO-APIC "
+ "address found in MPTABLE, "
+ "disabling IO/APIC support!\n");
+ smp_found_config = 0;
+ skip_ioapic_setup = 1;
+ goto fake_ioapic_page;
+ }
+#endif
+ } else {
+#ifdef CONFIG_X86_32
+fake_ioapic_page:
+#endif
+ ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
+ ioapic_phys = __pa(ioapic_phys);
+ }
+ set_fixmap_nocache(idx, ioapic_phys);
+ apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
+ ioapic_phys);
+ idx++;
+
+ ioapic_res->start = ioapic_phys;
+ ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
+ ioapic_res++;
+ }
+
+ probe_nr_irqs_gsi();
+}
+
+void __init ioapic_insert_resources(void)
+{
+ int i;
+ struct resource *r = ioapic_resources;
+
+ if (!r) {
+ if (nr_ioapics > 0)
+ printk(KERN_ERR
+ "IO APIC resources couldn't be allocated.\n");
+ return;
+ }
+
+ for (i = 0; i < nr_ioapics; i++) {
+ insert_resource(&iomem_resource, r);
+ r++;
+ }
+}
+
+int mp_find_ioapic(u32 gsi)
+{
+ int i = 0;
+
+ if (nr_ioapics == 0)
+ return -1;
+
+ /* Find the IOAPIC that manages this GSI. */
+ for (i = 0; i < nr_ioapics; i++) {
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
+ if ((gsi >= gsi_cfg->gsi_base)
+ && (gsi <= gsi_cfg->gsi_end))
+ return i;
+ }
+
+ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+ return -1;
+}
+
+int mp_find_ioapic_pin(int ioapic, u32 gsi)
+{
+ struct mp_ioapic_gsi *gsi_cfg;
+
+ if (WARN_ON(ioapic == -1))
+ return -1;
+
+ gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+ if (WARN_ON(gsi > gsi_cfg->gsi_end))
+ return -1;
+
+ return gsi - gsi_cfg->gsi_base;
+}
+
+static __init int bad_ioapic(unsigned long address)
+{
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
+ "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+ return 1;
+ }
+ if (!address) {
+ printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
+ " found in table, skipping!\n");
+ return 1;
+ }
+ return 0;
+}
+
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+{
+ int idx = 0;
+ int entries;
+ struct mp_ioapic_gsi *gsi_cfg;
+
+ if (bad_ioapic(address))
+ return;
+
+ idx = nr_ioapics;
+
+ ioapics[idx].mp_config.type = MP_IOAPIC;
+ ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
+ ioapics[idx].mp_config.apicaddr = address;
+
+ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+ ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
+ ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
+
+ /*
+ * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+ * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+ */
+ entries = io_apic_get_redir_entries(idx);
+ gsi_cfg = mp_ioapic_gsi_routing(idx);
+ gsi_cfg->gsi_base = gsi_base;
+ gsi_cfg->gsi_end = gsi_base + entries - 1;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ ioapics[idx].nr_registers = entries;
+
+ if (gsi_cfg->gsi_end >= gsi_top)
+ gsi_top = gsi_cfg->gsi_end + 1;
+
+ printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+ "GSI %d-%d\n", idx, mpc_ioapic_id(idx),
+ mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
+ gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+
+ nr_ioapics++;
+}
+
+/* Enable IOAPIC early just for system timer */
+void __init pre_init_apic_IRQ0(void)
+{
+ struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
+
+ printk(KERN_INFO "Early APIC setup for system timer0\n");
+#ifndef CONFIG_SMP
+ physid_set_mask_of_physid(boot_cpu_physical_apicid,
+ &phys_cpu_present_map);
+#endif
+ setup_local_APIC();
+
+ io_apic_setup_irq_pin(0, 0, &attr);
+ irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+ "edge");
+}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
new file mode 100644
index 00000000000..cce91bf2667
--- /dev/null
+++ b/arch/x86/kernel/apic/ipi.c
@@ -0,0 +1,167 @@
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/apic.h>
+#include <asm/proto.h>
+#include <asm/ipi.h>
+
+void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
+{
+ unsigned long query_cpu;
+ unsigned long flags;
+
+ /*
+ * Hack. The clustered APIC addressing mode doesn't allow us to send
+ * to an arbitrary mask, so I do a unicast to each CPU instead.
+ * - mbligh
+ */
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+ query_cpu), vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
+ int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int query_cpu;
+ unsigned long flags;
+
+ /* See Hack comment above */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ if (query_cpu == this_cpu)
+ continue;
+ __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+ query_cpu), vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_X86_32
+
+void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+ int vector)
+{
+ unsigned long flags;
+ unsigned int query_cpu;
+
+ /*
+ * Hack. The clustered APIC addressing mode doesn't allow us to send
+ * to an arbitrary mask, so I do a unicasts to each CPU instead. This
+ * should be modified to do 1 message per cluster ID - mbligh
+ */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask)
+ __default_send_IPI_dest_field(
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
+ local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+ int vector)
+{
+ unsigned long flags;
+ unsigned int query_cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ /* See Hack comment above */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ if (query_cpu == this_cpu)
+ continue;
+ __default_send_IPI_dest_field(
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
+ }
+ local_irq_restore(flags);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+ unsigned long flags;
+
+ if (WARN_ONCE(!mask, "empty IPI mask"))
+ return;
+
+ local_irq_save(flags);
+ WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
+ __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+ local_irq_restore(flags);
+}
+
+void default_send_IPI_allbutself(int vector)
+{
+ /*
+ * if there are no other CPUs in the system then we get an APIC send
+ * error if we try to broadcast, thus avoid sending IPIs in this case.
+ */
+ if (!(num_online_cpus() > 1))
+ return;
+
+ __default_local_send_IPI_allbutself(vector);
+}
+
+void default_send_IPI_all(int vector)
+{
+ __default_local_send_IPI_all(vector);
+}
+
+void default_send_IPI_self(int vector)
+{
+ __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
+}
+
+/* must come after the send_IPI functions above for inlining */
+static int convert_apicid_to_cpu(int apic_id)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
+ return i;
+ }
+ return -1;
+}
+
+int safe_smp_processor_id(void)
+{
+ int apicid, cpuid;
+
+ if (!cpu_has_apic)
+ return 0;
+
+ apicid = hard_smp_processor_id();
+ if (apicid == BAD_APICID)
+ return 0;
+
+ cpuid = convert_apicid_to_cpu(apicid);
+
+ return cpuid >= 0 ? cpuid : 0;
+}
+#endif
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
new file mode 100644
index 00000000000..c4a61ca1349
--- /dev/null
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -0,0 +1,541 @@
+/*
+ * Written by: Patricia Gaughen, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <gone@us.ibm.com>
+ */
+#include <linux/nodemask.h>
+#include <linux/topology.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/numa.h>
+#include <linux/smp.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/numaq.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/e820.h>
+#include <asm/ipi.h>
+
+int found_numaq;
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+struct mpc_trans {
+ unsigned char mpc_type;
+ unsigned char trans_len;
+ unsigned char trans_type;
+ unsigned char trans_quad;
+ unsigned char trans_global;
+ unsigned char trans_local;
+ unsigned short trans_reserved;
+};
+
+static int mpc_record;
+
+static struct mpc_trans *translation_table[MAX_MPC_ENTRY];
+
+int mp_bus_id_to_node[MAX_MP_BUSSES];
+int mp_bus_id_to_local[MAX_MP_BUSSES];
+int quad_local_to_mp_bus_id[NR_CPUS/4][4];
+
+
+static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
+{
+ struct eachquadmem *eq = scd->eq + node;
+ u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
+ u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
+ int ret;
+
+ node_set(node, numa_nodes_parsed);
+ ret = numa_add_memblk(node, start, end);
+ BUG_ON(ret < 0);
+}
+
+/*
+ * Function: smp_dump_qct()
+ *
+ * Description: gets memory layout from the quad config table. This
+ * function also updates numa_nodes_parsed with the nodes (quads) present.
+ */
+static void __init smp_dump_qct(void)
+{
+ struct sys_cfg_data *scd;
+ int node;
+
+ scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
+
+ for_each_node(node) {
+ if (scd->quads_present31_0 & (1 << node))
+ numaq_register_node(node, scd);
+ }
+}
+
+void __cpuinit numaq_tsc_disable(void)
+{
+ if (!found_numaq)
+ return;
+
+ if (num_online_nodes() > 1) {
+ printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+ setup_clear_cpu_cap(X86_FEATURE_TSC);
+ }
+}
+
+static void __init numaq_tsc_init(void)
+{
+ numaq_tsc_disable();
+}
+
+static inline int generate_logical_apicid(int quad, int phys_apicid)
+{
+ return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
+}
+
+/* x86_quirks member */
+static int mpc_apic_id(struct mpc_cpu *m)
+{
+ int quad = translation_table[mpc_record]->trans_quad;
+ int logical_apicid = generate_logical_apicid(quad, m->apicid);
+
+ printk(KERN_DEBUG
+ "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+ m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->cpufeature & CPU_MODEL_MASK) >> 4,
+ m->apicver, quad, logical_apicid);
+
+ return logical_apicid;
+}
+
+/* x86_quirks member */
+static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
+{
+ int quad = translation_table[mpc_record]->trans_quad;
+ int local = translation_table[mpc_record]->trans_local;
+
+ mp_bus_id_to_node[m->busid] = quad;
+ mp_bus_id_to_local[m->busid] = local;
+
+ printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad);
+}
+
+/* x86_quirks member */
+static void mpc_oem_pci_bus(struct mpc_bus *m)
+{
+ int quad = translation_table[mpc_record]->trans_quad;
+ int local = translation_table[mpc_record]->trans_local;
+
+ quad_local_to_mp_bus_id[quad][local] = m->busid;
+}
+
+/*
+ * Called from mpparse code.
+ * mode = 0: prescan
+ * mode = 1: one mpc entry scanned
+ */
+static void numaq_mpc_record(unsigned int mode)
+{
+ if (!mode)
+ mpc_record = 0;
+ else
+ mpc_record++;
+}
+
+static void __init MP_translation_info(struct mpc_trans *m)
+{
+ printk(KERN_INFO
+ "Translation: record %d, type %d, quad %d, global %d, local %d\n",
+ mpc_record, m->trans_type, m->trans_quad, m->trans_global,
+ m->trans_local);
+
+ if (mpc_record >= MAX_MPC_ENTRY)
+ printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+ else
+ translation_table[mpc_record] = m; /* stash this for later */
+
+ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+ node_set_online(m->trans_quad);
+}
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+ int sum = 0;
+
+ while (len--)
+ sum += *mp++;
+
+ return sum & 0xFF;
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+static void __init smp_read_mpc_oem(struct mpc_table *mpc)
+{
+ struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr;
+ int count = sizeof(*oemtable); /* the header size */
+ unsigned char *oemptr = ((unsigned char *)oemtable) + count;
+
+ mpc_record = 0;
+ printk(KERN_INFO
+ "Found an OEM MPC table at %8p - parsing it...\n", oemtable);
+
+ if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
+ printk(KERN_WARNING
+ "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+ oemtable->signature[0], oemtable->signature[1],
+ oemtable->signature[2], oemtable->signature[3]);
+ return;
+ }
+
+ if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
+ printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+ return;
+ }
+
+ while (count < oemtable->length) {
+ switch (*oemptr) {
+ case MP_TRANSLATION:
+ {
+ struct mpc_trans *m = (void *)oemptr;
+
+ MP_translation_info(m);
+ oemptr += sizeof(*m);
+ count += sizeof(*m);
+ ++mpc_record;
+ break;
+ }
+ default:
+ printk(KERN_WARNING
+ "Unrecognised OEM table entry type! - %d\n",
+ (int)*oemptr);
+ return;
+ }
+ }
+}
+
+static __init void early_check_numaq(void)
+{
+ /*
+ * get boot-time SMP configuration:
+ */
+ if (smp_found_config)
+ early_get_smp_config();
+
+ if (found_numaq) {
+ x86_init.mpparse.mpc_record = numaq_mpc_record;
+ x86_init.mpparse.setup_ioapic_ids = x86_init_noop;
+ x86_init.mpparse.mpc_apic_id = mpc_apic_id;
+ x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem;
+ x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
+ x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
+ x86_init.timers.tsc_pre_init = numaq_tsc_init;
+ x86_init.pci.init = pci_numaq_init;
+ }
+}
+
+int __init numaq_numa_init(void)
+{
+ early_check_numaq();
+ if (!found_numaq)
+ return -ENOENT;
+ smp_dump_qct();
+
+ return 0;
+}
+
+#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
+
+static inline unsigned int numaq_get_apic_id(unsigned long x)
+{
+ return (x >> 24) & 0x0F;
+}
+
+static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ default_send_IPI_mask_sequence_logical(mask, vector);
+}
+
+static inline void numaq_send_IPI_allbutself(int vector)
+{
+ default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
+}
+
+static inline void numaq_send_IPI_all(int vector)
+{
+ numaq_send_IPI_mask(cpu_online_mask, vector);
+}
+
+#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8)
+#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa)
+
+/*
+ * Because we use NMIs rather than the INIT-STARTUP sequence to
+ * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
+ */
+static inline void numaq_smp_callin_clear_local_apic(void)
+{
+ clear_local_APIC();
+}
+
+static inline const struct cpumask *numaq_target_cpus(void)
+{
+ return cpu_all_mask;
+}
+
+static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid)
+{
+ return physid_isset(apicid, *map);
+}
+
+static inline unsigned long numaq_check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+static inline int numaq_apic_id_registered(void)
+{
+ return 1;
+}
+
+static inline void numaq_init_apic_ldr(void)
+{
+ /* Already done in NUMA-Q firmware */
+}
+
+static inline void numaq_setup_apic_routing(void)
+{
+ printk(KERN_INFO
+ "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n",
+ nr_ioapics);
+}
+
+/*
+ * Skip adding the timer int on secondary nodes, which causes
+ * a small but painful rift in the time-space continuum.
+ */
+static inline int numaq_multi_timer_check(int apic, int irq)
+{
+ return apic != 0 && irq == 0;
+}
+
+static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
+{
+ /* We don't have a good way to do this yet - hack */
+ return physids_promote(0xFUL, retmap);
+}
+
+/*
+ * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
+ * cpu to APIC ID relation to properly interact with the intelligent
+ * mode of the cluster controller.
+ */
+static inline int numaq_cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < 60)
+ return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
+ else
+ return BAD_APICID;
+}
+
+static inline int numaq_apicid_to_node(int logical_apicid)
+{
+ return logical_apicid >> 4;
+}
+
+static int numaq_numa_cpu_node(int cpu)
+{
+ int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ if (logical_apicid != BAD_APICID)
+ return numaq_apicid_to_node(logical_apicid);
+ return NUMA_NO_NODE;
+}
+
+static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
+{
+ int node = numaq_apicid_to_node(logical_apicid);
+ int cpu = __ffs(logical_apicid & 0xf);
+
+ physid_set_mask_of_physid(cpu + 4*node, retmap);
+}
+
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio;
+
+static inline int numaq_check_phys_apicid_present(int phys_apicid)
+{
+ return 1;
+}
+
+/*
+ * We use physical apicids here, not logical, so just return the default
+ * physical broadcast to stop people from breaking us
+ */
+static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ return 0x0F;
+}
+
+static inline unsigned int
+numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ return 0x0F;
+}
+
+/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
+static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+static int
+numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+{
+ if (strncmp(oem, "IBM NUMA", 8))
+ printk(KERN_ERR "Warning! Not a NUMA-Q system!\n");
+ else
+ found_numaq = 1;
+
+ return found_numaq;
+}
+
+static int probe_numaq(void)
+{
+ /* already know from get_memcfg_numaq() */
+ return found_numaq;
+}
+
+static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
+
+static void numaq_setup_portio_remap(void)
+{
+ int num_quads = num_online_nodes();
+
+ if (num_quads <= 1)
+ return;
+
+ printk(KERN_INFO
+ "Remapping cross-quad port I/O for %d quads\n", num_quads);
+
+ xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
+
+ printk(KERN_INFO
+ "xquad_portio vaddr 0x%08lx, len %08lx\n",
+ (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
+}
+
+/* Use __refdata to keep false positive warning calm. */
+static struct apic __refdata apic_numaq = {
+
+ .name = "NUMAQ",
+ .probe = probe_numaq,
+ .acpi_madt_oem_check = NULL,
+ .apic_id_registered = numaq_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* physical delivery on LOCAL quad: */
+ .irq_dest_mode = 0,
+
+ .target_cpus = numaq_target_cpus,
+ .disable_esr = 1,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = numaq_check_apicid_used,
+ .check_apicid_present = numaq_check_apicid_present,
+
+ .vector_allocation_domain = numaq_vector_allocation_domain,
+ .init_apic_ldr = numaq_init_apic_ldr,
+
+ .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
+ .setup_apic_routing = numaq_setup_apic_routing,
+ .multi_timer_check = numaq_multi_timer_check,
+ .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
+ .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
+ .setup_portio_remap = numaq_setup_portio_remap,
+ .check_phys_apicid_present = numaq_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = numaq_phys_pkg_id,
+ .mps_oem_check = numaq_mps_oem_check,
+
+ .get_apic_id = numaq_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0x0F << 24,
+
+ .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = numaq_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = numaq_send_IPI_allbutself,
+ .send_IPI_all = numaq_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi,
+ .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
+
+ /* We don't do anything here because we use NMI's to boot instead */
+ .wait_for_init_deassert = NULL,
+
+ .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = numaq_numa_cpu_node,
+};
+
+apic_driver(apic_numaq);
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
new file mode 100644
index 00000000000..0787bb3412f
--- /dev/null
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -0,0 +1,270 @@
+/*
+ * Default generic APIC driver. This handles up to 8 CPUs.
+ *
+ * Copyright 2003 Andi Kleen, SuSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic x86 APIC driver probe layer.
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+#include <asm/apic.h>
+#include <asm/setup.h>
+
+#include <linux/smp.h>
+#include <asm/ipi.h>
+
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/e820.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define DEFAULT_SEND_IPI (1)
+#else
+#define DEFAULT_SEND_IPI (0)
+#endif
+
+int no_broadcast = DEFAULT_SEND_IPI;
+
+static __init int no_ipi_broadcast(char *str)
+{
+ get_option(&str, &no_broadcast);
+ pr_info("Using %s mode\n",
+ no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
+ return 1;
+}
+__setup("no_ipi_broadcast=", no_ipi_broadcast);
+
+static int __init print_ipi_mode(void)
+{
+ pr_info("Using IPI %s mode\n",
+ no_broadcast ? "No-Shortcut" : "Shortcut");
+ return 0;
+}
+late_initcall(print_ipi_mode);
+
+static int default_x86_32_early_logical_apicid(int cpu)
+{
+ return 1 << cpu;
+}
+
+static void setup_apic_flat_routing(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ printk(KERN_INFO
+ "Enabling APIC mode: Flat. Using %d I/O APICs\n",
+ nr_ioapics);
+#endif
+}
+
+static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ /*
+ * Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
+
+/* should be called last. */
+static int probe_default(void)
+{
+ return 1;
+}
+
+static struct apic apic_default = {
+
+ .name = "default",
+ .probe = probe_default,
+ .acpi_madt_oem_check = NULL,
+ .apic_id_registered = default_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* logical delivery broadcast to all CPUs: */
+ .irq_dest_mode = 1,
+
+ .target_cpus = default_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = default_check_apicid_used,
+ .check_apicid_present = default_check_apicid_present,
+
+ .vector_allocation_domain = default_vector_allocation_domain,
+ .init_apic_ldr = default_init_apic_ldr,
+
+ .ioapic_phys_id_map = default_ioapic_phys_id_map,
+ .setup_apic_routing = setup_apic_flat_routing,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = physid_set_mask_of_physid,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = default_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = default_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0x0F << 24,
+
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = default_send_IPI_mask_logical,
+ .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
+ .send_IPI_allbutself = default_send_IPI_allbutself,
+ .send_IPI_all = default_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+ .wait_for_init_deassert = default_wait_for_init_deassert,
+
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
+};
+
+apic_driver(apic_default);
+
+struct apic *apic = &apic_default;
+EXPORT_SYMBOL_GPL(apic);
+
+static int cmdline_apic __initdata;
+static int __init parse_apic(char *arg)
+{
+ struct apic **drv;
+
+ if (!arg)
+ return -EINVAL;
+
+ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+ if (!strcmp((*drv)->name, arg)) {
+ apic = *drv;
+ cmdline_apic = 1;
+ return 0;
+ }
+ }
+
+ /* Parsed again by __setup for debug/verbose */
+ return 0;
+}
+early_param("apic", parse_apic);
+
+void __init default_setup_apic_routing(void)
+{
+ int version = apic_version[boot_cpu_physical_apicid];
+
+ if (num_possible_cpus() > 8) {
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ if (!APIC_XAPIC(version)) {
+ def_to_bigsmp = 0;
+ break;
+ }
+ /* If P4 and above fall through */
+ case X86_VENDOR_AMD:
+ def_to_bigsmp = 1;
+ }
+ }
+
+#ifdef CONFIG_X86_BIGSMP
+ /*
+ * This is used to switch to bigsmp mode when
+ * - There is no apic= option specified by the user
+ * - generic_apic_probe() has chosen apic_default as the sub_arch
+ * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
+ */
+
+ if (!cmdline_apic && apic == &apic_default)
+ generic_bigsmp_probe();
+#endif
+
+ if (apic->setup_apic_routing)
+ apic->setup_apic_routing();
+}
+
+void __init generic_apic_probe(void)
+{
+ if (!cmdline_apic) {
+ struct apic **drv;
+
+ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+ if ((*drv)->probe()) {
+ apic = *drv;
+ break;
+ }
+ }
+ /* Not visible without early console */
+ if (drv == __apicdrivers_end)
+ panic("Didn't find an APIC driver");
+ }
+ printk(KERN_INFO "Using APIC driver %s\n", apic->name);
+}
+
+/* These functions can switch the APIC even after the initial ->probe() */
+
+int __init
+generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+{
+ struct apic **drv;
+
+ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+ if (!((*drv)->mps_oem_check))
+ continue;
+ if (!(*drv)->mps_oem_check(mpc, oem, productid))
+ continue;
+
+ if (!cmdline_apic) {
+ apic = *drv;
+ printk(KERN_INFO "Switched to APIC driver `%s'.\n",
+ apic->name);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ struct apic **drv;
+
+ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+ if (!(*drv)->acpi_madt_oem_check)
+ continue;
+ if (!(*drv)->acpi_madt_oem_check(oem_id, oem_table_id))
+ continue;
+
+ if (!cmdline_apic) {
+ apic = *drv;
+ printk(KERN_INFO "Switched to APIC driver `%s'.\n",
+ apic->name);
+ }
+ return 1;
+ }
+ return 0;
+}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
new file mode 100644
index 00000000000..3fe98669892
--- /dev/null
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/setup.h>
+
+static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+ return hard_smp_processor_id() >> index_msb;
+}
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init default_setup_apic_routing(void)
+{
+ struct apic **drv;
+
+ enable_IR_x2apic();
+
+ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+ if ((*drv)->probe && (*drv)->probe()) {
+ if (apic != *drv) {
+ apic = *drv;
+ pr_info("Switched APIC routing to %s.\n",
+ apic->name);
+ }
+ break;
+ }
+ }
+
+ if (is_vsmp_box()) {
+ /* need to update phys_pkg_id */
+ apic->phys_pkg_id = apicid_phys_pkg_id;
+ }
+}
+
+/* Same for both flat and physical. */
+
+void apic_send_IPI_self(int vector)
+{
+ __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ struct apic **drv;
+
+ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+ if ((*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) {
+ if (apic != *drv) {
+ apic = *drv;
+ pr_info("Setting APIC routing to %s.\n",
+ apic->name);
+ }
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
new file mode 100644
index 00000000000..19114423c58
--- /dev/null
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -0,0 +1,556 @@
+/*
+ * IBM Summit-Specific Code
+ *
+ * Written By: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (c) 2003 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <asm/io.h>
+#include <asm/bios_ebda.h>
+
+/*
+ * APIC driver for the IBM "Summit" chipset.
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/smp.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <asm/ipi.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/smp.h>
+
+static unsigned summit_get_apic_id(unsigned long x)
+{
+ return (x >> 24) & 0xFF;
+}
+
+static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ default_send_IPI_mask_sequence_logical(mask, vector);
+}
+
+static void summit_send_IPI_allbutself(int vector)
+{
+ default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
+}
+
+static void summit_send_IPI_all(int vector)
+{
+ summit_send_IPI_mask(cpu_online_mask, vector);
+}
+
+#include <asm/tsc.h>
+
+extern int use_cyclone;
+
+#ifdef CONFIG_X86_SUMMIT_NUMA
+static void setup_summit(void);
+#else
+static inline void setup_summit(void) {}
+#endif
+
+static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
+ char *productid)
+{
+ if (!strncmp(oem, "IBM ENSW", 8) &&
+ (!strncmp(productid, "VIGIL SMP", 9)
+ || !strncmp(productid, "EXA", 3)
+ || !strncmp(productid, "RUTHLESS SMP", 12))){
+ mark_tsc_unstable("Summit based system");
+ use_cyclone = 1; /*enable cyclone-timer*/
+ setup_summit();
+ return 1;
+ }
+ return 0;
+}
+
+/* Hook from generic ACPI tables.c */
+static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (!strncmp(oem_id, "IBM", 3) &&
+ (!strncmp(oem_table_id, "SERVIGIL", 8)
+ || !strncmp(oem_table_id, "EXA", 3))){
+ mark_tsc_unstable("Summit based system");
+ use_cyclone = 1; /*enable cyclone-timer*/
+ setup_summit();
+ return 1;
+ }
+ return 0;
+}
+
+struct rio_table_hdr {
+ unsigned char version; /* Version number of this data structure */
+ /* Version 3 adds chassis_num & WP_index */
+ unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
+ unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
+} __attribute__((packed));
+
+struct scal_detail {
+ unsigned char node_id; /* Scalability Node ID */
+ unsigned long CBAR; /* Address of 1MB register space */
+ unsigned char port0node; /* Node ID port connected to: 0xFF=None */
+ unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char port1node; /* Node ID port connected to: 0xFF = None */
+ unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char port2node; /* Node ID port connected to: 0xFF = None */
+ unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
+} __attribute__((packed));
+
+struct rio_detail {
+ unsigned char node_id; /* RIO Node ID */
+ unsigned long BBAR; /* Address of 1MB register space */
+ unsigned char type; /* Type of device */
+ unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
+ /* For CYC: Node ID of Twister that owns this CYC */
+ unsigned char port0node; /* Node ID port connected to: 0xFF=None */
+ unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char port1node; /* Node ID port connected to: 0xFF=None */
+ unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
+ /* For CYC: 0 */
+ unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
+ /* = 0 : the XAPIC is not used, ie:*/
+ /* ints fwded to another XAPIC */
+ /* Bits1:7 Reserved */
+ /* For CYC: Bits0:7 Reserved */
+ unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
+ /* lower slot numbers/PCI bus numbers */
+ /* For CYC: No meaning */
+ unsigned char chassis_num; /* 1 based Chassis number */
+ /* For LookOut WPEGs this field indicates the */
+ /* Expansion Chassis #, enumerated from Boot */
+ /* Node WPEG external port, then Boot Node CYC */
+ /* external port, then Next Vigil chassis WPEG */
+ /* external port, etc. */
+ /* Shared Lookouts have only 1 chassis number (the */
+ /* first one assigned) */
+} __attribute__((packed));
+
+
+typedef enum {
+ CompatTwister = 0, /* Compatibility Twister */
+ AltTwister = 1, /* Alternate Twister of internal 8-way */
+ CompatCyclone = 2, /* Compatibility Cyclone */
+ AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
+ CompatWPEG = 4, /* Compatibility WPEG */
+ AltWPEG = 5, /* Second Planar WPEG */
+ LookOutAWPEG = 6, /* LookOut WPEG */
+ LookOutBWPEG = 7, /* LookOut WPEG */
+} node_type;
+
+static inline int is_WPEG(struct rio_detail *rio){
+ return (rio->type == CompatWPEG || rio->type == AltWPEG ||
+ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
+}
+
+#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
+
+static const struct cpumask *summit_target_cpus(void)
+{
+ /* CPU_MASK_ALL (0xff) has undefined behaviour with
+ * dest_LowestPrio mode logical clustered apic interrupt routing
+ * Just start on cpu 0. IRQ balancing will spread load
+ */
+ return cpumask_of(0);
+}
+
+static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid)
+{
+ return 0;
+}
+
+/* we don't use the phys_cpu_present_map to indicate apicid presence */
+static unsigned long summit_check_apicid_present(int bit)
+{
+ return 1;
+}
+
+static int summit_early_logical_apicid(int cpu)
+{
+ int count = 0;
+ u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
+ u8 my_cluster = APIC_CLUSTER(my_id);
+#ifdef CONFIG_SMP
+ u8 lid;
+ int i;
+
+ /* Create logical APIC IDs by counting CPUs already in cluster. */
+ for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
+ lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
+ if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
+ ++count;
+ }
+#endif
+ /* We only have a 4 wide bitmap in cluster mode. If a deranged
+ * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
+ BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
+ return my_cluster | (1UL << count);
+}
+
+static void summit_init_apic_ldr(void)
+{
+ int cpu = smp_processor_id();
+ unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ unsigned long val;
+
+ apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(id);
+ apic_write(APIC_LDR, val);
+}
+
+static int summit_apic_id_registered(void)
+{
+ return 1;
+}
+
+static void summit_setup_apic_routing(void)
+{
+ printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
+ nr_ioapics);
+}
+
+static int summit_cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < nr_cpu_ids)
+ return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+ else
+ return BAD_APICID;
+}
+
+static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap)
+{
+ /* For clustered we don't have a good way to do this yet - hack */
+ physids_promote(0x0FL, retmap);
+}
+
+static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap)
+{
+ physid_set_mask_of_physid(0, retmap);
+}
+
+static int summit_check_phys_apicid_present(int physical_apicid)
+{
+ return 1;
+}
+
+static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ unsigned int round = 0;
+ int cpu, apicid = 0;
+
+ /*
+ * The cpus in the mask must all be on the apic cluster.
+ */
+ for_each_cpu(cpu, cpumask) {
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
+ printk("%s: Not a valid mask!\n", __func__);
+ return BAD_APICID;
+ }
+ apicid |= new_apicid;
+ round++;
+ }
+ return apicid;
+}
+
+static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
+ const struct cpumask *andmask)
+{
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
+ cpumask_var_t cpumask;
+
+ if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+ return apicid;
+
+ cpumask_and(cpumask, inmask, andmask);
+ cpumask_and(cpumask, cpumask, cpu_online_mask);
+ apicid = summit_cpu_mask_to_apicid(cpumask);
+
+ free_cpumask_var(cpumask);
+
+ return apicid;
+}
+
+/*
+ * cpuid returns the value latched in the HW at reset, not the APIC ID
+ * register's value. For any box whose BIOS changes APIC IDs, like
+ * clustered APIC systems, we must use hard_smp_processor_id.
+ *
+ * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
+ */
+static int summit_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return hard_smp_processor_id() >> index_msb;
+}
+
+static int probe_summit(void)
+{
+ /* probed later in mptable/ACPI hooks */
+ return 0;
+}
+
+static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
+
+#ifdef CONFIG_X86_SUMMIT_NUMA
+static struct rio_table_hdr *rio_table_hdr;
+static struct scal_detail *scal_devs[MAX_NUMNODES];
+static struct rio_detail *rio_devs[MAX_NUMNODES*4];
+
+#ifndef CONFIG_X86_NUMAQ
+static int mp_bus_id_to_node[MAX_MP_BUSSES];
+#endif
+
+static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
+{
+ int twister = 0, node = 0;
+ int i, bus, num_buses;
+
+ for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
+ if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
+ twister = rio_devs[i]->owner_id;
+ break;
+ }
+ }
+ if (i == rio_table_hdr->num_rio_dev) {
+ printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
+ return last_bus;
+ }
+
+ for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
+ if (scal_devs[i]->node_id == twister) {
+ node = scal_devs[i]->node_id;
+ break;
+ }
+ }
+ if (i == rio_table_hdr->num_scal_dev) {
+ printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
+ return last_bus;
+ }
+
+ switch (rio_devs[wpeg_num]->type) {
+ case CompatWPEG:
+ /*
+ * The Compatibility Winnipeg controls the 2 legacy buses,
+ * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
+ * a PCI-PCI bridge card is used in either slot: total 5 buses.
+ */
+ num_buses = 5;
+ break;
+ case AltWPEG:
+ /*
+ * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
+ * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
+ * the "extra" buses for each of those slots: total 7 buses.
+ */
+ num_buses = 7;
+ break;
+ case LookOutAWPEG:
+ case LookOutBWPEG:
+ /*
+ * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
+ * & the "extra" buses for each of those slots: total 9 buses.
+ */
+ num_buses = 9;
+ break;
+ default:
+ printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
+ return last_bus;
+ }
+
+ for (bus = last_bus; bus < last_bus + num_buses; bus++)
+ mp_bus_id_to_node[bus] = node;
+ return bus;
+}
+
+static int build_detail_arrays(void)
+{
+ unsigned long ptr;
+ int i, scal_detail_size, rio_detail_size;
+
+ if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
+ printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+ return 0;
+ }
+
+ switch (rio_table_hdr->version) {
+ default:
+ printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
+ return 0;
+ case 2:
+ scal_detail_size = 11;
+ rio_detail_size = 13;
+ break;
+ case 3:
+ scal_detail_size = 12;
+ rio_detail_size = 15;
+ break;
+ }
+
+ ptr = (unsigned long)rio_table_hdr + 3;
+ for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
+ scal_devs[i] = (struct scal_detail *)ptr;
+
+ for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
+ rio_devs[i] = (struct rio_detail *)ptr;
+
+ return 1;
+}
+
+void setup_summit(void)
+{
+ unsigned long ptr;
+ unsigned short offset;
+ int i, next_wpeg, next_bus = 0;
+
+ /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
+ ptr = get_bios_ebda();
+ ptr = (unsigned long)phys_to_virt(ptr);
+
+ rio_table_hdr = NULL;
+ offset = 0x180;
+ while (offset) {
+ /* The block id is stored in the 2nd word */
+ if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
+ /* set the pointer past the offset & block id */
+ rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
+ break;
+ }
+ /* The next offset is stored in the 1st word. 0 means no more */
+ offset = *((unsigned short *)(ptr + offset));
+ }
+ if (!rio_table_hdr) {
+ printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
+ return;
+ }
+
+ if (!build_detail_arrays())
+ return;
+
+ /* The first Winnipeg we're looking for has an index of 0 */
+ next_wpeg = 0;
+ do {
+ for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
+ if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
+ /* It's the Winnipeg we're looking for! */
+ next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
+ next_wpeg++;
+ break;
+ }
+ }
+ /*
+ * If we go through all Rio devices and don't find one with
+ * the next index, it means we've found all the Winnipegs,
+ * and thus all the PCI buses.
+ */
+ if (i == rio_table_hdr->num_rio_dev)
+ next_wpeg = 0;
+ } while (next_wpeg != 0);
+}
+#endif
+
+static struct apic apic_summit = {
+
+ .name = "summit",
+ .probe = probe_summit,
+ .acpi_madt_oem_check = summit_acpi_madt_oem_check,
+ .apic_id_registered = summit_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* logical delivery broadcast to all CPUs: */
+ .irq_dest_mode = 1,
+
+ .target_cpus = summit_target_cpus,
+ .disable_esr = 1,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = summit_check_apicid_used,
+ .check_apicid_present = summit_check_apicid_present,
+
+ .vector_allocation_domain = summit_vector_allocation_domain,
+ .init_apic_ldr = summit_init_apic_ldr,
+
+ .ioapic_phys_id_map = summit_ioapic_phys_id_map,
+ .setup_apic_routing = summit_setup_apic_routing,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = summit_cpu_present_to_apicid,
+ .apicid_to_cpu_present = summit_apicid_to_cpu_present,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = summit_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = summit_phys_pkg_id,
+ .mps_oem_check = summit_mps_oem_check,
+
+ .get_apic_id = summit_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0xFF << 24,
+
+ .cpu_mask_to_apicid = summit_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = summit_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = summit_send_IPI_allbutself,
+ .send_IPI_all = summit_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+ .wait_for_init_deassert = default_wait_for_init_deassert,
+
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = summit_early_logical_apicid,
+};
+
+apic_driver(apic_summit);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
new file mode 100644
index 00000000000..50079587582
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -0,0 +1,268 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+#include <linux/cpu.h>
+
+#include <asm/smp.h>
+#include <asm/x2apic.h>
+
+static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
+static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return x2apic_enabled();
+}
+
+static inline u32 x2apic_cluster(int cpu)
+{
+ return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
+}
+
+static void
+__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
+{
+ struct cpumask *cpus_in_cluster_ptr;
+ struct cpumask *ipi_mask_ptr;
+ unsigned int cpu, this_cpu;
+ unsigned long flags;
+ u32 dest;
+
+ x2apic_wrmsr_fence();
+
+ local_irq_save(flags);
+
+ this_cpu = smp_processor_id();
+
+ /*
+ * We are to modify mask, so we need an own copy
+ * and be sure it's manipulated with irq off.
+ */
+ ipi_mask_ptr = __raw_get_cpu_var(ipi_mask);
+ cpumask_copy(ipi_mask_ptr, mask);
+
+ /*
+ * The idea is to send one IPI per cluster.
+ */
+ for_each_cpu(cpu, ipi_mask_ptr) {
+ unsigned long i;
+
+ cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
+ dest = 0;
+
+ /* Collect cpus in cluster. */
+ for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
+ if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
+ dest |= per_cpu(x86_cpu_to_logical_apicid, i);
+ }
+
+ if (!dest)
+ continue;
+
+ __x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
+ /*
+ * Cluster sibling cpus should be discared now so
+ * we would not send IPI them second time.
+ */
+ cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
+ }
+
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC);
+}
+
+static void
+ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+ __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ /*
+ * We're using fixed IRQ delivery, can only return one logical APIC ID.
+ * May as well be the first.
+ */
+ int cpu = cpumask_first(cpumask);
+
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int
+x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one logical APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
+}
+
+static void init_x2apic_ldr(void)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
+
+ __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+ for_each_online_cpu(cpu) {
+ if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+ continue;
+ __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu));
+ __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ }
+}
+
+ /*
+ * At CPU state changes, update the x2apic cluster sibling info.
+ */
+static int __cpuinit
+update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int this_cpu = (unsigned long)hcpu;
+ unsigned int cpu;
+ int err = 0;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
+ GFP_KERNEL)) {
+ err = -ENOMEM;
+ } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu),
+ GFP_KERNEL)) {
+ free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
+ err = -ENOMEM;
+ }
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ case CPU_DEAD:
+ for_each_online_cpu(cpu) {
+ if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+ continue;
+ __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu));
+ __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ }
+ free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
+ free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+ break;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static struct notifier_block __refdata x2apic_cpu_notifier = {
+ .notifier_call = update_clusterinfo,
+};
+
+static int x2apic_init_cpu_notifier(void)
+{
+ int cpu = smp_processor_id();
+
+ zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL);
+
+ BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
+
+ __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu));
+ register_hotcpu_notifier(&x2apic_cpu_notifier);
+ return 1;
+}
+
+static int x2apic_cluster_probe(void)
+{
+ if (x2apic_mode)
+ return x2apic_init_cpu_notifier();
+ else
+ return 0;
+}
+
+static struct apic apic_x2apic_cluster = {
+
+ .name = "cluster x2apic",
+ .probe = x2apic_cluster_probe,
+ .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .apic_id_registered = x2apic_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ .irq_dest_mode = 1, /* logical */
+
+ .target_cpus = x2apic_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .init_apic_ldr = init_x2apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = x2apic_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = x2apic_get_apic_id,
+ .set_apic_id = x2apic_set_apic_id,
+ .apic_id_mask = 0xFFFFFFFFu,
+
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_self = x2apic_send_IPI_self,
+
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = native_x2apic_icr_read,
+ .icr_write = native_x2apic_icr_write,
+ .wait_icr_idle = native_x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
+};
+
+apic_driver(apic_x2apic_cluster);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
new file mode 100644
index 00000000000..f5373dfde21
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -0,0 +1,174 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/x2apic.h>
+
+int x2apic_phys;
+
+static struct apic apic_x2apic_phys;
+
+static int set_x2apic_phys_mode(char *arg)
+{
+ x2apic_phys = 1;
+ return 0;
+}
+early_param("x2apic_phys", set_x2apic_phys_mode);
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (x2apic_phys)
+ return x2apic_enabled();
+ else
+ return 0;
+}
+
+static void
+__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
+{
+ unsigned long query_cpu;
+ unsigned long this_cpu;
+ unsigned long flags;
+
+ x2apic_wrmsr_fence();
+
+ local_irq_save(flags);
+
+ this_cpu = smp_processor_id();
+ for_each_cpu(query_cpu, mask) {
+ if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu)
+ continue;
+ __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC);
+}
+
+static void
+ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+ __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ int cpu = cpumask_first(cpumask);
+
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int
+x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+
+ return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+static void init_x2apic_ldr(void)
+{
+}
+
+static int x2apic_phys_probe(void)
+{
+ if (x2apic_mode && x2apic_phys)
+ return 1;
+
+ return apic == &apic_x2apic_phys;
+}
+
+static struct apic apic_x2apic_phys = {
+
+ .name = "physical x2apic",
+ .probe = x2apic_phys_probe,
+ .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .apic_id_registered = x2apic_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ .irq_dest_mode = 0, /* physical */
+
+ .target_cpus = x2apic_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = 0,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .init_apic_ldr = init_x2apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = x2apic_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = x2apic_get_apic_id,
+ .set_apic_id = x2apic_set_apic_id,
+ .apic_id_mask = 0xFFFFFFFFu,
+
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_self = x2apic_send_IPI_self,
+
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = native_x2apic_icr_read,
+ .icr_write = native_x2apic_icr_write,
+ .wait_icr_idle = native_x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
+};
+
+apic_driver(apic_x2apic_phys);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
new file mode 100644
index 00000000000..79b05b88aa1
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -0,0 +1,889 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV APIC functions (note: not an Intel compatible APIC)
+ *
+ * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ */
+#include <linux/cpumask.h>
+#include <linux/hardirq.h>
+#include <linux/proc_fs.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/kdebug.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/smp.h>
+#include <asm/x86_init.h>
+#include <asm/emergency-restart.h>
+#include <asm/nmi.h>
+
+/* BMC sets a bit this MMR non-zero before sending an NMI */
+#define UVH_NMI_MMR UVH_SCRATCH5
+#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
+#define UV_NMI_PENDING_MASK (1UL << 63)
+DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
+
+DEFINE_PER_CPU(int, x2apic_extra_bits);
+
+#define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args)
+
+static enum uv_system_type uv_system_type;
+static u64 gru_start_paddr, gru_end_paddr;
+static union uvh_apicid uvh_apicid;
+int uv_min_hub_revision_id;
+EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
+unsigned int uv_apicid_hibits;
+EXPORT_SYMBOL_GPL(uv_apicid_hibits);
+static DEFINE_SPINLOCK(uv_nmi_lock);
+
+static struct apic apic_x2apic_uv_x;
+
+static unsigned long __init uv_early_read_mmr(unsigned long addr)
+{
+ unsigned long val, *mmr;
+
+ mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
+ val = *mmr;
+ early_iounmap(mmr, sizeof(*mmr));
+ return val;
+}
+
+static inline bool is_GRU_range(u64 start, u64 end)
+{
+ return start >= gru_start_paddr && end <= gru_end_paddr;
+}
+
+static bool uv_is_untracked_pat_range(u64 start, u64 end)
+{
+ return is_ISA_range(start, end) || is_GRU_range(start, end);
+}
+
+static int __init early_get_pnodeid(void)
+{
+ union uvh_node_id_u node_id;
+ union uvh_rh_gam_config_mmr_u m_n_config;
+ int pnode;
+
+ /* Currently, all blades have same revision number */
+ node_id.v = uv_early_read_mmr(UVH_NODE_ID);
+ m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
+ uv_min_hub_revision_id = node_id.s.revision;
+
+ if (node_id.s.part_number == UV2_HUB_PART_NUMBER)
+ uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
+ if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X)
+ uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
+
+ uv_hub_info->hub_revision = uv_min_hub_revision_id;
+ pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
+ return pnode;
+}
+
+static void __init early_get_apic_pnode_shift(void)
+{
+ uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
+ if (!uvh_apicid.v)
+ /*
+ * Old bios, use default value
+ */
+ uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
+}
+
+/*
+ * Add an extra bit as dictated by bios to the destination apicid of
+ * interrupts potentially passing through the UV HUB. This prevents
+ * a deadlock between interrupts and IO port operations.
+ */
+static void __init uv_set_apicid_hibit(void)
+{
+ union uv1h_lb_target_physical_apic_id_mask_u apicid_mask;
+
+ if (is_uv1_hub()) {
+ apicid_mask.v =
+ uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK);
+ uv_apicid_hibits =
+ apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK;
+ }
+}
+
+static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ int pnodeid, is_uv1, is_uv2;
+
+ is_uv1 = !strcmp(oem_id, "SGI");
+ is_uv2 = !strcmp(oem_id, "SGI2");
+ if (is_uv1 || is_uv2) {
+ uv_hub_info->hub_revision =
+ is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE;
+ pnodeid = early_get_pnodeid();
+ early_get_apic_pnode_shift();
+ x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
+ x86_platform.nmi_init = uv_nmi_init;
+ if (!strcmp(oem_table_id, "UVL"))
+ uv_system_type = UV_LEGACY_APIC;
+ else if (!strcmp(oem_table_id, "UVX"))
+ uv_system_type = UV_X2APIC;
+ else if (!strcmp(oem_table_id, "UVH")) {
+ __this_cpu_write(x2apic_extra_bits,
+ pnodeid << uvh_apicid.s.pnode_shift);
+ uv_system_type = UV_NON_UNIQUE_APIC;
+ uv_set_apicid_hibit();
+ return 1;
+ }
+ }
+ return 0;
+}
+
+enum uv_system_type get_uv_system_type(void)
+{
+ return uv_system_type;
+}
+
+int is_uv_system(void)
+{
+ return uv_system_type != UV_NONE;
+}
+EXPORT_SYMBOL_GPL(is_uv_system);
+
+DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+
+struct uv_blade_info *uv_blade_info;
+EXPORT_SYMBOL_GPL(uv_blade_info);
+
+short *uv_node_to_blade;
+EXPORT_SYMBOL_GPL(uv_node_to_blade);
+
+short *uv_cpu_to_blade;
+EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+
+short uv_possible_blades;
+EXPORT_SYMBOL_GPL(uv_possible_blades);
+
+unsigned long sn_rtc_cycles_per_second;
+EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+
+static const struct cpumask *uv_target_cpus(void)
+{
+ return cpu_online_mask;
+}
+
+static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+{
+#ifdef CONFIG_SMP
+ unsigned long val;
+ int pnode;
+
+ pnode = uv_apicid_to_pnode(phys_apicid);
+ phys_apicid |= uv_apicid_hibits;
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+ (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+ ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+ APIC_DM_INIT;
+ uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+ (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+ ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+ APIC_DM_STARTUP;
+ uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+
+ atomic_set(&init_deasserted, 1);
+#endif
+ return 0;
+}
+
+static void uv_send_IPI_one(int cpu, int vector)
+{
+ unsigned long apicid;
+ int pnode;
+
+ apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ pnode = uv_apicid_to_pnode(apicid);
+ uv_hub_send_ipi(pnode, apicid, vector);
+}
+
+static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask)
+ uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask) {
+ if (cpu != this_cpu)
+ uv_send_IPI_one(cpu, vector);
+ }
+}
+
+static void uv_send_IPI_allbutself(int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu != this_cpu)
+ uv_send_IPI_one(cpu, vector);
+ }
+}
+
+static void uv_send_IPI_all(int vector)
+{
+ uv_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int uv_apic_id_registered(void)
+{
+ return 1;
+}
+
+static void uv_init_apic_ldr(void)
+{
+}
+
+static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ int cpu = cpumask_first(cpumask);
+
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+ else
+ return BAD_APICID;
+}
+
+static unsigned int
+uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+ return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+}
+
+static unsigned int x2apic_get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ WARN_ON(preemptible() && num_online_cpus() > 1);
+ id = x | __this_cpu_read(x2apic_extra_bits);
+
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ /* maskout x2apic_extra_bits ? */
+ x = id;
+ return x;
+}
+
+static unsigned int uv_read_apic_id(void)
+{
+
+ return x2apic_get_apic_id(apic_read(APIC_ID));
+}
+
+static int uv_phys_pkg_id(int initial_apicid, int index_msb)
+{
+ return uv_read_apic_id() >> index_msb;
+}
+
+static void uv_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+
+static int uv_probe(void)
+{
+ return apic == &apic_x2apic_uv_x;
+}
+
+static struct apic __refdata apic_x2apic_uv_x = {
+
+ .name = "UV large system",
+ .probe = uv_probe,
+ .acpi_madt_oem_check = uv_acpi_madt_oem_check,
+ .apic_id_registered = uv_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ .irq_dest_mode = 0, /* physical */
+
+ .target_cpus = uv_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = uv_vector_allocation_domain,
+ .init_apic_ldr = uv_init_apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = uv_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = x2apic_get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xFFFFFFFFu,
+
+ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = uv_send_IPI_mask,
+ .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = uv_send_IPI_allbutself,
+ .send_IPI_all = uv_send_IPI_all,
+ .send_IPI_self = uv_send_IPI_self,
+
+ .wakeup_secondary_cpu = uv_wakeup_secondary,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = native_x2apic_icr_read,
+ .icr_write = native_x2apic_icr_write,
+ .wait_icr_idle = native_x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
+};
+
+static __cpuinit void set_x2apic_extra_bits(int pnode)
+{
+ __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
+}
+
+/*
+ * Called on boot cpu.
+ */
+static __init int boot_pnode_to_blade(int pnode)
+{
+ int blade;
+
+ for (blade = 0; blade < uv_num_possible_blades(); blade++)
+ if (pnode == uv_blade_info[blade].pnode)
+ return blade;
+ BUG();
+}
+
+struct redir_addr {
+ unsigned long redirect;
+ unsigned long alias;
+};
+
+#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
+
+static __initdata struct redir_addr redir_addrs[] = {
+ {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
+ {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
+ {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
+};
+
+static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
+{
+ union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
+ union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
+ alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+ if (alias.s.enable && alias.s.base == 0) {
+ *size = (1UL << alias.s.m_alias);
+ redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
+ *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
+ return;
+ }
+ }
+ *base = *size = 0;
+}
+
+enum map_type {map_wb, map_uc};
+
+static __init void map_high(char *id, unsigned long base, int pshift,
+ int bshift, int max_pnode, enum map_type map_type)
+{
+ unsigned long bytes, paddr;
+
+ paddr = base << pshift;
+ bytes = (1UL << bshift) * (max_pnode + 1);
+ printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
+ paddr + bytes);
+ if (map_type == map_uc)
+ init_extra_mapping_uc(paddr, bytes);
+ else
+ init_extra_mapping_wb(paddr, bytes);
+
+}
+static __init void map_gru_high(int max_pnode)
+{
+ union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+ int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+ gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
+ if (gru.s.enable) {
+ map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
+ gru_start_paddr = ((u64)gru.s.base << shift);
+ gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
+
+ }
+}
+
+static __init void map_mmr_high(int max_pnode)
+{
+ union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
+ int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+ mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+ if (mmr.s.enable)
+ map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
+}
+
+static __init void map_mmioh_high(int max_pnode)
+{
+ union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
+ int shift;
+
+ mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+ if (is_uv1_hub() && mmioh.s1.enable) {
+ shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io,
+ max_pnode, map_uc);
+ }
+ if (is_uv2_hub() && mmioh.s2.enable) {
+ shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io,
+ max_pnode, map_uc);
+ }
+}
+
+static __init void map_low_mmrs(void)
+{
+ init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+ init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+}
+
+static __init void uv_rtc_init(void)
+{
+ long status;
+ u64 ticks_per_sec;
+
+ status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+ &ticks_per_sec);
+ if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
+ printk(KERN_WARNING
+ "unable to determine platform RTC clock frequency, "
+ "guessing.\n");
+ /* BIOS gives wrong value for clock freq. so guess */
+ sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
+ } else
+ sn_rtc_cycles_per_second = ticks_per_sec;
+}
+
+/*
+ * percpu heartbeat timer
+ */
+static void uv_heartbeat(unsigned long ignored)
+{
+ struct timer_list *timer = &uv_hub_info->scir.timer;
+ unsigned char bits = uv_hub_info->scir.state;
+
+ /* flip heartbeat bit */
+ bits ^= SCIR_CPU_HEARTBEAT;
+
+ /* is this cpu idle? */
+ if (idle_cpu(raw_smp_processor_id()))
+ bits &= ~SCIR_CPU_ACTIVITY;
+ else
+ bits |= SCIR_CPU_ACTIVITY;
+
+ /* update system controller interface reg */
+ uv_set_scir_bits(bits);
+
+ /* enable next timer period */
+ mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+}
+
+static void __cpuinit uv_heartbeat_enable(int cpu)
+{
+ while (!uv_cpu_hub_info(cpu)->scir.enabled) {
+ struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+
+ uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+ setup_timer(timer, uv_heartbeat, cpu);
+ timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+ add_timer_on(timer, cpu);
+ uv_cpu_hub_info(cpu)->scir.enabled = 1;
+
+ /* also ensure that boot cpu is enabled */
+ cpu = 0;
+ }
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __cpuinit uv_heartbeat_disable(int cpu)
+{
+ if (uv_cpu_hub_info(cpu)->scir.enabled) {
+ uv_cpu_hub_info(cpu)->scir.enabled = 0;
+ del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+ }
+ uv_set_cpu_scir_bits(cpu, 0xff);
+}
+
+/*
+ * cpu hotplug notifier
+ */
+static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ uv_heartbeat_enable(cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ uv_heartbeat_disable(cpu);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+ hotcpu_notifier(uv_scir_cpu_notify, 0);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+}
+
+static __init int uv_init_heartbeat(void)
+{
+ int cpu;
+
+ if (is_uv_system())
+ for_each_online_cpu(cpu)
+ uv_heartbeat_enable(cpu);
+ return 0;
+}
+
+late_initcall(uv_init_heartbeat);
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
+/* Direct Legacy VGA I/O traffic to designated IOH */
+int uv_set_vga_state(struct pci_dev *pdev, bool decode,
+ unsigned int command_bits, u32 flags)
+{
+ int domain, bus, rc;
+
+ PR_DEVEL("devfn %x decode %d cmd %x flags %d\n",
+ pdev->devfn, decode, command_bits, flags);
+
+ if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
+ return 0;
+
+ if ((command_bits & PCI_COMMAND_IO) == 0)
+ return 0;
+
+ domain = pci_domain_nr(pdev->bus);
+ bus = pdev->bus->number;
+
+ rc = uv_bios_set_legacy_vga_target(decode, domain, bus);
+ PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc);
+
+ return rc;
+}
+
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ * FIXME: hotplug not supported yet
+ */
+void __cpuinit uv_cpu_init(void)
+{
+ /* CPU 0 initilization will be done via uv_system_init. */
+ if (!uv_blade_info)
+ return;
+
+ uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+ if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+ set_x2apic_extra_bits(uv_hub_info->pnode);
+}
+
+/*
+ * When NMI is received, print a stack trace.
+ */
+int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+{
+ unsigned long real_uv_nmi;
+ int bid;
+
+ /*
+ * Each blade has an MMR that indicates when an NMI has been sent
+ * to cpus on the blade. If an NMI is detected, atomically
+ * clear the MMR and update a per-blade NMI count used to
+ * cause each cpu on the blade to notice a new NMI.
+ */
+ bid = uv_numa_blade_id();
+ real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+
+ if (unlikely(real_uv_nmi)) {
+ spin_lock(&uv_blade_info[bid].nmi_lock);
+ real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+ if (real_uv_nmi) {
+ uv_blade_info[bid].nmi_count++;
+ uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+ }
+ spin_unlock(&uv_blade_info[bid].nmi_lock);
+ }
+
+ if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
+ return NMI_DONE;
+
+ __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
+
+ /*
+ * Use a lock so only one cpu prints at a time.
+ * This prevents intermixed output.
+ */
+ spin_lock(&uv_nmi_lock);
+ pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
+ dump_stack();
+ spin_unlock(&uv_nmi_lock);
+
+ return NMI_HANDLED;
+}
+
+void uv_register_nmi_notifier(void)
+{
+ if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
+ printk(KERN_WARNING "UV NMI handler failed to register\n");
+}
+
+void uv_nmi_init(void)
+{
+ unsigned int value;
+
+ /*
+ * Unmask NMI on all cpus
+ */
+ value = apic_read(APIC_LVT1) | APIC_DM_NMI;
+ value &= ~APIC_LVT_MASKED;
+ apic_write(APIC_LVT1, value);
+}
+
+void __init uv_system_init(void)
+{
+ union uvh_rh_gam_config_mmr_u m_n_config;
+ union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
+ union uvh_node_id_u node_id;
+ unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
+ int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
+ int gnode_extra, max_pnode = 0;
+ unsigned long mmr_base, present, paddr;
+ unsigned short pnode_mask, pnode_io_mask;
+
+ printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2");
+ map_low_mmrs();
+
+ m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
+ m_val = m_n_config.s.m_skt;
+ n_val = m_n_config.s.n_skt;
+ mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+ n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io;
+ mmr_base =
+ uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+ ~UV_MMR_ENABLE;
+ pnode_mask = (1 << n_val) - 1;
+ pnode_io_mask = (1 << n_io) - 1;
+
+ node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+ gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
+ gnode_upper = ((unsigned long)gnode_extra << m_val);
+ printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
+ n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
+
+ printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+
+ for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
+ uv_possible_blades +=
+ hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
+
+ /* uv_num_possible_blades() is really the hub count */
+ printk(KERN_INFO "UV: Found %d blades, %d hubs\n",
+ is_uv1_hub() ? uv_num_possible_blades() :
+ (uv_num_possible_blades() + 1) / 2,
+ uv_num_possible_blades());
+
+ bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
+ uv_blade_info = kzalloc(bytes, GFP_KERNEL);
+ BUG_ON(!uv_blade_info);
+
+ for (blade = 0; blade < uv_num_possible_blades(); blade++)
+ uv_blade_info[blade].memory_nid = -1;
+
+ get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
+
+ bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
+ uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!uv_node_to_blade);
+ memset(uv_node_to_blade, 255, bytes);
+
+ bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
+ uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!uv_cpu_to_blade);
+ memset(uv_cpu_to_blade, 255, bytes);
+
+ blade = 0;
+ for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+ present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
+ for (j = 0; j < 64; j++) {
+ if (!test_bit(j, &present))
+ continue;
+ pnode = (i * 64 + j) & pnode_mask;
+ uv_blade_info[blade].pnode = pnode;
+ uv_blade_info[blade].nr_possible_cpus = 0;
+ uv_blade_info[blade].nr_online_cpus = 0;
+ spin_lock_init(&uv_blade_info[blade].nmi_lock);
+ max_pnode = max(pnode, max_pnode);
+ blade++;
+ }
+ }
+
+ uv_bios_init();
+ uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
+ &sn_region_size, &system_serial_number);
+ uv_rtc_init();
+
+ for_each_present_cpu(cpu) {
+ int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+
+ nid = cpu_to_node(cpu);
+ /*
+ * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
+ */
+ uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
+ uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
+ uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
+
+ uv_cpu_hub_info(cpu)->m_shift = 64 - m_val;
+ uv_cpu_hub_info(cpu)->n_lshift = is_uv2_1_hub() ?
+ (m_val == 40 ? 40 : 39) : m_val;
+
+ pnode = uv_apicid_to_pnode(apicid);
+ blade = boot_pnode_to_blade(pnode);
+ lcpu = uv_blade_info[blade].nr_possible_cpus;
+ uv_blade_info[blade].nr_possible_cpus++;
+
+ /* Any node on the blade, else will contain -1. */
+ uv_blade_info[blade].memory_nid = nid;
+
+ uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
+ uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
+ uv_cpu_hub_info(cpu)->m_val = m_val;
+ uv_cpu_hub_info(cpu)->n_val = n_val;
+ uv_cpu_hub_info(cpu)->numa_blade_id = blade;
+ uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
+ uv_cpu_hub_info(cpu)->pnode = pnode;
+ uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
+ uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+ uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
+ uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+ uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
+ uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
+ uv_node_to_blade[nid] = blade;
+ uv_cpu_to_blade[cpu] = blade;
+ }
+
+ /* Add blade/pnode info for nodes without cpus */
+ for_each_online_node(nid) {
+ if (uv_node_to_blade[nid] >= 0)
+ continue;
+ paddr = node_start_pfn(nid) << PAGE_SHIFT;
+ pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
+ blade = boot_pnode_to_blade(pnode);
+ uv_node_to_blade[nid] = blade;
+ }
+
+ map_gru_high(max_pnode);
+ map_mmr_high(max_pnode);
+ map_mmioh_high(max_pnode & pnode_io_mask);
+
+ uv_cpu_init();
+ uv_scir_register_cpu_notifier();
+ uv_register_nmi_notifier();
+ proc_mkdir("sgi_uv", NULL);
+
+ /* register Legacy VGA I/O redirection handler */
+ pci_register_set_vga_state(uv_set_vga_state);
+
+ /*
+ * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as
+ * EFI is not enabled in the kdump kernel.
+ */
+ if (is_kdump_kernel())
+ reboot_type = BOOT_ACPI;
+}
+
+apic_driver(apic_x2apic_uv_x);