summaryrefslogtreecommitdiffstats
path: root/arch/ia64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64')
-rw-r--r--arch/ia64/Kconfig20
-rw-r--r--arch/ia64/ia32/sys_ia32.c2
-rw-r--r--arch/ia64/kernel/Makefile5
-rw-r--r--arch/ia64/kernel/acpi.c13
-rw-r--r--arch/ia64/kernel/entry.S4
-rw-r--r--arch/ia64/kernel/esi.c205
-rw-r--r--arch/ia64/kernel/esi_stub.S96
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c4
-rw-r--r--arch/ia64/kernel/kprobes.c61
-rw-r--r--arch/ia64/kernel/mca.c234
-rw-r--r--arch/ia64/kernel/mca_asm.S9
-rw-r--r--arch/ia64/kernel/mca_drv.c54
-rw-r--r--arch/ia64/kernel/mca_drv.h4
-rw-r--r--arch/ia64/kernel/numa.c34
-rw-r--r--arch/ia64/kernel/perfmon.c114
-rw-r--r--arch/ia64/kernel/salinfo.c4
-rw-r--r--arch/ia64/kernel/setup.c41
-rw-r--r--arch/ia64/kernel/smpboot.c24
-rw-r--r--arch/ia64/kernel/topology.c4
-rw-r--r--arch/ia64/kernel/uncached.c2
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S8
-rw-r--r--arch/ia64/mm/contig.c84
-rw-r--r--arch/ia64/mm/discontig.c72
-rw-r--r--arch/ia64/mm/init.c12
-rw-r--r--arch/ia64/pci/pci.c3
-rw-r--r--arch/ia64/sn/kernel/bte.c3
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c3
27 files changed, 833 insertions, 286 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index db274da7dba..0b7f701d5cf 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -66,15 +66,6 @@ config IA64_UNCACHED_ALLOCATOR
bool
select GENERIC_ALLOCATOR
-config DMA_IS_DMA32
- bool
- default y
-
-config DMA_IS_NORMAL
- bool
- depends on IA64_SGI_SN2
- default y
-
config AUDIT_ARCH
bool
default y
@@ -365,6 +356,9 @@ config NODES_SHIFT
MAX_NUMNODES will be 2^(This value).
If in doubt, use the default.
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
# VIRTUAL_MEM_MAP has been retained for historical reasons.
config VIRTUAL_MEM_MAP
@@ -429,6 +423,14 @@ config IA64_PALINFO
config SGI_SN
def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
+config IA64_ESI
+ bool "ESI (Extensible SAL Interface) support"
+ help
+ If you say Y here, support is built into the kernel to
+ make ESI calls. ESI calls are used to support vendor-specific
+ firmware extensions, such as the ability to inject memory-errors
+ for test-purposes. If you're unsure, say N.
+
source "drivers/sn/Kconfig"
source "drivers/firmware/Kconfig"
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 6aa3c51619c..bddbd22706e 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1942,7 +1942,7 @@ struct sysctl32 {
unsigned int __unused[4];
};
-#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_SYSCTL_SYSCALL
asmlinkage long
sys32_sysctl (struct sysctl32 __user *args)
{
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index ad8215a3c58..31497496eb4 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -32,6 +32,11 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
obj-$(CONFIG_AUDIT) += audit.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
+obj-$(CONFIG_IA64_ESI) += esi.o
+ifneq ($(CONFIG_IA64_ESI),)
+obj-y += esi_stub.o # must be in kernel proper
+endif
+
# The gate DSO image is built using a special linker script.
targets += gate.so gate-syms.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 0176556aeec..32c3abededc 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -771,16 +771,19 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
{
#ifdef CONFIG_ACPI_NUMA
int pxm_id;
+ int nid;
pxm_id = acpi_get_pxm(handle);
-
/*
- * Assuming that the container driver would have set the proximity
- * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag
+ * We don't have cpu-only-node hotadd. But if the system equips
+ * SRAT table, pxm is already found and node is ready.
+ * So, just pxm_to_nid(pxm) is OK.
+ * This code here is for the system which doesn't have full SRAT
+ * table for possible cpus.
*/
- node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id);
-
+ nid = acpi_map_pxm_to_node(pxm_id);
node_cpuid[cpu].phys_id = physid;
+ node_cpuid[cpu].nid = nid;
#endif
return (0);
}
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index fef06571be9..12701cf32d9 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1605,8 +1605,8 @@ sys_call_table:
data8 sys_ni_syscall // 1295 reserved for ppoll
data8 sys_unshare
data8 sys_splice
- data8 sys_ni_syscall // reserved for set_robust_list
- data8 sys_ni_syscall // reserved for get_robust_list
+ data8 sys_set_robust_list
+ data8 sys_get_robust_list
data8 sys_sync_file_range // 1300
data8 sys_tee
data8 sys_vmsplice
diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c
new file mode 100644
index 00000000000..ebf4e988e78
--- /dev/null
+++ b/arch/ia64/kernel/esi.c
@@ -0,0 +1,205 @@
+/*
+ * Extensible SAL Interface (ESI) support routines.
+ *
+ * Copyright (C) 2006 Hewlett-Packard Co
+ * Alex Williamson <alex.williamson@hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/esi.h>
+#include <asm/sal.h>
+
+MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>");
+MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support");
+MODULE_LICENSE("GPL");
+
+#define MODULE_NAME "esi"
+
+#define ESI_TABLE_GUID \
+ EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3, \
+ 0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4)
+
+enum esi_systab_entry_type {
+ ESI_DESC_ENTRY_POINT = 0
+};
+
+/*
+ * Entry type: Size:
+ * 0 48
+ */
+#define ESI_DESC_SIZE(type) "\060"[(unsigned) (type)]
+
+typedef struct ia64_esi_desc_entry_point {
+ u8 type;
+ u8 reserved1[15];
+ u64 esi_proc;
+ u64 gp;
+ efi_guid_t guid;
+} ia64_esi_desc_entry_point_t;
+
+struct pdesc {
+ void *addr;
+ void *gp;
+};
+
+static struct ia64_sal_systab *esi_systab;
+
+static int __init esi_init (void)
+{
+ efi_config_table_t *config_tables;
+ struct ia64_sal_systab *systab;
+ unsigned long esi = 0;
+ char *p;
+ int i;
+
+ config_tables = __va(efi.systab->tables);
+
+ for (i = 0; i < (int) efi.systab->nr_tables; ++i) {
+ if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) {
+ esi = config_tables[i].table;
+ break;
+ }
+ }
+
+ if (!esi)
+ return -ENODEV;;
+
+ systab = __va(esi);
+
+ if (strncmp(systab->signature, "ESIT", 4) != 0) {
+ printk(KERN_ERR "bad signature in ESI system table!");
+ return -ENODEV;
+ }
+
+ p = (char *) (systab + 1);
+ for (i = 0; i < systab->entry_count; i++) {
+ /*
+ * The first byte of each entry type contains the type
+ * descriptor.
+ */
+ switch (*p) {
+ case ESI_DESC_ENTRY_POINT:
+ break;
+ default:
+ printk(KERN_WARNING "Unkown table type %d found in "
+ "ESI table, ignoring rest of table\n", *p);
+ return -ENODEV;
+ }
+
+ p += ESI_DESC_SIZE(*p);
+ }
+
+ esi_systab = systab;
+ return 0;
+}
+
+
+int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+ enum esi_proc_type proc_type, u64 func,
+ u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+ u64 arg7)
+{
+ struct ia64_fpreg fr[6];
+ unsigned long flags = 0;
+ int i;
+ char *p;
+
+ if (!esi_systab)
+ return -1;
+
+ p = (char *) (esi_systab + 1);
+ for (i = 0; i < esi_systab->entry_count; i++) {
+ if (*p == ESI_DESC_ENTRY_POINT) {
+ ia64_esi_desc_entry_point_t *esi = (void *)p;
+ if (!efi_guidcmp(guid, esi->guid)) {
+ ia64_sal_handler esi_proc;
+ struct pdesc pdesc;
+
+ pdesc.addr = __va(esi->esi_proc);
+ pdesc.gp = __va(esi->gp);
+
+ esi_proc = (ia64_sal_handler) &pdesc;
+
+ ia64_save_scratch_fpregs(fr);
+ if (proc_type == ESI_PROC_SERIALIZED)
+ spin_lock_irqsave(&sal_lock, flags);
+ else if (proc_type == ESI_PROC_MP_SAFE)
+ local_irq_save(flags);
+ else
+ preempt_disable();
+ *isrvp = (*esi_proc)(func, arg1, arg2, arg3,
+ arg4, arg5, arg6, arg7);
+ if (proc_type == ESI_PROC_SERIALIZED)
+ spin_unlock_irqrestore(&sal_lock,
+ flags);
+ else if (proc_type == ESI_PROC_MP_SAFE)
+ local_irq_restore(flags);
+ else
+ preempt_enable();
+ ia64_load_scratch_fpregs(fr);
+ return 0;
+ }
+ }
+ p += ESI_DESC_SIZE(*p);
+ }
+ return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call);
+
+int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+ u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4,
+ u64 arg5, u64 arg6, u64 arg7)
+{
+ struct ia64_fpreg fr[6];
+ unsigned long flags;
+ u64 esi_params[8];
+ char *p;
+ int i;
+
+ if (!esi_systab)
+ return -1;
+
+ p = (char *) (esi_systab + 1);
+ for (i = 0; i < esi_systab->entry_count; i++) {
+ if (*p == ESI_DESC_ENTRY_POINT) {
+ ia64_esi_desc_entry_point_t *esi = (void *)p;
+ if (!efi_guidcmp(guid, esi->guid)) {
+ ia64_sal_handler esi_proc;
+ struct pdesc pdesc;
+
+ pdesc.addr = (void *)esi->esi_proc;
+ pdesc.gp = (void *)esi->gp;
+
+ esi_proc = (ia64_sal_handler) &pdesc;
+
+ esi_params[0] = func;
+ esi_params[1] = arg1;
+ esi_params[2] = arg2;
+ esi_params[3] = arg3;
+ esi_params[4] = arg4;
+ esi_params[5] = arg5;
+ esi_params[6] = arg6;
+ esi_params[7] = arg7;
+ ia64_save_scratch_fpregs(fr);
+ spin_lock_irqsave(&sal_lock, flags);
+ *isrvp = esi_call_phys(esi_proc, esi_params);
+ spin_unlock_irqrestore(&sal_lock, flags);
+ ia64_load_scratch_fpregs(fr);
+ return 0;
+ }
+ }
+ p += ESI_DESC_SIZE(*p);
+ }
+ return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call_phys);
+
+static void __exit esi_exit (void)
+{
+}
+
+module_init(esi_init);
+module_exit(esi_exit); /* makes module removable... */
diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S
new file mode 100644
index 00000000000..6b3d6c1f99b
--- /dev/null
+++ b/arch/ia64/kernel/esi_stub.S
@@ -0,0 +1,96 @@
+/*
+ * ESI call stub.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ * Alex Williamson <alex.williamson@hp.com>
+ *
+ * Based on EFI call stub by David Mosberger. The stub is virtually
+ * identical to the one for EFI phys-mode calls, except that ESI
+ * calls may have up to 8 arguments, so they get passed to this routine
+ * through memory.
+ *
+ * This stub allows us to make ESI calls in physical mode with interrupts
+ * turned off. ESI calls may not support calling from virtual mode.
+ *
+ * Google for "Extensible SAL specification" for a document describing the
+ * ESI standard.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e). Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared). Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR \
+ (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
+ IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
+ IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET \
+ (IA64_PSR_BN)
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+/*
+ * Inputs:
+ * in0 = address of function descriptor of ESI routine to call
+ * in1 = address of array of ESI parameters
+ *
+ * Outputs:
+ * r8 = result returned by called function
+ */
+GLOBAL_ENTRY(esi_call_phys)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+ alloc loc1=ar.pfs,2,7,8,0
+ ld8 r2=[in0],8 // load ESI function's entry point
+ mov loc0=rp
+ .body
+ ;;
+ ld8 out0=[in1],8 // ESI params loaded from array
+ ;; // passing all as inputs doesn't work
+ ld8 out1=[in1],8
+ ;;
+ ld8 out2=[in1],8
+ ;;
+ ld8 out3=[in1],8
+ ;;
+ ld8 out4=[in1],8
+ ;;
+ ld8 out5=[in1],8
+ ;;
+ ld8 out6=[in1],8
+ ;;
+ ld8 out7=[in1]
+ mov loc2=gp // save global pointer
+ mov loc4=ar.rsc // save RSE configuration
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ ;;
+ ld8 gp=[in0] // load ESI function's global pointer
+ movl r16=PSR_BITS_TO_CLEAR
+ mov loc3=psr // save processor status word
+ movl r17=PSR_BITS_TO_SET
+ ;;
+ or loc3=loc3,r17
+ mov b6=r2
+ ;;
+ andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
+ br.call.sptk.many rp=ia64_switch_mode_phys
+.ret0: mov loc5=r19 // old ar.bsp
+ mov loc6=r20 // old sp
+ br.call.sptk.many rp=b6 // call the ESI function
+.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ mov r16=loc3 // save virtual mode psr
+ mov r19=loc5 // save virtual mode bspstore
+ mov r20=loc6 // save virtual mode sp
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2: mov ar.rsc=loc4 // restore RSE configuration
+ mov ar.pfs=loc1
+ mov rp=loc0
+ mov gp=loc2
+ br.ret.sptk.many rp
+END(esi_call_phys)
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 3ead20fb6f4..879c1817bd1 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -105,5 +105,9 @@ EXPORT_SYMBOL(ia64_spinlock_contention);
# endif
#endif
+#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE)
+extern void esi_call_phys (void);
+EXPORT_SYMBOL_GPL(esi_call_phys);
+#endif
extern char ia64_ivt[];
EXPORT_SYMBOL(ia64_ivt);
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 781960f80b6..169ec3a7156 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -136,10 +136,8 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot,
static int __kprobes unsupported_inst(uint template, uint slot,
uint major_opcode,
unsigned long kprobe_inst,
- struct kprobe *p)
+ unsigned long addr)
{
- unsigned long addr = (unsigned long)p->addr;
-
if (bundle_encoding[template][slot] == I) {
switch (major_opcode) {
case 0x0: //I_UNIT_MISC_OPCODE:
@@ -217,7 +215,7 @@ static void __kprobes prepare_break_inst(uint template, uint slot,
struct kprobe *p)
{
unsigned long break_inst = BREAK_INST;
- bundle_t *bundle = &p->ainsn.insn.bundle;
+ bundle_t *bundle = &p->opcode.bundle;
/*
* Copy the original kprobe_inst qualifying predicate(qp)
@@ -423,11 +421,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
unsigned long kprobe_inst=0;
unsigned int slot = addr & 0xf, template, major_opcode = 0;
- bundle_t *bundle = &p->ainsn.insn.bundle;
-
- memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t));
- memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t));
+ bundle_t *bundle;
+ bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle;
template = bundle->quad0.template;
if(valid_kprobe_addr(template, slot, addr))
@@ -440,20 +436,19 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
/* Get kprobe_inst and major_opcode from the bundle */
get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
- if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p))
+ if (unsupported_inst(template, slot, major_opcode, kprobe_inst, addr))
return -EINVAL;
- prepare_break_inst(template, slot, major_opcode, kprobe_inst, p);
- return 0;
-}
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
+ return -ENOMEM;
+ memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t));
+ memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t));
-void __kprobes flush_insn_slot(struct kprobe *p)
-{
- unsigned long arm_addr;
+ prepare_break_inst(template, slot, major_opcode, kprobe_inst, p);
- arm_addr = ((unsigned long)&p->opcode.bundle) & ~0xFULL;
- flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+ return 0;
}
void __kprobes arch_arm_kprobe(struct kprobe *p)
@@ -461,9 +456,10 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
unsigned long addr = (unsigned long)p->addr;
unsigned long arm_addr = addr & ~0xFULL;
- flush_insn_slot(p);
- memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t));
- flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+ flush_icache_range((unsigned long)p->ainsn.insn,
+ (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+ memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
+ flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
void __kprobes arch_disarm_kprobe(struct kprobe *p)
@@ -471,11 +467,18 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
unsigned long addr = (unsigned long)p->addr;
unsigned long arm_addr = addr & ~0xFULL;
- /* p->opcode contains the original unaltered bundle */
- memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t));
- flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+ /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
+ memcpy((char *) arm_addr, (char *) p->ainsn.insn,
+ sizeof(kprobe_opcode_t));
+ flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ mutex_lock(&kprobe_mutex);
+ free_insn_slot(p->ainsn.insn);
+ mutex_unlock(&kprobe_mutex);
+}
/*
* We are resuming execution after a single step fault, so the pt_regs
* structure reflects the register state after we executed the instruction
@@ -486,12 +489,12 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
*/
static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
{
- unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL;
+ unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle);
unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
unsigned long template;
int slot = ((unsigned long)p->addr & 0xf);
- template = p->opcode.bundle.quad0.template;
+ template = p->ainsn.insn->bundle.quad0.template;
if (slot == 1 && bundle_encoding[template][1] == L)
slot = 2;
@@ -553,7 +556,7 @@ turn_ss_off:
static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
{
- unsigned long bundle_addr = (unsigned long) &p->opcode.bundle;
+ unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle;
unsigned long slot = (unsigned long)p->addr & 0xf;
/* single step inline if break instruction */
@@ -768,6 +771,12 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
*/
if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
return 1;
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ if (ia64_done_with_exception(regs))
+ return 1;
/*
* Let ia64_do_page_fault() fix it.
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 2fbe4536fe1..bfbd8986153 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -54,6 +54,9 @@
*
* 2005-10-07 Keith Owens <kaos@sgi.com>
* Add notify_die() hooks.
+ *
+ * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ * Add printing support for MCA/INIT.
*/
#include <linux/types.h>
#include <linux/init.h>
@@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
static int mca_init __initdata;
+/*
+ * limited & delayed printing support for MCA/INIT handler
+ */
+
+#define mprintk(fmt...) ia64_mca_printk(fmt)
+
+#define MLOGBUF_SIZE (512+256*NR_CPUS)
+#define MLOGBUF_MSGMAX 256
+static char mlogbuf[MLOGBUF_SIZE];
+static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */
+static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */
+static unsigned long mlogbuf_start;
+static unsigned long mlogbuf_end;
+static unsigned int mlogbuf_finished = 0;
+static unsigned long mlogbuf_timestamp = 0;
+
+static int loglevel_save = -1;
+#define BREAK_LOGLEVEL(__console_loglevel) \
+ oops_in_progress = 1; \
+ if (loglevel_save < 0) \
+ loglevel_save = __console_loglevel; \
+ __console_loglevel = 15;
+
+#define RESTORE_LOGLEVEL(__console_loglevel) \
+ if (loglevel_save >= 0) { \
+ __console_loglevel = loglevel_save; \
+ loglevel_save = -1; \
+ } \
+ mlogbuf_finished = 0; \
+ oops_in_progress = 0;
+
+/*
+ * Push messages into buffer, print them later if not urgent.
+ */
+void ia64_mca_printk(const char *fmt, ...)
+{
+ va_list args;
+ int printed_len;
+ char temp_buf[MLOGBUF_MSGMAX];
+ char *p;
+
+ va_start(args, fmt);
+ printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args);
+ va_end(args);
+
+ /* Copy the output into mlogbuf */
+ if (oops_in_progress) {
+ /* mlogbuf was abandoned, use printk directly instead. */
+ printk(temp_buf);
+ } else {
+ spin_lock(&mlogbuf_wlock);
+ for (p = temp_buf; *p; p++) {
+ unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE;
+ if (next != mlogbuf_start) {
+ mlogbuf[mlogbuf_end] = *p;
+ mlogbuf_end = next;
+ } else {
+ /* buffer full */
+ break;
+ }
+ }
+ mlogbuf[mlogbuf_end] = '\0';
+ spin_unlock(&mlogbuf_wlock);
+ }
+}
+EXPORT_SYMBOL(ia64_mca_printk);
+
+/*
+ * Print buffered messages.
+ * NOTE: call this after returning normal context. (ex. from salinfod)
+ */
+void ia64_mlogbuf_dump(void)
+{
+ char temp_buf[MLOGBUF_MSGMAX];
+ char *p;
+ unsigned long index;
+ unsigned long flags;
+ unsigned int printed_len;
+
+ /* Get output from mlogbuf */
+ while (mlogbuf_start != mlogbuf_end) {
+ temp_buf[0] = '\0';
+ p = temp_buf;
+ printed_len = 0;
+
+ spin_lock_irqsave(&mlogbuf_rlock, flags);
+
+ index = mlogbuf_start;
+ while (index != mlogbuf_end) {
+ *p = mlogbuf[index];
+ index = (index + 1) % MLOGBUF_SIZE;
+ if (!*p)
+ break;
+ p++;
+ if (++printed_len >= MLOGBUF_MSGMAX - 1)
+ break;
+ }
+ *p = '\0';
+ if (temp_buf[0])
+ printk(temp_buf);
+ mlogbuf_start = index;
+
+ mlogbuf_timestamp = 0;
+ spin_unlock_irqrestore(&mlogbuf_rlock, flags);
+ }
+}
+EXPORT_SYMBOL(ia64_mlogbuf_dump);
+
+/*
+ * Call this if system is going to down or if immediate flushing messages to
+ * console is required. (ex. recovery was failed, crash dump is going to be
+ * invoked, long-wait rendezvous etc.)
+ * NOTE: this should be called from monarch.
+ */
+static void ia64_mlogbuf_finish(int wait)
+{
+ BREAK_LOGLEVEL(console_loglevel);
+
+ spin_lock_init(&mlogbuf_rlock);
+ ia64_mlogbuf_dump();
+ printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, "
+ "MCA/INIT might be dodgy or fail.\n");
+
+ if (!wait)
+ return;
+
+ /* wait for console */
+ printk("Delaying for 5 seconds...\n");
+ udelay(5*1000000);
+
+ mlogbuf_finished = 1;
+}
+EXPORT_SYMBOL(ia64_mlogbuf_finish);
+
+/*
+ * Print buffered messages from INIT context.
+ */
+static void ia64_mlogbuf_dump_from_init(void)
+{
+ if (mlogbuf_finished)
+ return;
+
+ if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
+ printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
+ " and the system seems to be messed up.\n");
+ ia64_mlogbuf_finish(0);
+ return;
+ }
+
+ if (!spin_trylock(&mlogbuf_rlock)) {
+ printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. "
+ "Generated messages other than stack dump will be "
+ "buffered to mlogbuf and will be printed later.\n");
+ printk(KERN_ERR "INIT: If messages would not printed after "
+ "this INIT, wait 30sec and assert INIT again.\n");
+ if (!mlogbuf_timestamp)
+ mlogbuf_timestamp = jiffies;
+ return;
+ }
+ spin_unlock(&mlogbuf_rlock);
+ ia64_mlogbuf_dump();
+}
static void inline
ia64_mca_spin(const char *func)
{
- printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
+ if (monarch_cpu == smp_processor_id())
+ ia64_mlogbuf_finish(0);
+ mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
while (1)
cpu_relax();
}
@@ -344,9 +511,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
/* SAL spec states this should run w/ interrupts enabled */
local_irq_enable();
- /* Get the CPE error record and log it */
- ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
-
spin_lock(&cpe_history_lock);
if (!cpe_poll_enabled && cpe_vector >= 0) {
@@ -375,7 +539,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL);
/* lock already released, get out now */
- return IRQ_HANDLED;
+ goto out;
} else {
cpe_history[index++] = now;
if (index == CPE_HISTORY_LENGTH)
@@ -383,6 +547,10 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
}
}
spin_unlock(&cpe_history_lock);
+out:
+ /* Get the CPE error record and log it */
+ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
+
return IRQ_HANDLED;
}
@@ -988,18 +1156,22 @@ ia64_wait_for_slaves(int monarch, const char *type)
}
if (!missing)
goto all_in;
- printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
+ /*
+ * Maybe slave(s) dead. Print buffered messages immediately.
+ */
+ ia64_mlogbuf_finish(0);
+ mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
for_each_online_cpu(c) {
if (c == monarch)
continue;
if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
- printk(" %d", c);
+ mprintk(" %d", c);
}
- printk("\n");
+ mprintk("\n");
return;
all_in:
- printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
+ mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
return;
}
@@ -1027,10 +1199,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
struct ia64_mca_notify_die nd =
{ .sos = sos, .monarch_cpu = &monarch_cpu };
- oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
- console_loglevel = 15; /* make sure printks make it to console */
- printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
- sos->proc_state_param, cpu, sos->monarch);
+ mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
+ "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
monarch_cpu = cpu;
@@ -1066,6 +1236,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
rh->severity = sal_log_severity_corrected;
ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
sos->os_status = IA64_MCA_CORRECTED;
+ } else {
+ /* Dump buffered message to console */
+ ia64_mlogbuf_finish(1);
}
if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
== NOTIFY_STOP)
@@ -1106,9 +1279,6 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
/* SAL spec states this should run w/ interrupts enabled */
local_irq_enable();
- /* Get the CMC error record and log it */
- ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
-
spin_lock(&cmc_history_lock);
if (!cmc_polling_enabled) {
int i, count = 1; /* we know 1 happened now */
@@ -1141,7 +1311,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
/* lock already released, get out now */
- return IRQ_HANDLED;
+ goto out;
} else {
cmc_history[index++] = now;
if (index == CMC_HISTORY_LENGTH)
@@ -1149,6 +1319,10 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
}
}
spin_unlock(&cmc_history_lock);
+out:
+ /* Get the CMC error record and log it */
+ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
+
return IRQ_HANDLED;
}
@@ -1305,6 +1479,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
struct task_struct *g, *t;
if (val != DIE_INIT_MONARCH_PROCESS)
return NOTIFY_DONE;
+
+ /*
+ * FIXME: mlogbuf will brim over with INIT stack dumps.
+ * To enable show_stack from INIT, we use oops_in_progress which should
+ * be used in real oops. This would cause something wrong after INIT.
+ */
+ BREAK_LOGLEVEL(console_loglevel);
+ ia64_mlogbuf_dump_from_init();
+
printk(KERN_ERR "Processes interrupted by INIT -");
for_each_online_cpu(c) {
struct ia64_sal_os_state *s;
@@ -1326,6 +1509,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
} while_each_thread (g, t);
read_unlock(&tasklist_lock);
}
+ /* FIXME: This will not restore zapped printk locks. */
+ RESTORE_LOGLEVEL(console_loglevel);
return NOTIFY_DONE;
}
@@ -1357,12 +1542,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
struct ia64_mca_notify_die nd =
{ .sos = sos, .monarch_cpu = &monarch_cpu };
- oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
- console_loglevel = 15; /* make sure printks make it to console */
-
(void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0);
- printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
+ mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
sos->proc_state_param, cpu, sos->monarch);
salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
@@ -1375,7 +1557,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
* fix their proms and get their customers updated.
*/
if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
- printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
+ mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
__FUNCTION__, cpu);
atomic_dec(&slaves);
sos->monarch = 1;
@@ -1387,7 +1569,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
* fix their proms and get their customers updated.
*/
if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
- printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
+ mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
__FUNCTION__, cpu);
atomic_dec(&monarchs);
sos->monarch = 0;
@@ -1408,7 +1590,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0)
== NOTIFY_STOP)
ia64_mca_spin(__FUNCTION__);
- printk("Slave on cpu %d returning to normal service.\n", cpu);
+ mprintk("Slave on cpu %d returning to normal service.\n", cpu);
set_curr_task(cpu, previous_current);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
atomic_dec(&slaves);
@@ -1426,7 +1608,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
* same serial line, the user will need some time to switch out of the BMC before
* the dump begins.
*/
- printk("Delaying for 5 seconds...\n");
+ mprintk("Delaying for 5 seconds...\n");
udelay(5*1000000);
ia64_wait_for_slaves(cpu, "INIT");
/* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through
@@ -1439,7 +1621,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0)
== NOTIFY_STOP)
ia64_mca_spin(__FUNCTION__);
- printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu);
+ mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu);
atomic_dec(&monarchs);
set_curr_task(cpu, previous_current);
monarch_cpu = -1;
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 96047491d1b..c6b607c00de 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -1025,18 +1025,13 @@ ia64_old_stack:
ia64_set_kernel_registers:
add temp3=MCA_SP_OFFSET, r3
- add temp4=MCA_SOS_OFFSET+SOS(OS_GP), r3
mov b0=r2 // save return address
GET_IA64_MCA_DATA(temp1)
;;
- add temp4=temp4, temp1 // &struct ia64_sal_os_state.os_gp
add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack
add r13=temp1, r3 // set current to start of MCA/INIT stack
add r20=temp1, r3 // physical start of MCA/INIT stack
;;
- ld8 r1=[temp4] // OS GP from SAL OS state
- ;;
- DATA_PA_TO_VA(r1,temp1)
DATA_PA_TO_VA(r12,temp2)
DATA_PA_TO_VA(r13,temp3)
;;
@@ -1067,6 +1062,10 @@ ia64_set_kernel_registers:
mov cr.itir=r18
mov cr.ifa=r13
mov r20=IA64_TR_CURRENT_STACK
+
+ movl r17=FPSR_DEFAULT
+ ;;
+ mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
;;
itr.d dtr[r20]=r21
;;
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 8db6e0cedad..a45009d2bc9 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -79,14 +79,30 @@ static int
fatal_mca(const char *fmt, ...)
{
va_list args;
+ char buf[256];
va_start(args, fmt);
- vprintk(fmt, args);
+ vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
+ ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
return MCA_NOT_RECOVERED;
}
+static int
+mca_recovered(const char *fmt, ...)
+{
+ va_list args;
+ char buf[256];
+
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+ ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
+
+ return MCA_RECOVERED;
+}
+
/**
* mca_page_isolate - isolate a poisoned page in order not to use it later
* @paddr: poisoned memory location
@@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr)
void
mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
{
+ ia64_mlogbuf_dump();
printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
raw_smp_processor_id(), current->pid, current->uid,
@@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx,
/* Is target address valid? */
if (!pbci->tv)
- return fatal_mca(KERN_ALERT "MCA: target address not valid\n");
+ return fatal_mca("target address not valid");
/*
* cpu read or memory-mapped io read
@@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx,
/* Is minstate valid? */
if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
- return fatal_mca(KERN_ALERT "MCA: minstate not valid\n");
+ return fatal_mca("minstate not valid");
psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
@@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx,
psr2->bn = 1;
psr2->i = 0;
- return MCA_RECOVERED;
+ return mca_recovered("user memory corruption. "
+ "kill affected process - recovered.");
}
}
- return fatal_mca(KERN_ALERT "MCA: kernel context not recovered,"
- " iip 0x%lx\n", pmsa->pmsa_iip);
+ return fatal_mca("kernel context not recovered, iip 0x%lx\n",
+ pmsa->pmsa_iip);
}
/**
@@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
* The machine check is corrected.
*/
if (psp->cm == 1)
- return MCA_RECOVERED;
+ return mca_recovered("machine check is already corrected.");
/*
* The error was not contained. Software must be reset.
*/
if (psp->us || psp->ci == 0)
- return fatal_mca(KERN_ALERT "MCA: error not contained\n");
+ return fatal_mca("error not contained");
/*
* The cache check and bus check bits have four possible states
@@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
* 1 1 Memory error, attempt recovery
*/
if (psp->bc == 0 || pbci == NULL)
- return fatal_mca(KERN_ALERT "MCA: No bus check\n");
+ return fatal_mca("No bus check");
/*
* Sorry, we cannot handle so many.
*/
if (peidx_bus_check_num(peidx) > 1)
- return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n");
+ return fatal_mca("Too many bus checks");
/*
* Well, here is only one bus error.
*/
if (pbci->ib)
- return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n");
+ return fatal_mca("Internal Bus error");
if (pbci->cc)
- return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n");
+ return fatal_mca("Cache-cache error");
if (pbci->eb && pbci->bsi > 0)
- return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n");
+ return fatal_mca("External bus check fatal status");
/*
* This is a local MCA and estimated as recoverble external bus error.
@@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
/*
* On account of strange SAL error record, we cannot recover.
*/
- return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n");
+ return fatal_mca("Strange SAL record");
}
/**
@@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
/* Now, OS can recover when there is one processor error section */
if (n_proc_err > 1)
- return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n");
+ return fatal_mca("Too Many Errors");
else if (n_proc_err == 0)
- /* Weird SAL record ... We need not to recover */
- return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n");
+ /* Weird SAL record ... We can't do anything */
+ return fatal_mca("Weird SAL record");
/* Make index of processor error section */
mca_make_peidx((sal_log_processor_info_t*)
@@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
/* Check whether MCA is global or not */
if (is_mca_global(&peidx, &pbci, sos))
- return fatal_mca(KERN_ALERT "MCA: global MCA\n");
+ return fatal_mca("global MCA");
/* Try to recover a processor error */
return recover_from_processor_error(platform_err, &slidx, &peidx,
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
index 31a2e52bb16..c85e943ba5f 100644
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@@ -118,3 +118,7 @@ struct mca_table_entry {
extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
extern int mca_recover_range(unsigned long);
+extern void ia64_mca_printk(const char * fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
+extern void ia64_mlogbuf_dump(void);
+
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 1cc360c83e7..20340631179 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -29,6 +29,36 @@ EXPORT_SYMBOL(cpu_to_node_map);
cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+void __cpuinit map_cpu_to_node(int cpu, int nid)
+{
+ int oldnid;
+ if (nid < 0) { /* just initialize by zero */
+ cpu_to_node_map[cpu] = 0;
+ return;
+ }
+ /* sanity check first */
+ oldnid = cpu_to_node_map[cpu];
+ if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
+ return; /* nothing to do */
+ }
+ /* we don't have cpu-driven node hot add yet...
+ In usual case, node is created from SRAT at boot time. */
+ if (!node_online(nid))
+ nid = first_online_node;
+ cpu_to_node_map[cpu] = nid;
+ cpu_set(cpu, node_to_cpu_mask[nid]);
+ return;
+}
+
+void __cpuinit unmap_cpu_from_node(int cpu, int nid)
+{
+ WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
+ WARN_ON(cpu_to_node_map[cpu] != nid);
+ cpu_to_node_map[cpu] = 0;
+ cpu_clear(cpu, node_to_cpu_mask[nid]);
+}
+
+
/**
* build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
*
@@ -49,8 +79,6 @@ void __init build_cpu_to_node_map(void)
node = node_cpuid[i].nid;
break;
}
- cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
- if (node >= 0)
- cpu_set(cpu, node_to_cpu_mask[node]);
+ map_cpu_to_node(cpu, node);
}
}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 84a7e52f56f..281004ff7b0 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -34,6 +34,7 @@
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/vfs.h>
+#include <linux/smp.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
#include <linux/bitops.h>
@@ -62,6 +63,9 @@
#define PFM_INVALID_ACTIVATION (~0UL)
+#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */
+#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */
+
/*
* depth of message queue
*/
@@ -296,14 +300,17 @@ typedef struct pfm_context {
unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */
unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */
- unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */
+ unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */
unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */
unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */
unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */
unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */
- pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
+ pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */
+
+ unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */
+ unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */
u64 ctx_saved_psr_up; /* only contains psr.up value */
@@ -867,7 +874,6 @@ static void
pfm_mask_monitoring(struct task_struct *task)
{
pfm_context_t *ctx = PFM_GET_CTX(task);
- struct thread_struct *th = &task->thread;
unsigned long mask, val, ovfl_mask;
int i;
@@ -888,7 +894,7 @@ pfm_mask_monitoring(struct task_struct *task)
* So in both cases, the live register contains the owner's
* state. We can ONLY touch the PMU registers and NOT the PSR.
*
- * As a consequence to this call, the thread->pmds[] array
+ * As a consequence to this call, the ctx->th_pmds[] array
* contains stale information which must be ignored
* when context is reloaded AND monitoring is active (see
* pfm_restart).
@@ -923,9 +929,9 @@ pfm_mask_monitoring(struct task_struct *task)
mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
if ((mask & 0x1) == 0UL) continue;
- ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
- th->pmcs[i] &= ~0xfUL;
- DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
+ ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL);
+ ctx->th_pmcs[i] &= ~0xfUL;
+ DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
}
/*
* make all of this visible
@@ -942,7 +948,6 @@ static void
pfm_restore_monitoring(struct task_struct *task)
{
pfm_context_t *ctx = PFM_GET_CTX(task);
- struct thread_struct *th = &task->thread;
unsigned long mask, ovfl_mask;
unsigned long psr, val;
int i, is_system;
@@ -1008,9 +1013,9 @@ pfm_restore_monitoring(struct task_struct *task)
mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
if ((mask & 0x1) == 0UL) continue;
- th->pmcs[i] = ctx->ctx_pmcs[i];
- ia64_set_pmc(i, th->pmcs[i]);
- DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
+ ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+ ia64_set_pmc(i, ctx->th_pmcs[i]);
+ DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i]));
}
ia64_srlz_d();
@@ -1069,7 +1074,6 @@ pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
static inline void
pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
{
- struct thread_struct *thread = &task->thread;
unsigned long ovfl_val = pmu_conf->ovfl_val;
unsigned long mask = ctx->ctx_all_pmds[0];
unsigned long val;
@@ -1091,11 +1095,11 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
ctx->ctx_pmds[i].val = val & ~ovfl_val;
val &= ovfl_val;
}
- thread->pmds[i] = val;
+ ctx->th_pmds[i] = val;
DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
i,
- thread->pmds[i],
+ ctx->th_pmds[i],
ctx->ctx_pmds[i].val));
}
}
@@ -1106,7 +1110,6 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
static inline void
pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
{
- struct thread_struct *thread = &task->thread;
unsigned long mask = ctx->ctx_all_pmcs[0];
int i;
@@ -1114,8 +1117,8 @@ pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
for (i=0; mask; i++, mask>>=1) {
/* masking 0 with ovfl_val yields 0 */
- thread->pmcs[i] = ctx->ctx_pmcs[i];
- DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
+ ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+ DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
}
}
@@ -2859,7 +2862,6 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
static int
pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
- struct thread_struct *thread = NULL;
struct task_struct *task;
pfarg_reg_t *req = (pfarg_reg_t *)arg;
unsigned long value, pmc_pm;
@@ -2880,7 +2882,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
if (state == PFM_CTX_ZOMBIE) return -EINVAL;
if (is_loaded) {
- thread = &task->thread;
/*
* In system wide and when the context is loaded, access can only happen
* when the caller is running on the CPU being monitored by the session.
@@ -3035,7 +3036,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
*
* The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
*
- * The value in thread->pmcs[] may be modified on overflow, i.e., when
+ * The value in th_pmcs[] may be modified on overflow, i.e., when
* monitoring needs to be stopped.
*/
if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
@@ -3049,7 +3050,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
/*
* write thread state
*/
- if (is_system == 0) thread->pmcs[cnum] = value;
+ if (is_system == 0) ctx->th_pmcs[cnum] = value;
/*
* write hardware register if we can
@@ -3101,7 +3102,6 @@ error:
static int
pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
- struct thread_struct *thread = NULL;
struct task_struct *task;
pfarg_reg_t *req = (pfarg_reg_t *)arg;
unsigned long value, hw_value, ovfl_mask;
@@ -3125,7 +3125,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* the owner of the local PMU.
*/
if (likely(is_loaded)) {
- thread = &task->thread;
/*
* In system wide and when the context is loaded, access can only happen
* when the caller is running on the CPU being monitored by the session.
@@ -3233,7 +3232,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
/*
* write thread state
*/
- if (is_system == 0) thread->pmds[cnum] = hw_value;
+ if (is_system == 0) ctx->th_pmds[cnum] = hw_value;
/*
* write hardware register if we can
@@ -3299,7 +3298,6 @@ abort_mission:
static int
pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
- struct thread_struct *thread = NULL;
struct task_struct *task;
unsigned long val = 0UL, lval, ovfl_mask, sval;
pfarg_reg_t *req = (pfarg_reg_t *)arg;
@@ -3323,7 +3321,6 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
if (state == PFM_CTX_ZOMBIE) return -EINVAL;
if (likely(is_loaded)) {
- thread = &task->thread;
/*
* In system wide and when the context is loaded, access can only happen
* when the caller is running on the CPU being monitored by the session.
@@ -3385,7 +3382,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* if context is zombie, then task does not exist anymore.
* In this case, we use the full value saved in the context (pfm_flush_regs()).
*/
- val = is_loaded ? thread->pmds[cnum] : 0UL;
+ val = is_loaded ? ctx->th_pmds[cnum] : 0UL;
}
rd_func = pmu_conf->pmd_desc[cnum].read_check;
@@ -4354,8 +4351,8 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
pfm_copy_pmds(task, ctx);
pfm_copy_pmcs(task, ctx);
- pmcs_source = thread->pmcs;
- pmds_source = thread->pmds;
+ pmcs_source = ctx->th_pmcs;
+ pmds_source = ctx->th_pmds;
/*
* always the case for system-wide
@@ -5864,14 +5861,12 @@ void
pfm_save_regs(struct task_struct *task)
{
pfm_context_t *ctx;
- struct thread_struct *t;
unsigned long flags;
u64 psr;
ctx = PFM_GET_CTX(task);
if (ctx == NULL) return;
- t = &task->thread;
/*
* we always come here with interrupts ALREADY disabled by
@@ -5929,19 +5924,19 @@ pfm_save_regs(struct task_struct *task)
* guarantee we will be schedule at that same
* CPU again.
*/
- pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+ pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
/*
* save pmc0 ia64_srlz_d() done in pfm_save_pmds()
* we will need it on the restore path to check
* for pending overflow.
*/
- t->pmcs[0] = ia64_get_pmc(0);
+ ctx->th_pmcs[0] = ia64_get_pmc(0);
/*
* unfreeze PMU if had pending overflows
*/
- if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+ if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
/*
* finally, allow context access.
@@ -5986,7 +5981,6 @@ static void
pfm_lazy_save_regs (struct task_struct *task)
{
pfm_context_t *ctx;
- struct thread_struct *t;
unsigned long flags;
{ u64 psr = pfm_get_psr();
@@ -5994,7 +5988,6 @@ pfm_lazy_save_regs (struct task_struct *task)
}
ctx = PFM_GET_CTX(task);
- t = &task->thread;
/*
* we need to mask PMU overflow here to
@@ -6019,19 +6012,19 @@ pfm_lazy_save_regs (struct task_struct *task)
/*
* save all the pmds we use
*/
- pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+ pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
/*
* save pmc0 ia64_srlz_d() done in pfm_save_pmds()
* it is needed to check for pended overflow
* on the restore path
*/
- t->pmcs[0] = ia64_get_pmc(0);
+ ctx->th_pmcs[0] = ia64_get_pmc(0);
/*
* unfreeze PMU if had pending overflows
*/
- if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+ if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
/*
* now get can unmask PMU interrupts, they will
@@ -6050,7 +6043,6 @@ void
pfm_load_regs (struct task_struct *task)
{
pfm_context_t *ctx;
- struct thread_struct *t;
unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
unsigned long flags;
u64 psr, psr_up;
@@ -6061,11 +6053,10 @@ pfm_load_regs (struct task_struct *task)
BUG_ON(GET_PMU_OWNER());
- t = &task->thread;
/*
* possible on unload
*/
- if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
+ if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return;
/*
* we always come here with interrupts ALREADY disabled by
@@ -6147,21 +6138,21 @@ pfm_load_regs (struct task_struct *task)
*
* XXX: optimize here
*/
- if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
- if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
+ if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+ if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
/*
* check for pending overflow at the time the state
* was saved.
*/
- if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+ if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
/*
* reload pmc0 with the overflow information
* On McKinley PMU, this will trigger a PMU interrupt
*/
- ia64_set_pmc(0, t->pmcs[0]);
+ ia64_set_pmc(0, ctx->th_pmcs[0]);
ia64_srlz_d();
- t->pmcs[0] = 0UL;
+ ctx->th_pmcs[0] = 0UL;
/*
* will replay the PMU interrupt
@@ -6214,7 +6205,6 @@ pfm_load_regs (struct task_struct *task)
void
pfm_load_regs (struct task_struct *task)
{
- struct thread_struct *t;
pfm_context_t *ctx;
struct task_struct *owner;
unsigned long pmd_mask, pmc_mask;
@@ -6223,7 +6213,6 @@ pfm_load_regs (struct task_struct *task)
owner = GET_PMU_OWNER();
ctx = PFM_GET_CTX(task);
- t = &task->thread;
psr = pfm_get_psr();
BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
@@ -6286,22 +6275,22 @@ pfm_load_regs (struct task_struct *task)
*/
pmc_mask = ctx->ctx_all_pmcs[0];
- pfm_restore_pmds(t->pmds, pmd_mask);
- pfm_restore_pmcs(t->pmcs, pmc_mask);
+ pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+ pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
/*
* check for pending overflow at the time the state
* was saved.
*/
- if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+ if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
/*
* reload pmc0 with the overflow information
* On McKinley PMU, this will trigger a PMU interrupt
*/
- ia64_set_pmc(0, t->pmcs[0]);
+ ia64_set_pmc(0, ctx->th_pmcs[0]);
ia64_srlz_d();
- t->pmcs[0] = 0UL;
+ ctx->th_pmcs[0] = 0UL;
/*
* will replay the PMU interrupt
@@ -6376,11 +6365,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
*/
pfm_unfreeze_pmu();
} else {
- pmc0 = task->thread.pmcs[0];
+ pmc0 = ctx->th_pmcs[0];
/*
* clear whatever overflow status bits there were
*/
- task->thread.pmcs[0] = 0;
+ ctx->th_pmcs[0] = 0;
}
ovfl_val = pmu_conf->ovfl_val;
/*
@@ -6401,7 +6390,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
/*
* can access PMU always true in system wide mode
*/
- val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
+ val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i];
if (PMD_IS_COUNTING(i)) {
DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
@@ -6433,7 +6422,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
- if (is_self) task->thread.pmds[i] = pmd_val;
+ if (is_self) ctx->th_pmds[i] = pmd_val;
ctx->ctx_pmds[i].val = val;
}
@@ -6677,7 +6666,7 @@ pfm_init(void)
ffz(pmu_conf->ovfl_val));
/* sanity check */
- if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
+ if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) {
printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
pmu_conf = NULL;
return -1;
@@ -6752,7 +6741,6 @@ void
dump_pmu_state(const char *from)
{
struct task_struct *task;
- struct thread_struct *t;
struct pt_regs *regs;
pfm_context_t *ctx;
unsigned long psr, dcr, info, flags;
@@ -6797,16 +6785,14 @@ dump_pmu_state(const char *from)
ia64_psr(regs)->up = 0;
ia64_psr(regs)->pp = 0;
- t = &current->thread;
-
for (i=1; PMC_IS_LAST(i) == 0; i++) {
if (PMC_IS_IMPL(i) == 0) continue;
- printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
+ printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]);
}
for (i=1; PMD_IS_LAST(i) == 0; i++) {
if (PMD_IS_IMPL(i) == 0) continue;
- printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
+ printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]);
}
if (ctx) {
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 9065f0f01ba..e63b8ca5344 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
/* Check for outstanding MCA/INIT records every minute (arbitrary) */
#define SALINFO_TIMER_DELAY (60*HZ)
static struct timer_list salinfo_timer;
+extern void ia64_mlogbuf_dump(void);
static void
salinfo_timeout_check(struct salinfo_data *data)
@@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_data *data)
static void
salinfo_timeout (unsigned long arg)
{
+ ia64_mlogbuf_dump();
salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
@@ -332,6 +334,8 @@ retry:
if (cpu == -1)
goto retry;
+ ia64_mlogbuf_dump();
+
/* for next read, start checking at next CPU */
data->cpu_check = cpu;
if (++data->cpu_check == NR_CPUS)
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 7ad0d9cc6db..84f93c0f2c6 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -509,7 +509,7 @@ show_cpuinfo (struct seq_file *m, void *v)
{ 1UL << 1, "spontaneous deferral"},
{ 1UL << 2, "16-byte atomic ops" }
};
- char family[32], features[128], *cp, sep;
+ char features[128], *cp, sep;
struct cpuinfo_ia64 *c = v;
unsigned long mask;
unsigned long proc_freq;
@@ -517,12 +517,6 @@ show_cpuinfo (struct seq_file *m, void *v)
mask = c->features;
- switch (c->family) {
- case 0x07: memcpy(family, "Itanium", 8); break;
- case 0x1f: memcpy(family, "Itanium 2", 10); break;
- default: sprintf(family, "%u", c->family); break;
- }
-
/* build the feature string: */
memcpy(features, " standard", 10);
cp = features;
@@ -553,8 +547,9 @@ show_cpuinfo (struct seq_file *m, void *v)
"processor : %d\n"
"vendor : %s\n"
"arch : IA-64\n"
- "family : %s\n"
+ "family : %u\n"
"model : %u\n"
+ "model name : %s\n"
"revision : %u\n"
"archrev : %u\n"
"features :%s\n" /* don't change this---it _is_ right! */
@@ -563,7 +558,8 @@ show_cpuinfo (struct seq_file *m, void *v)
"cpu MHz : %lu.%06lu\n"
"itc MHz : %lu.%06lu\n"
"BogoMIPS : %lu.%02lu\n",
- cpunum, c->vendor, family, c->model, c->revision, c->archrev,
+ cpunum, c->vendor, c->family, c->model,
+ c->model_name, c->revision, c->archrev,
features, c->ppn, c->number,
proc_freq / 1000, proc_freq % 1000,
c->itc_freq / 1000000, c->itc_freq % 1000000,
@@ -611,6 +607,31 @@ struct seq_operations cpuinfo_op = {
.show = show_cpuinfo
};
+static char brandname[128];
+
+static char * __cpuinit
+get_model_name(__u8 family, __u8 model)
+{
+ char brand[128];
+
+ if (ia64_pal_get_brand_info(brand)) {
+ if (family == 0x7)
+ memcpy(brand, "Merced", 7);
+ else if (family == 0x1f) switch (model) {
+ case 0: memcpy(brand, "McKinley", 9); break;
+ case 1: memcpy(brand, "Madison", 8); break;
+ case 2: memcpy(brand, "Madison up to 9M cache", 23); break;
+ } else
+ memcpy(brand, "Unknown", 8);
+ }
+ if (brandname[0] == '\0')
+ return strcpy(brandname, brand);
+ else if (strcmp(brandname, brand) == 0)
+ return brandname;
+ else
+ return kstrdup(brand, GFP_KERNEL);
+}
+
static void __cpuinit
identify_cpu (struct cpuinfo_ia64 *c)
{
@@ -640,7 +661,6 @@ identify_cpu (struct cpuinfo_ia64 *c)
pal_status_t status;
unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */
int i;
-
for (i = 0; i < 5; ++i)
cpuid.bits[i] = ia64_get_cpuid(i);
@@ -663,6 +683,7 @@ identify_cpu (struct cpuinfo_ia64 *c)
c->family = cpuid.field.family;
c->archrev = cpuid.field.archrev;
c->features = cpuid.field.features;
+ c->model_name = get_model_name(c->family, c->model);
status = ia64_pal_vm_summary(&vm1, &vm2);
if (status == PAL_STATUS_SUCCESS) {
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 6203ed4ec8c..f7d7f566814 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -879,3 +879,27 @@ identify_siblings(struct cpuinfo_ia64 *c)
c->core_id = info.log1_cid;
c->thread_id = info.log1_tid;
}
+
+/*
+ * returns non zero, if multi-threading is enabled
+ * on at least one physical package. Due to hotplug cpu
+ * and (maxcpus=), all threads may not necessarily be enabled
+ * even though the processor supports multi-threading.
+ */
+int is_multithreading_enabled(void)
+{
+ int i, j;
+
+ for_each_present_cpu(i) {
+ for_each_present_cpu(j) {
+ if (j == i)
+ continue;
+ if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) {
+ if (cpu_data(j)->core_id == cpu_data(i)->core_id)
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(is_multithreading_enabled);
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index f648c610b10..5629b45e89c 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,6 +36,7 @@ int arch_register_cpu(int num)
*/
if (!can_cpei_retarget() && is_cpu_cpei_target(num))
sysfs_cpus[num].cpu.no_control = 1;
+ map_cpu_to_node(num, node_cpuid[num].nid);
#endif
return register_cpu(&sysfs_cpus[num].cpu, num);
@@ -45,7 +46,8 @@ int arch_register_cpu(int num)
void arch_unregister_cpu(int num)
{
- return unregister_cpu(&sysfs_cpus[num].cpu);
+ unregister_cpu(&sysfs_cpus[num].cpu);
+ unmap_cpu_from_node(num, cpu_to_node(num));
}
EXPORT_SYMBOL(arch_register_cpu);
EXPORT_SYMBOL(arch_unregister_cpu);
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 4c73a676366..c58e933694d 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
/* attempt to allocate a granule's worth of cached memory pages */
- page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO,
+ page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
IA64_GRANULE_SHIFT-PAGE_SHIFT);
if (!page) {
mutex_unlock(&uc_pool->add_chunk_mutex);
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 5b0d5f64a9b..b3b2e389d6b 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -184,7 +184,9 @@ SECTIONS
*(.data.gate)
__stop_gate_section = .;
}
- . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */
+ . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose
+ * kernel data
+ */
.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET)
{ *(.data.read_mostly) }
@@ -202,7 +204,9 @@ SECTIONS
*(.data.percpu)
__per_cpu_end = .;
}
- . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */
+ . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits
+ * into percpu page size
+ */
data : { } :data
.data : AT(ADDR(.data) - LOAD_OFFSET)
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index e004143ba86..daf977ff292 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -26,7 +26,6 @@
#include <asm/mca.h>
#ifdef CONFIG_VIRTUAL_MEM_MAP
-static unsigned long num_dma_physpages;
static unsigned long max_gap;
#endif
@@ -41,10 +40,11 @@ show_mem (void)
int i, total = 0, reserved = 0;
int shared = 0, cached = 0;
- printk("Mem-info:\n");
+ printk(KERN_INFO "Mem-info:\n");
show_free_areas();
- printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ printk(KERN_INFO "Free swap: %6ldkB\n",
+ nr_swap_pages<<(PAGE_SHIFT-10));
i = max_mapnr;
for (i = 0; i < max_mapnr; i++) {
if (!pfn_valid(i)) {
@@ -63,12 +63,12 @@ show_mem (void)
else if (page_count(mem_map + i))
shared += page_count(mem_map + i) - 1;
}
- printk("%d pages of RAM\n", total);
- printk("%d reserved pages\n", reserved);
- printk("%d pages shared\n", shared);
- printk("%d pages swap cached\n", cached);
- printk("%ld pages in page table cache\n",
- pgtable_quicklist_total_size());
+ printk(KERN_INFO "%d pages of RAM\n", total);
+ printk(KERN_INFO "%d reserved pages\n", reserved);
+ printk(KERN_INFO "%d pages shared\n", shared);
+ printk(KERN_INFO "%d pages swap cached\n", cached);
+ printk(KERN_INFO "%ld pages in page table cache\n",
+ pgtable_quicklist_total_size());
}
/* physical address where the bootmem map is located */
@@ -218,18 +218,6 @@ count_pages (u64 start, u64 end, void *arg)
return 0;
}
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-static int
-count_dma_pages (u64 start, u64 end, void *arg)
-{
- unsigned long *count = arg;
-
- if (start < MAX_DMA_ADDRESS)
- *count += (min(end, MAX_DMA_ADDRESS) - start) >> PAGE_SHIFT;
- return 0;
-}
-#endif
-
/*
* Set up the page tables.
*/
@@ -238,45 +226,22 @@ void __init
paging_init (void)
{
unsigned long max_dma;
- unsigned long zones_size[MAX_NR_ZONES];
-#ifdef CONFIG_VIRTUAL_MEM_MAP
- unsigned long zholes_size[MAX_NR_ZONES];
-#endif
-
- /* initialize mem_map[] */
-
- memset(zones_size, 0, sizeof(zones_size));
+ unsigned long nid = 0;
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
num_physpages = 0;
efi_memmap_walk(count_pages, &num_physpages);
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+ max_zone_pfns[ZONE_DMA] = max_dma;
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_VIRTUAL_MEM_MAP
- memset(zholes_size, 0, sizeof(zholes_size));
-
- num_dma_physpages = 0;
- efi_memmap_walk(count_dma_pages, &num_dma_physpages);
-
- if (max_low_pfn < max_dma) {
- zones_size[ZONE_DMA] = max_low_pfn;
- zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
- } else {
- zones_size[ZONE_DMA] = max_dma;
- zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
- if (num_physpages > num_dma_physpages) {
- zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
- zholes_size[ZONE_NORMAL] =
- ((max_low_pfn - max_dma) -
- (num_physpages - num_dma_physpages));
- }
- }
-
+ efi_memmap_walk(register_active_ranges, &nid);
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap < LARGE_GAP) {
vmem_map = (struct page *) 0;
- free_area_init_node(0, NODE_DATA(0), zones_size, 0,
- zholes_size);
+ free_area_init_nodes(max_zone_pfns);
} else {
unsigned long map_size;
@@ -288,20 +253,19 @@ paging_init (void)
vmem_map = (struct page *) vmalloc_end;
efi_memmap_walk(create_mem_map_page_table, NULL);
- NODE_DATA(0)->node_mem_map = vmem_map;
- free_area_init_node(0, NODE_DATA(0), zones_size,
- 0, zholes_size);
+ /*
+ * alloc_node_mem_map makes an adjustment for mem_map
+ * which isn't compatible with vmem_map.
+ */
+ NODE_DATA(0)->node_mem_map = vmem_map +
+ find_min_pfn_with_active_regions();
+ free_area_init_nodes(max_zone_pfns);
printk("Virtual mem_map starts at 0x%p\n", mem_map);
}
#else /* !CONFIG_VIRTUAL_MEM_MAP */
- if (max_low_pfn < max_dma)
- zones_size[ZONE_DMA] = max_low_pfn;
- else {
- zones_size[ZONE_DMA] = max_dma;
- zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
- }
- free_area_init(zones_size);
+ add_active_range(0, 0, max_low_pfn);
+ free_area_init_nodes(max_zone_pfns);
#endif /* !CONFIG_VIRTUAL_MEM_MAP */
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index d260bffa01a..d497b6b0f5b 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -547,15 +547,16 @@ void show_mem(void)
unsigned long total_present = 0;
pg_data_t *pgdat;
- printk("Mem-info:\n");
+ printk(KERN_INFO "Mem-info:\n");
show_free_areas();
- printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ printk(KERN_INFO "Free swap: %6ldkB\n",
+ nr_swap_pages<<(PAGE_SHIFT-10));
+ printk(KERN_INFO "Node memory in pages:\n");
for_each_online_pgdat(pgdat) {
unsigned long present;
unsigned long flags;
int shared = 0, cached = 0, reserved = 0;
- printk("Node ID: %d\n", pgdat->node_id);
pgdat_resize_lock(pgdat, &flags);
present = pgdat->node_present_pages;
for(i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -579,18 +580,17 @@ void show_mem(void)
total_reserved += reserved;
total_cached += cached;
total_shared += shared;
- printk("\t%ld pages of RAM\n", present);
- printk("\t%d reserved pages\n", reserved);
- printk("\t%d pages shared\n", shared);
- printk("\t%d pages swap cached\n", cached);
+ printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, "
+ "shrd: %10d, swpd: %10d\n", pgdat->node_id,
+ present, reserved, shared, cached);
}
- printk("%ld pages of RAM\n", total_present);
- printk("%d reserved pages\n", total_reserved);
- printk("%d pages shared\n", total_shared);
- printk("%d pages swap cached\n", total_cached);
- printk("Total of %ld pages in page table cache\n",
- pgtable_quicklist_total_size());
- printk("%d free buffer pages\n", nr_free_buffer_pages());
+ printk(KERN_INFO "%ld pages of RAM\n", total_present);
+ printk(KERN_INFO "%d reserved pages\n", total_reserved);
+ printk(KERN_INFO "%d pages shared\n", total_shared);
+ printk(KERN_INFO "%d pages swap cached\n", total_cached);
+ printk(KERN_INFO "Total of %ld pages in page table cache\n",
+ pgtable_quicklist_total_size());
+ printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
}
/**
@@ -654,6 +654,7 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n
{
unsigned long end = start + len;
+ add_active_range(node, start >> PAGE_SHIFT, end >> PAGE_SHIFT);
mem_data[node].num_physpages += len >> PAGE_SHIFT;
if (start <= __pa(MAX_DMA_ADDRESS))
mem_data[node].num_dma_physpages +=
@@ -678,10 +679,10 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n
void __init paging_init(void)
{
unsigned long max_dma;
- unsigned long zones_size[MAX_NR_ZONES];
- unsigned long zholes_size[MAX_NR_ZONES];
unsigned long pfn_offset = 0;
+ unsigned long max_pfn = 0;
int node;
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
@@ -698,47 +699,20 @@ void __init paging_init(void)
#endif
for_each_online_node(node) {
- memset(zones_size, 0, sizeof(zones_size));
- memset(zholes_size, 0, sizeof(zholes_size));
-
num_physpages += mem_data[node].num_physpages;
-
- if (mem_data[node].min_pfn >= max_dma) {
- /* All of this node's memory is above ZONE_DMA */
- zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
- mem_data[node].min_pfn;
- zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn -
- mem_data[node].min_pfn -
- mem_data[node].num_physpages;
- } else if (mem_data[node].max_pfn < max_dma) {
- /* All of this node's memory is in ZONE_DMA */
- zones_size[ZONE_DMA] = mem_data[node].max_pfn -
- mem_data[node].min_pfn;
- zholes_size[ZONE_DMA] = mem_data[node].max_pfn -
- mem_data[node].min_pfn -
- mem_data[node].num_dma_physpages;
- } else {
- /* This node has memory in both zones */
- zones_size[ZONE_DMA] = max_dma -
- mem_data[node].min_pfn;
- zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
- mem_data[node].num_dma_physpages;
- zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
- max_dma;
- zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] -
- (mem_data[node].num_physpages -
- mem_data[node].num_dma_physpages);
- }
-
pfn_offset = mem_data[node].min_pfn;
#ifdef CONFIG_VIRTUAL_MEM_MAP
NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
#endif
- free_area_init_node(node, NODE_DATA(node), zones_size,
- pfn_offset, zholes_size);
+ if (mem_data[node].max_pfn > max_pfn)
+ max_pfn = mem_data[node].max_pfn;
}
+ max_zone_pfns[ZONE_DMA] = max_dma;
+ max_zone_pfns[ZONE_NORMAL] = max_pfn;
+ free_area_init_nodes(max_zone_pfns);
+
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 30617ccb4f7..ff87a5cba39 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -593,6 +593,18 @@ find_largest_hole (u64 start, u64 end, void *arg)
last_end = end;
return 0;
}
+
+int __init
+register_active_ranges(u64 start, u64 end, void *nid)
+{
+ BUG_ON(nid == NULL);
+ BUG_ON(*(unsigned long *)nid >= MAX_NUMNODES);
+
+ add_active_range(*(unsigned long *)nid,
+ __pa(start) >> PAGE_SHIFT,
+ __pa(end) >> PAGE_SHIFT);
+ return 0;
+}
#endif /* CONFIG_VIRTUAL_MEM_MAP */
static int __init
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 60b45e79f08..15c7c670da3 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -562,7 +562,8 @@ pcibios_enable_device (struct pci_dev *dev, int mask)
void
pcibios_disable_device (struct pci_dev *dev)
{
- acpi_pci_irq_disable(dev);
+ if (dev->is_enabled)
+ acpi_pci_irq_disable(dev);
}
void
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index 27dee458406..7f73ad4408a 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -277,8 +277,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
}
/* temporary buffer used during unaligned transfers */
- bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES,
- GFP_KERNEL | GFP_DMA);
+ bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL);
if (bteBlock_unaligned == NULL) {
return BTEFAIL_NOTAVAIL;
}
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 9a8a29339d2..b632b9c1e3b 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -32,9 +32,10 @@
#include <linux/cpumask.h>
#include <linux/smp_lock.h>
#include <linux/nodemask.h>
+#include <linux/smp.h>
+
#include <asm/processor.h>
#include <asm/topology.h>
-#include <asm/smp.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>
#include <asm/sal.h>