diff options
Diffstat (limited to 'arch/ppc64/kernel')
58 files changed, 3897 insertions, 1821 deletions
diff --git a/arch/ppc64/kernel/ItLpQueue.c b/arch/ppc64/kernel/ItLpQueue.c index cdea00d7707..4231861288a 100644 --- a/arch/ppc64/kernel/ItLpQueue.c +++ b/arch/ppc64/kernel/ItLpQueue.c @@ -1,7 +1,7 @@ /* * ItLpQueue.c * Copyright (C) 2001 Mike Corrigan IBM Corporation - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -11,156 +11,252 @@ #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/proc_fs.h> #include <asm/system.h> #include <asm/paca.h> #include <asm/iSeries/ItLpQueue.h> #include <asm/iSeries/HvLpEvent.h> #include <asm/iSeries/HvCallEvent.h> -static __inline__ int set_inUse( struct ItLpQueue * lpQueue ) -{ - int t; - u32 * inUseP = &(lpQueue->xInUseWord); - - __asm__ __volatile__("\n\ -1: lwarx %0,0,%2 \n\ - cmpwi 0,%0,0 \n\ - li %0,0 \n\ - bne- 2f \n\ - addi %0,%0,1 \n\ - stwcx. %0,0,%2 \n\ - bne- 1b \n\ -2: eieio" - : "=&r" (t), "=m" (lpQueue->xInUseWord) - : "r" (inUseP), "m" (lpQueue->xInUseWord) - : "cc"); - - return t; -} +/* + * The LpQueue is used to pass event data from the hypervisor to + * the partition. This is where I/O interrupt events are communicated. + * + * It is written to by the hypervisor so cannot end up in the BSS. + */ +struct hvlpevent_queue hvlpevent_queue __attribute__((__section__(".data"))); -static __inline__ void clear_inUse( struct ItLpQueue * lpQueue ) -{ - lpQueue->xInUseWord = 0; -} +DEFINE_PER_CPU(unsigned long[HvLpEvent_Type_NumTypes], hvlpevent_counts); + +static char *event_types[HvLpEvent_Type_NumTypes] = { + "Hypervisor", + "Machine Facilities", + "Session Manager", + "SPD I/O", + "Virtual Bus", + "PCI I/O", + "RIO I/O", + "Virtual Lan", + "Virtual I/O" +}; /* Array of LpEvent handler functions */ extern LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes]; -unsigned long ItLpQueueInProcess = 0; -struct HvLpEvent * ItLpQueue_getNextLpEvent( struct ItLpQueue * lpQueue ) +static struct HvLpEvent * get_next_hvlpevent(void) { - struct HvLpEvent * nextLpEvent = - (struct HvLpEvent *)lpQueue->xSlicCurEventPtr; - if ( nextLpEvent->xFlags.xValid ) { + struct HvLpEvent * event; + event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr; + + if (event->xFlags.xValid) { /* rmb() needed only for weakly consistent machines (regatta) */ rmb(); /* Set pointer to next potential event */ - lpQueue->xSlicCurEventPtr += ((nextLpEvent->xSizeMinus1 + - LpEventAlign ) / - LpEventAlign ) * - LpEventAlign; + hvlpevent_queue.xSlicCurEventPtr += ((event->xSizeMinus1 + + LpEventAlign) / LpEventAlign) * LpEventAlign; + /* Wrap to beginning if no room at end */ - if (lpQueue->xSlicCurEventPtr > lpQueue->xSlicLastValidEventPtr) - lpQueue->xSlicCurEventPtr = lpQueue->xSlicEventStackPtr; + if (hvlpevent_queue.xSlicCurEventPtr > + hvlpevent_queue.xSlicLastValidEventPtr) { + hvlpevent_queue.xSlicCurEventPtr = + hvlpevent_queue.xSlicEventStackPtr; + } + } else { + event = NULL; } - else - nextLpEvent = NULL; - return nextLpEvent; + return event; } -int ItLpQueue_isLpIntPending( struct ItLpQueue * lpQueue ) +static unsigned long spread_lpevents = NR_CPUS; + +int hvlpevent_is_pending(void) { - int retval = 0; - struct HvLpEvent * nextLpEvent; - if ( lpQueue ) { - nextLpEvent = (struct HvLpEvent *)lpQueue->xSlicCurEventPtr; - retval = nextLpEvent->xFlags.xValid | lpQueue->xPlicOverflowIntPending; - } - return retval; + struct HvLpEvent *next_event; + + if (smp_processor_id() >= spread_lpevents) + return 0; + + next_event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr; + + return next_event->xFlags.xValid | + hvlpevent_queue.xPlicOverflowIntPending; } -void ItLpQueue_clearValid( struct HvLpEvent * event ) +static void hvlpevent_clear_valid(struct HvLpEvent * event) { - /* Clear the valid bit of the event - * Also clear bits within this event that might - * look like valid bits (on 64-byte boundaries) - */ - unsigned extra = (( event->xSizeMinus1 + LpEventAlign ) / - LpEventAlign ) - 1; - switch ( extra ) { - case 3: - ((struct HvLpEvent*)((char*)event+3*LpEventAlign))->xFlags.xValid=0; - case 2: - ((struct HvLpEvent*)((char*)event+2*LpEventAlign))->xFlags.xValid=0; - case 1: - ((struct HvLpEvent*)((char*)event+1*LpEventAlign))->xFlags.xValid=0; - case 0: - ; + /* Tell the Hypervisor that we're done with this event. + * Also clear bits within this event that might look like valid bits. + * ie. on 64-byte boundaries. + */ + struct HvLpEvent *tmp; + unsigned extra = ((event->xSizeMinus1 + LpEventAlign) / + LpEventAlign) - 1; + + switch (extra) { + case 3: + tmp = (struct HvLpEvent*)((char*)event + 3 * LpEventAlign); + tmp->xFlags.xValid = 0; + case 2: + tmp = (struct HvLpEvent*)((char*)event + 2 * LpEventAlign); + tmp->xFlags.xValid = 0; + case 1: + tmp = (struct HvLpEvent*)((char*)event + 1 * LpEventAlign); + tmp->xFlags.xValid = 0; } + mb(); + event->xFlags.xValid = 0; } -unsigned ItLpQueue_process( struct ItLpQueue * lpQueue, struct pt_regs *regs ) +void process_hvlpevents(struct pt_regs *regs) { - unsigned numIntsProcessed = 0; - struct HvLpEvent * nextLpEvent; + struct HvLpEvent * event; /* If we have recursed, just return */ - if ( !set_inUse( lpQueue ) ) - return 0; - - if (ItLpQueueInProcess == 0) - ItLpQueueInProcess = 1; - else - BUG(); + if (!spin_trylock(&hvlpevent_queue.lock)) + return; for (;;) { - nextLpEvent = ItLpQueue_getNextLpEvent( lpQueue ); - if ( nextLpEvent ) { - /* Count events to return to caller - * and count processed events in lpQueue - */ - ++numIntsProcessed; - lpQueue->xLpIntCount++; - /* Call appropriate handler here, passing + event = get_next_hvlpevent(); + if (event) { + /* Call appropriate handler here, passing * a pointer to the LpEvent. The handler * must make a copy of the LpEvent if it * needs it in a bottom half. (perhaps for * an ACK) - * - * Handlers are responsible for ACK processing + * + * Handlers are responsible for ACK processing * * The Hypervisor guarantees that LpEvents will * only be delivered with types that we have * registered for, so no type check is necessary * here! - */ - if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes ) - lpQueue->xLpIntCountByType[nextLpEvent->xType]++; - if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes && - lpEventHandler[nextLpEvent->xType] ) - lpEventHandler[nextLpEvent->xType](nextLpEvent, regs); + */ + if (event->xType < HvLpEvent_Type_NumTypes) + __get_cpu_var(hvlpevent_counts)[event->xType]++; + if (event->xType < HvLpEvent_Type_NumTypes && + lpEventHandler[event->xType]) + lpEventHandler[event->xType](event, regs); else - printk(KERN_INFO "Unexpected Lp Event type=%d\n", nextLpEvent->xType ); - - ItLpQueue_clearValid( nextLpEvent ); - } else if ( lpQueue->xPlicOverflowIntPending ) + printk(KERN_INFO "Unexpected Lp Event type=%d\n", event->xType ); + + hvlpevent_clear_valid(event); + } else if (hvlpevent_queue.xPlicOverflowIntPending) /* * No more valid events. If overflow events are * pending process them */ - HvCallEvent_getOverflowLpEvents( lpQueue->xIndex); + HvCallEvent_getOverflowLpEvents(hvlpevent_queue.xIndex); else break; } - ItLpQueueInProcess = 0; - mb(); - clear_inUse( lpQueue ); + spin_unlock(&hvlpevent_queue.lock); +} + +static int set_spread_lpevents(char *str) +{ + unsigned long val = simple_strtoul(str, NULL, 0); + + /* + * The parameter is the number of processors to share in processing + * lp events. + */ + if (( val > 0) && (val <= NR_CPUS)) { + spread_lpevents = val; + printk("lpevent processing spread over %ld processors\n", val); + } else { + printk("invalid spread_lpevents %ld\n", val); + } - get_paca()->lpevent_count += numIntsProcessed; + return 1; +} +__setup("spread_lpevents=", set_spread_lpevents); + +void setup_hvlpevent_queue(void) +{ + void *eventStack; + + /* + * Allocate a page for the Event Stack. The Hypervisor needs the + * absolute real address, so we subtract out the KERNELBASE and add + * in the absolute real address of the kernel load area. + */ + eventStack = alloc_bootmem_pages(LpEventStackSize); + memset(eventStack, 0, LpEventStackSize); + + /* Invoke the hypervisor to initialize the event stack */ + HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize); + + hvlpevent_queue.xSlicEventStackPtr = (char *)eventStack; + hvlpevent_queue.xSlicCurEventPtr = (char *)eventStack; + hvlpevent_queue.xSlicLastValidEventPtr = (char *)eventStack + + (LpEventStackSize - LpEventMaxSize); + hvlpevent_queue.xIndex = 0; +} + +static int proc_lpevents_show(struct seq_file *m, void *v) +{ + int cpu, i; + unsigned long sum; + static unsigned long cpu_totals[NR_CPUS]; + + /* FIXME: do we care that there's no locking here? */ + sum = 0; + for_each_online_cpu(cpu) { + cpu_totals[cpu] = 0; + for (i = 0; i < HvLpEvent_Type_NumTypes; i++) { + cpu_totals[cpu] += per_cpu(hvlpevent_counts, cpu)[i]; + } + sum += cpu_totals[cpu]; + } + + seq_printf(m, "LpEventQueue 0\n"); + seq_printf(m, " events processed:\t%lu\n", sum); + + for (i = 0; i < HvLpEvent_Type_NumTypes; ++i) { + sum = 0; + for_each_online_cpu(cpu) { + sum += per_cpu(hvlpevent_counts, cpu)[i]; + } + + seq_printf(m, " %-20s %10lu\n", event_types[i], sum); + } + + seq_printf(m, "\n events processed by processor:\n"); + + for_each_online_cpu(cpu) { + seq_printf(m, " CPU%02d %10lu\n", cpu, cpu_totals[cpu]); + } + + return 0; +} + +static int proc_lpevents_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_lpevents_show, NULL); +} - return numIntsProcessed; +static struct file_operations proc_lpevents_operations = { + .open = proc_lpevents_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_lpevents_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL); + if (e) + e->proc_fops = &proc_lpevents_operations; + + return 0; } +__initcall(proc_lpevents_init); + diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c index badc5a44361..76cfd1449d5 100644 --- a/arch/ppc64/kernel/LparData.c +++ b/arch/ppc64/kernel/LparData.c @@ -28,29 +28,41 @@ #include <asm/iSeries/IoHriProcessorVpd.h> #include <asm/iSeries/ItSpCommArea.h> -/* The LpQueue is used to pass event data from the hypervisor to - * the partition. This is where I/O interrupt events are communicated. - */ - -/* May be filled in by the hypervisor so cannot end up in the BSS */ -struct ItLpQueue xItLpQueue __attribute__((__section__(".data"))); - /* The HvReleaseData is the root of the information shared between * the hypervisor and Linux. */ +/* + * WARNING - magic here + * + * Ok, this is a horrid hack below, but marginally better than the + * alternatives. What we really want is just to initialize + * hvReleaseData in C as in the #if 0 section here. However, gcc + * refuses to believe that (u32)&x is a constant expression, so will + * not allow the xMsNucDataOffset field to be properly initialized. + * So, we declare hvReleaseData in inline asm instead. We use inline + * asm, rather than a .S file, because the assembler won't generate + * the necessary relocation for the LparMap either, unless that symbol + * is declared in the same source file. Finally, we put the asm in a + * dummy, attribute-used function, instead of at file scope, because + * file scope asms don't allow contraints. We want to use the "i" + * constraints to put sizeof() and offsetof() expressions in there, + * because including asm/offsets.h in C code then stringifying causes + * all manner of warnings. + */ +#if 0 struct HvReleaseData hvReleaseData = { .xDesc = 0xc8a5d9c4, /* "HvRD" ebcdic */ .xSize = sizeof(struct HvReleaseData), .xVpdAreasPtrOffset = offsetof(struct naca_struct, xItVpdAreas), .xSlicNacaAddr = &naca, /* 64-bit Naca address */ - .xMsNucDataOffset = 0x4800, /* offset of LparMap within loadarea (see head.S) */ - .xTagsMode = 1, /* tags inactive */ - .xAddressSize = 0, /* 64 bit */ - .xNoSharedProcs = 0, /* shared processors */ - .xNoHMT = 0, /* HMT allowed */ - .xRsvd2 = 6, /* TEMP: This allows non-GA driver */ + .xMsNucDataOffset = (u32)((unsigned long)&xLparMap - KERNELBASE), + .xFlags = HVREL_TAGSINACTIVE /* tags inactive */ + /* 64 bit */ + /* shared processors */ + /* HMT allowed */ + | 6, /* TEMP: This allows non-GA driver */ .xVrmIndex = 4, /* We are v5r2m0 */ .xMinSupportedPlicVrmIndex = 3, /* v5r1m0 */ .xMinCompatablePlicVrmIndex = 3, /* v5r1m0 */ @@ -58,6 +70,63 @@ struct HvReleaseData hvReleaseData = { 0xa7, 0x40, 0xf2, 0x4b, 0xf4, 0x4b, 0xf6, 0xf4 }, }; +#endif + + +extern struct HvReleaseData hvReleaseData; + +static void __attribute_used__ hvReleaseData_wrapper(void) +{ + /* This doesn't appear to need any alignment (even 4 byte) */ + asm volatile ( + " lparMapPhys = xLparMap - %3\n" + " .data\n" + " .globl hvReleaseData\n" + "hvReleaseData:\n" + " .long 0xc8a5d9c4\n" /* xDesc */ + /* "HvRD" in ebcdic */ + " .short %0\n" /* xSize */ + " .short %1\n" /* xVpdAreasPtrOffset */ + " .llong naca\n" /* xSlicNacaAddr */ + " .long lparMapPhys\n" /* xMsNucDataOffset */ + " .long 0\n" /* xRsvd1 */ + " .short %2\n" /* xFlags */ + " .short 4\n" /* xVrmIndex - v5r2m0 */ + " .short 3\n" /* xMinSupportedPlicVrmIndex - v5r1m0 */ + " .short 3\n" /* xMinCompatablePlicVrmIndex - v5r1m0 */ + " .long 0xd38995a4\n" /* xVrmName */ + " .long 0xa740f24b\n" /* "Linux 2.4.64" ebcdic */ + " .long 0xf44bf6f4\n" + " . = hvReleaseData + %0\n" + " .previous\n" + : : "i"(sizeof(hvReleaseData)), + "i"(offsetof(struct naca_struct, xItVpdAreas)), + "i"(HVREL_TAGSINACTIVE /* tags inactive, 64 bit, */ + /* shared processors, HMT allowed */ + | 6), /* TEMP: This allows non-GA drivers */ + "i"(KERNELBASE) + ); +} + +struct LparMap __attribute__((aligned (16))) xLparMap = { + .xNumberEsids = HvEsidsToMap, + .xNumberRanges = HvRangesToMap, + .xSegmentTableOffs = STAB0_PAGE, + + .xEsids = { + { .xKernelEsid = GET_ESID(KERNELBASE), + .xKernelVsid = KERNEL_VSID(KERNELBASE), }, + { .xKernelEsid = GET_ESID(VMALLOCBASE), + .xKernelVsid = KERNEL_VSID(VMALLOCBASE), }, + }, + + .xRanges = { + { .xPages = HvPagesToMap, + .xOffset = 0, + .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT), + }, + }, +}; extern void system_reset_iSeries(void); extern void machine_check_iSeries(void); @@ -200,7 +269,7 @@ struct ItVpdAreas itVpdAreas = { 0,0,0, /* 13 - 15 */ sizeof(struct IoHriProcessorVpd),/* 16 length of Proc Vpd */ 0,0,0,0,0,0, /* 17 - 22 */ - sizeof(struct ItLpQueue),/* 23 length of Lp Queue */ + sizeof(struct hvlpevent_queue), /* 23 length of Lp Queue */ 0,0 /* 24 - 25 */ }, .xSlicVpdAdrs = { /* VPD addresses */ @@ -218,7 +287,7 @@ struct ItVpdAreas itVpdAreas = { 0,0,0, /* 13 - 15 */ &xIoHriProcessorVpd, /* 16 Proc Vpd */ 0,0,0,0,0,0, /* 17 - 22 */ - &xItLpQueue, /* 23 Lp Queue */ + &hvlpevent_queue, /* 23 Lp Queue */ 0,0 } }; diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index b5e167cf1a0..d9b2660ef22 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -27,17 +27,22 @@ obj-$(CONFIG_PPC_ISERIES) += HvCall.o HvLpConfig.o LparData.o \ mf.o HvLpEvent.o iSeries_proc.o iSeries_htab.o \ iSeries_iommu.o -obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o prom_init.o prom.o mpic.o +obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o prom_init.o prom.o obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ pSeries_nvram.o rtasd.o ras.o pSeries_reconfig.o \ - xics.o rtas.o pSeries_setup.o pSeries_iommu.o + pSeries_setup.o pSeries_iommu.o +obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \ + bpa_iic.o spider-pic.o + +obj-$(CONFIG_KEXEC) += machine_kexec.o obj-$(CONFIG_EEH) += eeh.o obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o +obj-$(CONFIG_PPC_RTAS) += rtas.o rtas_pci.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_VIOPATH) += viopath.o @@ -46,6 +51,8 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_XICS) += xics.o +obj-$(CONFIG_MPIC) += mpic.o obj-$(CONFIG_PPC_PMAC) += pmac_setup.o pmac_feature.o pmac_pci.o \ pmac_time.o pmac_nvram.o pmac_low_i2c.o @@ -58,6 +65,7 @@ ifdef CONFIG_SMP obj-$(CONFIG_PPC_PMAC) += pmac_smp.o smp-tbsync.o obj-$(CONFIG_PPC_ISERIES) += iSeries_smp.o obj-$(CONFIG_PPC_PSERIES) += pSeries_smp.o +obj-$(CONFIG_PPC_BPA) += pSeries_smp.o obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o endif diff --git a/arch/ppc64/kernel/bpa_iic.c b/arch/ppc64/kernel/bpa_iic.c new file mode 100644 index 00000000000..c8f3dc3fad7 --- /dev/null +++ b/arch/ppc64/kernel/bpa_iic.c @@ -0,0 +1,270 @@ +/* + * BPA Internal Interrupt Controller + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/config.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/percpu.h> +#include <linux/types.h> + +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/ptrace.h> + +#include "bpa_iic.h" + +struct iic_pending_bits { + u32 data; + u8 flags; + u8 class; + u8 source; + u8 prio; +}; + +enum iic_pending_flags { + IIC_VALID = 0x80, + IIC_IPI = 0x40, +}; + +struct iic_regs { + struct iic_pending_bits pending; + struct iic_pending_bits pending_destr; + u64 generate; + u64 prio; +}; + +struct iic { + struct iic_regs __iomem *regs; +}; + +static DEFINE_PER_CPU(struct iic, iic); + +void iic_local_enable(void) +{ + out_be64(&__get_cpu_var(iic).regs->prio, 0xff); +} + +void iic_local_disable(void) +{ + out_be64(&__get_cpu_var(iic).regs->prio, 0x0); +} + +static unsigned int iic_startup(unsigned int irq) +{ + return 0; +} + +static void iic_enable(unsigned int irq) +{ + iic_local_enable(); +} + +static void iic_disable(unsigned int irq) +{ +} + +static void iic_end(unsigned int irq) +{ + iic_local_enable(); +} + +static struct hw_interrupt_type iic_pic = { + .typename = " BPA-IIC ", + .startup = iic_startup, + .enable = iic_enable, + .disable = iic_disable, + .end = iic_end, +}; + +static int iic_external_get_irq(struct iic_pending_bits pending) +{ + int irq; + unsigned char node, unit; + + node = pending.source >> 4; + unit = pending.source & 0xf; + irq = -1; + + /* + * This mapping is specific to the Broadband + * Engine. We might need to get the numbers + * from the device tree to support future CPUs. + */ + switch (unit) { + case 0x00: + case 0x0b: + /* + * One of these units can be connected + * to an external interrupt controller. + */ + if (pending.prio > 0x3f || + pending.class != 2) + break; + irq = IIC_EXT_OFFSET + + spider_get_irq(pending.prio + node * IIC_NODE_STRIDE) + + node * IIC_NODE_STRIDE; + break; + case 0x01 ... 0x04: + case 0x07 ... 0x0a: + /* + * These units are connected to the SPEs + */ + if (pending.class > 2) + break; + irq = IIC_SPE_OFFSET + + pending.class * IIC_CLASS_STRIDE + + node * IIC_NODE_STRIDE + + unit; + break; + } + if (irq == -1) + printk(KERN_WARNING "Unexpected interrupt class %02x, " + "source %02x, prio %02x, cpu %02x\n", pending.class, + pending.source, pending.prio, smp_processor_id()); + return irq; +} + +/* Get an IRQ number from the pending state register of the IIC */ +int iic_get_irq(struct pt_regs *regs) +{ + struct iic *iic; + int irq; + struct iic_pending_bits pending; + + iic = &__get_cpu_var(iic); + *(unsigned long *) &pending = + in_be64((unsigned long __iomem *) &iic->regs->pending_destr); + + irq = -1; + if (pending.flags & IIC_VALID) { + if (pending.flags & IIC_IPI) { + irq = IIC_IPI_OFFSET + (pending.prio >> 4); +/* + if (irq > 0x80) + printk(KERN_WARNING "Unexpected IPI prio %02x" + "on CPU %02x\n", pending.prio, + smp_processor_id()); +*/ + } else { + irq = iic_external_get_irq(pending); + } + } + return irq; +} + +static struct iic_regs __iomem *find_iic(int cpu) +{ + struct device_node *np; + int nodeid = cpu / 2; + unsigned long regs; + struct iic_regs __iomem *iic_regs; + + for (np = of_find_node_by_type(NULL, "cpu"); + np; + np = of_find_node_by_type(np, "cpu")) { + if (nodeid == *(int *)get_property(np, "node-id", NULL)) + break; + } + + if (!np) { + printk(KERN_WARNING "IIC: CPU %d not found\n", cpu); + iic_regs = NULL; + } else { + regs = *(long *)get_property(np, "iic", NULL); + + /* hack until we have decided on the devtree info */ + regs += 0x400; + if (cpu & 1) + regs += 0x20; + + printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs); + iic_regs = __ioremap(regs, sizeof(struct iic_regs), + _PAGE_NO_CACHE); + } + return iic_regs; +} + +#ifdef CONFIG_SMP +void iic_setup_cpu(void) +{ + out_be64(&__get_cpu_var(iic).regs->prio, 0xff); +} + +void iic_cause_IPI(int cpu, int mesg) +{ + out_be64(&per_cpu(iic, cpu).regs->generate, mesg); +} + +static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) +{ + + smp_message_recv(irq - IIC_IPI_OFFSET, regs); + return IRQ_HANDLED; +} + +static void iic_request_ipi(int irq, const char *name) +{ + /* IPIs are marked SA_INTERRUPT as they must run with irqs + * disabled */ + get_irq_desc(irq)->handler = &iic_pic; + get_irq_desc(irq)->status |= IRQ_PER_CPU; + request_irq(irq, iic_ipi_action, SA_INTERRUPT, name, NULL); +} + +void iic_request_IPIs(void) +{ + iic_request_ipi(IIC_IPI_OFFSET + PPC_MSG_CALL_FUNCTION, "IPI-call"); + iic_request_ipi(IIC_IPI_OFFSET + PPC_MSG_RESCHEDULE, "IPI-resched"); +#ifdef CONFIG_DEBUGGER + iic_request_ipi(IIC_IPI_OFFSET + PPC_MSG_DEBUGGER_BREAK, "IPI-debug"); +#endif /* CONFIG_DEBUGGER */ +} +#endif /* CONFIG_SMP */ + +static void iic_setup_spe_handlers(void) +{ + int be, isrc; + + /* Assume two threads per BE are present */ + for (be=0; be < num_present_cpus() / 2; be++) { + for (isrc = 0; isrc < IIC_CLASS_STRIDE * 3; isrc++) { + int irq = IIC_NODE_STRIDE * be + IIC_SPE_OFFSET + isrc; + get_irq_desc(irq)->handler = &iic_pic; + } + } +} + +void iic_init_IRQ(void) +{ + int cpu, irq_offset; + struct iic *iic; + + irq_offset = 0; + for_each_cpu(cpu) { + iic = &per_cpu(iic, cpu); + iic->regs = find_iic(cpu); + if (iic->regs) + out_be64(&iic->regs->prio, 0xff); + } + iic_setup_spe_handlers(); +} diff --git a/arch/ppc64/kernel/bpa_iic.h b/arch/ppc64/kernel/bpa_iic.h new file mode 100644 index 00000000000..6833c302216 --- /dev/null +++ b/arch/ppc64/kernel/bpa_iic.h @@ -0,0 +1,62 @@ +#ifndef ASM_BPA_IIC_H +#define ASM_BPA_IIC_H +#ifdef __KERNEL__ +/* + * Mapping of IIC pending bits into per-node + * interrupt numbers. + * + * IRQ FF CC SS PP FF CC SS PP Description + * + * 00-3f 80 02 +0 00 - 80 02 +0 3f South Bridge + * 00-3f 80 02 +b 00 - 80 02 +b 3f South Bridge + * 41-4a 80 00 +1 ** - 80 00 +a ** SPU Class 0 + * 51-5a 80 01 +1 ** - 80 01 +a ** SPU Class 1 + * 61-6a 80 02 +1 ** - 80 02 +a ** SPU Class 2 + * 70-7f C0 ** ** 00 - C0 ** ** 0f IPI + * + * F flags + * C class + * S source + * P Priority + * + node number + * * don't care + * + * A node consists of a Broadband Engine and an optional + * south bridge device providing a maximum of 64 IRQs. + * The south bridge may be connected to either IOIF0 + * or IOIF1. + * Each SPE is represented as three IRQ lines, one per + * interrupt class. + * 16 IRQ numbers are reserved for inter processor + * interruptions, although these are only used in the + * range of the first node. + * + * This scheme needs 128 IRQ numbers per BIF node ID, + * which means that with the total of 512 lines + * available, we can have a maximum of four nodes. + */ + +enum { + IIC_EXT_OFFSET = 0x00, /* Start of south bridge IRQs */ + IIC_NUM_EXT = 0x40, /* Number of south bridge IRQs */ + IIC_SPE_OFFSET = 0x40, /* Start of SPE interrupts */ + IIC_CLASS_STRIDE = 0x10, /* SPE IRQs per class */ + IIC_IPI_OFFSET = 0x70, /* Start of IPI IRQs */ + IIC_NUM_IPIS = 0x10, /* IRQs reserved for IPI */ + IIC_NODE_STRIDE = 0x80, /* Total IRQs per node */ +}; + +extern void iic_init_IRQ(void); +extern int iic_get_irq(struct pt_regs *regs); +extern void iic_cause_IPI(int cpu, int mesg); +extern void iic_request_IPIs(void); +extern void iic_setup_cpu(void); +extern void iic_local_enable(void); +extern void iic_local_disable(void); + + +extern void spider_init_IRQ(void); +extern int spider_get_irq(unsigned long int_pending); + +#endif +#endif /* ASM_BPA_IIC_H */ diff --git a/arch/ppc64/kernel/bpa_iommu.c b/arch/ppc64/kernel/bpa_iommu.c new file mode 100644 index 00000000000..f33a7bccb0d --- /dev/null +++ b/arch/ppc64/kernel/bpa_iommu.c @@ -0,0 +1,377 @@ +/* + * IOMMU implementation for Broadband Processor Architecture + * We just establish a linear mapping at boot by setting all the + * IOPT cache entries in the CPU. + * The mapping functions should be identical to pci_direct_iommu, + * except for the handling of the high order bit that is required + * by the Spider bridge. These should be split into a separate + * file at the point where we get a different bridge chip. + * + * Copyright (C) 2005 IBM Deutschland Entwicklung GmbH, + * Arnd Bergmann <arndb@de.ibm.com> + * + * Based on linear mapping + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> + +#include <asm/sections.h> +#include <asm/iommu.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/pmac_feature.h> +#include <asm/abs_addr.h> +#include <asm/system.h> + +#include "pci.h" +#include "bpa_iommu.h" + +static inline unsigned long +get_iopt_entry(unsigned long real_address, unsigned long ioid, + unsigned long prot) +{ + return (prot & IOPT_PROT_MASK) + | (IOPT_COHERENT) + | (IOPT_ORDER_VC) + | (real_address & IOPT_RPN_MASK) + | (ioid & IOPT_IOID_MASK); +} + +typedef struct { + unsigned long val; +} ioste; + +static inline ioste +mk_ioste(unsigned long val) +{ + ioste ioste = { .val = val, }; + return ioste; +} + +static inline ioste +get_iost_entry(unsigned long iopt_base, unsigned long io_address, unsigned page_size) +{ + unsigned long ps; + unsigned long iostep; + unsigned long nnpt; + unsigned long shift; + + switch (page_size) { + case 0x1000000: + ps = IOST_PS_16M; + nnpt = 0; /* one page per segment */ + shift = 5; /* segment has 16 iopt entries */ + break; + + case 0x100000: + ps = IOST_PS_1M; + nnpt = 0; /* one page per segment */ + shift = 1; /* segment has 256 iopt entries */ + break; + + case 0x10000: + ps = IOST_PS_64K; + nnpt = 0x07; /* 8 pages per io page table */ + shift = 0; /* all entries are used */ + break; + + case 0x1000: + ps = IOST_PS_4K; + nnpt = 0x7f; /* 128 pages per io page table */ + shift = 0; /* all entries are used */ + break; + + default: /* not a known compile time constant */ + BUILD_BUG_ON(1); + break; + } + + iostep = iopt_base + + /* need 8 bytes per iopte */ + (((io_address / page_size * 8) + /* align io page tables on 4k page boundaries */ + << shift) + /* nnpt+1 pages go into each iopt */ + & ~(nnpt << 12)); + + nnpt++; /* this seems to work, but the documentation is not clear + about wether we put nnpt or nnpt-1 into the ioste bits. + In theory, this can't work for 4k pages. */ + return mk_ioste(IOST_VALID_MASK + | (iostep & IOST_PT_BASE_MASK) + | ((nnpt << 5) & IOST_NNPT_MASK) + | (ps & IOST_PS_MASK)); +} + +/* compute the address of an io pte */ +static inline unsigned long +get_ioptep(ioste iost_entry, unsigned long io_address) +{ + unsigned long iopt_base; + unsigned long page_size; + unsigned long page_number; + unsigned long iopt_offset; + + iopt_base = iost_entry.val & IOST_PT_BASE_MASK; + page_size = iost_entry.val & IOST_PS_MASK; + + /* decode page size to compute page number */ + page_number = (io_address & 0x0fffffff) >> (10 + 2 * page_size); + /* page number is an offset into the io page table */ + iopt_offset = (page_number << 3) & 0x7fff8ul; + return iopt_base + iopt_offset; +} + +/* compute the tag field of the iopt cache entry */ +static inline unsigned long +get_ioc_tag(ioste iost_entry, unsigned long io_address) +{ + unsigned long iopte = get_ioptep(iost_entry, io_address); + + return IOPT_VALID_MASK + | ((iopte & 0x00000000000000ff8ul) >> 3) + | ((iopte & 0x0000003fffffc0000ul) >> 9); +} + +/* compute the hashed 6 bit index for the 4-way associative pte cache */ +static inline unsigned long +get_ioc_hash(ioste iost_entry, unsigned long io_address) +{ + unsigned long iopte = get_ioptep(iost_entry, io_address); + + return ((iopte & 0x000000000000001f8ul) >> 3) + ^ ((iopte & 0x00000000000020000ul) >> 17) + ^ ((iopte & 0x00000000000010000ul) >> 15) + ^ ((iopte & 0x00000000000008000ul) >> 13) + ^ ((iopte & 0x00000000000004000ul) >> 11) + ^ ((iopte & 0x00000000000002000ul) >> 9) + ^ ((iopte & 0x00000000000001000ul) >> 7); +} + +/* same as above, but pretend that we have a simpler 1-way associative + pte cache with an 8 bit index */ +static inline unsigned long +get_ioc_hash_1way(ioste iost_entry, unsigned long io_address) +{ + unsigned long iopte = get_ioptep(iost_entry, io_address); + + return ((iopte & 0x000000000000001f8ul) >> 3) + ^ ((iopte & 0x00000000000020000ul) >> 17) + ^ ((iopte & 0x00000000000010000ul) >> 15) + ^ ((iopte & 0x00000000000008000ul) >> 13) + ^ ((iopte & 0x00000000000004000ul) >> 11) + ^ ((iopte & 0x00000000000002000ul) >> 9) + ^ ((iopte & 0x00000000000001000ul) >> 7) + ^ ((iopte & 0x0000000000000c000ul) >> 8); +} + +static inline ioste +get_iost_cache(void __iomem *base, unsigned long index) +{ + unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR); + return mk_ioste(in_be64(&p[index])); +} + +static inline void +set_iost_cache(void __iomem *base, unsigned long index, ioste ste) +{ + unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR); + pr_debug("ioste %02lx was %016lx, store %016lx", index, + get_iost_cache(base, index).val, ste.val); + out_be64(&p[index], ste.val); + pr_debug(" now %016lx\n", get_iost_cache(base, index).val); +} + +static inline unsigned long +get_iopt_cache(void __iomem *base, unsigned long index, unsigned long *tag) +{ + unsigned long __iomem *tags = (void *)(base + IOC_PT_CACHE_DIR); + unsigned long __iomem *p = (void *)(base + IOC_PT_CACHE_REG); + + *tag = tags[index]; + rmb(); + return *p; +} + +static inline void +set_iopt_cache(void __iomem *base, unsigned long index, + unsigned long tag, unsigned long val) +{ + unsigned long __iomem *tags = base + IOC_PT_CACHE_DIR; + unsigned long __iomem *p = base + IOC_PT_CACHE_REG; + pr_debug("iopt %02lx was v%016lx/t%016lx, store v%016lx/t%016lx\n", + index, get_iopt_cache(base, index, &oldtag), oldtag, val, tag); + + out_be64(p, val); + out_be64(&tags[index], tag); +} + +static inline void +set_iost_origin(void __iomem *base) +{ + unsigned long __iomem *p = base + IOC_ST_ORIGIN; + unsigned long origin = IOSTO_ENABLE | IOSTO_SW; + + pr_debug("iost_origin %016lx, now %016lx\n", in_be64(p), origin); + out_be64(p, origin); +} + +static inline void +set_iocmd_config(void __iomem *base) +{ + unsigned long __iomem *p = base + 0xc00; + unsigned long conf; + + conf = in_be64(p); + pr_debug("iost_conf %016lx, now %016lx\n", conf, conf | IOCMD_CONF_TE); + out_be64(p, conf | IOCMD_CONF_TE); +} + +/* FIXME: get these from the device tree */ +#define ioc_base 0x20000511000ull +#define ioc_mmio_base 0x20000510000ull +#define ioid 0x48a +#define iopt_phys_offset (- 0x20000000) /* We have a 512MB offset from the SB */ +#define io_page_size 0x1000000 + +static unsigned long map_iopt_entry(unsigned long address) +{ + switch (address >> 20) { + case 0x600: + address = 0x24020000000ull; /* spider i/o */ + break; + default: + address += iopt_phys_offset; + break; + } + + return get_iopt_entry(address, ioid, IOPT_PROT_RW); +} + +static void iommu_bus_setup_null(struct pci_bus *b) { } +static void iommu_dev_setup_null(struct pci_dev *d) { } + +/* initialize the iommu to support a simple linear mapping + * for each DMA window used by any device. For now, we + * happen to know that there is only one DMA window in use, + * starting at iopt_phys_offset. */ +static void bpa_map_iommu(void) +{ + unsigned long address; + void __iomem *base; + ioste ioste; + unsigned long index; + + base = __ioremap(ioc_base, 0x1000, _PAGE_NO_CACHE); + pr_debug("%lx mapped to %p\n", ioc_base, base); + set_iocmd_config(base); + iounmap(base); + + base = __ioremap(ioc_mmio_base, 0x1000, _PAGE_NO_CACHE); + pr_debug("%lx mapped to %p\n", ioc_mmio_base, base); + + set_iost_origin(base); + + for (address = 0; address < 0x100000000ul; address += io_page_size) { + ioste = get_iost_entry(0x10000000000ul, address, io_page_size); + if ((address & 0xfffffff) == 0) /* segment start */ + set_iost_cache(base, address >> 28, ioste); + index = get_ioc_hash_1way(ioste, address); + pr_debug("addr %08lx, index %02lx, ioste %016lx\n", + address, index, ioste.val); + set_iopt_cache(base, + get_ioc_hash_1way(ioste, address), + get_ioc_tag(ioste, address), + map_iopt_entry(address)); + } + iounmap(base); +} + + +static void *bpa_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, unsigned int __nocast flag) +{ + void *ret; + + ret = (void *)__get_free_pages(flag, get_order(size)); + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_abs(ret) | BPA_DMA_VALID; + } + return ret; +} + +static void bpa_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} + +static dma_addr_t bpa_map_single(struct device *hwdev, void *ptr, + size_t size, enum dma_data_direction direction) +{ + return virt_to_abs(ptr) | BPA_DMA_VALID; +} + +static void bpa_unmap_single(struct device *hwdev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction) +{ +} + +static int bpa_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++, sg++) { + sg->dma_address = (page_to_phys(sg->page) + sg->offset) + | BPA_DMA_VALID; + sg->dma_length = sg->length; + } + + return nents; +} + +static void bpa_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ +} + +static int bpa_dma_supported(struct device *dev, u64 mask) +{ + return mask < 0x100000000ull; +} + +void bpa_init_iommu(void) +{ + bpa_map_iommu(); + + /* Direct I/O, IOMMU off */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + + pci_dma_ops.alloc_coherent = bpa_alloc_coherent; + pci_dma_ops.free_coherent = bpa_free_coherent; + pci_dma_ops.map_single = bpa_map_single; + pci_dma_ops.unmap_single = bpa_unmap_single; + pci_dma_ops.map_sg = bpa_map_sg; + pci_dma_ops.unmap_sg = bpa_unmap_sg; + pci_dma_ops.dma_supported = bpa_dma_supported; +} diff --git a/arch/ppc64/kernel/bpa_iommu.h b/arch/ppc64/kernel/bpa_iommu.h new file mode 100644 index 00000000000..e547d77dfa0 --- /dev/null +++ b/arch/ppc64/kernel/bpa_iommu.h @@ -0,0 +1,65 @@ +#ifndef BPA_IOMMU_H +#define BPA_IOMMU_H + +/* some constants */ +enum { + /* segment table entries */ + IOST_VALID_MASK = 0x8000000000000000ul, + IOST_TAG_MASK = 0x3000000000000000ul, + IOST_PT_BASE_MASK = 0x000003fffffff000ul, + IOST_NNPT_MASK = 0x0000000000000fe0ul, + IOST_PS_MASK = 0x000000000000000ful, + + IOST_PS_4K = 0x1, + IOST_PS_64K = 0x3, + IOST_PS_1M = 0x5, + IOST_PS_16M = 0x7, + + /* iopt tag register */ + IOPT_VALID_MASK = 0x0000000200000000ul, + IOPT_TAG_MASK = 0x00000001fffffffful, + + /* iopt cache register */ + IOPT_PROT_MASK = 0xc000000000000000ul, + IOPT_PROT_NONE = 0x0000000000000000ul, + IOPT_PROT_READ = 0x4000000000000000ul, + IOPT_PROT_WRITE = 0x8000000000000000ul, + IOPT_PROT_RW = 0xc000000000000000ul, + IOPT_COHERENT = 0x2000000000000000ul, + + IOPT_ORDER_MASK = 0x1800000000000000ul, + /* order access to same IOID/VC on same address */ + IOPT_ORDER_ADDR = 0x0800000000000000ul, + /* similar, but only after a write access */ + IOPT_ORDER_WRITES = 0x1000000000000000ul, + /* Order all accesses to same IOID/VC */ + IOPT_ORDER_VC = 0x1800000000000000ul, + + IOPT_RPN_MASK = 0x000003fffffff000ul, + IOPT_HINT_MASK = 0x0000000000000800ul, + IOPT_IOID_MASK = 0x00000000000007fful, + + IOSTO_ENABLE = 0x8000000000000000ul, + IOSTO_ORIGIN = 0x000003fffffff000ul, + IOSTO_HW = 0x0000000000000800ul, + IOSTO_SW = 0x0000000000000400ul, + + IOCMD_CONF_TE = 0x0000800000000000ul, + + /* memory mapped registers */ + IOC_PT_CACHE_DIR = 0x000, + IOC_ST_CACHE_DIR = 0x800, + IOC_PT_CACHE_REG = 0x910, + IOC_ST_ORIGIN = 0x918, + IOC_CONF = 0x930, + + /* The high bit needs to be set on every DMA address, + only 2GB are addressable */ + BPA_DMA_VALID = 0x80000000, + BPA_DMA_MASK = 0x7fffffff, +}; + + +void bpa_init_iommu(void); + +#endif diff --git a/arch/ppc64/kernel/bpa_nvram.c b/arch/ppc64/kernel/bpa_nvram.c new file mode 100644 index 00000000000..06a119cfceb --- /dev/null +++ b/arch/ppc64/kernel/bpa_nvram.c @@ -0,0 +1,118 @@ +/* + * NVRAM for CPBW + * + * (C) Copyright IBM Corp. 2005 + * + * Authors : Utz Bacher <utz.bacher@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm/machdep.h> +#include <asm/nvram.h> +#include <asm/prom.h> + +static void __iomem *bpa_nvram_start; +static long bpa_nvram_len; +static spinlock_t bpa_nvram_lock = SPIN_LOCK_UNLOCKED; + +static ssize_t bpa_nvram_read(char *buf, size_t count, loff_t *index) +{ + unsigned long flags; + + if (*index >= bpa_nvram_len) + return 0; + if (*index + count > bpa_nvram_len) + count = bpa_nvram_len - *index; + + spin_lock_irqsave(&bpa_nvram_lock, flags); + + memcpy_fromio(buf, bpa_nvram_start + *index, count); + + spin_unlock_irqrestore(&bpa_nvram_lock, flags); + + *index += count; + return count; +} + +static ssize_t bpa_nvram_write(char *buf, size_t count, loff_t *index) +{ + unsigned long flags; + + if (*index >= bpa_nvram_len) + return 0; + if (*index + count > bpa_nvram_len) + count = bpa_nvram_len - *index; + + spin_lock_irqsave(&bpa_nvram_lock, flags); + + memcpy_toio(bpa_nvram_start + *index, buf, count); + + spin_unlock_irqrestore(&bpa_nvram_lock, flags); + + *index += count; + return count; +} + +static ssize_t bpa_nvram_get_size(void) +{ + return bpa_nvram_len; +} + +int __init bpa_nvram_init(void) +{ + struct device_node *nvram_node; + unsigned long *buffer; + int proplen; + unsigned long nvram_addr; + int ret; + + ret = -ENODEV; + nvram_node = of_find_node_by_type(NULL, "nvram"); + if (!nvram_node) + goto out; + + ret = -EIO; + buffer = (unsigned long *)get_property(nvram_node, "reg", &proplen); + if (proplen != 2*sizeof(unsigned long)) + goto out; + + ret = -ENODEV; + nvram_addr = buffer[0]; + bpa_nvram_len = buffer[1]; + if ( (!bpa_nvram_len) || (!nvram_addr) ) + goto out; + + bpa_nvram_start = ioremap(nvram_addr, bpa_nvram_len); + if (!bpa_nvram_start) + goto out; + + printk(KERN_INFO "BPA NVRAM, %luk mapped to %p\n", + bpa_nvram_len >> 10, bpa_nvram_start); + + ppc_md.nvram_read = bpa_nvram_read; + ppc_md.nvram_write = bpa_nvram_write; + ppc_md.nvram_size = bpa_nvram_get_size; + +out: + of_node_put(nvram_node); + return ret; +} diff --git a/arch/ppc64/kernel/bpa_setup.c b/arch/ppc64/kernel/bpa_setup.c new file mode 100644 index 00000000000..57b3db66f45 --- /dev/null +++ b/arch/ppc64/kernel/bpa_setup.c @@ -0,0 +1,140 @@ +/* + * linux/arch/ppc/kernel/bpa_setup.c + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * Modified by BPA Team, IBM Deutschland Entwicklung GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#undef DEBUG + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/reboot.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> +#include <linux/console.h> + +#include <asm/mmu.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/pci-bridge.h> +#include <asm/iommu.h> +#include <asm/dma.h> +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/nvram.h> +#include <asm/cputable.h> + +#include "pci.h" +#include "bpa_iic.h" +#include "bpa_iommu.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +void bpa_get_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: BPA %s\n", model); + of_node_put(root); +} + +static void bpa_progress(char *s, unsigned short hex) +{ + printk("*** %04x : %s\n", hex, s ? s : ""); +} + +static void __init bpa_setup_arch(void) +{ + ppc_md.init_IRQ = iic_init_IRQ; + ppc_md.get_irq = iic_get_irq; + +#ifdef CONFIG_SMP + smp_init_pSeries(); +#endif + + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + if (ROOT_DEV == 0) { + printk("No ramdisk, default root is /dev/hda2\n"); + ROOT_DEV = Root_HDA2; + } + + /* Find and initialize PCI host bridges */ + init_pci_config_tokens(); + find_and_init_phbs(); + spider_init_IRQ(); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + bpa_nvram_init(); +} + +/* + * Early initialization. Relocation is on but do not reference unbolted pages + */ +static void __init bpa_init_early(void) +{ + DBG(" -> bpa_init_early()\n"); + + hpte_init_native(); + + bpa_init_iommu(); + + ppc64_interrupt_controller = IC_BPA_IIC; + + DBG(" <- bpa_init_early()\n"); +} + + +static int __init bpa_probe(int platform) +{ + if (platform != PLATFORM_BPA) + return 0; + + return 1; +} + +struct machdep_calls __initdata bpa_md = { + .probe = bpa_probe, + .setup_arch = bpa_setup_arch, + .init_early = bpa_init_early, + .get_cpuinfo = bpa_get_cpuinfo, + .restart = rtas_restart, + .power_off = rtas_power_off, + .halt = rtas_halt, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = bpa_progress, +}; diff --git a/arch/ppc64/kernel/cpu_setup_power4.S b/arch/ppc64/kernel/cpu_setup_power4.S index 3bd95182085..0482c063c26 100644 --- a/arch/ppc64/kernel/cpu_setup_power4.S +++ b/arch/ppc64/kernel/cpu_setup_power4.S @@ -31,10 +31,13 @@ _GLOBAL(__970_cpu_preinit) */ mfspr r0,SPRN_PVR srwi r0,r0,16 - cmpwi cr0,r0,0x39 - cmpwi cr1,r0,0x3c - cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cmpwi r0,0x39 + beq 1f + cmpwi r0,0x3c + beq 1f + cmpwi r0,0x44 bnelr +1: /* Make sure HID4:rm_ci is off before MMU is turned off, that large * pages are enabled with HID4:61 and clear HID5:DCBZ_size and @@ -73,7 +76,21 @@ _GLOBAL(__970_cpu_preinit) _GLOBAL(__setup_cpu_power4) blr - + +_GLOBAL(__setup_cpu_be) + /* Set large page sizes LP=0: 16MB, LP=1: 64KB */ + addi r3, 0, 0 + ori r3, r3, HID6_LB + sldi r3, r3, 32 + nor r3, r3, r3 + mfspr r4, SPRN_HID6 + and r4, r4, r3 + addi r3, 0, 0x02000 + sldi r3, r3, 32 + or r4, r4, r3 + mtspr SPRN_HID6, r4 + blr + _GLOBAL(__setup_cpu_ppc970) mfspr r0,SPRN_HID0 li r11,5 /* clear DOZE and SLEEP */ @@ -119,12 +136,14 @@ _GLOBAL(__save_cpu_setup) /* We only deal with 970 for now */ mfspr r0,SPRN_PVR srwi r0,r0,16 - cmpwi cr0,r0,0x39 - cmpwi cr1,r0,0x3c - cror 4*cr0+eq,4*cr0+eq,4*cr1+eq - bne 1f - - /* Save HID0,1,4 and 5 */ + cmpwi r0,0x39 + beq 1f + cmpwi r0,0x3c + beq 1f + cmpwi r0,0x44 + bne 2f + +1: /* Save HID0,1,4 and 5 */ mfspr r3,SPRN_HID0 std r3,CS_HID0(r5) mfspr r3,SPRN_HID1 @@ -134,7 +153,7 @@ _GLOBAL(__save_cpu_setup) mfspr r3,SPRN_HID5 std r3,CS_HID5(r5) -1: +2: mtcr r7 blr @@ -151,12 +170,14 @@ _GLOBAL(__restore_cpu_setup) /* We only deal with 970 for now */ mfspr r0,SPRN_PVR srwi r0,r0,16 - cmpwi cr0,r0,0x39 - cmpwi cr1,r0,0x3c - cror 4*cr0+eq,4*cr0+eq,4*cr1+eq - bne 1f + cmpwi r0,0x39 + beq 1f + cmpwi r0,0x3c + beq 1f + cmpwi r0,0x44 + bnelr - /* Before accessing memory, we make sure rm_ci is clear */ +1: /* Before accessing memory, we make sure rm_ci is clear */ li r0,0 mfspr r3,SPRN_HID4 rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ @@ -209,6 +230,5 @@ _GLOBAL(__restore_cpu_setup) mtspr SPRN_HID5,r3 sync isync -1: blr diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 8644a864805..77cec42f952 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -34,6 +34,7 @@ EXPORT_SYMBOL(cur_cpu_spec); extern void __setup_cpu_power3(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_power4(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_be(unsigned long offset, struct cpu_spec* spec); /* We only set the altivec features if the kernel was compiled with altivec @@ -48,150 +49,234 @@ extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); #endif struct cpu_spec cpu_specs[] = { - { /* Power3 */ - 0xffff0000, 0x00400000, "POWER3 (630)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Power3+ */ - 0xffff0000, 0x00410000, "POWER3 (630+)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Northstar */ - 0xffff0000, 0x00330000, "RS64-II (northstar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Pulsar */ - 0xffff0000, 0x00340000, "RS64-III (pulsar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* I-star */ - 0xffff0000, 0x00360000, "RS64-III (icestar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* S-star */ - 0xffff0000, 0x00370000, "RS64-IV (sstar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Power4 */ - 0xffff0000, 0x00350000, "POWER4 (gp)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* Power4+ */ - 0xffff0000, 0x00380000, "POWER4+ (gq)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* PPC970 */ - 0xffff0000, 0x00390000, "PPC970", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_ppc970, - COMMON_PPC64_FW - }, - { /* PPC970FX */ - 0xffff0000, 0x003c0000, "PPC970FX", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_ppc970, - COMMON_PPC64_FW - }, - { /* Power5 */ - 0xffff0000, 0x003a0000, "POWER5 (gr)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | - CPU_FTR_MMCRA_SIHV, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* Power5 */ - 0xffff0000, 0x003b0000, "POWER5 (gs)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | - CPU_FTR_MMCRA_SIHV, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* default match */ - 0x00000000, 0x00000000, "POWER4 (compatible)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - } + { /* Power3 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00400000, + .cpu_name = "POWER3 (630)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power3+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00410000, + .cpu_name = "POWER3 (630+)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Northstar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00330000, + .cpu_name = "RS64-II (northstar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Pulsar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00340000, + .cpu_name = "RS64-III (pulsar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* I-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00360000, + .cpu_name = "RS64-III (icestar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* S-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00370000, + .cpu_name = "RS64-IV (sstar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power4 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00350000, + .cpu_name = "POWER4 (gp)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power4+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00380000, + .cpu_name = "POWER4+ (gq)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* PPC970 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00390000, + .cpu_name = "PPC970", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, + }, + { /* PPC970FX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003c0000, + .cpu_name = "PPC970FX", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, + }, + { /* PPC970MP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00440000, + .cpu_name = "PPC970MP", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power5 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003a0000, + .cpu_name = "POWER5 (gr)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power5 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003b0000, + .cpu_name = "POWER5 (gs)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* BE DD1.x */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00700000, + .cpu_name = "Broadband Engine", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_SMT, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_be, + .firmware_features = COMMON_PPC64_FW, + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "POWER4 (compatible)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + } }; firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { - {FW_FEATURE_PFT, "hcall-pft"}, - {FW_FEATURE_TCE, "hcall-tce"}, - {FW_FEATURE_SPRG0, "hcall-sprg0"}, - {FW_FEATURE_DABR, "hcall-dabr"}, - {FW_FEATURE_COPY, "hcall-copy"}, - {FW_FEATURE_ASR, "hcall-asr"}, - {FW_FEATURE_DEBUG, "hcall-debug"}, - {FW_FEATURE_PERF, "hcall-perf"}, - {FW_FEATURE_DUMP, "hcall-dump"}, - {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, - {FW_FEATURE_MIGRATE, "hcall-migrate"}, - {FW_FEATURE_PERFMON, "hcall-perfmon"}, - {FW_FEATURE_CRQ, "hcall-crq"}, - {FW_FEATURE_VIO, "hcall-vio"}, - {FW_FEATURE_RDMA, "hcall-rdma"}, - {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, - {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, - {FW_FEATURE_SPLPAR, "hcall-splpar"}, + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, }; diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 02c8f4e3e4b..784f56d4684 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -308,6 +308,7 @@ exception_marker: label##_pSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) #define STD_EXCEPTION_ISERIES(n, label, area) \ @@ -315,6 +316,7 @@ label##_pSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(area); \ EXCEPTION_PROLOG_ISERIES_2; \ b label##_common @@ -324,6 +326,7 @@ label##_iSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ lbz r10,PACAPROCENABLED(r13); \ cmpwi 0,r10,0; \ @@ -393,6 +396,7 @@ __start_interrupts: _machine_check_pSeries: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) . = 0x300 @@ -419,6 +423,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) data_access_slb_pSeries: HMT_MEDIUM mtspr SPRG1,r13 + RUNLATCH_ON(r13) mfspr r13,SPRG3 /* get paca address into r13 */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ std r10,PACA_EXSLB+EX_R10(r13) @@ -439,6 +444,7 @@ data_access_slb_pSeries: instruction_access_slb_pSeries: HMT_MEDIUM mtspr SPRG1,r13 + RUNLATCH_ON(r13) mfspr r13,SPRG3 /* get paca address into r13 */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ std r10,PACA_EXSLB+EX_R10(r13) @@ -464,6 +470,7 @@ instruction_access_slb_pSeries: .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM + RUNLATCH_ON(r9) mr r9,r13 mfmsr r10 mfspr r13,SPRG3 @@ -515,36 +522,9 @@ __end_interrupts: #ifdef CONFIG_PPC_ISERIES .globl naca naca: - .llong itVpdAreas - - /* - * The iSeries LPAR map is at this fixed address - * so that the HvReleaseData structure can address - * it with a 32-bit offset. - * - * The VSID values below are dependent on the - * VSID generation algorithm. See include/asm/mmu_context.h. - */ - - . = 0x4800 - - .llong 2 /* # ESIDs to be mapped by hypervisor */ - .llong 1 /* # memory ranges to be mapped by hypervisor */ - .llong STAB0_PAGE /* Page # of segment table within load area */ - .llong 0 /* Reserved */ - .llong 0 /* Reserved */ - .llong 0 /* Reserved */ - .llong 0 /* Reserved */ - .llong 0 /* Reserved */ - .llong (KERNELBASE>>SID_SHIFT) - .llong 0x408f92c94 /* KERNELBASE VSID */ - /* We have to list the bolted VMALLOC segment here, too, so that it - * will be restored on shared processor switch */ - .llong (VMALLOCBASE>>SID_SHIFT) - .llong 0xf09b89af5 /* VMALLOCBASE VSID */ - .llong 8192 /* # pages to map (32 MB) */ - .llong 0 /* Offset from start of loadarea to start of map */ - .llong 0x408f92c940000 /* VPN of first page to map */ + .llong itVpdAreas + .llong 0 /* xRamDisk */ + .llong 0 /* xRamDiskSize */ . = 0x6100 @@ -707,11 +687,13 @@ fwnmi_data_area: system_reset_fwnmi: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) .globl machine_check_fwnmi machine_check_fwnmi: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) /* @@ -848,6 +830,7 @@ unrecov_fer: .align 7 .globl data_access_common data_access_common: + RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */ mfspr r10,DAR std r10,PACA_EXGEN+EX_DAR(r13) mfspr r10,DSISR @@ -1194,7 +1177,7 @@ _GLOBAL(pSeries_secondary_smp_init) bl .__restore_cpu_setup /* Set up a paca value for this processor. Since we have the - * physical cpu id in r3, we need to search the pacas to find + * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ LOADADDR(r13, paca) /* Get base vaddr of paca array */ @@ -1207,8 +1190,8 @@ _GLOBAL(pSeries_secondary_smp_init) cmpwi r5,NR_CPUS blt 1b -99: HMT_LOW /* Couldn't find our CPU id */ - b 99b + mr r3,r24 /* not found, copy phys to r3 */ + b .kexec_wait /* next kernel might do better */ 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ /* From now on, r24 is expected to be logica cpuid */ @@ -2088,7 +2071,7 @@ _GLOBAL(hmt_start_secondary) blr #endif -#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES) +#if defined(CONFIG_KEXEC) || (defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES)) _GLOBAL(smp_release_cpus) /* All secondary cpus are spinning on a common * spinloop, release them all now so they can start @@ -2121,13 +2104,6 @@ empty_zero_page: swapper_pg_dir: .space 4096 -#ifdef CONFIG_SMP -/* 1 page segment table per cpu (max 48, cpu0 allocated at STAB0_PHYS_ADDR) */ - .globl stab_array -stab_array: - .space 4096 * 48 -#endif - /* * This space gets a copy of optional info passed to us by the bootstrap * Used to pass parameters into the kernel like root=/dev/sda1, etc. diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c index c72fb8ffe97..138e128a388 100644 --- a/arch/ppc64/kernel/hvconsole.c +++ b/arch/ppc64/kernel/hvconsole.c @@ -27,7 +27,6 @@ #include <linux/module.h> #include <asm/hvcall.h> #include <asm/hvconsole.h> -#include <asm/prom.h> /** * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper @@ -42,29 +41,14 @@ int hvc_get_chars(uint32_t vtermno, char *buf, int count) unsigned long got; if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got, - (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) { - /* - * Work around a HV bug where it gives us a null - * after every \r. -- paulus - */ - if (got > 0) { - int i; - for (i = 1; i < got; ++i) { - if (buf[i] == 0 && buf[i-1] == '\r') { - --got; - if (i < got) - memmove(&buf[i], &buf[i+1], - got - i); - } - } - } + (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) return got; - } return 0; } EXPORT_SYMBOL(hvc_get_chars); + /** * hvc_put_chars: send characters to firmware for denoted vterm adapter * @vtermno: The vtermno or unit_address of the adapter from which the data @@ -88,34 +72,3 @@ int hvc_put_chars(uint32_t vtermno, const char *buf, int count) } EXPORT_SYMBOL(hvc_put_chars); - -/* - * We hope/assume that the first vty found corresponds to the first console - * device. - */ -int hvc_find_vtys(void) -{ - struct device_node *vty; - int num_found = 0; - - for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL; - vty = of_find_node_by_name(vty, "vty")) { - uint32_t *vtermno; - - /* We have statically defined space for only a certain number of - * console adapters. */ - if (num_found >= MAX_NR_HVC_CONSOLES) - break; - - vtermno = (uint32_t *)get_property(vty, "reg", NULL); - if (!vtermno) - continue; - - if (device_is_compatible(vty, "hvterm1")) { - hvc_instantiate(*vtermno, num_found); - ++num_found; - } - } - - return num_found; -} diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c index aa9e8fdd1a4..b0250ae4a72 100644 --- a/arch/ppc64/kernel/iSeries_htab.c +++ b/arch/ppc64/kernel/iSeries_htab.c @@ -38,11 +38,12 @@ static inline void iSeries_hunlock(unsigned long slot) } static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, - unsigned long prpn, int secondary, - unsigned long hpteflags, int bolted, int large) + unsigned long prpn, unsigned long vflags, + unsigned long rflags) { long slot; - HPTE lhpte; + hpte_t lhpte; + int secondary = 0; /* * The hypervisor tries both primary and secondary. @@ -50,13 +51,13 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, * it means we have already tried both primary and secondary, * so we return failure immediately. */ - if (secondary) + if (vflags & HPTE_V_SECONDARY) return -1; iSeries_hlock(hpte_group); slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); - BUG_ON(lhpte.dw0.dw0.v); + BUG_ON(lhpte.v & HPTE_V_VALID); if (slot == -1) { /* No available entry found in either group */ iSeries_hunlock(hpte_group); @@ -64,19 +65,13 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, } if (slot < 0) { /* MSB set means secondary group */ + vflags |= HPTE_V_VALID; secondary = 1; slot &= 0x7fffffffffffffff; } - lhpte.dw1.dword1 = 0; - lhpte.dw1.dw1.rpn = physRpn_to_absRpn(prpn); - lhpte.dw1.flags.flags = hpteflags; - - lhpte.dw0.dword0 = 0; - lhpte.dw0.dw0.avpn = va >> 23; - lhpte.dw0.dw0.h = secondary; - lhpte.dw0.dw0.bolted = bolted; - lhpte.dw0.dw0.v = 1; + lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; + lhpte.r = (physRpn_to_absRpn(prpn) << HPTE_R_RPN_SHIFT) | rflags; /* Now fill in the actual HPTE */ HvCallHpt_addValidate(slot, secondary, &lhpte); @@ -88,20 +83,17 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, static unsigned long iSeries_hpte_getword0(unsigned long slot) { - unsigned long dword0; - HPTE hpte; + hpte_t hpte; HvCallHpt_get(&hpte, slot); - dword0 = hpte.dw0.dword0; - - return dword0; + return hpte.v; } static long iSeries_hpte_remove(unsigned long hpte_group) { unsigned long slot_offset; int i; - HPTE lhpte; + unsigned long hpte_v; /* Pick a random slot to start at */ slot_offset = mftb() & 0x7; @@ -109,10 +101,9 @@ static long iSeries_hpte_remove(unsigned long hpte_group) iSeries_hlock(hpte_group); for (i = 0; i < HPTES_PER_GROUP; i++) { - lhpte.dw0.dword0 = - iSeries_hpte_getword0(hpte_group + slot_offset); + hpte_v = iSeries_hpte_getword0(hpte_group + slot_offset); - if (!lhpte.dw0.dw0.bolted) { + if (! (hpte_v & HPTE_V_BOLTED)) { HvCallHpt_invalidateSetSwBitsGet(hpte_group + slot_offset, 0, 0); iSeries_hunlock(hpte_group); @@ -137,13 +128,13 @@ static long iSeries_hpte_remove(unsigned long hpte_group) static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long va, int large, int local) { - HPTE hpte; + hpte_t hpte; unsigned long avpn = va >> 23; iSeries_hlock(slot); HvCallHpt_get(&hpte, slot); - if ((hpte.dw0.dw0.avpn == avpn) && (hpte.dw0.dw0.v)) { + if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) { /* * Hypervisor expects bits as NPPP, which is * different from how they are mapped in our PP. @@ -167,7 +158,7 @@ static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, */ static long iSeries_hpte_find(unsigned long vpn) { - HPTE hpte; + hpte_t hpte; long slot; /* @@ -177,7 +168,7 @@ static long iSeries_hpte_find(unsigned long vpn) * 0x80000000xxxxxxxx : Entry found in secondary group, slot x */ slot = HvCallHpt_findValid(&hpte, vpn); - if (hpte.dw0.dw0.v) { + if (hpte.v & HPTE_V_VALID) { if (slot < 0) { slot &= 0x7fffffffffffffff; slot = -slot; @@ -212,7 +203,7 @@ static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va, int large, int local) { - HPTE lhpte; + unsigned long hpte_v; unsigned long avpn = va >> 23; unsigned long flags; @@ -220,9 +211,9 @@ static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va, iSeries_hlock(slot); - lhpte.dw0.dword0 = iSeries_hpte_getword0(slot); + hpte_v = iSeries_hpte_getword0(slot); - if ((lhpte.dw0.dw0.avpn == avpn) && lhpte.dw0.dw0.v) + if ((HPTE_V_AVPN_VAL(hpte_v) == avpn) && (hpte_v & HPTE_V_VALID)) HvCallHpt_invalidateSetSwBitsGet(slot, 0, 0); iSeries_hunlock(slot); diff --git a/arch/ppc64/kernel/iSeries_proc.c b/arch/ppc64/kernel/iSeries_proc.c index 356bd9931fc..0fe3116eba2 100644 --- a/arch/ppc64/kernel/iSeries_proc.c +++ b/arch/ppc64/kernel/iSeries_proc.c @@ -40,50 +40,6 @@ static int __init iseries_proc_create(void) } core_initcall(iseries_proc_create); -static char *event_types[9] = { - "Hypervisor\t\t", - "Machine Facilities\t", - "Session Manager\t", - "SPD I/O\t\t", - "Virtual Bus\t\t", - "PCI I/O\t\t", - "RIO I/O\t\t", - "Virtual Lan\t\t", - "Virtual I/O\t\t" -}; - -static int proc_lpevents_show(struct seq_file *m, void *v) -{ - unsigned int i; - - seq_printf(m, "LpEventQueue 0\n"); - seq_printf(m, " events processed:\t%lu\n", - (unsigned long)xItLpQueue.xLpIntCount); - - for (i = 0; i < 9; ++i) - seq_printf(m, " %s %10lu\n", event_types[i], - (unsigned long)xItLpQueue.xLpIntCountByType[i]); - - seq_printf(m, "\n events processed by processor:\n"); - - for_each_online_cpu(i) - seq_printf(m, " CPU%02d %10u\n", i, paca[i].lpevent_count); - - return 0; -} - -static int proc_lpevents_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_lpevents_show, NULL); -} - -static struct file_operations proc_lpevents_operations = { - .open = proc_lpevents_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static unsigned long startTitan = 0; static unsigned long startTb = 0; @@ -148,10 +104,6 @@ static int __init iseries_proc_init(void) { struct proc_dir_entry *e; - e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL); - if (e) - e->proc_fops = &proc_lpevents_operations; - e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL); if (e) e->proc_fops = &proc_titantod_operations; diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index b31962436fe..a649edbb23b 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -24,7 +24,6 @@ #include <linux/smp.h> #include <linux/param.h> #include <linux/string.h> -#include <linux/bootmem.h> #include <linux/initrd.h> #include <linux/seq_file.h> #include <linux/kdev_t.h> @@ -504,7 +503,7 @@ static void __init build_iSeries_Memory_Map(void) /* Fill in the hashed page table hash mask */ num_ptegs = hptSizePages * - (PAGE_SIZE / (sizeof(HPTE) * HPTES_PER_GROUP)); + (PAGE_SIZE / (sizeof(hpte_t) * HPTES_PER_GROUP)); htab_hash_mask = num_ptegs - 1; /* @@ -619,25 +618,23 @@ static void __init setup_iSeries_cache_sizes(void) static void iSeries_make_pte(unsigned long va, unsigned long pa, int mode) { - HPTE local_hpte, rhpte; + hpte_t local_hpte, rhpte; unsigned long hash, vpn; long slot; vpn = va >> PAGE_SHIFT; hash = hpt_hash(vpn, 0); - local_hpte.dw1.dword1 = pa | mode; - local_hpte.dw0.dword0 = 0; - local_hpte.dw0.dw0.avpn = va >> 23; - local_hpte.dw0.dw0.bolted = 1; /* bolted */ - local_hpte.dw0.dw0.v = 1; + local_hpte.r = pa | mode; + local_hpte.v = ((va >> 23) << HPTE_V_AVPN_SHIFT) + | HPTE_V_BOLTED | HPTE_V_VALID; slot = HvCallHpt_findValid(&rhpte, vpn); if (slot < 0) { /* Must find space in primary group */ panic("hash_page: hpte already exists\n"); } - HvCallHpt_addValidate(slot, 0, (HPTE *)&local_hpte ); + HvCallHpt_addValidate(slot, 0, &local_hpte); } /* @@ -647,7 +644,7 @@ static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr) { unsigned long pa; unsigned long mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; - HPTE hpte; + hpte_t hpte; for (pa = saddr; pa < eaddr ;pa += PAGE_SIZE) { unsigned long ea = (unsigned long)__va(pa); @@ -660,7 +657,7 @@ static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr) if (!in_kernel_text(ea)) mode_rw |= HW_NO_EXEC; - if (hpte.dw0.dw0.v) { + if (hpte.v & HPTE_V_VALID) { /* HPTE exists, so just bolt it */ HvCallHpt_setSwBits(slot, 0x10, 0); /* And make sure the pp bits are correct */ @@ -671,15 +668,11 @@ static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr) } } -extern unsigned long ppc_proc_freq; -extern unsigned long ppc_tb_freq; - /* * Document me. */ static void __init iSeries_setup_arch(void) { - void *eventStack; unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index; /* Add an eye catcher and the systemcfg layout version number */ @@ -688,24 +681,7 @@ static void __init iSeries_setup_arch(void) systemcfg->version.minor = SYSTEMCFG_MINOR; /* Setup the Lp Event Queue */ - - /* Allocate a page for the Event Stack - * The hypervisor wants the absolute real address, so - * we subtract out the KERNELBASE and add in the - * absolute real address of the kernel load area - */ - eventStack = alloc_bootmem_pages(LpEventStackSize); - memset(eventStack, 0, LpEventStackSize); - - /* Invoke the hypervisor to initialize the event stack */ - HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize); - - /* Initialize fields in our Lp Event Queue */ - xItLpQueue.xSlicEventStackPtr = (char *)eventStack; - xItLpQueue.xSlicCurEventPtr = (char *)eventStack; - xItLpQueue.xSlicLastValidEventPtr = (char *)eventStack + - (LpEventStackSize - LpEventMaxSize); - xItLpQueue.xIndex = 0; + setup_hvlpevent_queue(); /* Compute processor frequency */ procFreqHz = ((1UL << 34) * 1000000) / @@ -772,8 +748,6 @@ static void iSeries_halt(void) mf_power_off(); } -extern void setup_default_decr(void); - /* * void __init iSeries_calibrate_decr() * @@ -858,27 +832,91 @@ static int __init iSeries_src_init(void) late_initcall(iSeries_src_init); -static int set_spread_lpevents(char *str) +static inline void process_iSeries_events(void) { - unsigned long i; - unsigned long val = simple_strtoul(str, NULL, 0); + asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); +} + +static void yield_shared_processor(void) +{ + unsigned long tb; + + HvCall_setEnabledInterrupts(HvCall_MaskIPI | + HvCall_MaskLpEvent | + HvCall_MaskLpProd | + HvCall_MaskTimeout); + + tb = get_tb(); + /* Compute future tb value when yield should expire */ + HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); /* - * The parameter is the number of processors to share in processing - * lp events. + * The decrementer stops during the yield. Force a fake decrementer + * here and let the timer_interrupt code sort out the actual time. */ - if (( val > 0) && (val <= NR_CPUS)) { - for (i = 1; i < val; ++i) - paca[i].lpqueue_ptr = paca[0].lpqueue_ptr; + get_paca()->lppaca.int_dword.fields.decr_int = 1; + process_iSeries_events(); +} - printk("lpevent processing spread over %ld processors\n", val); - } else { - printk("invalid spread_lpevents %ld\n", val); +static int iseries_shared_idle(void) +{ + while (1) { + while (!need_resched() && !hvlpevent_is_pending()) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* Recheck with irqs off */ + if (!need_resched() && !hvlpevent_is_pending()) + yield_shared_processor(); + + HMT_medium(); + local_irq_enable(); + } + + ppc64_runlatch_on(); + + if (hvlpevent_is_pending()) + process_iSeries_events(); + + schedule(); } - return 1; + return 0; +} + +static int iseries_dedicated_idle(void) +{ + long oldval; + + while (1) { + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + while (!need_resched()) { + ppc64_runlatch_off(); + HMT_low(); + + if (hvlpevent_is_pending()) { + HMT_medium(); + ppc64_runlatch_on(); + process_iSeries_events(); + } + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + ppc64_runlatch_on(); + schedule(); + } + + return 0; } -__setup("spread_lpevents=", set_spread_lpevents); #ifndef CONFIG_PCI void __init iSeries_init_IRQ(void) { } @@ -905,5 +943,13 @@ void __init iSeries_early_setup(void) ppc_md.get_rtc_time = iSeries_get_rtc_time; ppc_md.calibrate_decr = iSeries_calibrate_decr; ppc_md.progress = iSeries_progress; + + if (get_paca()->lppaca.shared_proc) { + ppc_md.idle_loop = iseries_shared_idle; + printk(KERN_INFO "Using shared processor idle loop\n"); + } else { + ppc_md.idle_loop = iseries_dedicated_idle; + printk(KERN_INFO "Using dedicated idle loop\n"); + } } diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index bdf13b4dc1c..954395d4263 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -20,109 +20,18 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/cpu.h> -#include <linux/module.h> #include <linux/sysctl.h> -#include <linux/smp.h> #include <asm/system.h> #include <asm/processor.h> -#include <asm/mmu.h> #include <asm/cputable.h> #include <asm/time.h> -#include <asm/iSeries/HvCall.h> -#include <asm/iSeries/ItLpQueue.h> -#include <asm/plpar_wrappers.h> #include <asm/systemcfg.h> +#include <asm/machdep.h> extern void power4_idle(void); -static int (*idle_loop)(void); - -#ifdef CONFIG_PPC_ISERIES -static unsigned long maxYieldTime = 0; -static unsigned long minYieldTime = 0xffffffffffffffffUL; - -static inline void process_iSeries_events(void) -{ - asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); -} - -static void yield_shared_processor(void) -{ - unsigned long tb; - unsigned long yieldTime; - - HvCall_setEnabledInterrupts(HvCall_MaskIPI | - HvCall_MaskLpEvent | - HvCall_MaskLpProd | - HvCall_MaskTimeout); - - tb = get_tb(); - /* Compute future tb value when yield should expire */ - HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); - - yieldTime = get_tb() - tb; - if (yieldTime > maxYieldTime) - maxYieldTime = yieldTime; - - if (yieldTime < minYieldTime) - minYieldTime = yieldTime; - - /* - * The decrementer stops during the yield. Force a fake decrementer - * here and let the timer_interrupt code sort out the actual time. - */ - get_paca()->lppaca.int_dword.fields.decr_int = 1; - process_iSeries_events(); -} - -static int iSeries_idle(void) -{ - struct paca_struct *lpaca; - long oldval; - - /* ensure iSeries run light will be out when idle */ - ppc64_runlatch_off(); - - lpaca = get_paca(); - - while (1) { - if (lpaca->lppaca.shared_proc) { - if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) - process_iSeries_events(); - if (!need_resched()) - yield_shared_processor(); - } else { - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - - while (!need_resched()) { - HMT_medium(); - if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) - process_iSeries_events(); - HMT_low(); - } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } - } - - ppc64_runlatch_on(); - schedule(); - ppc64_runlatch_off(); - } - - return 0; -} - -#else - -static int default_idle(void) +int default_idle(void) { long oldval; unsigned int cpu = smp_processor_id(); @@ -134,7 +43,8 @@ static int default_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (!need_resched() && !cpu_is_offline(cpu)) { - barrier(); + ppc64_runlatch_off(); + /* * Go into low thread priority and possibly * low power mode. @@ -149,6 +59,7 @@ static int default_idle(void) set_need_resched(); } + ppc64_runlatch_on(); schedule(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -157,127 +68,19 @@ static int default_idle(void) return 0; } -#ifdef CONFIG_PPC_PSERIES - -DECLARE_PER_CPU(unsigned long, smt_snooze_delay); - -int dedicated_idle(void) +int native_idle(void) { - long oldval; - struct paca_struct *lpaca = get_paca(), *ppaca; - unsigned long start_snooze; - unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); - unsigned int cpu = smp_processor_id(); - - ppaca = &paca[cpu ^ 1]; - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; - - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - start_snooze = __get_tb() + - *smt_snooze_delay * tb_ticks_per_usec; - while (!need_resched() && !cpu_is_offline(cpu)) { - /* - * Go into low thread priority and possibly - * low power mode. - */ - HMT_low(); - HMT_very_low(); - - if (*smt_snooze_delay == 0 || - __get_tb() < start_snooze) - continue; - - HMT_medium(); - - if (!(ppaca->lppaca.idle)) { - local_irq_disable(); - - /* - * We are about to sleep the thread - * and so wont be polling any - * more. - */ - clear_thread_flag(TIF_POLLING_NRFLAG); - - /* - * SMT dynamic mode. Cede will result - * in this thread going dormant, if the - * partner thread is still doing work. - * Thread wakes up if partner goes idle, - * an interrupt is presented, or a prod - * occurs. Returning from the cede - * enables external interrupts. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - } else { - /* - * Give the HV an opportunity at the - * processor, since we are not doing - * any work. - */ - poll_pending(); - } - } - - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } - - HMT_medium(); - lpaca->lppaca.idle = 0; - schedule(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } - return 0; -} - -static int shared_idle(void) -{ - struct paca_struct *lpaca = get_paca(); - unsigned int cpu = smp_processor_id(); - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; + ppc64_runlatch_off(); - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); + if (!need_resched()) + power4_idle(); - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - * - * Check need_resched() again with interrupts disabled - * to avoid a race. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); + if (need_resched()) { + ppc64_runlatch_on(); + schedule(); } - HMT_medium(); - lpaca->lppaca.idle = 0; - schedule(); if (cpu_is_offline(smp_processor_id()) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -286,29 +89,10 @@ static int shared_idle(void) return 0; } -#endif /* CONFIG_PPC_PSERIES */ - -static int native_idle(void) -{ - while(1) { - /* check CPU type here */ - if (!need_resched()) - power4_idle(); - if (need_resched()) - schedule(); - - if (cpu_is_offline(raw_smp_processor_id()) && - system_state == SYSTEM_RUNNING) - cpu_die(); - } - return 0; -} - -#endif /* CONFIG_PPC_ISERIES */ - void cpu_idle(void) { - idle_loop(); + BUG_ON(NULL == ppc_md.idle_loop); + ppc_md.idle_loop(); } int powersave_nap; @@ -342,42 +126,3 @@ register_powersave_nap_sysctl(void) } __initcall(register_powersave_nap_sysctl); #endif - -int idle_setup(void) -{ - /* - * Move that junk to each platform specific file, eventually define - * a pSeries_idle for shared processor stuff - */ -#ifdef CONFIG_PPC_ISERIES - idle_loop = iSeries_idle; - return 1; -#else - idle_loop = default_idle; -#endif -#ifdef CONFIG_PPC_PSERIES - if (systemcfg->platform & PLATFORM_PSERIES) { - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { - if (get_paca()->lppaca.shared_proc) { - printk(KERN_INFO "Using shared processor idle loop\n"); - idle_loop = shared_idle; - } else { - printk(KERN_INFO "Using dedicated idle loop\n"); - idle_loop = dedicated_idle; - } - } else { - printk(KERN_INFO "Using default idle loop\n"); - idle_loop = default_idle; - } - } -#endif /* CONFIG_PPC_PSERIES */ -#ifndef CONFIG_PPC_ISERIES - if (systemcfg->platform == PLATFORM_POWERMAC || - systemcfg->platform == PLATFORM_MAPLE) { - printk(KERN_INFO "Using native/NAP idle loop\n"); - idle_loop = native_idle; - } -#endif /* CONFIG_PPC_ISERIES */ - - return 1; -} diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c index d860467b8f0..f41afe54504 100644 --- a/arch/ppc64/kernel/irq.c +++ b/arch/ppc64/kernel/irq.c @@ -66,7 +66,6 @@ EXPORT_SYMBOL(irq_desc); int distribute_irqs = 1; int __irq_offset_value; int ppc_spurious_interrupts; -unsigned long lpevent_count; u64 ppc64_interrupt_controller; int show_interrupts(struct seq_file *p, void *v) @@ -245,7 +244,7 @@ void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq) spin_lock(&desc->lock); if (!noirqdebug) - note_interrupt(irq, desc, action_ret); + note_interrupt(irq, desc, action_ret, regs); if (likely(!(desc->status & IRQ_PENDING))) break; desc->status &= ~IRQ_PENDING; @@ -269,7 +268,6 @@ out: void do_IRQ(struct pt_regs *regs) { struct paca_struct *lpaca; - struct ItLpQueue *lpq; irq_enter(); @@ -295,9 +293,8 @@ void do_IRQ(struct pt_regs *regs) iSeries_smp_message_recv(regs); } #endif /* CONFIG_SMP */ - lpq = lpaca->lpqueue_ptr; - if (lpq && ItLpQueue_isLpIntPending(lpq)) - lpevent_count += ItLpQueue_process(lpq, regs); + if (hvlpevent_is_pending()) + process_hvlpevents(regs); irq_exit(); @@ -395,6 +392,9 @@ int virt_irq_create_mapping(unsigned int real_irq) if (ppc64_interrupt_controller == IC_OPEN_PIC) return real_irq; /* no mapping for openpic (for now) */ + if (ppc64_interrupt_controller == IC_BPA_IIC) + return real_irq; /* no mapping for iic either */ + /* don't map interrupts < MIN_VIRT_IRQ */ if (real_irq < MIN_VIRT_IRQ) { virt_irq_to_real_map[real_irq] = real_irq; diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index e950a2058a1..a3d519518fb 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -32,15 +32,16 @@ #include <linux/ptrace.h> #include <linux/spinlock.h> #include <linux/preempt.h> +#include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/sstep.h> -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 +static DECLARE_MUTEX(kprobe_mutex); static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_saved_msr; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_saved_msr_prev; static struct pt_regs jprobe_saved_regs; int arch_prepare_kprobe(struct kprobe *p) @@ -55,32 +56,87 @@ int arch_prepare_kprobe(struct kprobe *p) printk("Cannot register a kprobe on rfid or mtmsrd\n"); ret = -EINVAL; } + + /* insn must be on a special executable page on ppc64 */ + if (!ret) { + up(&kprobe_mutex); + p->ainsn.insn = get_insn_slot(); + down(&kprobe_mutex); + if (!p->ainsn.insn) + ret = -ENOMEM; + } return ret; } void arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; - regs->nip = (unsigned long)p->addr; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ + up(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + down(&kprobe_mutex); } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { + kprobe_opcode_t insn = *p->ainsn.insn; + regs->msr |= MSR_SE; - /*single step inline if it a breakpoint instruction*/ - if (p->opcode == BREAKPOINT_INSTRUCTION) + + /* single step inline if it is a trap variant */ + if (IS_TW(insn) || IS_TD(insn) || IS_TWI(insn) || IS_TDI(insn)) regs->nip = (unsigned long)p->addr; else - regs->nip = (unsigned long)&p->ainsn.insn; + regs->nip = (unsigned long)p->ainsn.insn; +} + +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_saved_msr_prev = kprobe_saved_msr; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_saved_msr = kprobe_saved_msr_prev; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; + ri->ret_addr = (kprobe_opcode_t *)regs->link; + + /* Replace the return addr with trampoline addr */ + regs->link = (unsigned long)kretprobe_trampoline; + add_rp_inst(ri); + } else { + rp->nmissed++; + } } static inline int kprobe_handler(struct pt_regs *regs) @@ -101,8 +157,19 @@ static inline int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + current_kprobe = p; + kprobe_saved_msr = regs->msr; + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -162,6 +229,78 @@ no_kprobe: } /* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +void kretprobe_trampoline_holder(void) +{ + asm volatile(".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n" + "nop\n"); +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned long orig_ret_address = 0; + unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + + head = kretprobe_inst_table_head(current); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + regs->nip = orig_ret_address; + + unlock_kprobes(); + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we have handled unlocking + * and re-enabling preemption. + */ + return 1; +} + +/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" * instruction. To avoid the SMP problems that can occur when we @@ -172,9 +311,10 @@ no_kprobe: static void resume_execution(struct kprobe *p, struct pt_regs *regs) { int ret; + unsigned int insn = *p->ainsn.insn; regs->nip = (unsigned long)p->addr; - ret = emulate_step(regs, p->ainsn.insn[0]); + ret = emulate_step(regs, insn); if (ret == 0) regs->nip = (unsigned long)p->addr + 4; } @@ -184,13 +324,21 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } resume_execution(current_kprobe, regs); regs->msr |= kprobe_saved_msr; + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); /* @@ -290,3 +438,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs)); return 1; } + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 387923fcf9b..02e96627fa6 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c @@ -34,6 +34,7 @@ #include <asm/system.h> #include <asm/time.h> #include <asm/iSeries/ItExtVpdPanel.h> +#include <asm/prom.h> #define MODULE_VERS "1.6" #define MODULE_NAME "lparcfg" diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c new file mode 100644 index 00000000000..4775f12a013 --- /dev/null +++ b/arch/ppc64/kernel/machine_kexec.c @@ -0,0 +1,306 @@ +/* + * machine_kexec.c - handle transition of Linux booting another kernel + * + * Copyright (C) 2004-2005, IBM Corp. + * + * Created by: Milton D Miller II + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +#include <linux/cpumask.h> +#include <linux/kexec.h> +#include <linux/smp.h> +#include <linux/thread_info.h> +#include <linux/errno.h> + +#include <asm/page.h> +#include <asm/current.h> +#include <asm/machdep.h> +#include <asm/cacheflush.h> +#include <asm/paca.h> +#include <asm/mmu.h> +#include <asm/sections.h> /* _end */ +#include <asm/prom.h> + +#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ + +/* Have this around till we move it into crash specific file */ +note_buf_t crash_notes[NR_CPUS]; + +/* Dummy for now. Not sure if we need to have a crash shutdown in here + * and if what it will achieve. Letting it be now to compile the code + * in generic kexec environment + */ +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* do nothing right now */ + /* smp_relase_cpus() if we want smp on panic kernel */ + /* cpu_irq_down to isolate us until we are ready */ +} + +int machine_kexec_prepare(struct kimage *image) +{ + int i; + unsigned long begin, end; /* limits of segment */ + unsigned long low, high; /* limits of blocked memory range */ + struct device_node *node; + unsigned long *basep; + unsigned int *sizep; + + if (!ppc_md.hpte_clear_all) + return -ENOENT; + + /* + * Since we use the kernel fault handlers and paging code to + * handle the virtual mode, we must make sure no destination + * overlaps kernel static data or bss. + */ + for (i = 0; i < image->nr_segments; i++) + if (image->segment[i].mem < __pa(_end)) + return -ETXTBSY; + + /* + * For non-LPAR, we absolutely can not overwrite the mmu hash + * table, since we are still using the bolted entries in it to + * do the copy. Check that here. + * + * It is safe if the end is below the start of the blocked + * region (end <= low), or if the beginning is after the + * end of the blocked region (begin >= high). Use the + * boolean identity !(a || b) === (!a && !b). + */ + if (htab_address) { + low = __pa(htab_address); + high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + /* We also should not overwrite the tce tables */ + for (node = of_find_node_by_type(NULL, "pci"); node != NULL; + node = of_find_node_by_type(node, "pci")) { + basep = (unsigned long *)get_property(node, "linux,tce-base", + NULL); + sizep = (unsigned int *)get_property(node, "linux,tce-size", + NULL); + if (basep == NULL || sizep == NULL) + continue; + + low = *basep; + high = low + (*sizep); + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ + /* we do nothing in prepare that needs to be undone */ +} + +#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) + +static void copy_segments(unsigned long ind) +{ + unsigned long entry; + unsigned long *ptr; + void *dest; + void *addr; + + /* + * We rely on kexec_load to create a lists that properly + * initializes these pointers before they are used. + * We will still crash if the list is wrong, but at least + * the compiler will be quiet. + */ + ptr = NULL; + dest = NULL; + + for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { + addr = __va(entry & PAGE_MASK); + + switch (entry & IND_FLAGS) { + case IND_DESTINATION: + dest = addr; + break; + case IND_INDIRECTION: + ptr = addr; + break; + case IND_SOURCE: + copy_page(dest, addr); + dest += PAGE_SIZE; + } + } +} + +void kexec_copy_flush(struct kimage *image) +{ + long i, nr_segments = image->nr_segments; + struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; + + /* save the ranges on the stack to efficiently flush the icache */ + memcpy(ranges, image->segment, sizeof(ranges)); + + /* + * After this call we may not use anything allocated in dynamic + * memory, including *image. + * + * Only globals and the stack are allowed. + */ + copy_segments(image->head); + + /* + * we need to clear the icache for all dest pages sometime, + * including ones that were in place on the original copy + */ + for (i = 0; i < nr_segments; i++) + flush_icache_range(ranges[i].mem + KERNELBASE, + ranges[i].mem + KERNELBASE + + ranges[i].memsz); +} + +#ifdef CONFIG_SMP + +/* FIXME: we should schedule this function to be called on all cpus based + * on calling the interrupts, but we would like to call it off irq level + * so that the interrupt controller is clean. + */ +void kexec_smp_down(void *arg) +{ + if (ppc_md.cpu_irq_down) + ppc_md.cpu_irq_down(1); + + local_irq_disable(); + kexec_smp_wait(); + /* NOTREACHED */ +} + +static void kexec_prepare_cpus(void) +{ + int my_cpu, i, notified=-1; + + smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); + my_cpu = get_cpu(); + + /* check the others cpus are now down (via paca hw cpu id == -1) */ + for (i=0; i < NR_CPUS; i++) { + if (i == my_cpu) + continue; + + while (paca[i].hw_cpu_id != -1) { + if (!cpu_possible(i)) { + printk("kexec: cpu %d hw_cpu_id %d is not" + " possible, ignoring\n", + i, paca[i].hw_cpu_id); + break; + } + if (!cpu_online(i)) { + /* Fixme: this can be spinning in + * pSeries_secondary_wait with a paca + * waiting for it to go online. + */ + printk("kexec: cpu %d hw_cpu_id %d is not" + " online, ignoring\n", + i, paca[i].hw_cpu_id); + break; + } + if (i != notified) { + printk( "kexec: waiting for cpu %d (physical" + " %d) to go down\n", + i, paca[i].hw_cpu_id); + notified = i; + } + } + } + + /* after we tell the others to go down */ + if (ppc_md.cpu_irq_down) + ppc_md.cpu_irq_down(0); + + put_cpu(); + + local_irq_disable(); +} + +#else /* ! SMP */ + +static void kexec_prepare_cpus(void) +{ + extern void smp_release_cpus(void); + /* + * move the secondarys to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + * + * We need to release the cpus if we are ever going from an + * UP to an SMP kernel. + */ + smp_release_cpus(); + if (ppc_md.cpu_irq_down) + ppc_md.cpu_irq_down(0); + local_irq_disable(); +} + +#endif /* SMP */ + +/* + * kexec thread structure and stack. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. It also must be statically allocated + * or allocated as part of the kimage, because everything else may be + * overwritten when we copy the kexec image. We piggyback on the + * "init_task" linker section here to statically allocate a stack. + * + * We could use a smaller stack if we don't care about anything using + * current, but that audit has not been performed. + */ +union thread_union kexec_stack + __attribute__((__section__(".data.init_task"))) = { }; + +/* Our assembly helper, in kexec_stub.S */ +extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, + void *image, void *control, + void (*clear_all)(void)) ATTRIB_NORET; + +/* too late to fail here */ +void machine_kexec(struct kimage *image) +{ + + /* prepare control code if any */ + + /* shutdown other cpus into our wait loop and quiesce interrupts */ + kexec_prepare_cpus(); + + /* switch to a staticly allocated stack. Based on irq stack code. + * XXX: the task struct will likely be invalid once we do the copy! + */ + kexec_stack.thread_info.task = current_thread_info()->task; + kexec_stack.thread_info.flags = 0; + + /* Some things are best done in assembly. Finding globals with + * a toc is easier in C, so pass in what we can. + */ + kexec_sequence(&kexec_stack, image->start, image, + page_address(image->control_code_page), + ppc_md.hpte_clear_all); + /* NOTREACHED */ +} diff --git a/arch/ppc64/kernel/maple_setup.c b/arch/ppc64/kernel/maple_setup.c index 8cf95a27178..bb55b5a5691 100644 --- a/arch/ppc64/kernel/maple_setup.c +++ b/arch/ppc64/kernel/maple_setup.c @@ -78,17 +78,77 @@ extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel); extern void generic_find_legacy_serial_ports(u64 *physport, unsigned int *default_speed); - static void maple_restart(char *cmd) { + unsigned int maple_nvram_base; + unsigned int maple_nvram_offset; + unsigned int maple_nvram_command; + struct device_node *rtcs; + + /* find NVRAM device */ + rtcs = find_compatible_devices("nvram", "AMD8111"); + if (rtcs && rtcs->addrs) { + maple_nvram_base = rtcs->addrs[0].address; + } else { + printk(KERN_EMERG "Maple: Unable to find NVRAM\n"); + printk(KERN_EMERG "Maple: Manual Restart Required\n"); + return; + } + + /* find service processor device */ + rtcs = find_devices("service-processor"); + if (!rtcs) { + printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); + printk(KERN_EMERG "Maple: Manual Restart Required\n"); + return; + } + maple_nvram_offset = *(unsigned int*) get_property(rtcs, + "restart-addr", NULL); + maple_nvram_command = *(unsigned int*) get_property(rtcs, + "restart-value", NULL); + + /* send command */ + outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset); + for (;;) ; } static void maple_power_off(void) { + unsigned int maple_nvram_base; + unsigned int maple_nvram_offset; + unsigned int maple_nvram_command; + struct device_node *rtcs; + + /* find NVRAM device */ + rtcs = find_compatible_devices("nvram", "AMD8111"); + if (rtcs && rtcs->addrs) { + maple_nvram_base = rtcs->addrs[0].address; + } else { + printk(KERN_EMERG "Maple: Unable to find NVRAM\n"); + printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); + return; + } + + /* find service processor device */ + rtcs = find_devices("service-processor"); + if (!rtcs) { + printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); + printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); + return; + } + maple_nvram_offset = *(unsigned int*) get_property(rtcs, + "power-off-addr", NULL); + maple_nvram_command = *(unsigned int*) get_property(rtcs, + "power-off-value", NULL); + + /* send command */ + outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset); + for (;;) ; } static void maple_halt(void) { + maple_power_off(); } #ifdef CONFIG_SMP @@ -117,6 +177,8 @@ void __init maple_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif + + printk(KERN_INFO "Using native/NAP idle loop\n"); } /* @@ -235,6 +297,7 @@ struct machdep_calls __initdata maple_md = { .get_boot_time = maple_get_boot_time, .set_rtc_time = maple_set_rtc_time, .get_rtc_time = maple_get_rtc_time, - .calibrate_decr = maple_calibrate_decr, + .calibrate_decr = generic_calibrate_decr, .progress = maple_progress, + .idle_loop = native_idle, }; diff --git a/arch/ppc64/kernel/maple_time.c b/arch/ppc64/kernel/maple_time.c index 07ce7895b43..d65210abcd0 100644 --- a/arch/ppc64/kernel/maple_time.c +++ b/arch/ppc64/kernel/maple_time.c @@ -42,11 +42,8 @@ #define DBG(x...) #endif -extern void setup_default_decr(void); extern void GregorianDay(struct rtc_time * tm); -extern unsigned long ppc_tb_freq; -extern unsigned long ppc_proc_freq; static int maple_rtc_addr; static int maple_clock_read(int addr) @@ -176,51 +173,3 @@ void __init maple_get_boot_time(struct rtc_time *tm) maple_get_rtc_time(tm); } -/* XXX FIXME: Some sane defaults: 125 MHz timebase, 1GHz processor */ -#define DEFAULT_TB_FREQ 125000000UL -#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8) - -void __init maple_calibrate_decr(void) -{ - struct device_node *cpu; - struct div_result divres; - unsigned int *fp = NULL; - - /* - * The cpu node should have a timebase-frequency property - * to tell us the rate at which the decrementer counts. - */ - cpu = of_find_node_by_type(NULL, "cpu"); - - ppc_tb_freq = DEFAULT_TB_FREQ; - if (cpu != 0) - fp = (unsigned int *)get_property(cpu, "timebase-frequency", NULL); - if (fp != NULL) - ppc_tb_freq = *fp; - else - printk(KERN_ERR "WARNING: Estimating decrementer frequency (not found)\n"); - fp = NULL; - ppc_proc_freq = DEFAULT_PROC_FREQ; - if (cpu != 0) - fp = (unsigned int *)get_property(cpu, "clock-frequency", NULL); - if (fp != NULL) - ppc_proc_freq = *fp; - else - printk(KERN_ERR "WARNING: Estimating processor frequency (not found)\n"); - - of_node_put(cpu); - - printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n", - ppc_tb_freq/1000000, ppc_tb_freq%1000000); - printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n", - ppc_proc_freq/1000000, ppc_proc_freq%1000000); - - tb_ticks_per_jiffy = ppc_tb_freq / HZ; - tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; - tb_ticks_per_usec = ppc_tb_freq / 1000000; - tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); - div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres); - tb_to_xs = divres.result_low; - - setup_default_decr(); -} diff --git a/arch/ppc64/kernel/mf.c b/arch/ppc64/kernel/mf.c index d98bebf7042..ef4a338ebd0 100644 --- a/arch/ppc64/kernel/mf.c +++ b/arch/ppc64/kernel/mf.c @@ -801,10 +801,8 @@ int mf_get_boot_rtc(struct rtc_time *tm) return rc; /* We need to poll here as we are not yet taking interrupts */ while (rtc_data.busy) { - extern unsigned long lpevent_count; - struct ItLpQueue *lpq = get_paca()->lpqueue_ptr; - if (lpq && ItLpQueue_isLpIntPending(lpq)) - lpevent_count += ItLpQueue_process(lpq, NULL); + if (hvlpevent_is_pending()) + process_hvlpevents(NULL); } return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm); } diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index e3c73b3425d..a05b50b738e 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -680,6 +680,177 @@ _GLOBAL(kernel_thread) ld r30,-16(r1) blr +/* kexec_wait(phys_cpu) + * + * wait for the flag to change, indicating this kernel is going away but + * the slave code for the next one is at addresses 0 to 100. + * + * This is used by all slaves. + * + * Physical (hardware) cpu id should be in r3. + */ +_GLOBAL(kexec_wait) + bl 1f +1: mflr r5 + addi r5,r5,kexec_flag-1b + +99: HMT_LOW +#ifdef CONFIG_KEXEC /* use no memory without kexec */ + lwz r4,0(r5) + cmpwi 0,r4,0 + bnea 0x60 +#endif + b 99b + +/* this can be in text because we won't change it until we are + * running in real anyways + */ +kexec_flag: + .long 0 + + +#ifdef CONFIG_KEXEC + +/* kexec_smp_wait(void) + * + * call with interrupts off + * note: this is a terminal routine, it does not save lr + * + * get phys id from paca + * set paca id to -1 to say we got here + * switch to real mode + * join other cpus in kexec_wait(phys_id) + */ +_GLOBAL(kexec_smp_wait) + lhz r3,PACAHWCPUID(r13) + li r4,-1 + sth r4,PACAHWCPUID(r13) /* let others know we left */ + bl real_mode + b .kexec_wait + +/* + * switch to real mode (turn mmu off) + * we use the early kernel trick that the hardware ignores bits + * 0 and 1 (big endian) of the effective address in real mode + * + * don't overwrite r3 here, it is live for kexec_wait above. + */ +real_mode: /* assume normal blr return */ +1: li r9,MSR_RI + li r10,MSR_DR|MSR_IR + mflr r11 /* return address to SRR0 */ + mfmsr r12 + andc r9,r12,r9 + andc r10,r12,r10 + + mtmsrd r9,1 + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + rfid + + +/* + * kexec_sequence(newstack, start, image, control, clear_all()) + * + * does the grungy work with stack switching and real mode switches + * also does simple calls to other code + */ + +_GLOBAL(kexec_sequence) + mflr r0 + std r0,16(r1) + + /* switch stacks to newstack -- &kexec_stack.stack */ + stdu r1,THREAD_SIZE-112(r3) + mr r1,r3 + + li r0,0 + std r0,16(r1) + + /* save regs for local vars on new stack. + * yes, we won't go back, but ... + */ + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + stdu r1,-112-64(r1) + + /* save args into preserved regs */ + mr r31,r3 /* newstack (both) */ + mr r30,r4 /* start (real) */ + mr r29,r5 /* image (virt) */ + mr r28,r6 /* control, unused */ + mr r27,r7 /* clear_all() fn desc */ + mr r26,r8 /* spare */ + lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ + + /* disable interrupts, we are overwriting kernel data next */ + mfmsr r3 + rlwinm r3,r3,0,17,15 + mtmsrd r3,1 + + /* copy dest pages, flush whole dest image */ + mr r3,r29 + bl .kexec_copy_flush /* (image) */ + + /* turn off mmu */ + bl real_mode + + /* clear out hardware hash page table and tlb */ + ld r5,0(r27) /* deref function descriptor */ + mtctr r5 + bctrl /* ppc_md.hash_clear_all(void); */ + +/* + * kexec image calling is: + * the first 0x100 bytes of the entry point are copied to 0 + * + * all slaves branch to slave = 0x60 (absolute) + * slave(phys_cpu_id); + * + * master goes to start = entry point + * start(phys_cpu_id, start, 0); + * + * + * a wrapper is needed to call existing kernels, here is an approximate + * description of one method: + * + * v2: (2.6.10) + * start will be near the boot_block (maybe 0x100 bytes before it?) + * it will have a 0x60, which will b to boot_block, where it will wait + * and 0 will store phys into struct boot-block and load r3 from there, + * copy kernel 0-0x100 and tell slaves to back down to 0x60 again + * + * v1: (2.6.9) + * boot block will have all cpus scanning device tree to see if they + * are the boot cpu ????? + * other device tree differences (prop sizes, va vs pa, etc)... + */ + + /* copy 0x100 bytes starting at start to 0 */ + li r3,0 + mr r4,r30 + li r5,0x100 + li r6,0 + bl .copy_and_flush /* (dest, src, copy limit, start offset) */ +1: /* assume normal blr return */ + + /* release other cpus to the new kernel secondary start at 0x60 */ + mflr r5 + li r6,1 + stw r6,kexec_flag-1b(5) + mr r3,r25 # my phys cpu + mr r4,r30 # start, aka phys mem offset + mtlr 4 + li r5,0 + blr /* image->start(physid, image->start, 0); */ +#endif /* CONFIG_KEXEC */ + /* Why isn't this a) automatic, b) written in 'C'? */ .balign 8 _GLOBAL(sys_call_table32) @@ -951,11 +1122,16 @@ _GLOBAL(sys_call_table32) .llong .compat_sys_mq_timedreceive /* 265 */ .llong .compat_sys_mq_notify .llong .compat_sys_mq_getsetattr - .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong .compat_sys_kexec_load .llong .sys32_add_key - .llong .sys32_request_key + .llong .sys32_request_key /* 270 */ .llong .compat_sys_keyctl .llong .compat_sys_waitid + .llong .sys32_ioprio_set + .llong .sys32_ioprio_get + .llong .sys_inotify_init /* 275 */ + .llong .sys_inotify_add_watch + .llong .sys_inotify_rm_watch .balign 8 _GLOBAL(sys_call_table) @@ -1227,8 +1403,13 @@ _GLOBAL(sys_call_table) .llong .sys_mq_timedreceive /* 265 */ .llong .sys_mq_notify .llong .sys_mq_getsetattr - .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong .sys_kexec_load .llong .sys_add_key .llong .sys_request_key /* 270 */ .llong .sys_keyctl .llong .sys_waitid + .llong .sys_ioprio_set + .llong .sys_ioprio_get + .llong .sys_inotify_init /* 275 */ + .llong .sys_inotify_add_watch + .llong .sys_inotify_rm_watch diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c index 593ea5b82af..cc262a05ddb 100644 --- a/arch/ppc64/kernel/mpic.c +++ b/arch/ppc64/kernel/mpic.c @@ -792,6 +792,35 @@ void mpic_setup_this_cpu(void) #endif /* CONFIG_SMP */ } +/* + * XXX: someone who knows mpic should check this. + * do we need to eoi the ipi including for kexec cpu here (see xics comments)? + * or can we reset the mpic in the new kernel? + */ +void mpic_teardown_this_cpu(int secondary) +{ + struct mpic *mpic = mpic_primary; + unsigned long flags; + u32 msk = 1 << hard_smp_processor_id(); + unsigned int i; + + BUG_ON(mpic == NULL); + + DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); + spin_lock_irqsave(&mpic_lock, flags); + + /* let the mpic know we don't want intrs. */ + for (i = 0; i < mpic->num_sources ; i++) + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk); + + /* Set current processor priority to max */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); + + spin_unlock_irqrestore(&mpic_lock, flags); +} + + void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) { struct mpic *mpic = mpic_primary; diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h index 571b3c99e06..ca78a7f1052 100644 --- a/arch/ppc64/kernel/mpic.h +++ b/arch/ppc64/kernel/mpic.h @@ -255,6 +255,9 @@ extern unsigned int mpic_irq_get_priority(unsigned int irq); /* Setup a non-boot CPU */ extern void mpic_setup_this_cpu(void); +/* Clean up for kexec (or cpu offline or ...) */ +extern void mpic_teardown_this_cpu(int secondary); + /* Request IPIs on primary mpic */ extern void mpic_request_ipis(void); @@ -265,3 +268,6 @@ extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask); extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); /* This one gets to the primary mpic */ extern int mpic_get_irq(struct pt_regs *regs); + +/* global mpic for pSeries */ +extern struct mpic *pSeries_mpic; diff --git a/arch/ppc64/kernel/nvram.c b/arch/ppc64/kernel/nvram.c index 4e71781a441..4fb1a9f5060 100644 --- a/arch/ppc64/kernel/nvram.c +++ b/arch/ppc64/kernel/nvram.c @@ -338,9 +338,8 @@ static int nvram_remove_os_partition(void) */ static int nvram_create_os_partition(void) { - struct list_head * p; - struct nvram_partition *part = NULL; - struct nvram_partition *new_part = NULL; + struct nvram_partition *part; + struct nvram_partition *new_part; struct nvram_partition *free_part = NULL; int seq_init[2] = { 0, 0 }; loff_t tmp_index; @@ -349,8 +348,7 @@ static int nvram_create_os_partition(void) /* Find a free partition that will give us the maximum needed size If can't find one that will give us the minimum size needed */ - list_for_each(p, &nvram_part->partition) { - part = list_entry(p, struct nvram_partition, partition); + list_for_each_entry(part, &nvram_part->partition, partition) { if (part->header.signature != NVRAM_SIG_FREE) continue; diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c index 66bd5ab7c25..b80e81984ba 100644 --- a/arch/ppc64/kernel/of_device.c +++ b/arch/ppc64/kernel/of_device.c @@ -3,6 +3,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/mod_devicetable.h> #include <asm/errno.h> #include <asm/of_device.h> @@ -15,20 +16,20 @@ * Used by a driver to check whether an of_device present in the * system is in its list of supported devices. */ -const struct of_match * of_match_device(const struct of_match *matches, +const struct of_device_id *of_match_device(const struct of_device_id *matches, const struct of_device *dev) { if (!dev->node) return NULL; - while (matches->name || matches->type || matches->compatible) { + while (matches->name[0] || matches->type[0] || matches->compatible[0]) { int match = 1; - if (matches->name && matches->name != OF_ANY_MATCH) + if (matches->name[0]) match &= dev->node->name && !strcmp(matches->name, dev->node->name); - if (matches->type && matches->type != OF_ANY_MATCH) + if (matches->type[0]) match &= dev->node->type && !strcmp(matches->type, dev->node->type); - if (matches->compatible && matches->compatible != OF_ANY_MATCH) + if (matches->compatible[0]) match &= device_is_compatible(dev->node, matches->compatible); if (match) @@ -42,7 +43,7 @@ static int of_platform_bus_match(struct device *dev, struct device_driver *drv) { struct of_device * of_dev = to_of_device(dev); struct of_platform_driver * of_drv = to_of_platform_driver(drv); - const struct of_match * matches = of_drv->match_table; + const struct of_device_id * matches = of_drv->match_table; if (!matches) return 0; @@ -75,7 +76,7 @@ static int of_device_probe(struct device *dev) int error = -ENODEV; struct of_platform_driver *drv; struct of_device *of_dev; - const struct of_match *match; + const struct of_device_id *match; drv = to_of_platform_driver(dev->driver); of_dev = to_of_device(dev); diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c index 6534812db43..74dd144dcce 100644 --- a/arch/ppc64/kernel/pSeries_lpar.c +++ b/arch/ppc64/kernel/pSeries_lpar.c @@ -277,31 +277,20 @@ void vpa_init(int cpu) long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, - int secondary, unsigned long hpteflags, - int bolted, int large) + unsigned long vflags, unsigned long rflags) { unsigned long arpn = physRpn_to_absRpn(prpn); unsigned long lpar_rc; unsigned long flags; unsigned long slot; - HPTE lhpte; + unsigned long hpte_v, hpte_r; unsigned long dummy0, dummy1; - /* Fill in the local HPTE with absolute rpn, avpn and flags */ - lhpte.dw1.dword1 = 0; - lhpte.dw1.dw1.rpn = arpn; - lhpte.dw1.flags.flags = hpteflags; + hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID; + if (vflags & HPTE_V_LARGE) + hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); - lhpte.dw0.dword0 = 0; - lhpte.dw0.dw0.avpn = va >> 23; - lhpte.dw0.dw0.h = secondary; - lhpte.dw0.dw0.bolted = bolted; - lhpte.dw0.dw0.v = 1; - - if (large) { - lhpte.dw0.dw0.l = 1; - lhpte.dw0.dw0.avpn &= ~0x1UL; - } + hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; /* Now fill in the actual HPTE */ /* Set CEC cookie to 0 */ @@ -312,11 +301,11 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, flags = 0; /* XXX why is this here? - Anton */ - if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) - lhpte.dw1.flags.flags &= ~_PAGE_COHERENT; + if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) + hpte_r &= ~_PAGE_COHERENT; - lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, lhpte.dw0.dword0, - lhpte.dw1.dword1, &slot, &dummy0, &dummy1); + lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v, + hpte_r, &slot, &dummy0, &dummy1); if (unlikely(lpar_rc == H_PTEG_Full)) return -1; @@ -332,7 +321,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, /* Because of iSeries, we have to pass down the secondary * bucket bit here as well */ - return (slot & 7) | (secondary << 3); + return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); } static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); @@ -427,22 +416,18 @@ static long pSeries_lpar_hpte_find(unsigned long vpn) unsigned long hash; unsigned long i, j; long slot; - union { - unsigned long dword0; - Hpte_dword0 dw0; - } hpte_dw0; - Hpte_dword0 dw0; + unsigned long hpte_v; hash = hpt_hash(vpn, 0); for (j = 0; j < 2; j++) { slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; for (i = 0; i < HPTES_PER_GROUP; i++) { - hpte_dw0.dword0 = pSeries_lpar_hpte_getword0(slot); - dw0 = hpte_dw0.dw0; + hpte_v = pSeries_lpar_hpte_getword0(slot); - if ((dw0.avpn == (vpn >> 11)) && dw0.v && - (dw0.h == j)) { + if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) + && (hpte_v & HPTE_V_VALID) + && (!!(hpte_v & HPTE_V_SECONDARY) == j)) { /* HPTE matches */ if (j) slot = -slot; diff --git a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c index 0b1cca28140..1f5f141fb7a 100644 --- a/arch/ppc64/kernel/pSeries_pci.c +++ b/arch/ppc64/kernel/pSeries_pci.c @@ -1,13 +1,11 @@ /* - * pSeries_pci.c + * arch/ppc64/kernel/pSeries_pci.c * * Copyright (C) 2001 Dave Engebretsen, IBM Corporation * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM * * pSeries specific routines for PCI. * - * Based on code from pci.c and chrp_pci.c - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -23,430 +21,18 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/init.h> +#include <linux/ioport.h> #include <linux/kernel.h> -#include <linux/threads.h> #include <linux/pci.h> #include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/irq.h> -#include <asm/prom.h> -#include <asm/machdep.h> #include <asm/pci-bridge.h> -#include <asm/iommu.h> -#include <asm/rtas.h> +#include <asm/prom.h> -#include "mpic.h" #include "pci.h" -/* RTAS tokens */ -static int read_pci_config; -static int write_pci_config; -static int ibm_read_pci_config; -static int ibm_write_pci_config; - -static int s7a_workaround; - -extern struct mpic *pSeries_mpic; - -static int config_access_valid(struct device_node *dn, int where) -{ - if (where < 256) - return 1; - if (where < 4096 && dn->pci_ext_config_space) - return 1; - - return 0; -} - -static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) -{ - int returnval = -1; - unsigned long buid, addr; - int ret; - - if (!dn) - return PCIBIOS_DEVICE_NOT_FOUND; - if (!config_access_valid(dn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = ((where & 0xf00) << 20) | (dn->busno << 16) | - (dn->devfn << 8) | (where & 0xff); - buid = dn->phb->buid; - if (buid) { - ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, - addr, buid >> 32, buid & 0xffffffff, size); - } else { - ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size); - } - *val = returnval; - - if (ret) - return PCIBIOS_DEVICE_NOT_FOUND; - - if (returnval == EEH_IO_ERROR_VALUE(size) - && eeh_dn_check_failure (dn, NULL)) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int rtas_pci_read_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 *val) -{ - struct device_node *busdn, *dn; - - if (bus->self) - busdn = pci_device_to_OF_node(bus->self); - else - busdn = bus->sysdata; /* must be a phb */ - - /* Search only direct children of the bus */ - for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn) - return rtas_read_config(dn, where, size, val); - return PCIBIOS_DEVICE_NOT_FOUND; -} - -static int rtas_write_config(struct device_node *dn, int where, int size, u32 val) -{ - unsigned long buid, addr; - int ret; - - if (!dn) - return PCIBIOS_DEVICE_NOT_FOUND; - if (!config_access_valid(dn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = ((where & 0xf00) << 20) | (dn->busno << 16) | - (dn->devfn << 8) | (where & 0xff); - buid = dn->phb->buid; - if (buid) { - ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, size, (ulong) val); - } else { - ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val); - } - - if (ret) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int rtas_pci_write_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 val) -{ - struct device_node *busdn, *dn; - - if (bus->self) - busdn = pci_device_to_OF_node(bus->self); - else - busdn = bus->sysdata; /* must be a phb */ - - /* Search only direct children of the bus */ - for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn) - return rtas_write_config(dn, where, size, val); - return PCIBIOS_DEVICE_NOT_FOUND; -} - -struct pci_ops rtas_pci_ops = { - rtas_pci_read_config, - rtas_pci_write_config -}; - -int is_python(struct device_node *dev) -{ - char *model = (char *)get_property(dev, "model", NULL); - - if (model && strstr(model, "Python")) - return 1; - - return 0; -} - -static int get_phb_reg_prop(struct device_node *dev, - unsigned int addr_size_words, - struct reg_property64 *reg) -{ - unsigned int *ui_ptr = NULL, len; - - /* Found a PHB, now figure out where his registers are mapped. */ - ui_ptr = (unsigned int *)get_property(dev, "reg", &len); - if (ui_ptr == NULL) - return 1; - - if (addr_size_words == 1) { - reg->address = ((struct reg_property32 *)ui_ptr)->address; - reg->size = ((struct reg_property32 *)ui_ptr)->size; - } else { - *reg = *((struct reg_property64 *)ui_ptr); - } - - return 0; -} - -static void python_countermeasures(struct device_node *dev, - unsigned int addr_size_words) -{ - struct reg_property64 reg_struct; - void __iomem *chip_regs; - volatile u32 val; - - if (get_phb_reg_prop(dev, addr_size_words, ®_struct)) - return; - - /* Python's register file is 1 MB in size. */ - chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000); - - /* - * Firmware doesn't always clear this bit which is critical - * for good performance - Anton - */ - -#define PRG_CL_RESET_VALID 0x00010000 - - val = in_be32(chip_regs + 0xf6030); - if (val & PRG_CL_RESET_VALID) { - printk(KERN_INFO "Python workaround: "); - val &= ~PRG_CL_RESET_VALID; - out_be32(chip_regs + 0xf6030, val); - /* - * We must read it back for changes to - * take effect - */ - val = in_be32(chip_regs + 0xf6030); - printk("reg0: %x\n", val); - } - - iounmap(chip_regs); -} - -void __init init_pci_config_tokens (void) -{ - read_pci_config = rtas_token("read-pci-config"); - write_pci_config = rtas_token("write-pci-config"); - ibm_read_pci_config = rtas_token("ibm,read-pci-config"); - ibm_write_pci_config = rtas_token("ibm,write-pci-config"); -} - -unsigned long __devinit get_phb_buid (struct device_node *phb) -{ - int addr_cells; - unsigned int *buid_vals; - unsigned int len; - unsigned long buid; - - if (ibm_read_pci_config == -1) return 0; - - /* PHB's will always be children of the root node, - * or so it is promised by the current firmware. */ - if (phb->parent == NULL) - return 0; - if (phb->parent->parent) - return 0; - - buid_vals = (unsigned int *) get_property(phb, "reg", &len); - if (buid_vals == NULL) - return 0; - - addr_cells = prom_n_addr_cells(phb); - if (addr_cells == 1) { - buid = (unsigned long) buid_vals[0]; - } else { - buid = (((unsigned long)buid_vals[0]) << 32UL) | - (((unsigned long)buid_vals[1]) & 0xffffffff); - } - return buid; -} - -static int phb_set_bus_ranges(struct device_node *dev, - struct pci_controller *phb) -{ - int *bus_range; - unsigned int len; - - bus_range = (int *) get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - return 1; - } - - phb->first_busno = bus_range[0]; - phb->last_busno = bus_range[1]; - - return 0; -} - -static int __devinit setup_phb(struct device_node *dev, - struct pci_controller *phb, - unsigned int addr_size_words) -{ - pci_setup_pci_controller(phb); - - if (is_python(dev)) - python_countermeasures(dev, addr_size_words); - - if (phb_set_bus_ranges(dev, phb)) - return 1; - - phb->arch_data = dev; - phb->ops = &rtas_pci_ops; - phb->buid = get_phb_buid(dev); - - return 0; -} - -static void __devinit add_linux_pci_domain(struct device_node *dev, - struct pci_controller *phb, - struct property *of_prop) -{ - memset(of_prop, 0, sizeof(struct property)); - of_prop->name = "linux,pci-domain"; - of_prop->length = sizeof(phb->global_number); - of_prop->value = (unsigned char *)&of_prop[1]; - memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number)); - prom_add_property(dev, of_prop); -} - -static struct pci_controller * __init alloc_phb(struct device_node *dev, - unsigned int addr_size_words) -{ - struct pci_controller *phb; - struct property *of_prop; - - phb = alloc_bootmem(sizeof(struct pci_controller)); - if (phb == NULL) - return NULL; - - of_prop = alloc_bootmem(sizeof(struct property) + - sizeof(phb->global_number)); - if (!of_prop) - return NULL; - - if (setup_phb(dev, phb, addr_size_words)) - return NULL; - - add_linux_pci_domain(dev, phb, of_prop); - - return phb; -} - -static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words) -{ - struct pci_controller *phb; - - phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), - GFP_KERNEL); - if (phb == NULL) - return NULL; - - if (setup_phb(dev, phb, addr_size_words)) - return NULL; - - phb->is_dynamic = 1; - - /* TODO: linux,pci-domain? */ - - return phb; -} - -unsigned long __init find_and_init_phbs(void) -{ - struct device_node *node; - struct pci_controller *phb; - unsigned int root_size_cells = 0; - unsigned int index; - unsigned int *opprop = NULL; - struct device_node *root = of_find_node_by_path("/"); - - if (ppc64_interrupt_controller == IC_OPEN_PIC) { - opprop = (unsigned int *)get_property(root, - "platform-open-pic", NULL); - } - - root_size_cells = prom_n_size_cells(root); - - index = 0; - - for (node = of_get_next_child(root, NULL); - node != NULL; - node = of_get_next_child(root, node)) { - if (node->type == NULL || strcmp(node->type, "pci") != 0) - continue; - - phb = alloc_phb(node, root_size_cells); - if (!phb) - continue; - - pci_process_bridge_OF_ranges(phb, node); - pci_setup_phb_io(phb, index == 0); - - if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) { - int addr = root_size_cells * (index + 2) - 1; - mpic_assign_isu(pSeries_mpic, index, opprop[addr]); - } - - index++; - } - - of_node_put(root); - pci_devs_phb_init(); - - /* - * pci_probe_only and pci_assign_all_buses can be set via properties - * in chosen. - */ - if (of_chosen) { - int *prop; - - prop = (int *)get_property(of_chosen, "linux,pci-probe-only", - NULL); - if (prop) - pci_probe_only = *prop; - - prop = (int *)get_property(of_chosen, - "linux,pci-assign-all-buses", NULL); - if (prop) - pci_assign_all_buses = *prop; - } - - return 0; -} - -struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) -{ - struct device_node *root = of_find_node_by_path("/"); - unsigned int root_size_cells = 0; - struct pci_controller *phb; - struct pci_bus *bus; - int primary; - - root_size_cells = prom_n_size_cells(root); - - primary = list_empty(&hose_list); - phb = alloc_phb_dynamic(dn, root_size_cells); - if (!phb) - return NULL; - - pci_process_bridge_OF_ranges(phb, dn); - - pci_setup_phb_io_dynamic(phb, primary); - of_node_put(root); - - pci_devs_phb_init_dynamic(phb); - phb->last_busno = 0xff; - bus = pci_scan_bus(phb->first_busno, phb->ops, phb->arch_data); - phb->bus = bus; - phb->last_busno = bus->subordinate; - - return phb; -} -EXPORT_SYMBOL(init_phb_dynamic); +static int __initdata s7a_workaround = -1; #if 0 void pcibios_name_device(struct pci_dev *dev) @@ -474,11 +60,12 @@ void pcibios_name_device(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif -static void check_s7a(void) +static void __init check_s7a(void) { struct device_node *root; char *model; + s7a_workaround = 0; root = of_find_node_by_path("/"); if (root) { model = get_property(root, "model", NULL); @@ -488,55 +75,23 @@ static void check_s7a(void) } } -/* RPA-specific bits for removing PHBs */ -int pcibios_remove_root_bus(struct pci_controller *phb) +void __devinit pSeries_irq_bus_setup(struct pci_bus *bus) { - struct pci_bus *b = phb->bus; - struct resource *res; - int rc, i; - - res = b->resource[0]; - if (!res->flags) { - printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__, - b->name); - return 1; - } - - rc = unmap_bus_range(b); - if (rc) { - printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", - __FUNCTION__, b->name); - return 1; - } + struct pci_dev *dev; - if (release_resource(res)) { - printk(KERN_ERR "%s: failed to release IO on bus %s\n", - __FUNCTION__, b->name); - return 1; - } - - for (i = 1; i < 3; ++i) { - res = b->resource[i]; - if (!res->flags && i == 0) { - printk(KERN_ERR "%s: no MEM resource for PHB %s\n", - __FUNCTION__, b->name); - return 1; - } - if (res->flags && release_resource(res)) { - printk(KERN_ERR - "%s: failed to release IO %d on bus %s\n", - __FUNCTION__, i, b->name); - return 1; + if (s7a_workaround < 0) + check_s7a(); + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_read_irq_line(dev); + if (s7a_workaround) { + if (dev->irq > 16) { + dev->irq -= 3; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, + dev->irq); + } } } - - list_del(&phb->list_node); - if (phb->is_dynamic) - kfree(phb); - - return 0; } -EXPORT_SYMBOL(pcibios_remove_root_bus); static void __init pSeries_request_regions(void) { @@ -553,20 +108,6 @@ static void __init pSeries_request_regions(void) void __init pSeries_final_fixup(void) { - struct pci_dev *dev = NULL; - - check_s7a(); - - for_each_pci_dev(dev) { - pci_read_irq_line(dev); - if (s7a_workaround) { - if (dev->irq > 16) { - dev->irq -= 3; - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); - } - } - } - phbs_remap_io(); pSeries_request_regions(); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 6c0d1d58a55..5bec956e44a 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -19,6 +19,7 @@ #undef DEBUG #include <linux/config.h> +#include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -71,11 +72,6 @@ #define DBG(fmt...) #endif -extern void pSeries_final_fixup(void); - -extern void pSeries_get_boot_time(struct rtc_time *rtc_time); -extern void pSeries_get_rtc_time(struct rtc_time *rtc_time); -extern int pSeries_set_rtc_time(struct rtc_time *rtc_time); extern void find_udbg_vterm(void); extern void system_reset_fwnmi(void); /* from head.S */ extern void machine_check_fwnmi(void); /* from head.S */ @@ -84,12 +80,12 @@ extern void generic_find_legacy_serial_ports(u64 *physport, int fwnmi_active; /* TRUE if an FWNMI handler is present */ -extern unsigned long ppc_proc_freq; -extern unsigned long ppc_tb_freq; - extern void pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); +static int pseries_shared_idle(void); +static int pseries_dedicated_idle(void); + static volatile void __iomem * chrp_int_ack_special; struct mpic *pSeries_mpic; @@ -195,14 +191,16 @@ static void __init pSeries_setup_arch(void) { /* Fixup ppc_md depending on the type of interrupt controller */ if (ppc64_interrupt_controller == IC_OPEN_PIC) { - ppc_md.init_IRQ = pSeries_init_mpic; + ppc_md.init_IRQ = pSeries_init_mpic; ppc_md.get_irq = mpic_get_irq; + ppc_md.cpu_irq_down = mpic_teardown_this_cpu; /* Allocate the mpic now, so that find_and_init_phbs() can * fill the ISUs */ pSeries_setup_mpic(); } else { ppc_md.init_IRQ = xics_init_IRQ; ppc_md.get_irq = xics_get_irq; + ppc_md.cpu_irq_down = xics_teardown_cpu; } #ifdef CONFIG_SMP @@ -235,6 +233,20 @@ static void __init pSeries_setup_arch(void) if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) vpa_init(boot_cpuid); + + /* Choose an idle loop */ + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (get_paca()->lppaca.shared_proc) { + printk(KERN_INFO "Using shared processor idle loop\n"); + ppc_md.idle_loop = pseries_shared_idle; + } else { + printk(KERN_INFO "Using dedicated idle loop\n"); + ppc_md.idle_loop = pseries_dedicated_idle; + } + } else { + printk(KERN_INFO "Using default idle loop\n"); + ppc_md.idle_loop = default_idle; + } } static int __init pSeries_init_panel(void) @@ -381,171 +393,6 @@ static void __init pSeries_init_early(void) } -static void pSeries_progress(char *s, unsigned short hex) -{ - struct device_node *root; - int width, *p; - char *os; - static int display_character, set_indicator; - static int max_width; - static DEFINE_SPINLOCK(progress_lock); - static int pending_newline = 0; /* did last write end with unprinted newline? */ - - if (!rtas.base) - return; - - if (max_width == 0) { - if ((root = find_path_device("/rtas")) && - (p = (unsigned int *)get_property(root, - "ibm,display-line-length", - NULL))) - max_width = *p; - else - max_width = 0x10; - display_character = rtas_token("display-character"); - set_indicator = rtas_token("set-indicator"); - } - - if (display_character == RTAS_UNKNOWN_SERVICE) { - /* use hex display if available */ - if (set_indicator != RTAS_UNKNOWN_SERVICE) - rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex); - return; - } - - spin_lock(&progress_lock); - - /* - * Last write ended with newline, but we didn't print it since - * it would just clear the bottom line of output. Print it now - * instead. - * - * If no newline is pending, print a CR to start output at the - * beginning of the line. - */ - if (pending_newline) { - rtas_call(display_character, 1, 1, NULL, '\r'); - rtas_call(display_character, 1, 1, NULL, '\n'); - pending_newline = 0; - } else { - rtas_call(display_character, 1, 1, NULL, '\r'); - } - - width = max_width; - os = s; - while (*os) { - if (*os == '\n' || *os == '\r') { - /* Blank to end of line. */ - while (width-- > 0) - rtas_call(display_character, 1, 1, NULL, ' '); - - /* If newline is the last character, save it - * until next call to avoid bumping up the - * display output. - */ - if (*os == '\n' && !os[1]) { - pending_newline = 1; - spin_unlock(&progress_lock); - return; - } - - /* RTAS wants CR-LF, not just LF */ - - if (*os == '\n') { - rtas_call(display_character, 1, 1, NULL, '\r'); - rtas_call(display_character, 1, 1, NULL, '\n'); - } else { - /* CR might be used to re-draw a line, so we'll - * leave it alone and not add LF. - */ - rtas_call(display_character, 1, 1, NULL, *os); - } - - width = max_width; - } else { - width--; - rtas_call(display_character, 1, 1, NULL, *os); - } - - os++; - - /* if we overwrite the screen length */ - if (width <= 0) - while ((*os != 0) && (*os != '\n') && (*os != '\r')) - os++; - } - - /* Blank to end of line. */ - while (width-- > 0) - rtas_call(display_character, 1, 1, NULL, ' '); - - spin_unlock(&progress_lock); -} - -extern void setup_default_decr(void); - -/* Some sane defaults: 125 MHz timebase, 1GHz processor */ -#define DEFAULT_TB_FREQ 125000000UL -#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8) - -static void __init pSeries_calibrate_decr(void) -{ - struct device_node *cpu; - struct div_result divres; - unsigned int *fp; - int node_found; - - /* - * The cpu node should have a timebase-frequency property - * to tell us the rate at which the decrementer counts. - */ - cpu = of_find_node_by_type(NULL, "cpu"); - - ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ - node_found = 0; - if (cpu != 0) { - fp = (unsigned int *)get_property(cpu, "timebase-frequency", - NULL); - if (fp != 0) { - node_found = 1; - ppc_tb_freq = *fp; - } - } - if (!node_found) - printk(KERN_ERR "WARNING: Estimating decrementer frequency " - "(not found)\n"); - - ppc_proc_freq = DEFAULT_PROC_FREQ; - node_found = 0; - if (cpu != 0) { - fp = (unsigned int *)get_property(cpu, "clock-frequency", - NULL); - if (fp != 0) { - node_found = 1; - ppc_proc_freq = *fp; - } - } - if (!node_found) - printk(KERN_ERR "WARNING: Estimating processor frequency " - "(not found)\n"); - - of_node_put(cpu); - - printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n", - ppc_tb_freq/1000000, ppc_tb_freq%1000000); - printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n", - ppc_proc_freq/1000000, ppc_proc_freq%1000000); - - tb_ticks_per_jiffy = ppc_tb_freq / HZ; - tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; - tb_ticks_per_usec = ppc_tb_freq / 1000000; - tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); - div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres); - tb_to_xs = divres.result_low; - - setup_default_decr(); -} - static int pSeries_check_legacy_ioport(unsigned int baseport) { struct device_node *np; @@ -589,6 +436,144 @@ static int __init pSeries_probe(int platform) return 1; } +DECLARE_PER_CPU(unsigned long, smt_snooze_delay); + +static inline void dedicated_idle_sleep(unsigned int cpu) +{ + struct paca_struct *ppaca = &paca[cpu ^ 1]; + + /* Only sleep if the other thread is not idle */ + if (!(ppaca->lppaca.idle)) { + local_irq_disable(); + + /* + * We are about to sleep the thread and so wont be polling any + * more. + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + + /* + * SMT dynamic mode. Cede will result in this thread going + * dormant, if the partner thread is still doing work. Thread + * wakes up if partner goes idle, an interrupt is presented, or + * a prod occurs. Returning from the cede enables external + * interrupts. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } else { + /* + * Give the HV an opportunity at the processor, since we are + * not doing any work. + */ + poll_pending(); + } +} + +static int pseries_dedicated_idle(void) +{ + long oldval; + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + unsigned long start_snooze; + unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + start_snooze = __get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + + while (!need_resched() && !cpu_is_offline(cpu)) { + ppc64_runlatch_off(); + + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + + if (*smt_snooze_delay != 0 && + __get_tb() > start_snooze) { + HMT_medium(); + dedicated_idle_sleep(cpu); + } + + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } +} + +static int pseries_shared_idle(void) +{ + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + while (!need_resched() && !cpu_is_offline(cpu)) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + * + * Check need_resched() again with interrupts disabled + * to avoid a race. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + + HMT_medium(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } + + return 0; +} + struct machdep_calls __initdata pSeries_md = { .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, @@ -596,16 +581,17 @@ struct machdep_calls __initdata pSeries_md = { .get_cpuinfo = pSeries_get_cpuinfo, .log_error = pSeries_log_error, .pcibios_fixup = pSeries_final_fixup, + .irq_bus_setup = pSeries_irq_bus_setup, .restart = rtas_restart, .power_off = rtas_power_off, .halt = rtas_halt, .panic = rtas_os_term, .cpu_die = pSeries_mach_cpu_die, - .get_boot_time = pSeries_get_boot_time, - .get_rtc_time = pSeries_get_rtc_time, - .set_rtc_time = pSeries_set_rtc_time, - .calibrate_decr = pSeries_calibrate_decr, - .progress = pSeries_progress, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = rtas_progress, .check_legacy_ioport = pSeries_check_legacy_ioport, .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 4203bd020c8..62c55a12356 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -1,5 +1,5 @@ /* - * SMP support for pSeries machines. + * SMP support for pSeries and BPA machines. * * Dave Engebretsen, Peter Bergner, and * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com @@ -47,6 +47,7 @@ #include <asm/pSeries_reconfig.h> #include "mpic.h" +#include "bpa_iic.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -92,10 +93,13 @@ static int query_cpu_stopped(unsigned int pcpu) int pSeries_cpu_disable(void) { + int cpu = smp_processor_id(); + + cpu_clear(cpu, cpu_online_map); systemcfg->processorCount--; /*fix boot_cpuid here*/ - if (smp_processor_id() == boot_cpuid) + if (cpu == boot_cpuid) boot_cpuid = any_online_cpu(cpu_online_map); /* FIXME: abstract this to not be platform specific later on */ @@ -286,6 +290,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) return 1; } +#ifdef CONFIG_XICS static inline void smp_xics_do_message(int cpu, int msg) { set_bit(msg, &xics_ipi_message[cpu].value); @@ -327,6 +332,37 @@ static void __devinit smp_xics_setup_cpu(int cpu) cpu_clear(cpu, of_spin_map); } +#endif /* CONFIG_XICS */ +#ifdef CONFIG_BPA_IIC +static void smp_iic_message_pass(int target, int msg) +{ + unsigned int i; + + if (target < NR_CPUS) { + iic_cause_IPI(target, msg); + } else { + for_each_online_cpu(i) { + if (target == MSG_ALL_BUT_SELF + && i == smp_processor_id()) + continue; + iic_cause_IPI(i, msg); + } + } +} + +static int __init smp_iic_probe(void) +{ + iic_request_IPIs(); + + return cpus_weight(cpu_possible_map); +} + +static void __devinit smp_iic_setup_cpu(int cpu) +{ + if (cpu != boot_cpuid) + iic_setup_cpu(); +} +#endif /* CONFIG_BPA_IIC */ static DEFINE_SPINLOCK(timebase_lock); static unsigned long timebase = 0; @@ -381,14 +417,15 @@ static int smp_pSeries_cpu_bootable(unsigned int nr) return 1; } - +#ifdef CONFIG_MPIC static struct smp_ops_t pSeries_mpic_smp_ops = { .message_pass = smp_mpic_message_pass, .probe = smp_mpic_probe, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_mpic_setup_cpu, }; - +#endif +#ifdef CONFIG_XICS static struct smp_ops_t pSeries_xics_smp_ops = { .message_pass = smp_xics_message_pass, .probe = smp_xics_probe, @@ -396,6 +433,16 @@ static struct smp_ops_t pSeries_xics_smp_ops = { .setup_cpu = smp_xics_setup_cpu, .cpu_bootable = smp_pSeries_cpu_bootable, }; +#endif +#ifdef CONFIG_BPA_IIC +static struct smp_ops_t bpa_iic_smp_ops = { + .message_pass = smp_iic_message_pass, + .probe = smp_iic_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_iic_setup_cpu, + .cpu_bootable = smp_pSeries_cpu_bootable, +}; +#endif /* This is called very early */ void __init smp_init_pSeries(void) @@ -404,10 +451,25 @@ void __init smp_init_pSeries(void) DBG(" -> smp_init_pSeries()\n"); - if (ppc64_interrupt_controller == IC_OPEN_PIC) + switch (ppc64_interrupt_controller) { +#ifdef CONFIG_MPIC + case IC_OPEN_PIC: smp_ops = &pSeries_mpic_smp_ops; - else + break; +#endif +#ifdef CONFIG_XICS + case IC_PPC_XIC: smp_ops = &pSeries_xics_smp_ops; + break; +#endif +#ifdef CONFIG_BPA_IIC + case IC_BPA_IIC: + smp_ops = &bpa_iic_smp_ops; + break; +#endif + default: + panic("Invalid interrupt controller"); + } #ifdef CONFIG_HOTPLUG_CPU smp_ops->cpu_disable = pSeries_cpu_disable; diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c index a3e0975c26c..6316188737b 100644 --- a/arch/ppc64/kernel/pacaData.c +++ b/arch/ppc64/kernel/pacaData.c @@ -42,21 +42,7 @@ extern unsigned long __toc_start; * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -#ifdef CONFIG_PPC_ISERIES -#define EXTRA_INITS(number, lpq) \ - .lppaca_ptr = &paca[number].lppaca, \ - .lpqueue_ptr = (lpq), /* &xItLpQueue, */ \ - .reg_save_ptr = &paca[number].reg_save, \ - .reg_save = { \ - .xDesc = 0xd397d9e2, /* "LpRS" */ \ - .xSize = sizeof(struct ItLpRegSave) \ - }, -#else -#define EXTRA_INITS(number, lpq) -#endif - -#define PACAINITDATA(number,start,lpq,asrr,asrv) \ -{ \ +#define PACA_INIT_COMMON(number, start, asrr, asrv) \ .lock_token = 0x8000, \ .paca_index = (number), /* Paca Index */ \ .default_decr = 0x00ff0000, /* Initial Decr */ \ @@ -74,147 +60,79 @@ extern unsigned long __toc_start; .end_of_quantum = 0xfffffffffffffffful, \ .slb_count = 64, \ }, \ - EXTRA_INITS((number), (lpq)) \ -} -struct paca_struct paca[] = { #ifdef CONFIG_PPC_ISERIES - PACAINITDATA( 0, 1, &xItLpQueue, 0, STAB0_VIRT_ADDR), +#define PACA_INIT_ISERIES(number) \ + .lppaca_ptr = &paca[number].lppaca, \ + .reg_save_ptr = &paca[number].reg_save, \ + .reg_save = { \ + .xDesc = 0xd397d9e2, /* "LpRS" */ \ + .xSize = sizeof(struct ItLpRegSave) \ + } + +#define PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 0, 0, 0) \ + PACA_INIT_ISERIES(number) \ +} + +#define BOOTCPU_PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR) \ + PACA_INIT_ISERIES(number) \ +} + #else - PACAINITDATA( 0, 1, NULL, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR), +#define PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 0, 0, 0) \ +} + +#define BOOTCPU_PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR) \ +} #endif + +struct paca_struct paca[] = { + BOOTCPU_PACA_INIT(0), #if NR_CPUS > 1 - PACAINITDATA( 1, 0, NULL, 0, 0), - PACAINITDATA( 2, 0, NULL, 0, 0), - PACAINITDATA( 3, 0, NULL, 0, 0), + PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3), #if NR_CPUS > 4 - PACAINITDATA( 4, 0, NULL, 0, 0), - PACAINITDATA( 5, 0, NULL, 0, 0), - PACAINITDATA( 6, 0, NULL, 0, 0), - PACAINITDATA( 7, 0, NULL, 0, 0), + PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7), #if NR_CPUS > 8 - PACAINITDATA( 8, 0, NULL, 0, 0), - PACAINITDATA( 9, 0, NULL, 0, 0), - PACAINITDATA(10, 0, NULL, 0, 0), - PACAINITDATA(11, 0, NULL, 0, 0), - PACAINITDATA(12, 0, NULL, 0, 0), - PACAINITDATA(13, 0, NULL, 0, 0), - PACAINITDATA(14, 0, NULL, 0, 0), - PACAINITDATA(15, 0, NULL, 0, 0), - PACAINITDATA(16, 0, NULL, 0, 0), - PACAINITDATA(17, 0, NULL, 0, 0), - PACAINITDATA(18, 0, NULL, 0, 0), - PACAINITDATA(19, 0, NULL, 0, 0), - PACAINITDATA(20, 0, NULL, 0, 0), - PACAINITDATA(21, 0, NULL, 0, 0), - PACAINITDATA(22, 0, NULL, 0, 0), - PACAINITDATA(23, 0, NULL, 0, 0), - PACAINITDATA(24, 0, NULL, 0, 0), - PACAINITDATA(25, 0, NULL, 0, 0), - PACAINITDATA(26, 0, NULL, 0, 0), - PACAINITDATA(27, 0, NULL, 0, 0), - PACAINITDATA(28, 0, NULL, 0, 0), - PACAINITDATA(29, 0, NULL, 0, 0), - PACAINITDATA(30, 0, NULL, 0, 0), - PACAINITDATA(31, 0, NULL, 0, 0), + PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11), + PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15), + PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19), + PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23), + PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27), + PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31), #if NR_CPUS > 32 - PACAINITDATA(32, 0, NULL, 0, 0), - PACAINITDATA(33, 0, NULL, 0, 0), - PACAINITDATA(34, 0, NULL, 0, 0), - PACAINITDATA(35, 0, NULL, 0, 0), - PACAINITDATA(36, 0, NULL, 0, 0), - PACAINITDATA(37, 0, NULL, 0, 0), - PACAINITDATA(38, 0, NULL, 0, 0), - PACAINITDATA(39, 0, NULL, 0, 0), - PACAINITDATA(40, 0, NULL, 0, 0), - PACAINITDATA(41, 0, NULL, 0, 0), - PACAINITDATA(42, 0, NULL, 0, 0), - PACAINITDATA(43, 0, NULL, 0, 0), - PACAINITDATA(44, 0, NULL, 0, 0), - PACAINITDATA(45, 0, NULL, 0, 0), - PACAINITDATA(46, 0, NULL, 0, 0), - PACAINITDATA(47, 0, NULL, 0, 0), - PACAINITDATA(48, 0, NULL, 0, 0), - PACAINITDATA(49, 0, NULL, 0, 0), - PACAINITDATA(50, 0, NULL, 0, 0), - PACAINITDATA(51, 0, NULL, 0, 0), - PACAINITDATA(52, 0, NULL, 0, 0), - PACAINITDATA(53, 0, NULL, 0, 0), - PACAINITDATA(54, 0, NULL, 0, 0), - PACAINITDATA(55, 0, NULL, 0, 0), - PACAINITDATA(56, 0, NULL, 0, 0), - PACAINITDATA(57, 0, NULL, 0, 0), - PACAINITDATA(58, 0, NULL, 0, 0), - PACAINITDATA(59, 0, NULL, 0, 0), - PACAINITDATA(60, 0, NULL, 0, 0), - PACAINITDATA(61, 0, NULL, 0, 0), - PACAINITDATA(62, 0, NULL, 0, 0), - PACAINITDATA(63, 0, NULL, 0, 0), + PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35), + PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39), + PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43), + PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47), + PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51), + PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55), + PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59), + PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63), #if NR_CPUS > 64 - PACAINITDATA(64, 0, NULL, 0, 0), - PACAINITDATA(65, 0, NULL, 0, 0), - PACAINITDATA(66, 0, NULL, 0, 0), - PACAINITDATA(67, 0, NULL, 0, 0), - PACAINITDATA(68, 0, NULL, 0, 0), - PACAINITDATA(69, 0, NULL, 0, 0), - PACAINITDATA(70, 0, NULL, 0, 0), - PACAINITDATA(71, 0, NULL, 0, 0), - PACAINITDATA(72, 0, NULL, 0, 0), - PACAINITDATA(73, 0, NULL, 0, 0), - PACAINITDATA(74, 0, NULL, 0, 0), - PACAINITDATA(75, 0, NULL, 0, 0), - PACAINITDATA(76, 0, NULL, 0, 0), - PACAINITDATA(77, 0, NULL, 0, 0), - PACAINITDATA(78, 0, NULL, 0, 0), - PACAINITDATA(79, 0, NULL, 0, 0), - PACAINITDATA(80, 0, NULL, 0, 0), - PACAINITDATA(81, 0, NULL, 0, 0), - PACAINITDATA(82, 0, NULL, 0, 0), - PACAINITDATA(83, 0, NULL, 0, 0), - PACAINITDATA(84, 0, NULL, 0, 0), - PACAINITDATA(85, 0, NULL, 0, 0), - PACAINITDATA(86, 0, NULL, 0, 0), - PACAINITDATA(87, 0, NULL, 0, 0), - PACAINITDATA(88, 0, NULL, 0, 0), - PACAINITDATA(89, 0, NULL, 0, 0), - PACAINITDATA(90, 0, NULL, 0, 0), - PACAINITDATA(91, 0, NULL, 0, 0), - PACAINITDATA(92, 0, NULL, 0, 0), - PACAINITDATA(93, 0, NULL, 0, 0), - PACAINITDATA(94, 0, NULL, 0, 0), - PACAINITDATA(95, 0, NULL, 0, 0), - PACAINITDATA(96, 0, NULL, 0, 0), - PACAINITDATA(97, 0, NULL, 0, 0), - PACAINITDATA(98, 0, NULL, 0, 0), - PACAINITDATA(99, 0, NULL, 0, 0), - PACAINITDATA(100, 0, NULL, 0, 0), - PACAINITDATA(101, 0, NULL, 0, 0), - PACAINITDATA(102, 0, NULL, 0, 0), - PACAINITDATA(103, 0, NULL, 0, 0), - PACAINITDATA(104, 0, NULL, 0, 0), - PACAINITDATA(105, 0, NULL, 0, 0), - PACAINITDATA(106, 0, NULL, 0, 0), - PACAINITDATA(107, 0, NULL, 0, 0), - PACAINITDATA(108, 0, NULL, 0, 0), - PACAINITDATA(109, 0, NULL, 0, 0), - PACAINITDATA(110, 0, NULL, 0, 0), - PACAINITDATA(111, 0, NULL, 0, 0), - PACAINITDATA(112, 0, NULL, 0, 0), - PACAINITDATA(113, 0, NULL, 0, 0), - PACAINITDATA(114, 0, NULL, 0, 0), - PACAINITDATA(115, 0, NULL, 0, 0), - PACAINITDATA(116, 0, NULL, 0, 0), - PACAINITDATA(117, 0, NULL, 0, 0), - PACAINITDATA(118, 0, NULL, 0, 0), - PACAINITDATA(119, 0, NULL, 0, 0), - PACAINITDATA(120, 0, NULL, 0, 0), - PACAINITDATA(121, 0, NULL, 0, 0), - PACAINITDATA(122, 0, NULL, 0, 0), - PACAINITDATA(123, 0, NULL, 0, 0), - PACAINITDATA(124, 0, NULL, 0, 0), - PACAINITDATA(125, 0, NULL, 0, 0), - PACAINITDATA(126, 0, NULL, 0, 0), - PACAINITDATA(127, 0, NULL, 0, 0), + PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67), + PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71), + PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75), + PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79), + PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83), + PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87), + PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91), + PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95), + PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99), + PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103), + PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107), + PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111), + PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115), + PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119), + PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123), + PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127), #endif #endif #endif diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index 2bf0513f3ec..d0d55c7908e 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -108,8 +108,28 @@ void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region region->end = res->end - offset; } +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + unsigned long offset = 0; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + if (!hose) + return; + + if (res->flags & IORESOURCE_IO) + offset = (unsigned long)hose->io_base_virt - pci_io_base; + + if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + + res->start = region->start + offset; + res->end = region->end + offset; +} + #ifdef CONFIG_HOTPLUG EXPORT_SYMBOL(pcibios_resource_to_bus); +EXPORT_SYMBOL(pcibios_bus_to_resource); #endif /* @@ -351,7 +371,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, *offset += hose->pci_mem_offset; res_bit = IORESOURCE_MEM; } else { - io_offset = (unsigned long)hose->io_base_virt; + io_offset = (unsigned long)hose->io_base_virt - pci_io_base; *offset += io_offset; res_bit = IORESOURCE_IO; } @@ -378,7 +398,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, /* found it! construct the final physical address */ if (mmap_state == pci_mmap_io) - *offset += hose->io_base_phys - io_offset; + *offset += hose->io_base_phys - io_offset; return rp; } @@ -902,6 +922,9 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) ppc_md.iommu_dev_setup(dev); + if (ppc_md.irq_bus_setup) + ppc_md.irq_bus_setup(bus); + if (!pci_probe_only) return; @@ -941,4 +964,22 @@ int pci_read_irq_line(struct pci_dev *pci_dev) } EXPORT_SYMBOL(pci_read_irq_line); +void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, + u64 *start, u64 *end) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long offset = 0; + + if (hose == NULL) + return; + + if (rsrc->flags & IORESOURCE_IO) + offset = pci_io_base - (unsigned long)hose->io_base_virt + + hose->io_base_phys; + + *start = rsrc->start + offset; + *end = rsrc->end + offset; +} + #endif /* CONFIG_PPC_MULTIPLATFORM */ diff --git a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h index 0fd7d849aa7..26be78b13af 100644 --- a/arch/ppc64/kernel/pci.h +++ b/arch/ppc64/kernel/pci.h @@ -40,10 +40,14 @@ struct device_node *fetch_dev_dn(struct pci_dev *dev); void pci_addr_cache_insert_device(struct pci_dev *dev); void pci_addr_cache_remove_device(struct pci_dev *dev); -/* From pSeries_pci.h */ +/* From rtas_pci.h */ void init_pci_config_tokens (void); unsigned long get_phb_buid (struct device_node *); +/* From pSeries_pci.h */ +extern void pSeries_final_fixup(void); +extern void pSeries_irq_bus_setup(struct pci_bus *bus); + extern unsigned long pci_probe_only; extern unsigned long pci_assign_all_buses; extern int pci_read_irq_line(struct pci_dev *pci_dev); diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index 6cf03d387b9..e40877fa67c 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -97,7 +97,7 @@ EXPORT_SYMBOL(smu_cmdbuf_abs); extern void udbg_init_scc(struct device_node *np); -void __pmac pmac_show_cpuinfo(struct seq_file *m) +static void __pmac pmac_show_cpuinfo(struct seq_file *m) { struct device_node *np; char *pp; @@ -144,7 +144,7 @@ void __pmac pmac_show_cpuinfo(struct seq_file *m) } -void __init pmac_setup_arch(void) +static void __init pmac_setup_arch(void) { /* init to some ~sane value until calibrate_delay() runs */ loops_per_jiffy = 50000000; @@ -186,6 +186,8 @@ void __init pmac_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif + + printk(KERN_INFO "Using native/NAP idle loop\n"); } #ifdef CONFIG_SCSI @@ -228,7 +230,7 @@ void __pmac note_bootable_part(dev_t dev, int part, int goodness) } } -void __pmac pmac_restart(char *cmd) +static void __pmac pmac_restart(char *cmd) { switch(sys_ctrler) { #ifdef CONFIG_ADB_PMU @@ -247,7 +249,7 @@ void __pmac pmac_restart(char *cmd) } } -void __pmac pmac_power_off(void) +static void __pmac pmac_power_off(void) { switch(sys_ctrler) { #ifdef CONFIG_ADB_PMU @@ -265,7 +267,7 @@ void __pmac pmac_power_off(void) } } -void __pmac pmac_halt(void) +static void __pmac pmac_halt(void) { pmac_power_off(); } @@ -325,7 +327,7 @@ static void __init init_boot_display(void) /* * Early initialization. */ -void __init pmac_init_early(void) +static void __init pmac_init_early(void) { DBG(" -> pmac_init_early\n"); @@ -507,5 +509,6 @@ struct machdep_calls __initdata pmac_md = { .calibrate_decr = pmac_calibrate_decr, .feature_call = pmac_do_feature_call, .progress = pmac_progress, - .check_legacy_ioport = pmac_check_legacy_ioport + .check_legacy_ioport = pmac_check_legacy_ioport, + .idle_loop = native_idle, }; diff --git a/arch/ppc64/kernel/pmac_time.c b/arch/ppc64/kernel/pmac_time.c index f24827581dd..3059edb09cc 100644 --- a/arch/ppc64/kernel/pmac_time.c +++ b/arch/ppc64/kernel/pmac_time.c @@ -40,11 +40,6 @@ #define DBG(x...) #endif -extern void setup_default_decr(void); - -extern unsigned long ppc_tb_freq; -extern unsigned long ppc_proc_freq; - /* Apparently the RTC stores seconds since 1 Jan 1904 */ #define RTC_OFFSET 2082844800 @@ -161,8 +156,7 @@ void __init pmac_get_boot_time(struct rtc_time *tm) /* * Query the OF and get the decr frequency. - * This was taken from the pmac time_init() when merging the prep/pmac - * time functions. + * FIXME: merge this with generic_calibrate_decr */ void __init pmac_calibrate_decr(void) { diff --git a/arch/ppc64/kernel/ppc_ksyms.c b/arch/ppc64/kernel/ppc_ksyms.c index b230a63fe4c..705742f4eec 100644 --- a/arch/ppc64/kernel/ppc_ksyms.c +++ b/arch/ppc64/kernel/ppc_ksyms.c @@ -75,6 +75,7 @@ EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(giveup_altivec); #endif EXPORT_SYMBOL(__flush_icache_range); +EXPORT_SYMBOL(flush_dcache_range); #ifdef CONFIG_SMP #ifdef CONFIG_PPC_ISERIES diff --git a/arch/ppc64/kernel/proc_ppc64.c b/arch/ppc64/kernel/proc_ppc64.c index 0914b0669b0..a87c66a9652 100644 --- a/arch/ppc64/kernel/proc_ppc64.c +++ b/arch/ppc64/kernel/proc_ppc64.c @@ -53,7 +53,7 @@ static int __init proc_ppc64_create(void) if (!root) return 1; - if (!(systemcfg->platform & PLATFORM_PSERIES)) + if (!(systemcfg->platform & (PLATFORM_PSERIES | PLATFORM_BPA))) return 0; if (!proc_mkdir("rtas", root)) diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index aba89554d89..f7cae05e40f 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -36,6 +36,7 @@ #include <linux/kallsyms.h> #include <linux/interrupt.h> #include <linux/utsname.h> +#include <linux/kprobes.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -307,6 +308,8 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { + kprobe_flush_task(current); + #ifndef CONFIG_SMP if (last_task_used_math == current) last_task_used_math = NULL; @@ -321,6 +324,7 @@ void flush_thread(void) { struct thread_info *t = current_thread_info(); + kprobe_flush_task(current); if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 47727a6f734..5aca01ddd81 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -916,6 +916,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, } } +#ifdef CONFIG_ALTIVEC /* Check if we have a VMX and eventually update CPU features */ prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", NULL); if (prop && (*prop) > 0) { @@ -929,6 +930,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; } +#endif /* CONFIG_ALTIVEC */ /* * Check for an SMT capable CPU and set the CPU feature. We do diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index b7683abfbe6..e248a7950ae 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -1915,9 +1915,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long prom_send_capabilities(); /* - * On pSeries, copy the CPU hold code + * On pSeries and BPA, copy the CPU hold code */ - if (RELOC(of_platform) & PLATFORM_PSERIES) + if (RELOC(of_platform) & (PLATFORM_PSERIES | PLATFORM_BPA)) copy_and_flush(0, KERNELBASE - offset, 0x100, 0); /* diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c index 9f8c6087ae5..2993f108d96 100644 --- a/arch/ppc64/kernel/ptrace.c +++ b/arch/ppc64/kernel/ptrace.c @@ -305,6 +305,8 @@ static void do_syscall_trace(void) void do_syscall_trace_enter(struct pt_regs *regs) { + secure_computing(regs->gpr[0]); + if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) do_syscall_trace(); @@ -320,8 +322,6 @@ void do_syscall_trace_enter(struct pt_regs *regs) void do_syscall_trace_leave(struct pt_regs *regs) { - secure_computing(regs->gpr[0]); - if (unlikely(current->audit_context)) audit_syscall_exit(current, (regs->ccr&0x1000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, diff --git a/arch/ppc64/kernel/rtas-proc.c b/arch/ppc64/kernel/rtas-proc.c index 28b1f1521f2..1f3ff860fdf 100644 --- a/arch/ppc64/kernel/rtas-proc.c +++ b/arch/ppc64/kernel/rtas-proc.c @@ -371,11 +371,11 @@ static ssize_t ppc_rtas_progress_write(struct file *file, /* Lets see if the user passed hexdigits */ hex = simple_strtoul(progress_led, NULL, 10); - ppc_md.progress ((char *)progress_led, hex); + rtas_progress ((char *)progress_led, hex); return count; /* clear the line */ - /* ppc_md.progress(" ", 0xffff);*/ + /* rtas_progress(" ", 0xffff);*/ } /* ****************************************************************** */ static int ppc_rtas_progress_show(struct seq_file *m, void *v) diff --git a/arch/ppc64/kernel/rtas.c b/arch/ppc64/kernel/rtas.c index 5575603def2..5e8eb33b8e5 100644 --- a/arch/ppc64/kernel/rtas.c +++ b/arch/ppc64/kernel/rtas.c @@ -91,6 +91,123 @@ call_rtas_display_status_delay(unsigned char c) } } +void +rtas_progress(char *s, unsigned short hex) +{ + struct device_node *root; + int width, *p; + char *os; + static int display_character, set_indicator; + static int display_width, display_lines, *row_width, form_feed; + static DEFINE_SPINLOCK(progress_lock); + static int current_line; + static int pending_newline = 0; /* did last write end with unprinted newline? */ + + if (!rtas.base) + return; + + if (display_width == 0) { + display_width = 0x10; + if ((root = find_path_device("/rtas"))) { + if ((p = (unsigned int *)get_property(root, + "ibm,display-line-length", NULL))) + display_width = *p; + if ((p = (unsigned int *)get_property(root, + "ibm,form-feed", NULL))) + form_feed = *p; + if ((p = (unsigned int *)get_property(root, + "ibm,display-number-of-lines", NULL))) + display_lines = *p; + row_width = (unsigned int *)get_property(root, + "ibm,display-truncation-length", NULL); + } + display_character = rtas_token("display-character"); + set_indicator = rtas_token("set-indicator"); + } + + if (display_character == RTAS_UNKNOWN_SERVICE) { + /* use hex display if available */ + if (set_indicator != RTAS_UNKNOWN_SERVICE) + rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex); + return; + } + + spin_lock(&progress_lock); + + /* + * Last write ended with newline, but we didn't print it since + * it would just clear the bottom line of output. Print it now + * instead. + * + * If no newline is pending and form feed is supported, clear the + * display with a form feed; otherwise, print a CR to start output + * at the beginning of the line. + */ + if (pending_newline) { + rtas_call(display_character, 1, 1, NULL, '\r'); + rtas_call(display_character, 1, 1, NULL, '\n'); + pending_newline = 0; + } else { + current_line = 0; + if (form_feed) + rtas_call(display_character, 1, 1, NULL, + (char)form_feed); + else + rtas_call(display_character, 1, 1, NULL, '\r'); + } + + if (row_width) + width = row_width[current_line]; + else + width = display_width; + os = s; + while (*os) { + if (*os == '\n' || *os == '\r') { + /* If newline is the last character, save it + * until next call to avoid bumping up the + * display output. + */ + if (*os == '\n' && !os[1]) { + pending_newline = 1; + current_line++; + if (current_line > display_lines-1) + current_line = display_lines-1; + spin_unlock(&progress_lock); + return; + } + + /* RTAS wants CR-LF, not just LF */ + + if (*os == '\n') { + rtas_call(display_character, 1, 1, NULL, '\r'); + rtas_call(display_character, 1, 1, NULL, '\n'); + } else { + /* CR might be used to re-draw a line, so we'll + * leave it alone and not add LF. + */ + rtas_call(display_character, 1, 1, NULL, *os); + } + + if (row_width) + width = row_width[current_line]; + else + width = display_width; + } else { + width--; + rtas_call(display_character, 1, 1, NULL, *os); + } + + os++; + + /* if we overwrite the screen length */ + if (width <= 0) + while ((*os != 0) && (*os != '\n') && (*os != '\r')) + os++; + } + + spin_unlock(&progress_lock); +} + int rtas_token(const char *service) { @@ -425,8 +542,8 @@ rtas_flash_firmware(void) printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); printk(KERN_ALERT "FLASH: performing flash and reboot\n"); - ppc_md.progress("Flashing \n", 0x0); - ppc_md.progress("Please Wait... ", 0x0); + rtas_progress("Flashing \n", 0x0); + rtas_progress("Please Wait... ", 0x0); printk(KERN_ALERT "FLASH: this will take several minutes. Do not power off!\n"); status = rtas_call(update_token, 1, 1, NULL, rtas_block_list); switch (status) { /* should only get "bad" status */ diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c new file mode 100644 index 00000000000..1048817befb --- /dev/null +++ b/arch/ppc64/kernel/rtas_pci.c @@ -0,0 +1,495 @@ +/* + * arch/ppc64/kernel/rtas_pci.c + * + * Copyright (C) 2001 Dave Engebretsen, IBM Corporation + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * + * RTAS specific routines for PCI. + * + * Based on code from pci.c, chrp_pci.c and pSeries_pci.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/threads.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <asm/iommu.h> +#include <asm/rtas.h> + +#include "mpic.h" +#include "pci.h" + +/* RTAS tokens */ +static int read_pci_config; +static int write_pci_config; +static int ibm_read_pci_config; +static int ibm_write_pci_config; + +static int config_access_valid(struct device_node *dn, int where) +{ + if (where < 256) + return 1; + if (where < 4096 && dn->pci_ext_config_space) + return 1; + + return 0; +} + +static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) +{ + int returnval = -1; + unsigned long buid, addr; + int ret; + + if (!dn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(dn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = ((where & 0xf00) << 20) | (dn->busno << 16) | + (dn->devfn << 8) | (where & 0xff); + buid = dn->phb->buid; + if (buid) { + ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, + addr, buid >> 32, buid & 0xffffffff, size); + } else { + ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size); + } + *val = returnval; + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (returnval == EEH_IO_ERROR_VALUE(size) + && eeh_dn_check_failure (dn, NULL)) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_read_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 *val) +{ + struct device_node *busdn, *dn; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) + if (dn->devfn == devfn) + return rtas_read_config(dn, where, size, val); + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static int rtas_write_config(struct device_node *dn, int where, int size, u32 val) +{ + unsigned long buid, addr; + int ret; + + if (!dn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(dn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = ((where & 0xf00) << 20) | (dn->busno << 16) | + (dn->devfn << 8) | (where & 0xff); + buid = dn->phb->buid; + if (buid) { + ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, size, (ulong) val); + } else { + ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val); + } + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_write_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 val) +{ + struct device_node *busdn, *dn; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) + if (dn->devfn == devfn) + return rtas_write_config(dn, where, size, val); + return PCIBIOS_DEVICE_NOT_FOUND; +} + +struct pci_ops rtas_pci_ops = { + rtas_pci_read_config, + rtas_pci_write_config +}; + +int is_python(struct device_node *dev) +{ + char *model = (char *)get_property(dev, "model", NULL); + + if (model && strstr(model, "Python")) + return 1; + + return 0; +} + +static int get_phb_reg_prop(struct device_node *dev, + unsigned int addr_size_words, + struct reg_property64 *reg) +{ + unsigned int *ui_ptr = NULL, len; + + /* Found a PHB, now figure out where his registers are mapped. */ + ui_ptr = (unsigned int *)get_property(dev, "reg", &len); + if (ui_ptr == NULL) + return 1; + + if (addr_size_words == 1) { + reg->address = ((struct reg_property32 *)ui_ptr)->address; + reg->size = ((struct reg_property32 *)ui_ptr)->size; + } else { + *reg = *((struct reg_property64 *)ui_ptr); + } + + return 0; +} + +static void python_countermeasures(struct device_node *dev, + unsigned int addr_size_words) +{ + struct reg_property64 reg_struct; + void __iomem *chip_regs; + volatile u32 val; + + if (get_phb_reg_prop(dev, addr_size_words, ®_struct)) + return; + + /* Python's register file is 1 MB in size. */ + chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000); + + /* + * Firmware doesn't always clear this bit which is critical + * for good performance - Anton + */ + +#define PRG_CL_RESET_VALID 0x00010000 + + val = in_be32(chip_regs + 0xf6030); + if (val & PRG_CL_RESET_VALID) { + printk(KERN_INFO "Python workaround: "); + val &= ~PRG_CL_RESET_VALID; + out_be32(chip_regs + 0xf6030, val); + /* + * We must read it back for changes to + * take effect + */ + val = in_be32(chip_regs + 0xf6030); + printk("reg0: %x\n", val); + } + + iounmap(chip_regs); +} + +void __init init_pci_config_tokens (void) +{ + read_pci_config = rtas_token("read-pci-config"); + write_pci_config = rtas_token("write-pci-config"); + ibm_read_pci_config = rtas_token("ibm,read-pci-config"); + ibm_write_pci_config = rtas_token("ibm,write-pci-config"); +} + +unsigned long __devinit get_phb_buid (struct device_node *phb) +{ + int addr_cells; + unsigned int *buid_vals; + unsigned int len; + unsigned long buid; + + if (ibm_read_pci_config == -1) return 0; + + /* PHB's will always be children of the root node, + * or so it is promised by the current firmware. */ + if (phb->parent == NULL) + return 0; + if (phb->parent->parent) + return 0; + + buid_vals = (unsigned int *) get_property(phb, "reg", &len); + if (buid_vals == NULL) + return 0; + + addr_cells = prom_n_addr_cells(phb); + if (addr_cells == 1) { + buid = (unsigned long) buid_vals[0]; + } else { + buid = (((unsigned long)buid_vals[0]) << 32UL) | + (((unsigned long)buid_vals[1]) & 0xffffffff); + } + return buid; +} + +static int phb_set_bus_ranges(struct device_node *dev, + struct pci_controller *phb) +{ + int *bus_range; + unsigned int len; + + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + return 1; + } + + phb->first_busno = bus_range[0]; + phb->last_busno = bus_range[1]; + + return 0; +} + +static int __devinit setup_phb(struct device_node *dev, + struct pci_controller *phb, + unsigned int addr_size_words) +{ + pci_setup_pci_controller(phb); + + if (is_python(dev)) + python_countermeasures(dev, addr_size_words); + + if (phb_set_bus_ranges(dev, phb)) + return 1; + + phb->arch_data = dev; + phb->ops = &rtas_pci_ops; + phb->buid = get_phb_buid(dev); + + return 0; +} + +static void __devinit add_linux_pci_domain(struct device_node *dev, + struct pci_controller *phb, + struct property *of_prop) +{ + memset(of_prop, 0, sizeof(struct property)); + of_prop->name = "linux,pci-domain"; + of_prop->length = sizeof(phb->global_number); + of_prop->value = (unsigned char *)&of_prop[1]; + memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number)); + prom_add_property(dev, of_prop); +} + +static struct pci_controller * __init alloc_phb(struct device_node *dev, + unsigned int addr_size_words) +{ + struct pci_controller *phb; + struct property *of_prop; + + phb = alloc_bootmem(sizeof(struct pci_controller)); + if (phb == NULL) + return NULL; + + of_prop = alloc_bootmem(sizeof(struct property) + + sizeof(phb->global_number)); + if (!of_prop) + return NULL; + + if (setup_phb(dev, phb, addr_size_words)) + return NULL; + + add_linux_pci_domain(dev, phb, of_prop); + + return phb; +} + +static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words) +{ + struct pci_controller *phb; + + phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), + GFP_KERNEL); + if (phb == NULL) + return NULL; + + if (setup_phb(dev, phb, addr_size_words)) + return NULL; + + phb->is_dynamic = 1; + + /* TODO: linux,pci-domain? */ + + return phb; +} + +unsigned long __init find_and_init_phbs(void) +{ + struct device_node *node; + struct pci_controller *phb; + unsigned int root_size_cells = 0; + unsigned int index; + unsigned int *opprop = NULL; + struct device_node *root = of_find_node_by_path("/"); + + if (ppc64_interrupt_controller == IC_OPEN_PIC) { + opprop = (unsigned int *)get_property(root, + "platform-open-pic", NULL); + } + + root_size_cells = prom_n_size_cells(root); + + index = 0; + + for (node = of_get_next_child(root, NULL); + node != NULL; + node = of_get_next_child(root, node)) { + if (node->type == NULL || strcmp(node->type, "pci") != 0) + continue; + + phb = alloc_phb(node, root_size_cells); + if (!phb) + continue; + + pci_process_bridge_OF_ranges(phb, node); + pci_setup_phb_io(phb, index == 0); +#ifdef CONFIG_PPC_PSERIES + if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) { + int addr = root_size_cells * (index + 2) - 1; + mpic_assign_isu(pSeries_mpic, index, opprop[addr]); + } +#endif + index++; + } + + of_node_put(root); + pci_devs_phb_init(); + + /* + * pci_probe_only and pci_assign_all_buses can be set via properties + * in chosen. + */ + if (of_chosen) { + int *prop; + + prop = (int *)get_property(of_chosen, "linux,pci-probe-only", + NULL); + if (prop) + pci_probe_only = *prop; + + prop = (int *)get_property(of_chosen, + "linux,pci-assign-all-buses", NULL); + if (prop) + pci_assign_all_buses = *prop; + } + + return 0; +} + +struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) +{ + struct device_node *root = of_find_node_by_path("/"); + unsigned int root_size_cells = 0; + struct pci_controller *phb; + struct pci_bus *bus; + int primary; + + root_size_cells = prom_n_size_cells(root); + + primary = list_empty(&hose_list); + phb = alloc_phb_dynamic(dn, root_size_cells); + if (!phb) + return NULL; + + pci_process_bridge_OF_ranges(phb, dn); + + pci_setup_phb_io_dynamic(phb, primary); + of_node_put(root); + + pci_devs_phb_init_dynamic(phb); + phb->last_busno = 0xff; + bus = pci_scan_bus(phb->first_busno, phb->ops, phb->arch_data); + phb->bus = bus; + phb->last_busno = bus->subordinate; + + return phb; +} +EXPORT_SYMBOL(init_phb_dynamic); + +/* RPA-specific bits for removing PHBs */ +int pcibios_remove_root_bus(struct pci_controller *phb) +{ + struct pci_bus *b = phb->bus; + struct resource *res; + int rc, i; + + res = b->resource[0]; + if (!res->flags) { + printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__, + b->name); + return 1; + } + + rc = unmap_bus_range(b); + if (rc) { + printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + if (release_resource(res)) { + printk(KERN_ERR "%s: failed to release IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + for (i = 1; i < 3; ++i) { + res = b->resource[i]; + if (!res->flags && i == 0) { + printk(KERN_ERR "%s: no MEM resource for PHB %s\n", + __FUNCTION__, b->name); + return 1; + } + if (res->flags && release_resource(res)) { + printk(KERN_ERR + "%s: failed to release IO %d on bus %s\n", + __FUNCTION__, i, b->name); + return 1; + } + } + + list_del(&phb->list_node); + if (phb->is_dynamic) + kfree(phb); + + return 0; +} +EXPORT_SYMBOL(pcibios_remove_root_bus); diff --git a/arch/ppc64/kernel/rtc.c b/arch/ppc64/kernel/rtc.c index de02aedbe08..d729fefa0df 100644 --- a/arch/ppc64/kernel/rtc.c +++ b/arch/ppc64/kernel/rtc.c @@ -301,7 +301,7 @@ void iSeries_get_boot_time(struct rtc_time *tm) #ifdef CONFIG_PPC_RTAS #define MAX_RTC_WAIT 5000 /* 5 sec */ #define RTAS_CLOCK_BUSY (-2) -void pSeries_get_boot_time(struct rtc_time *rtc_tm) +void rtas_get_boot_time(struct rtc_time *rtc_tm) { int ret[8]; int error, wait_time; @@ -336,7 +336,7 @@ void pSeries_get_boot_time(struct rtc_time *rtc_tm) * and if a delay is needed to read the clock. In this case we just * silently return without updating rtc_tm. */ -void pSeries_get_rtc_time(struct rtc_time *rtc_tm) +void rtas_get_rtc_time(struct rtc_time *rtc_tm) { int ret[8]; int error, wait_time; @@ -371,7 +371,7 @@ void pSeries_get_rtc_time(struct rtc_time *rtc_tm) rtc_tm->tm_year = ret[0] - 1900; } -int pSeries_set_rtc_time(struct rtc_time *tm) +int rtas_set_rtc_time(struct rtc_time *tm) { int error, wait_time; unsigned long max_wait_tb; diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index 8e439a81764..687e8559520 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -96,7 +96,6 @@ extern void udbg_init_maple_realmode(void); extern unsigned long klimit; extern void mm_init_ppc64(void); -extern int idle_setup(void); extern void stab_initialize(unsigned long stab); extern void htab_initialize(void); extern void early_init_devtree(void *flat_dt); @@ -344,6 +343,7 @@ static void __init setup_cpu_maps(void) extern struct machdep_calls pSeries_md; extern struct machdep_calls pmac_md; extern struct machdep_calls maple_md; +extern struct machdep_calls bpa_md; /* Ultimately, stuff them in an elf section like initcalls... */ static struct machdep_calls __initdata *machines[] = { @@ -356,6 +356,9 @@ static struct machdep_calls __initdata *machines[] = { #ifdef CONFIG_PPC_MAPLE &maple_md, #endif /* CONFIG_PPC_MAPLE */ +#ifdef CONFIG_PPC_BPA + &bpa_md, +#endif NULL }; @@ -673,37 +676,49 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } - -void machine_restart(char *cmd) +/* also used by kexec */ +void machine_shutdown(void) { if (ppc_md.nvram_sync) ppc_md.nvram_sync(); +} + +void machine_restart(char *cmd) +{ + machine_shutdown(); ppc_md.restart(cmd); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; } -EXPORT_SYMBOL(machine_restart); - void machine_power_off(void) { - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); + machine_shutdown(); ppc_md.power_off(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; } -EXPORT_SYMBOL(machine_power_off); - void machine_halt(void) { - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); + machine_shutdown(); ppc_md.halt(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; } -EXPORT_SYMBOL(machine_halt); - -unsigned long ppc_proc_freq; -unsigned long ppc_tb_freq; - static int ppc64_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -1053,16 +1068,22 @@ void __init setup_arch(char **cmdline_p) irqstack_early_init(); emergency_stack_init(); + stabs_alloc(); + /* set up the bootmem stuff with available memory */ do_init_bootmem(); + sparse_init(); /* initialize the syscall map in systemcfg */ setup_syscall_map(); ppc_md.setup_arch(); - /* Select the correct idle loop for the platform. */ - idle_setup(); + /* Use the default idle loop if the platform hasn't provided one. */ + if (NULL == ppc_md.idle_loop) { + ppc_md.idle_loop = default_idle; + printk(KERN_INFO "Using default idle loop\n"); + } paging_init(); ppc64_boot_msg(0x15, "Setup Done"); @@ -1079,11 +1100,11 @@ void __init setup_arch(char **cmdline_p) static void ppc64_do_msg(unsigned int src, const char *msg) { if (ppc_md.progress) { - char buf[32]; + char buf[128]; - sprintf(buf, "%08x \n", src); + sprintf(buf, "%08X\n", src); ppc_md.progress(buf, 0); - sprintf(buf, "%-16s", msg); + snprintf(buf, 128, "%s", msg); ppc_md.progress(buf, 0); } } @@ -1117,7 +1138,7 @@ void ppc64_dump_msg(unsigned int src, const char *msg) } /* This should only be called on processor 0 during calibrate decr */ -void setup_default_decr(void) +void __init setup_default_decr(void) { struct paca_struct *lpaca = get_paca(); diff --git a/arch/ppc64/kernel/smp.c b/arch/ppc64/kernel/smp.c index 9ef5d36d6b2..793b562da65 100644 --- a/arch/ppc64/kernel/smp.c +++ b/arch/ppc64/kernel/smp.c @@ -65,13 +65,11 @@ struct smp_ops_t *smp_ops; static volatile unsigned int cpu_callin_map[NR_CPUS]; -extern unsigned char stab_array[]; - void smp_call_function_interrupt(void); int smt_enabled_at_boot = 1; -#ifdef CONFIG_PPC_MULTIPLATFORM +#ifdef CONFIG_MPIC void smp_mpic_message_pass(int target, int msg) { /* make sure we're sending something that translates to an IPI */ @@ -128,7 +126,7 @@ void __devinit smp_generic_kick_cpu(int nr) smp_mb(); } -#endif /* CONFIG_PPC_MULTIPLATFORM */ +#endif /* CONFIG_MPIC */ static void __init smp_space_timers(unsigned int max_cpus) { @@ -492,19 +490,6 @@ int __devinit __cpu_up(unsigned int cpu) paca[cpu].default_decr = tb_ticks_per_jiffy; - if (!cpu_has_feature(CPU_FTR_SLB)) { - void *tmp; - - /* maximum of 48 CPUs on machines with a segment table */ - if (cpu >= 48) - BUG(); - - tmp = &stab_array[PAGE_SIZE * cpu]; - memset(tmp, 0, PAGE_SIZE); - paca[cpu].stab_addr = (unsigned long)tmp; - paca[cpu].stab_real = virt_to_abs(tmp); - } - /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ diff --git a/arch/ppc64/kernel/spider-pic.c b/arch/ppc64/kernel/spider-pic.c new file mode 100644 index 00000000000..d5c9a02fb11 --- /dev/null +++ b/arch/ppc64/kernel/spider-pic.c @@ -0,0 +1,191 @@ +/* + * External Interrupt Controller on Spider South Bridge + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> + +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/io.h> + +#include "bpa_iic.h" + +/* register layout taken from Spider spec, table 7.4-4 */ +enum { + TIR_DEN = 0x004, /* Detection Enable Register */ + TIR_MSK = 0x084, /* Mask Level Register */ + TIR_EDC = 0x0c0, /* Edge Detection Clear Register */ + TIR_PNDA = 0x100, /* Pending Register A */ + TIR_PNDB = 0x104, /* Pending Register B */ + TIR_CS = 0x144, /* Current Status Register */ + TIR_LCSA = 0x150, /* Level Current Status Register A */ + TIR_LCSB = 0x154, /* Level Current Status Register B */ + TIR_LCSC = 0x158, /* Level Current Status Register C */ + TIR_LCSD = 0x15c, /* Level Current Status Register D */ + TIR_CFGA = 0x200, /* Setting Register A0 */ + TIR_CFGB = 0x204, /* Setting Register B0 */ + /* 0x208 ... 0x3ff Setting Register An/Bn */ + TIR_PPNDA = 0x400, /* Packet Pending Register A */ + TIR_PPNDB = 0x404, /* Packet Pending Register B */ + TIR_PIERA = 0x408, /* Packet Output Error Register A */ + TIR_PIERB = 0x40c, /* Packet Output Error Register B */ + TIR_PIEN = 0x444, /* Packet Output Enable Register */ + TIR_PIPND = 0x454, /* Packet Output Pending Register */ + TIRDID = 0x484, /* Spider Device ID Register */ + REISTIM = 0x500, /* Reissue Command Timeout Time Setting */ + REISTIMEN = 0x504, /* Reissue Command Timeout Setting */ + REISWAITEN = 0x508, /* Reissue Wait Control*/ +}; + +static void __iomem *spider_pics[4]; + +static void __iomem *spider_get_pic(int irq) +{ + int node = irq / IIC_NODE_STRIDE; + irq %= IIC_NODE_STRIDE; + + if (irq >= IIC_EXT_OFFSET && + irq < IIC_EXT_OFFSET + IIC_NUM_EXT && + spider_pics) + return spider_pics[node]; + return NULL; +} + +static int spider_get_nr(unsigned int irq) +{ + return (irq % IIC_NODE_STRIDE) - IIC_EXT_OFFSET; +} + +static void __iomem *spider_get_irq_config(int irq) +{ + void __iomem *pic; + pic = spider_get_pic(irq); + return pic + TIR_CFGA + 8 * spider_get_nr(irq); +} + +static void spider_enable_irq(unsigned int irq) +{ + void __iomem *cfg = spider_get_irq_config(irq); + irq = spider_get_nr(irq); + + out_be32(cfg, in_be32(cfg) | 0x3107000eu); + out_be32(cfg + 4, in_be32(cfg + 4) | 0x00020000u | irq); +} + +static void spider_disable_irq(unsigned int irq) +{ + void __iomem *cfg = spider_get_irq_config(irq); + irq = spider_get_nr(irq); + + out_be32(cfg, in_be32(cfg) & ~0x30000000u); +} + +static unsigned int spider_startup_irq(unsigned int irq) +{ + spider_enable_irq(irq); + return 0; +} + +static void spider_shutdown_irq(unsigned int irq) +{ + spider_disable_irq(irq); +} + +static void spider_end_irq(unsigned int irq) +{ + spider_enable_irq(irq); +} + +static void spider_ack_irq(unsigned int irq) +{ + spider_disable_irq(irq); + iic_local_enable(); +} + +static struct hw_interrupt_type spider_pic = { + .typename = " SPIDER ", + .startup = spider_startup_irq, + .shutdown = spider_shutdown_irq, + .enable = spider_enable_irq, + .disable = spider_disable_irq, + .ack = spider_ack_irq, + .end = spider_end_irq, +}; + + +int spider_get_irq(unsigned long int_pending) +{ + void __iomem *regs = spider_get_pic(int_pending); + unsigned long cs; + int irq; + + cs = in_be32(regs + TIR_CS); + + irq = cs >> 24; + if (irq != 63) + return irq; + + return -1; +} + +void spider_init_IRQ(void) +{ + int node; + struct device_node *dn; + unsigned int *property; + long spiderpic; + int n; + +/* FIXME: detect multiple PICs as soon as the device tree has them */ + for (node = 0; node < 1; node++) { + dn = of_find_node_by_path("/"); + n = prom_n_addr_cells(dn); + property = (unsigned int *) get_property(dn, + "platform-spider-pic", NULL); + + if (!property) + continue; + for (spiderpic = 0; n > 0; --n) + spiderpic = (spiderpic << 32) + *property++; + printk(KERN_DEBUG "SPIDER addr: %lx\n", spiderpic); + spider_pics[node] = __ioremap(spiderpic, 0x800, _PAGE_NO_CACHE); + for (n = 0; n < IIC_NUM_EXT; n++) { + int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE; + get_irq_desc(irq)->handler = &spider_pic; + + /* do not mask any interrupts because of level */ + out_be32(spider_pics[node] + TIR_MSK, 0x0); + + /* disable edge detection clear */ + /* out_be32(spider_pics[node] + TIR_EDC, 0x0); */ + + /* enable interrupt packets to be output */ + out_be32(spider_pics[node] + TIR_PIEN, + in_be32(spider_pics[node] + TIR_PIEN) | 0x1); + + /* Enable the interrupt detection enable bit. Do this last! */ + out_be32(spider_pics[node] + TIR_DEN, + in_be32(spider_pics[node] +TIR_DEN) | 0x1); + + } + } +} diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index 118436e8085..206619080e6 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -30,47 +30,26 @@ #include <linux/sem.h> #include <linux/msg.h> #include <linux/shm.h> -#include <linux/slab.h> -#include <linux/uio.h> -#include <linux/aio.h> -#include <linux/nfs_fs.h> -#include <linux/module.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> -#include <linux/nfsd/syscall.h> #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> -#include <linux/filter.h> -#include <linux/highmem.h> -#include <linux/highuid.h> #include <linux/mman.h> -#include <linux/ipv6.h> #include <linux/in.h> -#include <linux/icmpv6.h> #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/sysctl.h> #include <linux/binfmts.h> -#include <linux/dnotify.h> #include <linux/security.h> #include <linux/compat.h> #include <linux/ptrace.h> -#include <linux/aio_abi.h> #include <linux/elf.h> -#include <net/scm.h> -#include <net/sock.h> - #include <asm/ptrace.h> #include <asm/types.h> #include <asm/ipc.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/semaphore.h> -#include <asm/ppcdebug.h> #include <asm/time.h> #include <asm/mmu_context.h> #include <asm/systemcfg.h> @@ -350,8 +329,6 @@ asmlinkage long sys32_adjtimex(struct timex32 __user *utp) return ret; } - -/* These are here just in case some old sparc32 binary calls it. */ asmlinkage long sys32_pause(void) { current->state = TASK_INTERRUPTIBLE; @@ -360,8 +337,6 @@ asmlinkage long sys32_pause(void) return -ERESTARTNOHAND; } - - static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) { long usec; @@ -847,16 +822,6 @@ asmlinkage long sys32_getpgid(u32 pid) } -/* Note: it is necessary to treat which and who as unsigned ints, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long sys32_getpriority(u32 which, u32 who) -{ - return sys_getpriority((int)which, (int)who); -} - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the @@ -1048,6 +1013,11 @@ asmlinkage long sys32_setpgid(u32 pid, u32 pgid) return sys_setpgid((int)pid, (int)pgid); } +long sys32_getpriority(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_getpriority((int)which, (int)who); +} long sys32_setpriority(u32 which, u32 who, u32 niceval) { @@ -1055,6 +1025,18 @@ long sys32_setpriority(u32 which, u32 who, u32 niceval) return sys_setpriority((int)which, (int)who, (int)niceval); } +long sys32_ioprio_get(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_ioprio_get((int)which, (int)who); +} + +long sys32_ioprio_set(u32 which, u32 who, u32 ioprio) +{ + /* sign extend which, who and ioprio */ + return sys_ioprio_set((int)which, (int)who, (int)ioprio); +} + /* Note: it is necessary to treat newmask as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -1273,8 +1255,6 @@ long ppc32_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, (u64)len_high << 32 | len_low, advice); } -extern asmlinkage long sys_timer_create(clockid_t, sigevent_t __user *, timer_t __user *); - long ppc32_timer_create(clockid_t clock, struct compat_sigevent __user *ev32, timer_t __user *timer_id) diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index c8fa6569b2f..02b8ac4e016 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -112,7 +112,6 @@ void ppc64_enable_pmcs(void) unsigned long hid0; #ifdef CONFIG_PPC_PSERIES unsigned long set, reset; - int ret; #endif /* CONFIG_PPC_PSERIES */ /* Only need to enable them once */ @@ -145,11 +144,7 @@ void ppc64_enable_pmcs(void) case PLATFORM_PSERIES_LPAR: set = 1UL << 63; reset = 0; - ret = plpar_hcall_norets(H_PERFMON, set, reset); - if (ret) - printk(KERN_ERR "H_PERFMON call on cpu %u " - "returned %d\n", - smp_processor_id(), ret); + plpar_hcall_norets(H_PERFMON, set, reset); break; #endif /* CONFIG_PPC_PSERIES */ @@ -161,13 +156,6 @@ void ppc64_enable_pmcs(void) /* instruct hypervisor to maintain PMCs */ if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) get_paca()->lppaca.pmcregs_in_use = 1; - - /* - * On SMT machines we have to set the run latch in the ctrl register - * in order to make PMC6 spin. - */ - if (cpu_has_feature(CPU_FTR_SMT)) - ppc64_runlatch_on(); #endif /* CONFIG_PPC_PSERIES */ } @@ -400,7 +388,12 @@ static int __init topology_init(void) struct cpu *c = &per_cpu(cpu_devices, cpu); #ifdef CONFIG_NUMA - parent = &node_devices[cpu_to_node(cpu)]; + /* The node to which a cpu belongs can't be known + * until the cpu is made present. + */ + parent = NULL; + if (cpu_present(cpu)) + parent = &node_devices[cpu_to_node(cpu)]; #endif /* * For now, we just see if the system supports making diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 33364a7d2cd..909462e1ade 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -91,6 +91,7 @@ unsigned long tb_to_xs; unsigned tb_to_us; unsigned long processor_freq; DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL_GPL(rtc_lock); unsigned long tb_to_ns_scale; unsigned long tb_to_ns_shift; @@ -98,7 +99,6 @@ unsigned long tb_to_ns_shift; struct gettimeofday_struct do_gtod; extern unsigned long wall_jiffies; -extern unsigned long lpevent_count; extern int smp_tb_synchronized; extern struct timezone sys_tz; @@ -107,6 +107,9 @@ void ppc_adjtimex(void); static unsigned adjusting_time = 0; +unsigned long ppc_proc_freq; +unsigned long ppc_tb_freq; + static __inline__ void timer_check_rtc(void) { /* @@ -363,11 +366,8 @@ int timer_interrupt(struct pt_regs * regs) set_dec(next_dec); #ifdef CONFIG_PPC_ISERIES - { - struct ItLpQueue *lpq = lpaca->lpqueue_ptr; - if (lpq && ItLpQueue_isLpIntPending(lpq)) - lpevent_count += ItLpQueue_process(lpq, regs); - } + if (hvlpevent_is_pending()) + process_hvlpevents(regs); #endif /* collect purr register values often, for accurate calculations */ @@ -472,6 +472,66 @@ int do_settimeofday(struct timespec *tv) EXPORT_SYMBOL(do_settimeofday); +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_MAPLE) || defined(CONFIG_PPC_BPA) +void __init generic_calibrate_decr(void) +{ + struct device_node *cpu; + struct div_result divres; + unsigned int *fp; + int node_found; + + /* + * The cpu node should have a timebase-frequency property + * to tell us the rate at which the decrementer counts. + */ + cpu = of_find_node_by_type(NULL, "cpu"); + + ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ + node_found = 0; + if (cpu != 0) { + fp = (unsigned int *)get_property(cpu, "timebase-frequency", + NULL); + if (fp != 0) { + node_found = 1; + ppc_tb_freq = *fp; + } + } + if (!node_found) + printk(KERN_ERR "WARNING: Estimating decrementer frequency " + "(not found)\n"); + + ppc_proc_freq = DEFAULT_PROC_FREQ; + node_found = 0; + if (cpu != 0) { + fp = (unsigned int *)get_property(cpu, "clock-frequency", + NULL); + if (fp != 0) { + node_found = 1; + ppc_proc_freq = *fp; + } + } + if (!node_found) + printk(KERN_ERR "WARNING: Estimating processor frequency " + "(not found)\n"); + + of_node_put(cpu); + + printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n", + ppc_tb_freq/1000000, ppc_tb_freq%1000000); + printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n", + ppc_proc_freq/1000000, ppc_proc_freq%1000000); + + tb_ticks_per_jiffy = ppc_tb_freq / HZ; + tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_usec = ppc_tb_freq / 1000000; + tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); + div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres); + tb_to_xs = divres.result_low; + + setup_default_decr(); +} +#endif + void __init time_init(void) { /* This function is only called on the boot processor */ diff --git a/arch/ppc64/kernel/traps.c b/arch/ppc64/kernel/traps.c index 7e52cb2605e..a8d5e83ee89 100644 --- a/arch/ppc64/kernel/traps.c +++ b/arch/ppc64/kernel/traps.c @@ -126,6 +126,10 @@ int die(const char *str, struct pt_regs *regs, long err) printk("POWERMAC "); nl = 1; break; + case PLATFORM_BPA: + printk("BPA "); + nl = 1; + break; } if (nl) printk("\n"); diff --git a/arch/ppc64/kernel/udbg.c b/arch/ppc64/kernel/udbg.c index d4ccd6f1ef4..c0da45540f0 100644 --- a/arch/ppc64/kernel/udbg.c +++ b/arch/ppc64/kernel/udbg.c @@ -141,7 +141,7 @@ void udbg_init_scc(struct device_node *np) #endif /* CONFIG_PPC_PMAC */ -#if CONFIG_PPC_PMAC +#ifdef CONFIG_PPC_PMAC static void udbg_real_putc(unsigned char c) { while ((real_readb(sccc) & SCC_TXRDY) == 0) diff --git a/arch/ppc64/kernel/vdso32/vdso32.lds.S b/arch/ppc64/kernel/vdso32/vdso32.lds.S index 11290c902ba..6f87a916a39 100644 --- a/arch/ppc64/kernel/vdso32/vdso32.lds.S +++ b/arch/ppc64/kernel/vdso32/vdso32.lds.S @@ -40,9 +40,9 @@ SECTIONS .gcc_except_table : { *(.gcc_except_table) } .fixup : { *(.fixup) } - .got ALIGN(4) : { *(.got.plt) *(.got) } - .dynamic : { *(.dynamic) } :text :dynamic + .got : { *(.got) } + .plt : { *(.plt) } _end = .; __end = .; diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c index 879f39b90a3..d9dc6f28d05 100644 --- a/arch/ppc64/kernel/xics.c +++ b/arch/ppc64/kernel/xics.c @@ -647,6 +647,32 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) } } +void xics_teardown_cpu(int secondary) +{ + int cpu = smp_processor_id(); + + ops->cppr_info(cpu, 0x00); + iosync(); + + /* + * Some machines need to have at least one cpu in the GIQ, + * so leave the master cpu in the group. + */ + if (secondary) { + /* + * we need to EOI the IPI if we got here from kexec down IPI + * + * probably need to check all the other interrupts too + * should we be flagging idle loop instead? + * or creating some task to be scheduled? + */ + ops->xirr_info_set(cpu, XICS_IPI); + rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - + default_distrib_server, 0); + } +} + #ifdef CONFIG_HOTPLUG_CPU /* Interrupts are disabled. */ |