diff options
Diffstat (limited to 'arch/powerpc')
151 files changed, 58285 insertions, 0 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig new file mode 100644 index 00000000000..cd55c9b0f7e --- /dev/null +++ b/arch/powerpc/Kconfig @@ -0,0 +1,863 @@ +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. +# + +mainmenu "Linux/PowerPC Kernel Configuration" + +config PPC64 + bool "64-bit kernel" + default n + help + This option selects whether a 32-bit or a 64-bit kernel + will be built. + +config PPC32 + bool + default y if !PPC64 + +config 64BIT + bool + default y if PPC64 + +config PPC_MERGE + def_bool y + +config MMU + bool + default y + +config UID16 + bool + +config GENERIC_HARDIRQS + bool + default y + +config RWSEM_GENERIC_SPINLOCK + bool + +config RWSEM_XCHGADD_ALGORITHM + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config PPC + bool + default y + +config EARLY_PRINTK + bool + default y if PPC64 + +config COMPAT + bool + default y if PPC64 + +config SYSVIPC_COMPAT + bool + depends on COMPAT && SYSVIPC + default y + +# All PPC32s use generic nvram driver through ppc_md +config GENERIC_NVRAM + bool + default y if PPC32 + +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool + default y + +config ARCH_MAY_HAVE_PC_FDC + bool + default y + +menu "Processor support" +choice + prompt "Processor Type" + depends on PPC32 + default 6xx + +config 6xx + bool "6xx/7xx/74xx" + select PPC_FPU + help + There are four families of PowerPC chips supported. The more common + types (601, 603, 604, 740, 750, 7400), the Motorola embedded + versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC + embedded versions (403 and 405) and the high end 64 bit Power + processors (POWER 3, POWER4, and IBM PPC970 also known as G5). + + Unless you are building a kernel for one of the embedded processor + systems, 64 bit IBM RS/6000 or an Apple G5, choose 6xx. + Note that the kernel runs in 32-bit mode even on 64-bit chips. + +config PPC_52xx + bool "Freescale 52xx" + +config PPC_82xx + bool "Freescale 82xx" + +config PPC_83xx + bool "Freescale 83xx" + +config 40x + bool "AMCC 40x" + +config 44x + bool "AMCC 44x" + +config 8xx + bool "Freescale 8xx" + +config E200 + bool "Freescale e200" + +config E500 + bool "Freescale e500" +endchoice + +config POWER4_ONLY + bool "Optimize for POWER4" + depends on PPC64 + default n + ---help--- + Cause the compiler to optimize for POWER4/POWER5/PPC970 processors. + The resulting binary will not work on POWER3 or RS64 processors + when compiled with binutils 2.15 or later. + +config POWER3 + bool + depends on PPC64 + default y if !POWER4_ONLY + +config POWER4 + depends on PPC64 + def_bool y + +config PPC_FPU + bool + default y if PPC64 + +config BOOKE + bool + depends on E200 || E500 + default y + +config FSL_BOOKE + bool + depends on E200 || E500 + default y + +config PTE_64BIT + bool + depends on 44x || E500 + default y if 44x + default y if E500 && PHYS_64BIT + +config PHYS_64BIT + bool 'Large physical address support' if E500 + depends on 44x || E500 + default y if 44x + ---help--- + This option enables kernel support for larger than 32-bit physical + addresses. This features is not be available on all e500 cores. + + If in doubt, say N here. + +config ALTIVEC + bool "AltiVec Support" + depends on 6xx || POWER4 + ---help--- + This option enables kernel support for the Altivec extensions to the + PowerPC processor. The kernel currently supports saving and restoring + altivec registers, and turning on the 'altivec enable' bit so user + processes can execute altivec instructions. + + This option is only usefully if you have a processor that supports + altivec (G4, otherwise known as 74xx series), but does not have + any affect on a non-altivec cpu (it does, however add code to the + kernel). + + If in doubt, say Y here. + +config SPE + bool "SPE Support" + depends on E200 || E500 + ---help--- + This option enables kernel support for the Signal Processing + Extensions (SPE) to the PowerPC processor. The kernel currently + supports saving and restoring SPE registers, and turning on the + 'spe enable' bit so user processes can execute SPE instructions. + + This option is only useful if you have a processor that supports + SPE (e500, otherwise known as 85xx series), but does not have any + effect on a non-spe cpu (it does, however add code to the kernel). + + If in doubt, say Y here. + +config PPC_STD_MMU + bool + depends on 6xx || POWER3 || POWER4 || PPC64 + default y + +config PPC_STD_MMU_32 + def_bool y + depends on PPC_STD_MMU && PPC32 + +config SMP + depends on PPC_STD_MMU + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. Note that the kernel does not currently + support SMP machines with 603/603e/603ev or PPC750 ("G3") processors + since they have inadequate hardware support for multiprocessor + operation. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on single-processor machines. + On a single-processor machine, the kernel will run faster if you say + N here. + + If you don't know what to do here, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-32)" + range 2 128 + depends on SMP + default "32" if PPC64 + default "4" + +config NOT_COHERENT_CACHE + bool + depends on 4xx || 8xx || E200 + default y +endmenu + +source "init/Kconfig" + +menu "Platform support" + depends on PPC64 || 6xx + +choice + prompt "Machine type" + default PPC_MULTIPLATFORM + +config PPC_MULTIPLATFORM + bool "Generic desktop/server/laptop" + help + Select this option if configuring for an IBM pSeries or + RS/6000 machine, an Apple machine, or a PReP, CHRP, + Maple or Cell-based machine. + +config PPC_ISERIES + bool "IBM Legacy iSeries" + depends on PPC64 + +config EMBEDDED6xx + bool "Embedded 6xx/7xx/7xxx-based board" + depends on PPC32 + +config APUS + bool "Amiga-APUS" + depends on PPC32 && BROKEN + help + Select APUS if configuring for a PowerUP Amiga. + More information is available at: + <http://linux-apus.sourceforge.net/>. +endchoice + +config PPC_PSERIES + depends on PPC_MULTIPLATFORM && PPC64 + bool " IBM pSeries & new (POWER5-based) iSeries" + default y + +config PPC_CHRP + bool " Common Hardware Reference Platform (CHRP) based machines" + depends on PPC_MULTIPLATFORM && PPC32 + default y + +config PPC_PMAC + bool " Apple PowerMac based machines" + depends on PPC_MULTIPLATFORM + default y + +config PPC_PMAC64 + bool + depends on PPC_PMAC && POWER4 + default y + +config PPC_PREP + bool " PowerPC Reference Platform (PReP) based machines" + depends on PPC_MULTIPLATFORM && PPC32 + default y + +config PPC_MAPLE + depends on PPC_MULTIPLATFORM && PPC64 + bool " Maple 970FX Evaluation Board" + select U3_DART + select MPIC_BROKEN_U3 + default n + help + This option enables support for the Maple 970FX Evaluation Board. + For more informations, refer to <http://www.970eval.com> + +config PPC_BPA + bool " Broadband Processor Architecture" + depends on PPC_MULTIPLATFORM && PPC64 + +config PPC_OF + bool + depends on PPC_MULTIPLATFORM # for now + default y + +config XICS + depends on PPC_PSERIES + bool + default y + +config U3_DART + bool + depends on PPC_MULTIPLATFORM && PPC64 + default n + +config MPIC + depends on PPC_PSERIES || PPC_PMAC || PPC_MAPLE + bool + default y + +config MPIC_BROKEN_U3 + bool + depends on PPC_MAPLE + default y + +config BPA_IIC + depends on PPC_BPA + bool + default y + +config IBMVIO + depends on PPC_PSERIES || PPC_ISERIES + bool + default y + +source "drivers/cpufreq/Kconfig" + +config CPU_FREQ_PMAC + bool "Support for Apple PowerBooks" + depends on CPU_FREQ && ADB_PMU && PPC32 + select CPU_FREQ_TABLE + help + This adds support for frequency switching on Apple PowerBooks, + this currently includes some models of iBook & Titanium + PowerBook. + +config PPC601_SYNC_FIX + bool "Workarounds for PPC601 bugs" + depends on 6xx && (PPC_PREP || PPC_PMAC) + help + Some versions of the PPC601 (the first PowerPC chip) have bugs which + mean that extra synchronization instructions are required near + certain instructions, typically those that make major changes to the + CPU state. These extra instructions reduce performance slightly. + If you say N here, these extra instructions will not be included, + resulting in a kernel which will run faster but may not run at all + on some systems with the PPC601 chip. + + If in doubt, say Y here. + +config TAU + bool "Thermal Management Support" + depends on 6xx + help + G3 and G4 processors have an on-chip temperature sensor called the + 'Thermal Assist Unit (TAU)', which, in theory, can measure the on-die + temperature within 2-4 degrees Celsius. This option shows the current + on-die temperature in /proc/cpuinfo if the cpu supports it. + + Unfortunately, on some chip revisions, this sensor is very inaccurate + and in some cases, does not work at all, so don't assume the cpu + temp is actually what /proc/cpuinfo says it is. + +config TAU_INT + bool "Interrupt driven TAU driver (DANGEROUS)" + depends on TAU + ---help--- + The TAU supports an interrupt driven mode which causes an interrupt + whenever the temperature goes out of range. This is the fastest way + to get notified the temp has exceeded a range. With this option off, + a timer is used to re-check the temperature periodically. + + However, on some cpus it appears that the TAU interrupt hardware + is buggy and can cause a situation which would lead unexplained hard + lockups. + + Unless you are extending the TAU driver, or enjoy kernel/hardware + debugging, leave this option off. + +config TAU_AVERAGE + bool "Average high and low temp" + depends on TAU + ---help--- + The TAU hardware can compare the temperature to an upper and lower + bound. The default behavior is to show both the upper and lower + bound in /proc/cpuinfo. If the range is large, the temperature is + either changing a lot, or the TAU hardware is broken (likely on some + G4's). If the range is small (around 4 degrees), the temperature is + relatively stable. If you say Y here, a single temperature value, + halfway between the upper and lower bounds, will be reported in + /proc/cpuinfo. + + If in doubt, say N here. +endmenu + +source arch/powerpc/platforms/embedded6xx/Kconfig +source arch/powerpc/platforms/4xx/Kconfig +source arch/powerpc/platforms/85xx/Kconfig +source arch/powerpc/platforms/8xx/Kconfig + +menu "Kernel options" + +config HIGHMEM + bool "High memory support" + depends on PPC32 + +source kernel/Kconfig.hz +source kernel/Kconfig.preempt +source "fs/Kconfig.binfmt" + +# We optimistically allocate largepages from the VM, so make the limit +# large enough (16MB). This badly named config option is actually +# max order + 1 +config FORCE_MAX_ZONEORDER + int + depends on PPC64 + default "13" + +config MATH_EMULATION + bool "Math emulation" + depends on 4xx || 8xx || E200 || E500 + ---help--- + Some PowerPC chips designed for embedded applications do not have + a floating-point unit and therefore do not implement the + floating-point instructions in the PowerPC instruction set. If you + say Y here, the kernel will include code to emulate a floating-point + unit, which will allow programs that use floating-point + instructions to run. + +config IOMMU_VMERGE + bool "Enable IOMMU virtual merging (EXPERIMENTAL)" + depends on EXPERIMENTAL && PPC64 + default n + help + Cause IO segments sent to a device for DMA to be merged virtually + by the IOMMU when they happen to have been allocated contiguously. + This doesn't add pressure to the IOMMU allocator. However, some + drivers don't support getting large merged segments coming back + from *_map_sg(). Say Y if you know the drivers you are using are + properly handling this case. + +config HOTPLUG_CPU + bool "Support for enabling/disabling CPUs" + depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) + ---help--- + Say Y here to be able to disable and re-enable individual + CPUs at runtime on SMP machines. + + Say N if you are unsure. + +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on PPC_MULTIPLATFORM && EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config EMBEDDEDBOOT + bool + depends on 8xx || 8260 + default y + +config PC_KEYBOARD + bool "PC PS/2 style Keyboard" + depends on 4xx || CPM2 + +config PPCBUG_NVRAM + bool "Enable reading PPCBUG NVRAM during boot" if PPLUS || LOPEC + default y if PPC_PREP + +config IRQ_ALL_CPUS + bool "Distribute interrupts on all CPUs by default" + depends on SMP && !MV64360 + help + This option gives the kernel permission to distribute IRQs across + multiple CPUs. Saying N here will route all IRQs to the first + CPU. Generally saying Y is safe, although some problems have been + reported with SMP Power Macintoshes with this option enabled. + +source "arch/powerpc/platforms/pseries/Kconfig" + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on PPC64 + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on PPC64 && !NUMA + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on SMP && PPC_PSERIES + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on PPC64 + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + depends on NEED_MULTIPLE_NODES + +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. +# +# This is a relatively temporary hack that should +# be able to go away when sparsemem is fully in +# place + +config NODES_SPAN_OTHER_NODES + def_bool y + depends on NEED_MULTIPLE_NODES + +config NUMA + bool "NUMA support" + default y if DISCONTIGMEM || SPARSEMEM + +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on PPC64 && SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with POWER5 cpus at a cost of slightly increased + overhead in some places. If unsure say N here. + +config PROC_DEVICETREE + bool "Support for Open Firmware device tree in /proc" + depends on (PPC_OF || PPC_ISERIES) && PROC_FS + help + This option adds a device-tree directory under /proc which contains + an image of the device tree that the kernel copies from Open + Firmware. If unsure, say Y here. + +source "arch/powerpc/platforms/prep/Kconfig" + +config CMDLINE_BOOL + bool "Default bootloader kernel arguments" + depends on !PPC_ISERIES + +config CMDLINE + string "Initial kernel command string" + depends on CMDLINE_BOOL + default "console=ttyS0,9600 console=tty0 root=/dev/sda2" + help + On some platforms, there is currently no way for the boot loader to + pass arguments to the kernel. For these platforms, you can supply + some command-line options at build time by entering them here. In + most cases you will need to specify the root device here. + +if !44x || BROKEN +source kernel/power/Kconfig +endif + +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + default y + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc/<pid>/seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + +endmenu + +config ISA_DMA_API + bool + default y + +menu "Bus options" + +config ISA + bool "Support for ISA-bus hardware" + depends on PPC_PREP || PPC_CHRP + help + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. If you have an Apple machine, say N here; if you + have an IBM RS/6000 or pSeries machine or a PReP machine, say Y. If + you have an embedded board, consult your board documentation. + +config GENERIC_ISA_DMA + bool + depends on PPC64 || POWER4 || 6xx && !CPM2 + default y + +config EISA + bool + +config SBUS + bool + +# Yes MCA RS/6000s exist but Linux-PPC does not currently support any +config MCA + bool + +config PCI + bool "PCI support" if 40x || CPM2 || 83xx || 85xx || PPC_MPC52xx || (EMBEDDED && PPC_ISERIES) + default y if !40x && !CPM2 && !8xx && !APUS && !83xx && !85xx + default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS + default PCI_QSPAN if !4xx && !CPM2 && 8xx + help + Find out whether your system includes a PCI bus. PCI is the name of + a bus system, i.e. the way the CPU talks to the other stuff inside + your box. If you say Y here, the kernel will include drivers and + infrastructure code to support PCI bus devices. + +config PCI_DOMAINS + bool + default PCI + +config MPC83xx_PCI2 + bool " Supprt for 2nd PCI host controller" + depends on PCI && MPC834x + default y if MPC834x_SYS + +config PCI_QSPAN + bool "QSpan PCI" + depends on !4xx && !CPM2 && 8xx + help + Say Y here if you have a system based on a Motorola 8xx-series + embedded processor with a QSPAN PCI interface, otherwise say N. + +config PCI_8260 + bool + depends on PCI && 8260 + default y + +config 8260_PCI9 + bool " Enable workaround for MPC826x erratum PCI 9" + depends on PCI_8260 && !ADS8272 + default y + +choice + prompt " IDMA channel for PCI 9 workaround" + depends on 8260_PCI9 + +config 8260_PCI9_IDMA1 + bool "IDMA1" + +config 8260_PCI9_IDMA2 + bool "IDMA2" + +config 8260_PCI9_IDMA3 + bool "IDMA3" + +config 8260_PCI9_IDMA4 + bool "IDMA4" + +endchoice + +source "drivers/pci/Kconfig" + +source "drivers/pcmcia/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +endmenu + +menu "Advanced setup" + depends on PPC32 + +config ADVANCED_OPTIONS + bool "Prompt for advanced kernel configuration options" + help + This option will enable prompting for a variety of advanced kernel + configuration options. These options can cause the kernel to not + work if they are set incorrectly, but can be used to optimize certain + aspects of kernel memory management. + + Unless you know what you are doing, say N here. + +comment "Default settings for advanced configuration options are used" + depends on !ADVANCED_OPTIONS + +config HIGHMEM_START_BOOL + bool "Set high memory pool address" + depends on ADVANCED_OPTIONS && HIGHMEM + help + This option allows you to set the base address of the kernel virtual + area used to map high memory pages. This can be useful in + optimizing the layout of kernel virtual memory. + + Say N here unless you know what you are doing. + +config HIGHMEM_START + hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL + default "0xfe000000" + +config LOWMEM_SIZE_BOOL + bool "Set maximum low memory" + depends on ADVANCED_OPTIONS + help + This option allows you to set the maximum amount of memory which + will be used as "low memory", that is, memory which the kernel can + access directly, without having to set up a kernel virtual mapping. + This can be useful in optimizing the layout of kernel virtual + memory. + + Say N here unless you know what you are doing. + +config LOWMEM_SIZE + hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL + default "0x30000000" + +config KERNEL_START_BOOL + bool "Set custom kernel base address" + depends on ADVANCED_OPTIONS + help + This option allows you to set the kernel virtual address at which + the kernel will map low memory (the kernel image will be linked at + this address). This can be useful in optimizing the virtual memory + layout of the system. + + Say N here unless you know what you are doing. + +config KERNEL_START + hex "Virtual address of kernel base" if KERNEL_START_BOOL + default "0xc0000000" + +config TASK_SIZE_BOOL + bool "Set custom user task size" + depends on ADVANCED_OPTIONS + help + This option allows you to set the amount of virtual address space + allocated to user tasks. This can be useful in optimizing the + virtual memory layout of the system. + + Say N here unless you know what you are doing. + +config TASK_SIZE + hex "Size of user task space" if TASK_SIZE_BOOL + default "0x80000000" + +config CONSISTENT_START_BOOL + bool "Set custom consistent memory pool address" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the base virtual address + of the the consistent memory pool. This pool of virtual + memory is used to make consistent memory allocations. + +config CONSISTENT_START + hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL + default "0xff100000" if NOT_COHERENT_CACHE + +config CONSISTENT_SIZE_BOOL + bool "Set custom consistent memory pool size" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the size of the the + consistent memory pool. This pool of virtual memory + is used to make consistent memory allocations. + +config CONSISTENT_SIZE + hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL + default "0x00200000" if NOT_COHERENT_CACHE + +config BOOT_LOAD_BOOL + bool "Set the boot link/load address" + depends on ADVANCED_OPTIONS && !PPC_MULTIPLATFORM + help + This option allows you to set the initial load address of the zImage + or zImage.initrd file. This can be useful if you are on a board + which has a small amount of memory. + + Say N here unless you know what you are doing. + +config BOOT_LOAD + hex "Link/load address for booting" if BOOT_LOAD_BOOL + default "0x00400000" if 40x || 8xx || 8260 + default "0x01000000" if 44x + default "0x00800000" + +config PIN_TLB + bool "Pinned Kernel TLBs (860 ONLY)" + depends on ADVANCED_OPTIONS && 8xx +endmenu + +if PPC64 +config KERNEL_START + hex + default "0xc000000000000000" +endif + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +# XXX source "arch/ppc/8xx_io/Kconfig" + +# XXX source "arch/ppc/8260_io/Kconfig" + +source "arch/powerpc/platforms/iseries/Kconfig" + +source "lib/Kconfig" + +source "arch/powerpc/oprofile/Kconfig" + +source "arch/powerpc/Kconfig.debug" + +source "security/Kconfig" + +config KEYS_COMPAT + bool + depends on COMPAT && KEYS + default y + +source "crypto/Kconfig" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug new file mode 100644 index 00000000000..0baf64ec80d --- /dev/null +++ b/arch/powerpc/Kconfig.debug @@ -0,0 +1,128 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +config DEBUG_STACKOVERFLOW + bool "Check for stack overflows" + depends on DEBUG_KERNEL && PPC64 + help + This option will cause messages to be printed if free stack space + drops below a certain limit. + +config KPROBES + bool "Kprobes" + depends on DEBUG_KERNEL && PPC64 + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + +config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL && PPC64 + help + Enables the display of the minimum amount of free stack which each + task has ever had available in the sysrq-T and sysrq-P debug output. + + This option will slow down process creation somewhat. + +config DEBUGGER + bool "Enable debugger hooks" + depends on DEBUG_KERNEL + help + Include in-kernel hooks for kernel debuggers. Unless you are + intending to debug the kernel, say N here. + +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUGGER && (BROKEN || PPC_GEN550 || 4xx) + select DEBUG_INFO + help + Include in-kernel hooks for kgdb, the Linux kernel source level + debugger. See <http://kgdb.sourceforge.net/> for more information. + Unless you are intending to debug the kernel, say N here. + +choice + prompt "Serial Port" + depends on KGDB + default KGDB_TTYS1 + +config KGDB_TTYS0 + bool "ttyS0" + +config KGDB_TTYS1 + bool "ttyS1" + +config KGDB_TTYS2 + bool "ttyS2" + +config KGDB_TTYS3 + bool "ttyS3" + +endchoice + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB && 8xx || CPM2 + help + If you enable this, all serial console messages will be sent + over the gdb stub. + If unsure, say N. + +config XMON + bool "Include xmon kernel debugger" + depends on DEBUGGER && !PPC_ISERIES + help + Include in-kernel hooks for the xmon kernel monitor/debugger. + Unless you are intending to debug the kernel, say N here. + Make sure to enable also CONFIG_BOOTX_TEXT on Macs. Otherwise + nothing will appear on the screen (xmon writes directly to the + framebuffer memory). + The cmdline option 'xmon' or 'xmon=early' will drop into xmon + very early during boot. 'xmon=on' will just enable the xmon + debugger hooks. 'xmon=off' will disable the debugger hooks + if CONFIG_XMON_DEFAULT is set. + +config XMON_DEFAULT + bool "Enable xmon by default" + depends on XMON + help + xmon is normally disabled unless booted with 'xmon=on'. + Use 'xmon=off' to disable xmon init during runtime. + +config IRQSTACKS + bool "Use separate kernel stacks when processing interrupts" + depends on PPC64 + help + If you say Y here the kernel will use separate kernel stacks + for handling hard and soft interrupts. This can help avoid + overflowing the process kernel stacks. + +config BDI_SWITCH + bool "Include BDI-2000 user context switcher" + depends on DEBUG_KERNEL && PPC32 + help + Include in-kernel support for the Abatron BDI2000 debugger. + Unless you are intending to debug the kernel with one of these + machines, say N here. + +config BOOTX_TEXT + bool "Support for early boot text console (BootX or OpenFirmware only)" + depends PPC_OF && !PPC_ISERIES + help + Say Y here to see progress messages from the boot firmware in text + mode. Requires either BootX or Open Firmware. + +config SERIAL_TEXT_DEBUG + bool "Support for early boot texts over serial port" + depends on 4xx || LOPEC || MV64X60 || PPLUS || PRPMC800 || \ + PPC_GEN550 || PPC_MPC52xx + +config PPC_OCP + bool + depends on IBM_OCP || XILINX_OCP + default y + +endmenu diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile new file mode 100644 index 00000000000..8d1e7bd14c5 --- /dev/null +++ b/arch/powerpc/Makefile @@ -0,0 +1,218 @@ +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# Changes for PPC by Gary Thomas +# Rewritten by Cort Dougan and Paul Mackerras +# + +# This must match PAGE_OFFSET in include/asm-powerpc/page.h. +KERNELLOAD := $(CONFIG_KERNEL_START) + +HAS_BIARCH := $(call cc-option-yn, -m32) + +ifeq ($(CONFIG_PPC64),y) +OLDARCH := ppc64 +SZ := 64 + +# Set default 32 bits cross compilers for vdso and boot wrapper +CROSS32_COMPILE ?= + +CROSS32CC := $(CROSS32_COMPILE)gcc +CROSS32AS := $(CROSS32_COMPILE)as +CROSS32LD := $(CROSS32_COMPILE)ld +CROSS32OBJCOPY := $(CROSS32_COMPILE)objcopy + +ifeq ($(HAS_BIARCH),y) +ifeq ($(CROSS32_COMPILE),) +CROSS32CC := $(CC) -m32 +CROSS32AS := $(AS) -a32 +CROSS32LD := $(LD) -m elf32ppc +CROSS32OBJCOPY := $(OBJCOPY) +endif +endif + +export CROSS32CC CROSS32AS CROSS32LD CROSS32OBJCOPY + +new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) + +ifeq ($(new_nm),y) +NM := $(NM) --synthetic +endif + +else +OLDARCH := ppc +SZ := 32 +endif + +ifeq ($(HAS_BIARCH),y) +override AS += -a$(SZ) +override LD += -m elf$(SZ)ppc +override CC += -m$(SZ) +endif + +LDFLAGS_vmlinux := -Ttext $(KERNELLOAD) -Bstatic -e $(KERNELLOAD) + +# The -Iarch/$(ARCH)/include is temporary while we are merging +CPPFLAGS += -Iarch/$(ARCH) -Iarch/$(ARCH)/include +AFLAGS += -Iarch/$(ARCH) +CFLAGS += -Iarch/$(ARCH) -msoft-float -pipe +CFLAGS-$(CONFIG_PPC64) := -mminimal-toc -mtraceback=none -mcall-aixdesc +CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 -mmultiple +CFLAGS += $(CFLAGS-y) +CPP = $(CC) -E $(CFLAGS) +# Temporary hack until we have migrated to asm-powerpc +LINUXINCLUDE += -Iarch/$(ARCH)/include + +CHECKFLAGS += -m$(SZ) -D__powerpc__ -D__powerpc$(SZ)__ + +ifeq ($(CONFIG_PPC64),y) +GCC_VERSION := $(call cc-version) +GCC_BROKEN_VEC := $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi) + +ifeq ($(CONFIG_POWER4_ONLY),y) +ifeq ($(CONFIG_ALTIVEC),y) +ifeq ($(GCC_BROKEN_VEC),y) + CFLAGS += $(call cc-option,-mcpu=970) +else + CFLAGS += $(call cc-option,-mcpu=power4) +endif +else + CFLAGS += $(call cc-option,-mcpu=power4) +endif +else + CFLAGS += $(call cc-option,-mtune=power4) +endif +endif + +# Enable unit-at-a-time mode when possible. It shrinks the +# kernel considerably. +CFLAGS += $(call cc-option,-funit-at-a-time) + +ifndef CONFIG_FSL_BOOKE +CFLAGS += -mstring +endif + +cpu-as-$(CONFIG_PPC64BRIDGE) += -Wa,-mppc64bridge +cpu-as-$(CONFIG_4xx) += -Wa,-m405 +cpu-as-$(CONFIG_6xx) += -Wa,-maltivec +cpu-as-$(CONFIG_POWER4) += -Wa,-maltivec +cpu-as-$(CONFIG_E500) += -Wa,-me500 +cpu-as-$(CONFIG_E200) += -Wa,-me200 + +AFLAGS += $(cpu-as-y) +CFLAGS += $(cpu-as-y) + +# Default to the common case. +KBUILD_DEFCONFIG := common_defconfig + +head-y := arch/powerpc/kernel/head_32.o +head-$(CONFIG_PPC64) := arch/powerpc/kernel/head_64.o +head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o +head-$(CONFIG_4xx) := arch/powerpc/kernel/head_4xx.o +head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o +head-$(CONFIG_FSL_BOOKE) := arch/powerpc/kernel/head_fsl_booke.o + +head-$(CONFIG_6xx) += arch/powerpc/kernel/idle_6xx.o +head-$(CONFIG_PPC64) += arch/powerpc/kernel/entry_64.o +head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o + +core-y += arch/powerpc/kernel/ \ + arch/$(OLDARCH)/kernel/ \ + arch/powerpc/mm/ \ + arch/powerpc/lib/ \ + arch/powerpc/sysdev/ \ + arch/powerpc/platforms/ +core-$(CONFIG_MATH_EMULATION) += arch/ppc/math-emu/ +core-$(CONFIG_XMON) += arch/powerpc/xmon/ +core-$(CONFIG_APUS) += arch/ppc/amiga/ +drivers-$(CONFIG_8xx) += arch/ppc/8xx_io/ +drivers-$(CONFIG_4xx) += arch/ppc/4xx_io/ +drivers-$(CONFIG_CPM2) += arch/ppc/8260_io/ + +drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ + +defaultimage-$(CONFIG_PPC32) := uImage zImage +defaultimage-$(CONFIG_PPC_ISERIES) := vmlinux +defaultimage-$(CONFIG_PPC_PSERIES) := zImage +KBUILD_IMAGE := $(defaultimage-y) +all: $(KBUILD_IMAGE) + +CPPFLAGS_vmlinux.lds := -Upowerpc + +# All the instructions talk about "make bzImage". +bzImage: zImage + +BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm + +.PHONY: $(BOOT_TARGETS) + +boot := arch/$(OLDARCH)/boot + +# urk +ifeq ($(CONFIG_PPC64),y) +$(BOOT_TARGETS): vmlinux + $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) +else +$(BOOT_TARGETS): vmlinux + $(Q)$(MAKE) ARCH=ppc $(build)=$(boot) $@ +endif + +uImage: vmlinux + $(Q)$(MAKE) ARCH=$(OLDARCH) $(build)=$(boot)/images $(boot)/images/$@ + +define archhelp + @echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/images/zImage.*)' + @echo ' uImage - Create a bootable image for U-Boot / PPCBoot' + @echo ' install - Install kernel using' + @echo ' (your) ~/bin/installkernel or' + @echo ' (distribution) /sbin/installkernel or' + @echo ' install to $$(INSTALL_PATH) and run lilo' + @echo ' *_defconfig - Select default config from arch/$(ARCH)/ppc/configs' +endef + +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + # Temporary hack until we have migrated to asm-powerpc + $(Q)rm -rf arch/$(ARCH)/include + +archprepare: checkbin + +# Temporary hack until we have migrated to asm-powerpc +include/asm: arch/$(ARCH)/include/asm +arch/$(ARCH)/include/asm: + $(Q)if [ ! -d arch/$(ARCH)/include ]; then mkdir -p arch/$(ARCH)/include; fi + $(Q)ln -fsn $(srctree)/include/asm-$(OLDARCH) arch/$(ARCH)/include/asm + +# Use the file '.tmp_gas_check' for binutils tests, as gas won't output +# to stdout and these checks are run even on install targets. +TOUT := .tmp_gas_check +# Ensure this is binutils 2.12.1 (or 2.12.90.0.7) or later for altivec +# instructions. +# gcc-3.4 and binutils-2.14 are a fatal combination. +GCC_VERSION := $(call cc-version) + +checkbin: + @if test "$(GCC_VERSION)" = "0304" ; then \ + if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \ + echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \ + echo 'correctly with gcc-3.4 and your version of binutils.'; \ + echo '*** Please upgrade your binutils or downgrade your gcc'; \ + false; \ + fi ; \ + fi + @if ! /bin/echo dssall | $(AS) -many -o $(TOUT) >/dev/null 2>&1 ; then \ + echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build ' ; \ + echo 'correctly with old versions of binutils.' ; \ + echo '*** Please upgrade your binutils to 2.12.1 or newer' ; \ + false ; \ + fi + +CLEAN_FILES += $(TOUT) + diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile new file mode 100644 index 00000000000..93179538097 --- /dev/null +++ b/arch/powerpc/kernel/Makefile @@ -0,0 +1,50 @@ +# +# Makefile for the linux kernel. +# + +ifeq ($(CONFIG_PPC64),y) +EXTRA_CFLAGS += -mno-minimal-toc +endif +ifeq ($(CONFIG_PPC32),y) +CFLAGS_prom_init.o += -fPIC +CFLAGS_btext.o += -fPIC +endif + +obj-y := semaphore.o +obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o +obj-$(CONFIG_POWER4) += idle_power4.o + +ifeq ($(CONFIG_PPC_MERGE),y) + +extra-$(CONFIG_PPC_STD_MMU) := head_32.o +extra-$(CONFIG_PPC64) := head_64.o +extra-$(CONFIG_40x) := head_4xx.o +extra-$(CONFIG_44x) := head_44x.o +extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o +extra-$(CONFIG_8xx) := head_8xx.o +extra-$(CONFIG_6xx) += idle_6xx.o +extra-$(CONFIG_PPC64) += entry_64.o +extra-$(CONFIG_PPC_FPU) += fpu.o +extra-y += vmlinux.lds + +obj-y += process.o init_task.o \ + prom.o systbl.o traps.o +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o +obj-$(CONFIG_PPC64) += setup_64.o misc_64.o +obj-$(CONFIG_PPC_OF) += prom_init.o of_device.o +obj-$(CONFIG_MODULES) += ppc_ksyms.o +obj-$(CONFIG_BOOTX_TEXT) += btext.o + +ifeq ($(CONFIG_PPC_ISERIES),y) +$(obj)/head_64.o: $(obj)/lparmap.s +AFLAGS_head_64.o += -I$(obj) +endif + +else +# stuff used from here for ARCH=ppc or ARCH=ppc64 +obj-$(CONFIG_PPC64) += traps.o process.o init_task.o + +fpux-$(CONFIG_PPC32) += fpu.o +extra-$(CONFIG_PPC_FPU) += $(fpux-y) + +endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c new file mode 100644 index 00000000000..b0d6a7cd85e --- /dev/null +++ b/arch/powerpc/kernel/asm-offsets.c @@ -0,0 +1,278 @@ +/* + * This program is used to generate definitions needed by + * assembly language modules. + * + * We use the technique used in the OSF Mach kernel code: + * generate asm statements containing #defines, + * compile this file to assembler, and then extract the + * #defines from the assembly-language output. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mman.h> +#include <linux/mm.h> +#ifdef CONFIG_PPC64 +#include <linux/time.h> +#include <linux/hardirq.h> +#else +#include <linux/ptrace.h> +#include <linux/suspend.h> +#endif + +#include <asm/io.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#include <asm/lppaca.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/rtas.h> +#include <asm/cache.h> +#include <asm/systemcfg.h> +#include <asm/compat.h> +#endif + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +int main(void) +{ + DEFINE(THREAD, offsetof(struct task_struct, thread)); + DEFINE(MM, offsetof(struct task_struct, mm)); +#ifdef CONFIG_PPC64 + DEFINE(THREAD_SHIFT, THREAD_SHIFT); + DEFINE(THREAD_SIZE, THREAD_SIZE); + DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); +#else + DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info)); + DEFINE(PTRACE, offsetof(struct task_struct, ptrace)); +#endif /* CONFIG_PPC64 */ + + DEFINE(KSP, offsetof(struct thread_struct, ksp)); + DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); + DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode)); + DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); + DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr)); +#ifdef CONFIG_ALTIVEC + DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0])); + DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); + DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr)); + DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 + DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); +#else /* CONFIG_PPC64 */ + DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); + DEFINE(LAST_SYSCALL, offsetof(struct thread_struct, last_syscall)); +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); + DEFINE(PT_PTRACED, PT_PTRACED); +#endif +#ifdef CONFIG_SPE + DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); + DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); + DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr)); + DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); +#endif /* CONFIG_SPE */ +#endif /* CONFIG_PPC64 */ + + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); +#ifdef CONFIG_PPC64 + DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror)); +#else + DEFINE(TI_TASK, offsetof(struct thread_info, task)); + DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain)); + DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); +#endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_PPC64 + DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); + DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); + DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); + DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); + DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); + DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); + DEFINE(PLATFORM, offsetof(struct systemcfg, platform)); + DEFINE(PLATFORM_LPAR, PLATFORM_LPAR); + + /* paca */ + DEFINE(PACA_SIZE, sizeof(struct paca_struct)); + DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); + DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); + DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); + DEFINE(PACACURRENT, offsetof(struct paca_struct, __current)); + DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr)); + DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real)); + DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr)); + DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr)); + DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1)); + DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); + DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled)); + DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); + DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); + DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); +#ifdef CONFIG_HUGETLB_PAGE + DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); + DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); +#endif /* CONFIG_HUGETLB_PAGE */ + DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); + DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); + DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); + DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); + DEFINE(PACA_EXDSI, offsetof(struct paca_struct, exdsi)); + DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); + DEFINE(PACALPPACA, offsetof(struct paca_struct, lppaca)); + DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); + + DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); + DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); + DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); + DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); + + /* RTAS */ + DEFINE(RTASBASE, offsetof(struct rtas_t, base)); + DEFINE(RTASENTRY, offsetof(struct rtas_t, entry)); +#endif /* CONFIG_PPC64 */ + + /* Interrupt register frame */ + DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); +#ifndef CONFIG_PPC64 + DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); +#else /* CONFIG_PPC64 */ + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); + /* 288 = # of volatile regs, int & fp, for leaf routines */ + /* which do not stack a frame. See the PPC64 ABI. */ + DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288); + /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ + DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); + DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); +#endif /* CONFIG_PPC64 */ + DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); + DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); + DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2])); + DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3])); + DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4])); + DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5])); + DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6])); + DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7])); + DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8])); + DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9])); + DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10])); + DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11])); + DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12])); + DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); +#ifndef CONFIG_PPC64 + DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14])); + DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15])); + DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16])); + DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17])); + DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18])); + DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19])); + DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20])); + DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21])); + DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22])); + DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23])); + DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24])); + DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25])); + DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26])); + DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27])); + DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28])); + DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29])); + DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30])); + DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31])); +#endif /* CONFIG_PPC64 */ + /* + * Note: these symbols include _ because they overlap with special + * register names + */ + DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip)); + DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr)); + DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr)); + DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link)); + DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr)); + DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer)); + DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); + DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); + DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3)); + DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); +#ifndef CONFIG_PPC64 + DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq)); + /* + * The PowerPC 400-class & Book-E processors have neither the DAR + * nor the DSISR SPRs. Hence, we overload them to hold the similar + * DEAR and ESR SPRs for such processors. For critical interrupts + * we use them to hold SRR0 and SRR1. + */ + DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); + DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); + DEFINE(TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); +#else /* CONFIG_PPC64 */ + DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); + DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe)); + + /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */ + DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)); + DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8); +#endif /* CONFIG_PPC64 */ + + DEFINE(CLONE_VM, CLONE_VM); + DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + +#ifndef CONFIG_PPC64 + DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); +#endif /* ! CONFIG_PPC64 */ + + /* About the CPU features table */ + DEFINE(CPU_SPEC_ENTRY_SIZE, sizeof(struct cpu_spec)); + DEFINE(CPU_SPEC_PVR_MASK, offsetof(struct cpu_spec, pvr_mask)); + DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value)); + DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); + DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); + +#ifndef CONFIG_PPC64 + DEFINE(pbe_address, offsetof(struct pbe, address)); + DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); + DEFINE(pbe_next, offsetof(struct pbe, next)); + + DEFINE(TASK_SIZE, TASK_SIZE); + DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); +#else /* CONFIG_PPC64 */ + /* systemcfg offsets for use by vdso */ + DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp)); + DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec)); + DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs)); + DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec)); + DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count)); + DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest)); + DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime)); + DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32)); + DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64)); + + /* timeval/timezone offsets for use by vdso */ + DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec)); + DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); + DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); + DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); + DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); + DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); +#endif /* CONFIG_PPC64 */ + return 0; +} diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c new file mode 100644 index 00000000000..bdfba92b2b3 --- /dev/null +++ b/arch/powerpc/kernel/btext.c @@ -0,0 +1,853 @@ +/* + * Procedures for drawing on the screen early on in the boot process. + * + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/module.h> + +#include <asm/sections.h> +#include <asm/prom.h> +#include <asm/btext.h> +#include <asm/prom.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/lmb.h> +#include <asm/processor.h> + +#define NO_SCROLL + +#ifndef NO_SCROLL +static void scrollscreen(void); +#endif + +static void draw_byte(unsigned char c, long locX, long locY); +static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb); + +static int g_loc_X; +static int g_loc_Y; +static int g_max_loc_X; +static int g_max_loc_Y; + +static int dispDeviceRowBytes; +static int dispDeviceDepth; +static int dispDeviceRect[4]; +static unsigned char *dispDeviceBase, *logicalDisplayBase; + +unsigned long disp_BAT[2] __initdata = {0, 0}; + +#define cmapsz (16*256) + +static unsigned char vga_font[cmapsz]; + +int boot_text_mapped; +int force_printk_to_btext = 0; + +#ifdef CONFIG_PPC32 +/* Calc BAT values for mapping the display and store them + * in disp_BAT. Those values are then used from head.S to map + * the display during identify_machine() and MMU_Init() + * + * The display is mapped to virtual address 0xD0000000, rather + * than 1:1, because some some CHRP machines put the frame buffer + * in the region starting at 0xC0000000 (KERNELBASE). + * This mapping is temporary and will disappear as soon as the + * setup done by MMU_Init() is applied. + * + * For now, we align the BAT and then map 8Mb on 601 and 16Mb + * on other PPCs. This may cause trouble if the framebuffer + * is really badly aligned, but I didn't encounter this case + * yet. + */ +void __init +btext_prepare_BAT(void) +{ + unsigned long vaddr = KERNELBASE + 0x10000000; + unsigned long addr; + unsigned long lowbits; + + addr = (unsigned long)dispDeviceBase; + if (!addr) { + boot_text_mapped = 0; + return; + } + if (PVR_VER(mfspr(SPRN_PVR)) != 1) { + /* 603, 604, G3, G4, ... */ + lowbits = addr & ~0xFF000000UL; + addr &= 0xFF000000UL; + disp_BAT[0] = vaddr | (BL_16M<<2) | 2; + disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW); + } else { + /* 601 */ + lowbits = addr & ~0xFF800000UL; + addr &= 0xFF800000UL; + disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4; + disp_BAT[1] = addr | BL_8M | 0x40; + } + logicalDisplayBase = (void *) (vaddr + lowbits); +} +#endif + +/* This function will enable the early boot text when doing OF booting. This + * way, xmon output should work too + */ +void __init +btext_setup_display(int width, int height, int depth, int pitch, + unsigned long address) +{ + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; + logicalDisplayBase = (unsigned char *)address; + dispDeviceBase = (unsigned char *)address; + dispDeviceRowBytes = pitch; + dispDeviceDepth = depth; + dispDeviceRect[0] = dispDeviceRect[1] = 0; + dispDeviceRect[2] = width; + dispDeviceRect[3] = height; + boot_text_mapped = 1; +} + +/* Here's a small text engine to use during early boot + * or for debugging purposes + * + * todo: + * + * - build some kind of vgacon with it to enable early printk + * - move to a separate file + * - add a few video driver hooks to keep in sync with display + * changes. + */ + +void map_boot_text(void) +{ + unsigned long base, offset, size; + unsigned char *vbase; + + /* By default, we are no longer mapped */ + boot_text_mapped = 0; + if (dispDeviceBase == 0) + return; + base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL; + offset = ((unsigned long) dispDeviceBase) - base; + size = dispDeviceRowBytes * dispDeviceRect[3] + offset + + dispDeviceRect[0]; + vbase = __ioremap(base, size, _PAGE_NO_CACHE); + if (vbase == 0) + return; + logicalDisplayBase = vbase + offset; + boot_text_mapped = 1; +} + +int btext_initialize(struct device_node *np) +{ + unsigned int width, height, depth, pitch; + unsigned long address = 0; + u32 *prop; + + prop = (u32 *)get_property(np, "width", NULL); + if (prop == NULL) + return -EINVAL; + width = *prop; + prop = (u32 *)get_property(np, "height", NULL); + if (prop == NULL) + return -EINVAL; + height = *prop; + prop = (u32 *)get_property(np, "depth", NULL); + if (prop == NULL) + return -EINVAL; + depth = *prop; + pitch = width * ((depth + 7) / 8); + prop = (u32 *)get_property(np, "linebytes", NULL); + if (prop) + pitch = *prop; + if (pitch == 1) + pitch = 0x1000; + prop = (u32 *)get_property(np, "address", NULL); + if (prop) + address = *prop; + + /* FIXME: Add support for PCI reg properties */ + + if (address == 0) + return -EINVAL; + + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; + logicalDisplayBase = (unsigned char *)address; + dispDeviceBase = (unsigned char *)address; + dispDeviceRowBytes = pitch; + dispDeviceDepth = depth; + dispDeviceRect[0] = dispDeviceRect[1] = 0; + dispDeviceRect[2] = width; + dispDeviceRect[3] = height; + + map_boot_text(); + + return 0; +} + +void __init init_boot_display(void) +{ + char *name; + struct device_node *np = NULL; + int rc = -ENODEV; + + printk("trying to initialize btext ...\n"); + + name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + if (name != NULL) { + np = of_find_node_by_path(name); + if (np != NULL) { + if (strcmp(np->type, "display") != 0) { + printk("boot stdout isn't a display !\n"); + of_node_put(np); + np = NULL; + } + } + } + if (np) + rc = btext_initialize(np); + if (rc == 0) + return; + + for (np = NULL; (np = of_find_node_by_type(np, "display"));) { + if (get_property(np, "linux,opened", NULL)) { + printk("trying %s ...\n", np->full_name); + rc = btext_initialize(np); + printk("result: %d\n", rc); + } + if (rc == 0) + return; + } +} + +/* Calc the base address of a given point (x,y) */ +static unsigned char * calc_base(int x, int y) +{ + unsigned char *base; + + base = logicalDisplayBase; + if (base == 0) + base = dispDeviceBase; + base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3); + base += (y + dispDeviceRect[1]) * dispDeviceRowBytes; + return base; +} + +/* Adjust the display to a new resolution */ +void btext_update_display(unsigned long phys, int width, int height, + int depth, int pitch) +{ + if (dispDeviceBase == 0) + return; + + /* check it's the same frame buffer (within 256MB) */ + if ((phys ^ (unsigned long)dispDeviceBase) & 0xf0000000) + return; + + dispDeviceBase = (__u8 *) phys; + dispDeviceRect[0] = 0; + dispDeviceRect[1] = 0; + dispDeviceRect[2] = width; + dispDeviceRect[3] = height; + dispDeviceDepth = depth; + dispDeviceRowBytes = pitch; + if (boot_text_mapped) { + iounmap(logicalDisplayBase); + boot_text_mapped = 0; + } + map_boot_text(); + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; +} +EXPORT_SYMBOL(btext_update_display); + +void btext_clearscreen(void) +{ + unsigned long *base = (unsigned long *)calc_base(0, 0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 3; + int i,j; + + for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++) + { + unsigned long *ptr = base; + for(j=width; j; --j) + *(ptr++) = 0; + base += (dispDeviceRowBytes >> 3); + } +} + +#ifndef NO_SCROLL +static void scrollscreen(void) +{ + unsigned long *src = (unsigned long *)calc_base(0,16); + unsigned long *dst = (unsigned long *)calc_base(0,0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 3; + int i,j; + + for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++) + { + unsigned long *src_ptr = src; + unsigned long *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = *(src_ptr++); + src += (dispDeviceRowBytes >> 3); + dst += (dispDeviceRowBytes >> 3); + } + for (i=0; i<16; i++) + { + unsigned long *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = 0; + dst += (dispDeviceRowBytes >> 3); + } +} +#endif /* ndef NO_SCROLL */ + +void btext_drawchar(char c) +{ + int cline = 0; +#ifdef NO_SCROLL + int x; +#endif + if (!boot_text_mapped) + return; + + switch (c) { + case '\b': + if (g_loc_X > 0) + --g_loc_X; + break; + case '\t': + g_loc_X = (g_loc_X & -8) + 8; + break; + case '\r': + g_loc_X = 0; + break; + case '\n': + g_loc_X = 0; + g_loc_Y++; + cline = 1; + break; + default: + draw_byte(c, g_loc_X++, g_loc_Y); + } + if (g_loc_X >= g_max_loc_X) { + g_loc_X = 0; + g_loc_Y++; + cline = 1; + } +#ifndef NO_SCROLL + while (g_loc_Y >= g_max_loc_Y) { + scrollscreen(); + g_loc_Y--; + } +#else + /* wrap around from bottom to top of screen so we don't + waste time scrolling each line. -- paulus. */ + if (g_loc_Y >= g_max_loc_Y) + g_loc_Y = 0; + if (cline) { + for (x = 0; x < g_max_loc_X; ++x) + draw_byte(' ', x, g_loc_Y); + } +#endif +} + +void btext_drawstring(const char *c) +{ + if (!boot_text_mapped) + return; + while (*c) + btext_drawchar(*c++); +} + +void btext_drawhex(unsigned long v) +{ + char *hex_table = "0123456789abcdef"; + + if (!boot_text_mapped) + return; +#ifdef CONFIG_PPC64 + btext_drawchar(hex_table[(v >> 60) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 56) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 52) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 48) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 44) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 40) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 36) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 32) & 0x0000000FUL]); +#endif + btext_drawchar(hex_table[(v >> 28) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 24) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 20) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 16) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 12) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 8) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 4) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 0) & 0x0000000FUL]); + btext_drawchar(' '); +} + +static void draw_byte(unsigned char c, long locX, long locY) +{ + unsigned char *base = calc_base(locX << 3, locY << 4); + unsigned char *font = &vga_font[((unsigned int)c) * 16]; + int rb = dispDeviceRowBytes; + + switch(dispDeviceDepth) { + case 24: + case 32: + draw_byte_32(font, (unsigned int *)base, rb); + break; + case 15: + case 16: + draw_byte_16(font, (unsigned int *)base, rb); + break; + case 8: + draw_byte_8(font, (unsigned int *)base, rb); + break; + } +} + +static unsigned int expand_bits_8[16] = { + 0x00000000, + 0x000000ff, + 0x0000ff00, + 0x0000ffff, + 0x00ff0000, + 0x00ff00ff, + 0x00ffff00, + 0x00ffffff, + 0xff000000, + 0xff0000ff, + 0xff00ff00, + 0xff00ffff, + 0xffff0000, + 0xffff00ff, + 0xffffff00, + 0xffffffff +}; + +static unsigned int expand_bits_16[4] = { + 0x00000000, + 0x0000ffff, + 0xffff0000, + 0xffffffff +}; + + +static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (-(bits >> 7) & fg) ^ bg; + base[1] = (-((bits >> 6) & 1) & fg) ^ bg; + base[2] = (-((bits >> 5) & 1) & fg) ^ bg; + base[3] = (-((bits >> 4) & 1) & fg) ^ bg; + base[4] = (-((bits >> 3) & 1) & fg) ^ bg; + base[5] = (-((bits >> 2) & 1) & fg) ^ bg; + base[6] = (-((bits >> 1) & 1) & fg) ^ bg; + base[7] = (-(bits & 1) & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_16; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 6] & fg) ^ bg; + base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg; + base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg; + base[3] = (eb[bits & 3] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0x0F0F0F0FUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_8; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 4] & fg) ^ bg; + base[1] = (eb[bits & 0xf] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static unsigned char vga_font[cmapsz] = { +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, +0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, +0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, +0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, +0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, +0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, +0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e, +0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, +0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63, +0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, +0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e, +0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, +0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb, +0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, +0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, +0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, +0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, +0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, +0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, +0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18, +0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, +0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, +0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, +0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, +0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, +0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde, +0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, +0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c, +0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, +0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c, +0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, +0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7, +0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, +0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, +0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, +0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, +0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, +0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, +0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, +0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, +0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, +0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, +0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30, +0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18, +0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, +0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, +0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, +0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00, +0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, +0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c, +0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, +0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, +0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, +0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, +0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, +0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, +0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, +0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, +0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, +0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, +0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44, +0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, +0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, +0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, +0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, +0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, +0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, +0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, +0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, +0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, +0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, +0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, +0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, +0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, +0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, +0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, +0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, +0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, +}; diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S new file mode 100644 index 00000000000..094eea6fbd6 --- /dev/null +++ b/arch/powerpc/kernel/entry_32.S @@ -0,0 +1,1002 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@fsmlabs.com) for PReP + * Copyright (C) 1996 Cort Dougan <cort@fsmlabs.com> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the system call entry code, context switch + * code, and exception/interrupt return code for PowerPC. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sys.h> +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +#undef SHOW_SYSCALLS +#undef SHOW_SYSCALLS_TASK + +/* + * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. + */ +#if MSR_KERNEL >= 0x10000 +#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l +#else +#define LOAD_MSR_KERNEL(r, x) li r,(x) +#endif + +#ifdef CONFIG_BOOKE +#include "head_booke.h" +#define TRANSFER_TO_HANDLER_EXC_LEVEL(exc_level) \ + mtspr exc_level##_SPRG,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(exc_level); \ + lwz r0,GPR10-INT_FRAME_SIZE(r8); \ + stw r0,GPR10(r11); \ + lwz r0,GPR11-INT_FRAME_SIZE(r8); \ + stw r0,GPR11(r11); \ + mfspr r8,exc_level##_SPRG + + .globl mcheck_transfer_to_handler +mcheck_transfer_to_handler: + TRANSFER_TO_HANDLER_EXC_LEVEL(MCHECK) + b transfer_to_handler_full + + .globl debug_transfer_to_handler +debug_transfer_to_handler: + TRANSFER_TO_HANDLER_EXC_LEVEL(DEBUG) + b transfer_to_handler_full + + .globl crit_transfer_to_handler +crit_transfer_to_handler: + TRANSFER_TO_HANDLER_EXC_LEVEL(CRIT) + /* fall through */ +#endif + +#ifdef CONFIG_40x + .globl crit_transfer_to_handler +crit_transfer_to_handler: + lwz r0,crit_r10@l(0) + stw r0,GPR10(r11) + lwz r0,crit_r11@l(0) + stw r0,GPR11(r11) + /* fall through */ +#endif + +/* + * This code finishes saving the registers to the exception frame + * and jumps to the appropriate handler for the exception, turning + * on address translation. + * Note that we rely on the caller having set cr0.eq iff the exception + * occurred in kernel mode (i.e. MSR:PR = 0). + */ + .globl transfer_to_handler_full +transfer_to_handler_full: + SAVE_NVGPRS(r11) + /* fall through */ + + .globl transfer_to_handler +transfer_to_handler: + stw r2,GPR2(r11) + stw r12,_NIP(r11) + stw r9,_MSR(r11) + andi. r2,r9,MSR_PR + mfctr r12 + mfspr r2,SPRN_XER + stw r12,_CTR(r11) + stw r2,_XER(r11) + mfspr r12,SPRN_SPRG3 + addi r2,r12,-THREAD + tovirt(r2,r2) /* set r2 to current */ + beq 2f /* if from user, fix up THREAD.regs */ + addi r11,r1,STACK_FRAME_OVERHEAD + stw r11,PT_REGS(r12) +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + /* Check to see if the dbcr0 register is set up to debug. Use the + single-step bit to do this. */ + lwz r12,THREAD_DBCR0(r12) + andis. r12,r12,DBCR0_IC@h + beq+ 3f + /* From user and task is ptraced - load up global dbcr0 */ + li r12,-1 /* clear all pending debug events */ + mtspr SPRN_DBSR,r12 + lis r11,global_dbcr0@ha + tophys(r11,r11) + addi r11,r11,global_dbcr0@l + lwz r12,0(r11) + mtspr SPRN_DBCR0,r12 + lwz r12,4(r11) + addi r12,r12,-1 + stw r12,4(r11) +#endif + b 3f +2: /* if from kernel, check interrupted DOZE/NAP mode and + * check for stack overflow + */ +#ifdef CONFIG_6xx + mfspr r11,SPRN_HID0 + mtcr r11 +BEGIN_FTR_SECTION + bt- 8,power_save_6xx_restore /* Check DOZE */ +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) +BEGIN_FTR_SECTION + bt- 9,power_save_6xx_restore /* Check NAP */ +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) +#endif /* CONFIG_6xx */ + .globl transfer_to_handler_cont +transfer_to_handler_cont: + lwz r11,THREAD_INFO-THREAD(r12) + cmplw r1,r11 /* if r1 <= current->thread_info */ + ble- stack_ovf /* then the kernel stack overflowed */ +3: + mflr r9 + lwz r11,0(r9) /* virtual address of handler */ + lwz r9,4(r9) /* where to go when done */ + FIX_SRR1(r10,r12) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r10 + mtlr r9 + SYNC + RFI /* jump to handler, enable MMU */ + +/* + * On kernel stack overflow, load up an initial stack pointer + * and call StackOverflow(regs), which should not return. + */ +stack_ovf: + /* sometimes we use a statically-allocated stack, which is OK. */ + lis r11,_end@h + ori r11,r11,_end@l + cmplw r1,r11 + ble 3b /* r1 <= &_end is OK */ + SAVE_NVGPRS(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + lis r9,StackOverflow@ha + addi r9,r9,StackOverflow@l + LOAD_MSR_KERNEL(r10,MSR_KERNEL) + FIX_SRR1(r10,r12) + mtspr SPRN_SRR0,r9 + mtspr SPRN_SRR1,r10 + SYNC + RFI + +/* + * Handle a system call. + */ + .stabs "arch/powerpc/kernel/",N_SO,0,0,0f + .stabs "entry_32.S",N_SO,0,0,0f +0: + +_GLOBAL(DoSyscall) + stw r0,THREAD+LAST_SYSCALL(r2) + stw r3,ORIG_GPR3(r1) + li r12,0 + stw r12,RESULT(r1) + lwz r11,_CCR(r1) /* Clear SO bit in CR */ + rlwinm r11,r11,0,4,2 + stw r11,_CCR(r1) +#ifdef SHOW_SYSCALLS + bl do_show_syscall +#endif /* SHOW_SYSCALLS */ + rlwinm r10,r1,0,0,18 /* current_thread_info() */ + lwz r11,TI_LOCAL_FLAGS(r10) + rlwinm r11,r11,0,~_TIFL_FORCE_NOERROR + stw r11,TI_LOCAL_FLAGS(r10) + lwz r11,TI_FLAGS(r10) + andi. r11,r11,_TIF_SYSCALL_T_OR_A + bne- syscall_dotrace +syscall_dotrace_cont: + cmplwi 0,r0,NR_syscalls + lis r10,sys_call_table@h + ori r10,r10,sys_call_table@l + slwi r0,r0,2 + bge- 66f + lwzx r10,r10,r0 /* Fetch system call handler [ptr] */ + mtlr r10 + addi r9,r1,STACK_FRAME_OVERHEAD + PPC440EP_ERR42 + blrl /* Call handler */ + .globl ret_from_syscall +ret_from_syscall: +#ifdef SHOW_SYSCALLS + bl do_show_syscall_exit +#endif + mr r6,r3 + li r11,-_LAST_ERRNO + cmplw 0,r3,r11 + rlwinm r12,r1,0,0,18 /* current_thread_info() */ + blt+ 30f + lwz r11,TI_LOCAL_FLAGS(r12) + andi. r11,r11,_TIFL_FORCE_NOERROR + bne 30f + neg r3,r3 + lwz r10,_CCR(r1) /* Set SO bit in CR */ + oris r10,r10,0x1000 + stw r10,_CCR(r1) + + /* disable interrupts so current_thread_info()->flags can't change */ +30: LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ + SYNC + MTMSRD(r10) + lwz r9,TI_FLAGS(r12) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + bne- syscall_exit_work +syscall_exit_cont: +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + /* If the process has its own DBCR0 value, load it up. The single + step bit tells us that dbcr0 should be loaded. */ + lwz r0,THREAD+THREAD_DBCR0(r2) + andis. r10,r0,DBCR0_IC@h + bnel- load_dbcr0 +#endif + stwcx. r0,0,r1 /* to clear the reservation */ + lwz r4,_LINK(r1) + lwz r5,_CCR(r1) + mtlr r4 + mtcr r5 + lwz r7,_NIP(r1) + lwz r8,_MSR(r1) + FIX_SRR1(r8, r0) + lwz r2,GPR2(r1) + lwz r1,GPR1(r1) + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + SYNC + RFI + +66: li r3,-ENOSYS + b ret_from_syscall + + .globl ret_from_fork +ret_from_fork: + REST_NVGPRS(r1) + bl schedule_tail + li r3,0 + b ret_from_syscall + +/* Traced system call support */ +syscall_dotrace: + SAVE_NVGPRS(r1) + li r0,0xc00 + stw r0,TRAP(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_syscall_trace_enter + lwz r0,GPR0(r1) /* Restore original registers */ + lwz r3,GPR3(r1) + lwz r4,GPR4(r1) + lwz r5,GPR5(r1) + lwz r6,GPR6(r1) + lwz r7,GPR7(r1) + lwz r8,GPR8(r1) + REST_NVGPRS(r1) + b syscall_dotrace_cont + +syscall_exit_work: + stw r6,RESULT(r1) /* Save result */ + stw r3,GPR3(r1) /* Update return value */ + andi. r0,r9,_TIF_SYSCALL_T_OR_A + beq 5f + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* re-enable interrupts */ + lwz r4,TRAP(r1) + andi. r4,r4,1 + beq 4f + SAVE_NVGPRS(r1) + li r4,0xc00 + stw r4,TRAP(r1) +4: + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_syscall_trace_leave + REST_NVGPRS(r1) +2: + lwz r3,GPR3(r1) + LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ + SYNC + MTMSRD(r10) /* disable interrupts again */ + rlwinm r12,r1,0,0,18 /* current_thread_info() */ + lwz r9,TI_FLAGS(r12) +5: + andi. r0,r9,_TIF_NEED_RESCHED + bne 1f + lwz r5,_MSR(r1) + andi. r5,r5,MSR_PR + beq syscall_exit_cont + andi. r0,r9,_TIF_SIGPENDING + beq syscall_exit_cont + b do_user_signal +1: + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* re-enable interrupts */ + bl schedule + b 2b + +#ifdef SHOW_SYSCALLS +do_show_syscall: +#ifdef SHOW_SYSCALLS_TASK + lis r11,show_syscalls_task@ha + lwz r11,show_syscalls_task@l(r11) + cmp 0,r2,r11 + bnelr +#endif + stw r31,GPR31(r1) + mflr r31 + lis r3,7f@ha + addi r3,r3,7f@l + lwz r4,GPR0(r1) + lwz r5,GPR3(r1) + lwz r6,GPR4(r1) + lwz r7,GPR5(r1) + lwz r8,GPR6(r1) + lwz r9,GPR7(r1) + bl printk + lis r3,77f@ha + addi r3,r3,77f@l + lwz r4,GPR8(r1) + mr r5,r2 + bl printk + lwz r0,GPR0(r1) + lwz r3,GPR3(r1) + lwz r4,GPR4(r1) + lwz r5,GPR5(r1) + lwz r6,GPR6(r1) + lwz r7,GPR7(r1) + lwz r8,GPR8(r1) + mtlr r31 + lwz r31,GPR31(r1) + blr + +do_show_syscall_exit: +#ifdef SHOW_SYSCALLS_TASK + lis r11,show_syscalls_task@ha + lwz r11,show_syscalls_task@l(r11) + cmp 0,r2,r11 + bnelr +#endif + stw r31,GPR31(r1) + mflr r31 + stw r3,RESULT(r1) /* Save result */ + mr r4,r3 + lis r3,79f@ha + addi r3,r3,79f@l + bl printk + lwz r3,RESULT(r1) + mtlr r31 + lwz r31,GPR31(r1) + blr + +7: .string "syscall %d(%x, %x, %x, %x, %x, " +77: .string "%x), current=%p\n" +79: .string " -> %x\n" + .align 2,0 + +#ifdef SHOW_SYSCALLS_TASK + .data + .globl show_syscalls_task +show_syscalls_task: + .long -1 + .text +#endif +#endif /* SHOW_SYSCALLS */ + +/* + * The sigsuspend and rt_sigsuspend system calls can call do_signal + * and thus put the process into the stopped state where we might + * want to examine its user state with ptrace. Therefore we need + * to save all the nonvolatile registers (r13 - r31) before calling + * the C code. + */ + .globl ppc_sigsuspend +ppc_sigsuspend: + SAVE_NVGPRS(r1) + lwz r0,TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,TRAP(r1) /* register set saved */ + b sys_sigsuspend + + .globl ppc_rt_sigsuspend +ppc_rt_sigsuspend: + SAVE_NVGPRS(r1) + lwz r0,TRAP(r1) + rlwinm r0,r0,0,0,30 + stw r0,TRAP(r1) + b sys_rt_sigsuspend + + .globl ppc_fork +ppc_fork: + SAVE_NVGPRS(r1) + lwz r0,TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,TRAP(r1) /* register set saved */ + b sys_fork + + .globl ppc_vfork +ppc_vfork: + SAVE_NVGPRS(r1) + lwz r0,TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,TRAP(r1) /* register set saved */ + b sys_vfork + + .globl ppc_clone +ppc_clone: + SAVE_NVGPRS(r1) + lwz r0,TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,TRAP(r1) /* register set saved */ + b sys_clone + + .globl ppc_swapcontext +ppc_swapcontext: + SAVE_NVGPRS(r1) + lwz r0,TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,TRAP(r1) /* register set saved */ + b sys_swapcontext + +/* + * Top-level page fault handling. + * This is in assembler because if do_page_fault tells us that + * it is a bad kernel page fault, we want to save the non-volatile + * registers before calling bad_page_fault. + */ + .globl handle_page_fault +handle_page_fault: + stw r4,_DAR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_page_fault + cmpwi r3,0 + beq+ ret_from_except + SAVE_NVGPRS(r1) + lwz r0,TRAP(r1) + clrrwi r0,r0,1 + stw r0,TRAP(r1) + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + lwz r4,_DAR(r1) + bl bad_page_fault + b ret_from_except_full + +/* + * This routine switches between two different tasks. The process + * state of one is saved on its kernel stack. Then the state + * of the other is restored from its kernel stack. The memory + * management hardware is updated to the second process's state. + * Finally, we can return to the second process. + * On entry, r3 points to the THREAD for the current task, r4 + * points to the THREAD for the new task. + * + * This routine is always called with interrupts disabled. + * + * Note: there are two ways to get to the "going out" portion + * of this code; either by coming in via the entry (_switch) + * or via "fork" which must set up an environment equivalent + * to the "_switch" path. If you change this , you'll have to + * change the fork code also. + * + * The code which creates the new task context is in 'copy_thread' + * in arch/ppc/kernel/process.c + */ +_GLOBAL(_switch) + stwu r1,-INT_FRAME_SIZE(r1) + mflr r0 + stw r0,INT_FRAME_SIZE+4(r1) + /* r3-r12 are caller saved -- Cort */ + SAVE_NVGPRS(r1) + stw r0,_NIP(r1) /* Return to switch caller */ + mfmsr r11 + li r0,MSR_FP /* Disable floating-point */ +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + oris r0,r0,MSR_VEC@h /* Disable altivec */ + mfspr r12,SPRN_VRSAVE /* save vrsave register value */ + stw r12,THREAD+THREAD_VRSAVE(r2) +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + oris r0,r0,MSR_SPE@h /* Disable SPE */ + mfspr r12,SPRN_SPEFSCR /* save spefscr register value */ + stw r12,THREAD+THREAD_SPEFSCR(r2) +#endif /* CONFIG_SPE */ + and. r0,r0,r11 /* FP or altivec or SPE enabled? */ + beq+ 1f + andc r11,r11,r0 + MTMSRD(r11) + isync +1: stw r11,_MSR(r1) + mfcr r10 + stw r10,_CCR(r1) + stw r1,KSP(r3) /* Set old stack pointer */ + +#ifdef CONFIG_SMP + /* We need a sync somewhere here to make sure that if the + * previous task gets rescheduled on another CPU, it sees all + * stores it has performed on this one. + */ + sync +#endif /* CONFIG_SMP */ + + tophys(r0,r4) + CLR_TOP32(r0) + mtspr SPRN_SPRG3,r0 /* Update current THREAD phys addr */ + lwz r1,KSP(r4) /* Load new stack pointer */ + + /* save the old current 'last' for return value */ + mr r3,r2 + addi r2,r4,-THREAD /* Update current */ + +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + lwz r0,THREAD+THREAD_VRSAVE(r2) + mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + lwz r0,THREAD+THREAD_SPEFSCR(r2) + mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */ +#endif /* CONFIG_SPE */ + + lwz r0,_CCR(r1) + mtcrf 0xFF,r0 + /* r3-r12 are destroyed -- Cort */ + REST_NVGPRS(r1) + + lwz r4,_NIP(r1) /* Return to _switch caller in new task */ + mtlr r4 + addi r1,r1,INT_FRAME_SIZE + blr + + .globl fast_exception_return +fast_exception_return: +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + andi. r10,r9,MSR_RI /* check for recoverable interrupt */ + beq 1f /* if not, we've got problems */ +#endif + +2: REST_4GPRS(3, r11) + lwz r10,_CCR(r11) + REST_GPR(1, r11) + mtcr r10 + lwz r10,_LINK(r11) + mtlr r10 + REST_GPR(10, r11) + mtspr SPRN_SRR1,r9 + mtspr SPRN_SRR0,r12 + REST_GPR(9, r11) + REST_GPR(12, r11) + lwz r11,GPR11(r11) + SYNC + RFI + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +/* check if the exception happened in a restartable section */ +1: lis r3,exc_exit_restart_end@ha + addi r3,r3,exc_exit_restart_end@l + cmplw r12,r3 + bge 3f + lis r4,exc_exit_restart@ha + addi r4,r4,exc_exit_restart@l + cmplw r12,r4 + blt 3f + lis r3,fee_restarts@ha + tophys(r3,r3) + lwz r5,fee_restarts@l(r3) + addi r5,r5,1 + stw r5,fee_restarts@l(r3) + mr r12,r4 /* restart at exc_exit_restart */ + b 2b + + .comm fee_restarts,4 + +/* aargh, a nonrecoverable interrupt, panic */ +/* aargh, we don't know which trap this is */ +/* but the 601 doesn't implement the RI bit, so assume it's OK */ +3: +BEGIN_FTR_SECTION + b 2b +END_FTR_SECTION_IFSET(CPU_FTR_601) + li r10,-1 + stw r10,TRAP(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + lis r10,MSR_KERNEL@h + ori r10,r10,MSR_KERNEL@l + bl transfer_to_handler_full + .long nonrecoverable_exception + .long ret_from_except +#endif + + .globl sigreturn_exit +sigreturn_exit: + subi r1,r3,STACK_FRAME_OVERHEAD + rlwinm r12,r1,0,0,18 /* current_thread_info() */ + lwz r9,TI_FLAGS(r12) + andi. r0,r9,_TIF_SYSCALL_T_OR_A + bnel- do_syscall_trace_leave + /* fall through */ + + .globl ret_from_except_full +ret_from_except_full: + REST_NVGPRS(r1) + /* fall through */ + + .globl ret_from_except +ret_from_except: + /* Hard-disable interrupts so that current_thread_info()->flags + * can't change between when we test it and when we return + * from the interrupt. */ + LOAD_MSR_KERNEL(r10,MSR_KERNEL) + SYNC /* Some chip revs have problems here... */ + MTMSRD(r10) /* disable interrupts */ + + lwz r3,_MSR(r1) /* Returning to user mode? */ + andi. r0,r3,MSR_PR + beq resume_kernel + +user_exc_return: /* r10 contains MSR_KERNEL here */ + /* Check current_thread_info()->flags */ + rlwinm r9,r1,0,0,18 + lwz r9,TI_FLAGS(r9) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) + bne do_work + +restore_user: +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + /* Check whether this process has its own DBCR0 value. The single + step bit tells us that dbcr0 should be loaded. */ + lwz r0,THREAD+THREAD_DBCR0(r2) + andis. r10,r0,DBCR0_IC@h + bnel- load_dbcr0 +#endif + +#ifdef CONFIG_PREEMPT + b restore + +/* N.B. the only way to get here is from the beq following ret_from_except. */ +resume_kernel: + /* check current_thread_info->preempt_count */ + rlwinm r9,r1,0,0,18 + lwz r0,TI_PREEMPT(r9) + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ + bne restore + lwz r0,TI_FLAGS(r9) + andi. r0,r0,_TIF_NEED_RESCHED + beq+ restore + andi. r0,r3,MSR_EE /* interrupts off? */ + beq restore /* don't schedule if so */ +1: bl preempt_schedule_irq + rlwinm r9,r1,0,0,18 + lwz r3,TI_FLAGS(r9) + andi. r0,r3,_TIF_NEED_RESCHED + bne- 1b +#else +resume_kernel: +#endif /* CONFIG_PREEMPT */ + + /* interrupts are hard-disabled at this point */ +restore: + lwz r0,GPR0(r1) + lwz r2,GPR2(r1) + REST_4GPRS(3, r1) + REST_2GPRS(7, r1) + + lwz r10,_XER(r1) + lwz r11,_CTR(r1) + mtspr SPRN_XER,r10 + mtctr r11 + + PPC405_ERR77(0,r1) + stwcx. r0,0,r1 /* to clear the reservation */ + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + lwz r9,_MSR(r1) + andi. r10,r9,MSR_RI /* check if this exception occurred */ + beql nonrecoverable /* at a bad place (MSR:RI = 0) */ + + lwz r10,_CCR(r1) + lwz r11,_LINK(r1) + mtcrf 0xFF,r10 + mtlr r11 + + /* + * Once we put values in SRR0 and SRR1, we are in a state + * where exceptions are not recoverable, since taking an + * exception will trash SRR0 and SRR1. Therefore we clear the + * MSR:RI bit to indicate this. If we do take an exception, + * we can't return to the point of the exception but we + * can restart the exception exit path at the label + * exc_exit_restart below. -- paulus + */ + LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI) + SYNC + MTMSRD(r10) /* clear the RI bit */ + .globl exc_exit_restart +exc_exit_restart: + lwz r9,_MSR(r1) + lwz r12,_NIP(r1) + FIX_SRR1(r9,r10) + mtspr SPRN_SRR0,r12 + mtspr SPRN_SRR1,r9 + REST_4GPRS(9, r1) + lwz r1,GPR1(r1) + .globl exc_exit_restart_end +exc_exit_restart_end: + SYNC + RFI + +#else /* !(CONFIG_4xx || CONFIG_BOOKE) */ + /* + * This is a bit different on 4xx/Book-E because it doesn't have + * the RI bit in the MSR. + * The TLB miss handler checks if we have interrupted + * the exception exit path and restarts it if so + * (well maybe one day it will... :). + */ + lwz r11,_LINK(r1) + mtlr r11 + lwz r10,_CCR(r1) + mtcrf 0xff,r10 + REST_2GPRS(9, r1) + .globl exc_exit_restart +exc_exit_restart: + lwz r11,_NIP(r1) + lwz r12,_MSR(r1) +exc_exit_start: + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 + REST_2GPRS(11, r1) + lwz r1,GPR1(r1) + .globl exc_exit_restart_end +exc_exit_restart_end: + PPC405_ERR77_SYNC + rfi + b . /* prevent prefetch past rfi */ + +/* + * Returning from a critical interrupt in user mode doesn't need + * to be any different from a normal exception. For a critical + * interrupt in the kernel, we just return (without checking for + * preemption) since the interrupt may have happened at some crucial + * place (e.g. inside the TLB miss handler), and because we will be + * running with r1 pointing into critical_stack, not the current + * process's kernel stack (and therefore current_thread_info() will + * give the wrong answer). + * We have to restore various SPRs that may have been in use at the + * time of the critical interrupt. + * + */ +#ifdef CONFIG_40x +#define PPC_40x_TURN_OFF_MSR_DR \ + /* avoid any possible TLB misses here by turning off MSR.DR, we \ + * assume the instructions here are mapped by a pinned TLB entry */ \ + li r10,MSR_IR; \ + mtmsr r10; \ + isync; \ + tophys(r1, r1); +#else +#define PPC_40x_TURN_OFF_MSR_DR +#endif + +#define RET_FROM_EXC_LEVEL(exc_lvl_srr0, exc_lvl_srr1, exc_lvl_rfi) \ + REST_NVGPRS(r1); \ + lwz r3,_MSR(r1); \ + andi. r3,r3,MSR_PR; \ + LOAD_MSR_KERNEL(r10,MSR_KERNEL); \ + bne user_exc_return; \ + lwz r0,GPR0(r1); \ + lwz r2,GPR2(r1); \ + REST_4GPRS(3, r1); \ + REST_2GPRS(7, r1); \ + lwz r10,_XER(r1); \ + lwz r11,_CTR(r1); \ + mtspr SPRN_XER,r10; \ + mtctr r11; \ + PPC405_ERR77(0,r1); \ + stwcx. r0,0,r1; /* to clear the reservation */ \ + lwz r11,_LINK(r1); \ + mtlr r11; \ + lwz r10,_CCR(r1); \ + mtcrf 0xff,r10; \ + PPC_40x_TURN_OFF_MSR_DR; \ + lwz r9,_DEAR(r1); \ + lwz r10,_ESR(r1); \ + mtspr SPRN_DEAR,r9; \ + mtspr SPRN_ESR,r10; \ + lwz r11,_NIP(r1); \ + lwz r12,_MSR(r1); \ + mtspr exc_lvl_srr0,r11; \ + mtspr exc_lvl_srr1,r12; \ + lwz r9,GPR9(r1); \ + lwz r12,GPR12(r1); \ + lwz r10,GPR10(r1); \ + lwz r11,GPR11(r1); \ + lwz r1,GPR1(r1); \ + PPC405_ERR77_SYNC; \ + exc_lvl_rfi; \ + b .; /* prevent prefetch past exc_lvl_rfi */ + + .globl ret_from_crit_exc +ret_from_crit_exc: + RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, RFCI) + +#ifdef CONFIG_BOOKE + .globl ret_from_debug_exc +ret_from_debug_exc: + RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, RFDI) + + .globl ret_from_mcheck_exc +ret_from_mcheck_exc: + RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, RFMCI) +#endif /* CONFIG_BOOKE */ + +/* + * Load the DBCR0 value for a task that is being ptraced, + * having first saved away the global DBCR0. Note that r0 + * has the dbcr0 value to set upon entry to this. + */ +load_dbcr0: + mfmsr r10 /* first disable debug exceptions */ + rlwinm r10,r10,0,~MSR_DE + mtmsr r10 + isync + mfspr r10,SPRN_DBCR0 + lis r11,global_dbcr0@ha + addi r11,r11,global_dbcr0@l + stw r10,0(r11) + mtspr SPRN_DBCR0,r0 + lwz r10,4(r11) + addi r10,r10,1 + stw r10,4(r11) + li r11,-1 + mtspr SPRN_DBSR,r11 /* clear all pending debug events */ + blr + + .comm global_dbcr0,8 +#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ + +do_work: /* r10 contains MSR_KERNEL here */ + andi. r0,r9,_TIF_NEED_RESCHED + beq do_user_signal + +do_resched: /* r10 contains MSR_KERNEL here */ + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* hard-enable interrupts */ + bl schedule +recheck: + LOAD_MSR_KERNEL(r10,MSR_KERNEL) + SYNC + MTMSRD(r10) /* disable interrupts */ + rlwinm r9,r1,0,0,18 + lwz r9,TI_FLAGS(r9) + andi. r0,r9,_TIF_NEED_RESCHED + bne- do_resched + andi. r0,r9,_TIF_SIGPENDING + beq restore_user +do_user_signal: /* r10 contains MSR_KERNEL here */ + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* hard-enable interrupts */ + /* save r13-r31 in the exception frame, if not already done */ + lwz r3,TRAP(r1) + andi. r0,r3,1 + beq 2f + SAVE_NVGPRS(r1) + rlwinm r3,r3,0,0,30 + stw r3,TRAP(r1) +2: li r3,0 + addi r4,r1,STACK_FRAME_OVERHEAD + bl do_signal + REST_NVGPRS(r1) + b recheck + +/* + * We come here when we are at the end of handling an exception + * that occurred at a place where taking an exception will lose + * state information, such as the contents of SRR0 and SRR1. + */ +nonrecoverable: + lis r10,exc_exit_restart_end@ha + addi r10,r10,exc_exit_restart_end@l + cmplw r12,r10 + bge 3f + lis r11,exc_exit_restart@ha + addi r11,r11,exc_exit_restart@l + cmplw r12,r11 + blt 3f + lis r10,ee_restarts@ha + lwz r12,ee_restarts@l(r10) + addi r12,r12,1 + stw r12,ee_restarts@l(r10) + mr r12,r11 /* restart at exc_exit_restart */ + blr +3: /* OK, we can't recover, kill this process */ + /* but the 601 doesn't implement the RI bit, so assume it's OK */ +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFSET(CPU_FTR_601) + lwz r3,TRAP(r1) + andi. r0,r3,1 + beq 4f + SAVE_NVGPRS(r1) + rlwinm r3,r3,0,0,30 + stw r3,TRAP(r1) +4: addi r3,r1,STACK_FRAME_OVERHEAD + bl nonrecoverable_exception + /* shouldn't return */ + b 4b + + .comm ee_restarts,4 + +/* + * PROM code for specific machines follows. Put it + * here so it's easy to add arch-specific sections later. + * -- Cort + */ +#ifdef CONFIG_PPC_OF +/* + * On CHRP, the Run-Time Abstraction Services (RTAS) have to be + * called with the MMU off. + */ +_GLOBAL(enter_rtas) + stwu r1,-INT_FRAME_SIZE(r1) + mflr r0 + stw r0,INT_FRAME_SIZE+4(r1) + lis r4,rtas_data@ha + lwz r4,rtas_data@l(r4) + lis r6,1f@ha /* physical return address for rtas */ + addi r6,r6,1f@l + tophys(r6,r6) + tophys(r7,r1) + lis r8,rtas_entry@ha + lwz r8,rtas_entry@l(r8) + mfmsr r9 + stw r9,8(r1) + LOAD_MSR_KERNEL(r0,MSR_KERNEL) + SYNC /* disable interrupts so SRR0/1 */ + MTMSRD(r0) /* don't get trashed */ + li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) + mtlr r6 + CLR_TOP32(r7) + mtspr SPRN_SPRG2,r7 + mtspr SPRN_SRR0,r8 + mtspr SPRN_SRR1,r9 + RFI +1: tophys(r9,r1) + lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ + lwz r9,8(r9) /* original msr value */ + FIX_SRR1(r9,r0) + addi r1,r1,INT_FRAME_SIZE + li r0,0 + mtspr SPRN_SPRG2,r0 + mtspr SPRN_SRR0,r8 + mtspr SPRN_SRR1,r9 + RFI /* return to caller */ + + .globl machine_check_in_rtas +machine_check_in_rtas: + twi 31,0,0 + /* XXX load up BATs and panic */ + +#endif /* CONFIG_PPC_OF */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S new file mode 100644 index 00000000000..22796e28881 --- /dev/null +++ b/arch/powerpc/kernel/entry_64.S @@ -0,0 +1,842 @@ +/* + * arch/ppc64/kernel/entry.S + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the system call entry code, context switch + * code, and exception/interrupt return code for PowerPC. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <asm/unistd.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cputable.h> + +#ifdef CONFIG_PPC_ISERIES +#define DO_SOFT_DISABLE +#endif + +/* + * System calls. + */ + .section ".toc","aw" +.SYS_CALL_TABLE: + .tc .sys_call_table[TC],.sys_call_table + +/* This value is used to mark exception frames on the stack. */ +exception_marker: + .tc ID_72656773_68657265[TC],0x7265677368657265 + + .section ".text" + .align 7 + +#undef SHOW_SYSCALLS + + .globl system_call_common +system_call_common: + andi. r10,r12,MSR_PR + mr r10,r1 + addi r1,r1,-INT_FRAME_SIZE + beq- 1f + ld r1,PACAKSAVE(r13) +1: std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + li r11,0 + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + crclr so + mfcr r9 + mflr r10 + li r11,0xc01 + std r9,_CCR(r1) + std r10,_LINK(r1) + std r11,_TRAP(r1) + mfxer r9 + mfctr r10 + std r9,_XER(r1) + std r10,_CTR(r1) + std r3,ORIG_GPR3(r1) + ld r2,PACATOC(r13) + addi r9,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r9) /* "regshere" marker */ +#ifdef CONFIG_PPC_ISERIES + /* Hack for handling interrupts when soft-enabling on iSeries */ + cmpdi cr1,r0,0x5555 /* syscall 0x5555 */ + andi. r10,r12,MSR_PR /* from kernel */ + crand 4*cr0+eq,4*cr1+eq,4*cr0+eq + beq hardware_interrupt_entry + lbz r10,PACAPROCENABLED(r13) + std r10,SOFTE(r1) +#endif + mfmsr r11 + ori r11,r11,MSR_EE + mtmsrd r11,1 + +#ifdef SHOW_SYSCALLS + bl .do_show_syscall + REST_GPR(0,r1) + REST_4GPRS(3,r1) + REST_2GPRS(7,r1) + addi r9,r1,STACK_FRAME_OVERHEAD +#endif + clrrdi r11,r1,THREAD_SHIFT + li r12,0 + ld r10,TI_FLAGS(r11) + stb r12,TI_SC_NOERR(r11) + andi. r11,r10,_TIF_SYSCALL_T_OR_A + bne- syscall_dotrace +syscall_dotrace_cont: + cmpldi 0,r0,NR_syscalls + bge- syscall_enosys + +system_call: /* label this so stack traces look sane */ +/* + * Need to vector to 32 Bit or default sys_call_table here, + * based on caller's run-mode / personality. + */ + ld r11,.SYS_CALL_TABLE@toc(2) + andi. r10,r10,_TIF_32BIT + beq 15f + addi r11,r11,8 /* use 32-bit syscall entries */ + clrldi r3,r3,32 + clrldi r4,r4,32 + clrldi r5,r5,32 + clrldi r6,r6,32 + clrldi r7,r7,32 + clrldi r8,r8,32 +15: + slwi r0,r0,4 + ldx r10,r11,r0 /* Fetch system call handler [ptr] */ + mtctr r10 + bctrl /* Call handler */ + +syscall_exit: +#ifdef SHOW_SYSCALLS + std r3,GPR3(r1) + bl .do_show_syscall_exit + ld r3,GPR3(r1) +#endif + std r3,RESULT(r1) + ld r5,_CCR(r1) + li r10,-_LAST_ERRNO + cmpld r3,r10 + clrrdi r12,r1,THREAD_SHIFT + bge- syscall_error +syscall_error_cont: + + /* check for syscall tracing or audit */ + ld r9,TI_FLAGS(r12) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + bne- syscall_exit_trace +syscall_exit_trace_cont: + + /* disable interrupts so current_thread_info()->flags can't change, + and so that we don't get interrupted after loading SRR0/1. */ + ld r8,_MSR(r1) + andi. r10,r8,MSR_RI + beq- unrecov_restore + mfmsr r10 + rldicl r10,r10,48,1 + rotldi r10,r10,16 + mtmsrd r10,1 + ld r9,TI_FLAGS(r12) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + bne- syscall_exit_work + ld r7,_NIP(r1) + stdcx. r0,0,r1 /* to clear the reservation */ + andi. r6,r8,MSR_PR + ld r4,_LINK(r1) + beq- 1f /* only restore r13 if */ + ld r13,GPR13(r1) /* returning to usermode */ +1: ld r2,GPR2(r1) + li r12,MSR_RI + andc r10,r10,r12 + mtmsrd r10,1 /* clear MSR.RI */ + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + rfid + b . /* prevent speculative execution */ + +syscall_enosys: + li r3,-ENOSYS + std r3,RESULT(r1) + clrrdi r12,r1,THREAD_SHIFT + ld r5,_CCR(r1) + +syscall_error: + lbz r11,TI_SC_NOERR(r12) + cmpwi 0,r11,0 + bne- syscall_error_cont + neg r3,r3 + oris r5,r5,0x1000 /* Set SO bit in CR */ + std r5,_CCR(r1) + b syscall_error_cont + +/* Traced system call support */ +syscall_dotrace: + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_syscall_trace_enter + ld r0,GPR0(r1) /* Restore original registers */ + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + addi r9,r1,STACK_FRAME_OVERHEAD + clrrdi r10,r1,THREAD_SHIFT + ld r10,TI_FLAGS(r10) + b syscall_dotrace_cont + +syscall_exit_trace: + std r3,GPR3(r1) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_syscall_trace_leave + REST_NVGPRS(r1) + ld r3,GPR3(r1) + ld r5,_CCR(r1) + clrrdi r12,r1,THREAD_SHIFT + b syscall_exit_trace_cont + +/* Stuff to do on exit from a system call. */ +syscall_exit_work: + std r3,GPR3(r1) + std r5,_CCR(r1) + b .ret_from_except_lite + +/* Save non-volatile GPRs, if not already saved. */ +_GLOBAL(save_nvgprs) + ld r11,_TRAP(r1) + andi. r0,r11,1 + beqlr- + SAVE_NVGPRS(r1) + clrrdi r0,r11,1 + std r0,_TRAP(r1) + blr + +/* + * The sigsuspend and rt_sigsuspend system calls can call do_signal + * and thus put the process into the stopped state where we might + * want to examine its user state with ptrace. Therefore we need + * to save all the nonvolatile registers (r14 - r31) before calling + * the C code. Similarly, fork, vfork and clone need the full + * register state on the stack so that it can be copied to the child. + */ +_GLOBAL(ppc32_sigsuspend) + bl .save_nvgprs + bl .sys32_sigsuspend + b 70f + +_GLOBAL(ppc64_rt_sigsuspend) + bl .save_nvgprs + bl .sys_rt_sigsuspend + b 70f + +_GLOBAL(ppc32_rt_sigsuspend) + bl .save_nvgprs + bl .sys32_rt_sigsuspend +70: cmpdi 0,r3,0 + /* If it returned an error, we need to return via syscall_exit to set + the SO bit in cr0 and potentially stop for ptrace. */ + bne syscall_exit + /* If sigsuspend() returns zero, we are going into a signal handler. We + may need to call audit_syscall_exit() to mark the exit from sigsuspend() */ +#ifdef CONFIG_AUDIT + ld r3,PACACURRENT(r13) + ld r4,AUDITCONTEXT(r3) + cmpdi 0,r4,0 + beq .ret_from_except /* No audit_context: Leave immediately. */ + li r4, 2 /* AUDITSC_FAILURE */ + li r5,-4 /* It's always -EINTR */ + bl .audit_syscall_exit +#endif + b .ret_from_except + +_GLOBAL(ppc_fork) + bl .save_nvgprs + bl .sys_fork + b syscall_exit + +_GLOBAL(ppc_vfork) + bl .save_nvgprs + bl .sys_vfork + b syscall_exit + +_GLOBAL(ppc_clone) + bl .save_nvgprs + bl .sys_clone + b syscall_exit + +_GLOBAL(ppc32_swapcontext) + bl .save_nvgprs + bl .sys32_swapcontext + b 80f + +_GLOBAL(ppc64_swapcontext) + bl .save_nvgprs + bl .sys_swapcontext + b 80f + +_GLOBAL(ppc32_sigreturn) + bl .sys32_sigreturn + b 80f + +_GLOBAL(ppc32_rt_sigreturn) + bl .sys32_rt_sigreturn + b 80f + +_GLOBAL(ppc64_rt_sigreturn) + bl .sys_rt_sigreturn + +80: cmpdi 0,r3,0 + blt syscall_exit + clrrdi r4,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r4) + andi. r4,r4,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq+ 81f + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_syscall_trace_leave +81: b .ret_from_except + +_GLOBAL(ret_from_fork) + bl .schedule_tail + REST_NVGPRS(r1) + li r3,0 + b syscall_exit + +/* + * This routine switches between two different tasks. The process + * state of one is saved on its kernel stack. Then the state + * of the other is restored from its kernel stack. The memory + * management hardware is updated to the second process's state. + * Finally, we can return to the second process, via ret_from_except. + * On entry, r3 points to the THREAD for the current task, r4 + * points to the THREAD for the new task. + * + * Note: there are two ways to get to the "going out" portion + * of this code; either by coming in via the entry (_switch) + * or via "fork" which must set up an environment equivalent + * to the "_switch" path. If you change this you'll have to change + * the fork code also. + * + * The code which creates the new task context is in 'copy_thread' + * in arch/ppc64/kernel/process.c + */ + .align 7 +_GLOBAL(_switch) + mflr r0 + std r0,16(r1) + stdu r1,-SWITCH_FRAME_SIZE(r1) + /* r3-r13 are caller saved -- Cort */ + SAVE_8GPRS(14, r1) + SAVE_10GPRS(22, r1) + mflr r20 /* Return to switch caller */ + mfmsr r22 + li r0, MSR_FP +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + oris r0,r0,MSR_VEC@h /* Disable altivec */ + mfspr r24,SPRN_VRSAVE /* save vrsave register value */ + std r24,THREAD_VRSAVE(r3) +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ + and. r0,r0,r22 + beq+ 1f + andc r22,r22,r0 + mtmsrd r22 + isync +1: std r20,_NIP(r1) + mfcr r23 + std r23,_CCR(r1) + std r1,KSP(r3) /* Set old stack pointer */ + +#ifdef CONFIG_SMP + /* We need a sync somewhere here to make sure that if the + * previous task gets rescheduled on another CPU, it sees all + * stores it has performed on this one. + */ + sync +#endif /* CONFIG_SMP */ + + addi r6,r4,-THREAD /* Convert THREAD to 'current' */ + std r6,PACACURRENT(r13) /* Set new 'current' */ + + ld r8,KSP(r4) /* new stack pointer */ +BEGIN_FTR_SECTION + clrrdi r6,r8,28 /* get its ESID */ + clrrdi r9,r1,28 /* get current sp ESID */ + clrldi. r0,r6,2 /* is new ESID c00000000? */ + cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ + cror eq,4*cr1+eq,eq + beq 2f /* if yes, don't slbie it */ + + /* Bolt in the new stack SLB entry */ + ld r7,KSP_VSID(r4) /* Get new stack's VSID */ + oris r0,r6,(SLB_ESID_V)@h + ori r0,r0,(SLB_NUM_BOLTED-1)@l + slbie r6 + slbie r6 /* Workaround POWER5 < DD2.1 issue */ + slbmte r7,r0 + isync + +2: +END_FTR_SECTION_IFSET(CPU_FTR_SLB) + clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ + /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE + because we don't need to leave the 288-byte ABI gap at the + top of the kernel stack. */ + addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE + + mr r1,r8 /* start using new stack pointer */ + std r7,PACAKSAVE(r13) + + ld r6,_CCR(r1) + mtcrf 0xFF,r6 + +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + ld r0,THREAD_VRSAVE(r4) + mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ + + /* r3-r13 are destroyed -- Cort */ + REST_8GPRS(14, r1) + REST_10GPRS(22, r1) + + /* convert old thread to its task_struct for return value */ + addi r3,r3,-THREAD + ld r7,_NIP(r1) /* Return to _switch caller in new task */ + mtlr r7 + addi r1,r1,SWITCH_FRAME_SIZE + blr + + .align 7 +_GLOBAL(ret_from_except) + ld r11,_TRAP(r1) + andi. r0,r11,1 + bne .ret_from_except_lite + REST_NVGPRS(r1) + +_GLOBAL(ret_from_except_lite) + /* + * Disable interrupts so that current_thread_info()->flags + * can't change between when we test it and when we return + * from the interrupt. + */ + mfmsr r10 /* Get current interrupt state */ + rldicl r9,r10,48,1 /* clear MSR_EE */ + rotldi r9,r9,16 + mtmsrd r9,1 /* Update machine state */ + +#ifdef CONFIG_PREEMPT + clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ + li r0,_TIF_NEED_RESCHED /* bits to check */ + ld r3,_MSR(r1) + ld r4,TI_FLAGS(r9) + /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */ + rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING + and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */ + bne do_work + +#else /* !CONFIG_PREEMPT */ + ld r3,_MSR(r1) /* Returning to user mode? */ + andi. r3,r3,MSR_PR + beq restore /* if not, just restore regs and return */ + + /* Check current_thread_info()->flags */ + clrrdi r9,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r9) + andi. r0,r4,_TIF_USER_WORK_MASK + bne do_work +#endif + +restore: +#ifdef CONFIG_PPC_ISERIES + ld r5,SOFTE(r1) + cmpdi 0,r5,0 + beq 4f + /* Check for pending interrupts (iSeries) */ + ld r3,PACALPPACA+LPPACAANYINT(r13) + cmpdi r3,0 + beq+ 4f /* skip do_IRQ if no interrupts */ + + li r3,0 + stb r3,PACAPROCENABLED(r13) /* ensure we are soft-disabled */ + ori r10,r10,MSR_EE + mtmsrd r10 /* hard-enable again */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_IRQ + b .ret_from_except_lite /* loop back and handle more */ + +4: stb r5,PACAPROCENABLED(r13) +#endif + + ld r3,_MSR(r1) + andi. r0,r3,MSR_RI + beq- unrecov_restore + + andi. r0,r3,MSR_PR + + /* + * r13 is our per cpu area, only restore it if we are returning to + * userspace + */ + beq 1f + REST_GPR(13, r1) +1: + ld r3,_CTR(r1) + ld r0,_LINK(r1) + mtctr r3 + mtlr r0 + ld r3,_XER(r1) + mtspr SPRN_XER,r3 + + REST_8GPRS(5, r1) + + stdcx. r0,0,r1 /* to clear the reservation */ + + mfmsr r0 + li r2, MSR_RI + andc r0,r0,r2 + mtmsrd r0,1 + + ld r0,_MSR(r1) + mtspr SPRN_SRR1,r0 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SPRN_SRR0,r2 + + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + + rfid + b . /* prevent speculative execution */ + +/* Note: this must change if we start using the TIF_NOTIFY_RESUME bit */ +do_work: +#ifdef CONFIG_PREEMPT + andi. r0,r3,MSR_PR /* Returning to user mode? */ + bne user_work + /* Check that preempt_count() == 0 and interrupts are enabled */ + lwz r8,TI_PREEMPT(r9) + cmpwi cr1,r8,0 +#ifdef CONFIG_PPC_ISERIES + ld r0,SOFTE(r1) + cmpdi r0,0 +#else + andi. r0,r3,MSR_EE +#endif + crandc eq,cr1*4+eq,eq + bne restore + /* here we are preempting the current task */ +1: +#ifdef CONFIG_PPC_ISERIES + li r0,1 + stb r0,PACAPROCENABLED(r13) +#endif + ori r10,r10,MSR_EE + mtmsrd r10,1 /* reenable interrupts */ + bl .preempt_schedule + mfmsr r10 + clrrdi r9,r1,THREAD_SHIFT + rldicl r10,r10,48,1 /* disable interrupts again */ + rotldi r10,r10,16 + mtmsrd r10,1 + ld r4,TI_FLAGS(r9) + andi. r0,r4,_TIF_NEED_RESCHED + bne 1b + b restore + +user_work: +#endif + /* Enable interrupts */ + ori r10,r10,MSR_EE + mtmsrd r10,1 + + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + bl .schedule + b .ret_from_except_lite + +1: bl .save_nvgprs + li r3,0 + addi r4,r1,STACK_FRAME_OVERHEAD + bl .do_signal + b .ret_from_except + +unrecov_restore: + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b unrecov_restore + +#ifdef CONFIG_PPC_RTAS +/* + * On CHRP, the Run-Time Abstraction Services (RTAS) have to be + * called with the MMU off. + * + * In addition, we need to be in 32b mode, at least for now. + * + * Note: r3 is an input parameter to rtas, so don't trash it... + */ +_GLOBAL(enter_rtas) + mflr r0 + std r0,16(r1) + stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */ + + /* Because RTAS is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * RTAS might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_GPR(2, r1) /* Save the TOC */ + SAVE_GPR(13, r1) /* Save paca */ + SAVE_8GPRS(14, r1) /* Save the non-volatiles */ + SAVE_10GPRS(22, r1) /* ditto */ + + mfcr r4 + std r4,_CCR(r1) + mfctr r5 + std r5,_CTR(r1) + mfspr r6,SPRN_XER + std r6,_XER(r1) + mfdar r7 + std r7,_DAR(r1) + mfdsisr r8 + std r8,_DSISR(r1) + mfsrr0 r9 + std r9,_SRR0(r1) + mfsrr1 r10 + std r10,_SRR1(r1) + + /* There is no way it is acceptable to get here with interrupts enabled, + * check it with the asm equivalent of WARN_ON + */ + mfmsr r6 + andi. r0,r6,MSR_EE +1: tdnei r0,0 +.section __bug_table,"a" + .llong 1b,__LINE__ + 0x1000000, 1f, 2f +.previous +.section .rodata,"a" +1: .asciz __FILE__ +2: .asciz "enter_rtas" +.previous + + /* Unfortunately, the stack pointer and the MSR are also clobbered, + * so they are saved in the PACA which allows us to restore + * our original state after RTAS returns. + */ + std r1,PACAR1(r13) + std r6,PACASAVEDMSR(r13) + + /* Setup our real return addr */ + SET_REG_TO_LABEL(r4,.rtas_return_loc) + SET_REG_TO_CONST(r9,KERNELBASE) + sub r4,r4,r9 + mtlr r4 + + li r0,0 + ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI + andc r0,r6,r0 + + li r9,1 + rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) + ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP + andc r6,r0,r9 + ori r6,r6,MSR_RI + sync /* disable interrupts so SRR0/1 */ + mtmsrd r0 /* don't get trashed */ + + SET_REG_TO_LABEL(r4,rtas) + ld r5,RTASENTRY(r4) /* get the rtas->entry value */ + ld r4,RTASBASE(r4) /* get the rtas->base value */ + + mtspr SPRN_SRR0,r5 + mtspr SPRN_SRR1,r6 + rfid + b . /* prevent speculative execution */ + +_STATIC(rtas_return_loc) + /* relocation is off at this point */ + mfspr r4,SPRN_SPRG3 /* Get PACA */ + SET_REG_TO_CONST(r5, KERNELBASE) + sub r4,r4,r5 /* RELOC the PACA base pointer */ + + mfmsr r6 + li r0,MSR_RI + andc r6,r6,r0 + sync + mtmsrd r6 + + ld r1,PACAR1(r4) /* Restore our SP */ + LOADADDR(r3,.rtas_restore_regs) + ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ + + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfid + b . /* prevent speculative execution */ + +_STATIC(rtas_restore_regs) + /* relocation is on at this point */ + REST_GPR(2, r1) /* Restore the TOC */ + REST_GPR(13, r1) /* Restore paca */ + REST_8GPRS(14, r1) /* Restore the non-volatiles */ + REST_10GPRS(22, r1) /* ditto */ + + mfspr r13,SPRN_SPRG3 + + ld r4,_CCR(r1) + mtcr r4 + ld r5,_CTR(r1) + mtctr r5 + ld r6,_XER(r1) + mtspr SPRN_XER,r6 + ld r7,_DAR(r1) + mtdar r7 + ld r8,_DSISR(r1) + mtdsisr r8 + ld r9,_SRR0(r1) + mtsrr0 r9 + ld r10,_SRR1(r1) + mtsrr1 r10 + + addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */ + ld r0,16(r1) /* get return address */ + + mtlr r0 + blr /* return to caller */ + +#endif /* CONFIG_PPC_RTAS */ + +#ifdef CONFIG_PPC_MULTIPLATFORM + +_GLOBAL(enter_prom) + mflr r0 + std r0,16(r1) + stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ + + /* Because PROM is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * PROM might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_8GPRS(2, r1) + SAVE_GPR(13, r1) + SAVE_8GPRS(14, r1) + SAVE_10GPRS(22, r1) + mfcr r4 + std r4,_CCR(r1) + mfctr r5 + std r5,_CTR(r1) + mfspr r6,SPRN_XER + std r6,_XER(r1) + mfdar r7 + std r7,_DAR(r1) + mfdsisr r8 + std r8,_DSISR(r1) + mfsrr0 r9 + std r9,_SRR0(r1) + mfsrr1 r10 + std r10,_SRR1(r1) + mfmsr r11 + std r11,_MSR(r1) + + /* Get the PROM entrypoint */ + ld r0,GPR4(r1) + mtlr r0 + + /* Switch MSR to 32 bits mode + */ + mfmsr r11 + li r12,1 + rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) + andc r11,r11,r12 + li r12,1 + rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + andc r11,r11,r12 + mtmsrd r11 + isync + + /* Restore arguments & enter PROM here... */ + ld r3,GPR3(r1) + blrl + + /* Just make sure that r1 top 32 bits didn't get + * corrupt by OF + */ + rldicl r1,r1,0,32 + + /* Restore the MSR (back to 64 bits) */ + ld r0,_MSR(r1) + mtmsrd r0 + isync + + /* Restore other registers */ + REST_GPR(2, r1) + REST_GPR(13, r1) + REST_8GPRS(14, r1) + REST_10GPRS(22, r1) + ld r4,_CCR(r1) + mtcr r4 + ld r5,_CTR(r1) + mtctr r5 + ld r6,_XER(r1) + mtspr SPRN_XER,r6 + ld r7,_DAR(r1) + mtdar r7 + ld r8,_DSISR(r1) + mtdsisr r8 + ld r9,_SRR0(r1) + mtsrr0 r9 + ld r10,_SRR1(r1) + mtsrr1 r10 + + addi r1,r1,PROM_FRAME_SIZE + ld r0,16(r1) + mtlr r0 + blr + +#endif /* CONFIG_PPC_MULTIPLATFORM */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S new file mode 100644 index 00000000000..563d445ff58 --- /dev/null +++ b/arch/powerpc/kernel/fpu.S @@ -0,0 +1,119 @@ +/* + * FPU support code, moved here from head.S so that it can be used + * by chips which use other head-whatever.S files. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* + * This task wants to use the FPU now. + * On UP, disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Load up this task's FP registers from its thread_struct, + * enable the FPU for the current task and return to the task. + */ +_GLOBAL(load_up_fpu) + mfmsr r5 + ori r5,r5,MSR_FP + SYNC + MTMSRD(r5) /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + */ +#ifndef CONFIG_SMP + LOADBASE(r3, last_task_used_math) + tophys(r3,r3) + LDL r4,OFF(last_task_used_math)(r3) + CMPI 0,r4,0 + beq 1f + tophys(r4,r4) + addi r4,r4,THREAD /* want last_task_used_math->thread */ + SAVE_32FPRS(0, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR-4(r4) + LDL r5,PT_REGS(r4) + tophys(r5,r5) + LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r10,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r10 /* disable FP for previous task */ + STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ +#ifdef CONFIG_PPC32 + mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + lwz r4,THREAD_FPEXC_MODE(r5) + ori r9,r9,MSR_FP /* enable FP for current */ + or r9,r9,r4 +#else + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + ld r4,THREAD_FPEXC_MODE(r5) + ori r12,r12,MSR_FP + or r12,r12,r4 + std r12,_MSR(r1) +#endif + lfd fr0,THREAD_FPSCR-4(r5) + mtfsf 0xff,fr0 + REST_32FPRS(0, r5) +#ifndef CONFIG_SMP + subi r4,r5,THREAD + tovirt(r4,r4) + STL r4,OFF(last_task_used_math)(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + /* we haven't used ctr or xer or lr */ + b fast_exception_return + +/* + * giveup_fpu(tsk) + * Disable FP for the task given as the argument, + * and save the floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + */ +_GLOBAL(giveup_fpu) + mfmsr r5 + ori r5,r5,MSR_FP + SYNC_601 + ISYNC_601 + MTMSRD(r5) /* enable use of fpu now */ + SYNC_601 + isync + CMPI 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + LDL r5,PT_REGS(r3) + CMPI 0,r5,0 + SAVE_32FPRS(0, r3) + mffs fr0 + stfd fr0,THREAD_FPSCR-4(r3) + beq 1f + LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r3,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r3 /* disable FP for previous task */ + STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + LOADBASE(r4,last_task_used_math) + STL r5,OFF(last_task_used_math)(r4) +#endif /* CONFIG_SMP */ + blr diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S new file mode 100644 index 00000000000..108e78ef387 --- /dev/null +++ b/arch/powerpc/kernel/head_32.S @@ -0,0 +1,1399 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * This file contains the low-level support and setup for the + * PowerPC platform, including trap and interrupt dispatch. + * (The PPC 8xx embedded CPUs use head_8xx.S instead.) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +#ifdef CONFIG_APUS +#include <asm/amigappc.h> +#endif + +/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ +#define LOAD_BAT(n, reg, RA, RB) \ + /* see the comment for clear_bats() -- Cort */ \ + li RA,0; \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##U,RA; \ + lwz RA,(n*16)+0(reg); \ + lwz RB,(n*16)+4(reg); \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_IBAT##n##L,RB; \ + beq 1f; \ + lwz RA,(n*16)+8(reg); \ + lwz RB,(n*16)+12(reg); \ + mtspr SPRN_DBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##L,RB; \ +1: + + .text + .stabs "arch/powerpc/kernel/",N_SO,0,0,0f + .stabs "head_32.S",N_SO,0,0,0f +0: + .globl _stext +_stext: + +/* + * _start is defined this way because the XCOFF loader in the OpenFirmware + * on the powermac expects the entry point to be a procedure descriptor. + */ + .text + .globl _start +_start: + /* + * These are here for legacy reasons, the kernel used to + * need to look like a coff function entry for the pmac + * but we're always started by some kind of bootloader now. + * -- Cort + */ + nop /* used by __secondary_hold on prep (mtx) and chrp smp */ + nop /* used by __secondary_hold on prep (mtx) and chrp smp */ + nop + +/* PMAC + * Enter here with the kernel text, data and bss loaded starting at + * 0, running with virtual == physical mapping. + * r5 points to the prom entry point (the client interface handler + * address). Address translation is turned on, with the prom + * managing the hash table. Interrupts are disabled. The stack + * pointer (r1) points to just below the end of the half-meg region + * from 0x380000 - 0x400000, which is mapped in already. + * + * If we are booted from MacOS via BootX, we enter with the kernel + * image loaded somewhere, and the following values in registers: + * r3: 'BooX' (0x426f6f58) + * r4: virtual address of boot_infos_t + * r5: 0 + * + * APUS + * r3: 'APUS' + * r4: physical address of memory base + * Linux/m68k style BootInfo structure at &_end. + * + * PREP + * This is jumped to on prep systems right after the kernel is relocated + * to its proper place in memory by the boot loader. The expected layout + * of the regs is: + * r3: ptr to residual data + * r4: initrd_start or if no initrd then 0 + * r5: initrd_end - unused if r4 is 0 + * r6: Start of command line string + * r7: End of command line string + * + * This just gets a minimal mmu environment setup so we can call + * start_here() to do the real work. + * -- Cort + */ + + .globl __start +__start: +/* + * We have to do any OF calls before we map ourselves to KERNELBASE, + * because OF may have I/O devices mapped into that area + * (particularly on CHRP). + */ + cmpwi 0,r5,0 + beq 1f + bl prom_init + trap + +1: mr r31,r3 /* save parameters */ + mr r30,r4 + li r24,0 /* cpu # */ + +/* + * early_init() does the early machine identification and does + * the necessary low-level setup and clears the BSS + * -- Cort <cort@fsmlabs.com> + */ + bl early_init + +#ifdef CONFIG_APUS +/* On APUS the __va/__pa constants need to be set to the correct + * values before continuing. + */ + mr r4,r30 + bl fix_mem_constants +#endif /* CONFIG_APUS */ + +/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains + * the physical address we are running at, returned by early_init() + */ + bl mmu_off +__after_mmu_off: + bl clear_bats + bl flush_tlbs + + bl initial_bats +#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) + bl setup_disp_bat +#endif + +/* + * Call setup_cpu for CPU 0 and initialize 6xx Idle + */ + bl reloc_offset + li r24,0 /* cpu# */ + bl call_setup_cpu /* Call setup_cpu for this CPU */ +#ifdef CONFIG_6xx + bl reloc_offset + bl init_idle_6xx +#endif /* CONFIG_6xx */ + + +#ifndef CONFIG_APUS +/* + * We need to run with _start at physical address 0. + * On CHRP, we are loaded at 0x10000 since OF on CHRP uses + * the exception vectors at 0 (and therefore this copy + * overwrites OF's exception vectors with our own). + * The MMU is off at this point. + */ + bl reloc_offset + mr r26,r3 + addis r4,r3,KERNELBASE@h /* current address of _start */ + cmpwi 0,r4,0 /* are we already running at 0? */ + bne relocate_kernel +#endif /* CONFIG_APUS */ +/* + * we now have the 1st 16M of ram mapped with the bats. + * prep needs the mmu to be turned on here, but pmac already has it on. + * this shouldn't bother the pmac since it just gets turned on again + * as we jump to our code at KERNELBASE. -- Cort + * Actually no, pmac doesn't have it on any more. BootX enters with MMU + * off, and in other cases, we now turn it off before changing BATs above. + */ +turn_on_mmu: + mfmsr r0 + ori r0,r0,MSR_DR|MSR_IR + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + SYNC + RFI /* enables MMU */ + +/* + * We need __secondary_hold as a place to hold the other cpus on + * an SMP machine, even when we are running a UP kernel. + */ + . = 0xc0 /* for prep bootloader */ + li r3,1 /* MTX only has 1 cpu */ + .globl __secondary_hold +__secondary_hold: + /* tell the master we're here */ + stw r3,4(0) +#ifdef CONFIG_SMP +100: lwz r4,0(0) + /* wait until we're told to start */ + cmpw 0,r4,r3 + bne 100b + /* our cpu # was at addr 0 - go */ + mr r24,r3 /* cpu # */ + b __secondary_start +#else + b . +#endif /* CONFIG_SMP */ + +/* + * Exception entry code. This code runs with address translation + * turned off, i.e. using physical addresses. + * We assume sprg3 has the physical address of the current + * task's thread_struct. + */ +#define EXCEPTION_PROLOG \ + mtspr SPRN_SPRG0,r10; \ + mtspr SPRN_SPRG1,r11; \ + mfcr r10; \ + EXCEPTION_PROLOG_1; \ + EXCEPTION_PROLOG_2 + +#define EXCEPTION_PROLOG_1 \ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ + andi. r11,r11,MSR_PR; \ + tophys(r11,r1); /* use tophys(r1) if kernel */ \ + beq 1f; \ + mfspr r11,SPRN_SPRG3; \ + lwz r11,THREAD_INFO-THREAD(r11); \ + addi r11,r11,THREAD_SIZE; \ + tophys(r11,r11); \ +1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ + + +#define EXCEPTION_PROLOG_2 \ + CLR_TOP32(r11); \ + stw r10,_CCR(r11); /* save registers */ \ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_SRR0; \ + mfspr r9,SPRN_SRR1; \ + stw r1,GPR1(r11); \ + stw r1,0(r11); \ + tovirt(r1,r11); /* set new kernel sp */ \ + li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ + MTMSRD(r10); /* (except for mach check in rtas) */ \ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r11, r12 (SRR0), and r9 (SRR1). + * + * Note2: once we have set r1 we are in a position to take exceptions + * again, and we could thus set MSR:RI at that point. + */ + +/* + * Exception vectors. + */ +#define EXCEPTION(n, label, hdlr, xfer) \ + . = n; \ +label: \ + EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,TRAP(r11); \ + li r10,MSR_KERNEL; \ + copyee(r10, r9); \ + bl tfer; \ +i##n: \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ + ret_from_except) + +/* System reset */ +/* core99 pmac starts the seconary here by changing the vector, and + putting it back to what it was (unknown_exception) when done. */ +#if defined(CONFIG_GEMINI) && defined(CONFIG_SMP) + . = 0x100 + b __secondary_start_gemini +#else + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) +#endif + +/* Machine check */ +/* + * On CHRP, this is complicated by the fact that we could get a + * machine check inside RTAS, and we have no guarantee that certain + * critical registers will have the values we expect. The set of + * registers that might have bad values includes all the GPRs + * and all the BATs. We indicate that we are in RTAS by putting + * a non-zero value, the address of the exception frame to use, + * in SPRG2. The machine check handler checks SPRG2 and uses its + * value if it is non-zero. If we ever needed to free up SPRG2, + * we could use a field in the thread_info or thread_struct instead. + * (Other exception handlers assume that r1 is a valid kernel stack + * pointer when we take an exception from supervisor mode.) + * -- paulus. + */ + . = 0x200 + mtspr SPRN_SPRG0,r10 + mtspr SPRN_SPRG1,r11 + mfcr r10 +#ifdef CONFIG_PPC_CHRP + mfspr r11,SPRN_SPRG2 + cmpwi 0,r11,0 + bne 7f +#endif /* CONFIG_PPC_CHRP */ + EXCEPTION_PROLOG_1 +7: EXCEPTION_PROLOG_2 + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_CHRP + mfspr r4,SPRN_SPRG2 + cmpwi cr1,r4,0 + bne cr1,1f +#endif + EXC_XFER_STD(0x200, machine_check_exception) +#ifdef CONFIG_PPC_CHRP +1: b machine_check_in_rtas +#endif + +/* Data access exception. */ + . = 0x300 +DataAccess: + EXCEPTION_PROLOG + mfspr r10,SPRN_DSISR + andis. r0,r10,0xa470 /* weird error? */ + bne 1f /* if not, try to put a PTE */ + mfspr r4,SPRN_DAR /* into the hash table */ + rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ + bl hash_page +1: stw r10,_DSISR(r11) + mr r5,r10 + mfspr r4,SPRN_DAR + EXC_XFER_EE_LITE(0x300, handle_page_fault) + + +/* Instruction access exception. */ + . = 0x400 +InstructionAccess: + EXCEPTION_PROLOG + andis. r0,r9,0x4000 /* no pte found? */ + beq 1f /* if so, try to put a PTE */ + li r3,0 /* into the hash table */ + mr r4,r12 /* SRR0 is fault address */ + bl hash_page +1: mr r4,r12 + mr r5,r9 + EXC_XFER_EE_LITE(0x400, handle_page_fault) + +/* External interrupt */ + EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* Alignment exception */ + . = 0x600 +Alignment: + EXCEPTION_PROLOG + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0x600, alignment_exception) + +/* Program check exception */ + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + +/* Floating-point unavailable */ + . = 0x800 +FPUnavailable: + EXCEPTION_PROLOG + bne load_up_fpu /* if from user, just load it up */ + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + +/* Decrementer */ + EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + +/* System call */ + . = 0xc00 +SystemCall: + EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0xc00, DoSyscall) + +/* Single step - not used on 601 */ + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + +/* + * The Altivec unavailable trap is at 0x0f20. Foo. + * We effectively remap it to 0x3000. + * We include an altivec unavailable exception vector even if + * not configured for Altivec, so that you can't panic a + * non-altivec kernel running on a machine with altivec just + * by executing an altivec instruction. + */ + . = 0xf00 + b Trap_0f + + . = 0xf20 + b AltiVecUnavailable + +Trap_0f: + EXCEPTION_PROLOG + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0xf00, unknown_exception) + +/* + * Handle TLB miss for instruction on 603/603e. + * Note: we get an alternate set of r0 - r3 to use automatically. + */ + . = 0x1000 +InstructionTLBMiss: +/* + * r0: stored ctr + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + mfctr r0 + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_IMISS + lis r1,KERNELBASE@h /* check if kernel address */ + cmplw 0,r3,r1 + mfspr r2,SPRN_SPRG3 + li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */ + lwz r2,PGDIR(r2) + blt+ 112f + lis r2,swapper_pg_dir@ha /* if kernel address, use */ + addi r2,r2,swapper_pg_dir@l /* kernel page table */ + mfspr r1,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ + rlwinm r1,r1,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ +112: tophys(r2,r2) + rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- InstructionAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r3,0(r2) /* get linux-style pte */ + andc. r1,r1,r3 /* check access & ~permission */ + bne- InstructionAddressInvalid /* return if access not permitted */ + ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + stw r3,0(r2) /* update PTE (accessed bit) */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + and r1,r1,r2 /* writable if _RW and _DIRTY */ + rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */ + ori r1,r1,0xe14 /* clear out reserved bits and M */ + andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ + mtspr SPRN_RPA,r1 + mfspr r3,SPRN_IMISS + tlbli r3 + mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r3 + rfi +InstructionAddressInvalid: + mfspr r3,SPRN_SRR1 + rlwinm r1,r3,9,6,6 /* Get load/store bit */ + + addis r1,r1,0x2000 + mtspr SPRN_DSISR,r1 /* (shouldn't be needed) */ + mtctr r0 /* Restore CTR */ + andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ + or r2,r2,r1 + mtspr SPRN_SRR1,r2 + mfspr r1,SPRN_IMISS /* Get failing address */ + rlwinm. r2,r2,0,31,31 /* Check for little endian access */ + rlwimi r2,r2,1,30,30 /* change 1 -> 3 */ + xor r1,r1,r2 + mtspr SPRN_DAR,r1 /* Set fault address */ + mfmsr r0 /* Restore "normal" registers */ + xoris r0,r0,MSR_TGPR>>16 + mtcrf 0x80,r3 /* Restore CR0 */ + mtmsr r0 + b InstructionAccess + +/* + * Handle TLB miss for DATA Load operation on 603/603e + */ + . = 0x1100 +DataLoadTLBMiss: +/* + * r0: stored ctr + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + mfctr r0 + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_DMISS + lis r1,KERNELBASE@h /* check if kernel address */ + cmplw 0,r3,r1 + mfspr r2,SPRN_SPRG3 + li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */ + lwz r2,PGDIR(r2) + blt+ 112f + lis r2,swapper_pg_dir@ha /* if kernel address, use */ + addi r2,r2,swapper_pg_dir@l /* kernel page table */ + mfspr r1,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ + rlwinm r1,r1,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ +112: tophys(r2,r2) + rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- DataAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r3,0(r2) /* get linux-style pte */ + andc. r1,r1,r3 /* check access & ~permission */ + bne- DataAddressInvalid /* return if access not permitted */ + ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + stw r3,0(r2) /* update PTE (accessed bit) */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + and r1,r1,r2 /* writable if _RW and _DIRTY */ + rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */ + ori r1,r1,0xe14 /* clear out reserved bits and M */ + andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ + mtspr SPRN_RPA,r1 + mfspr r3,SPRN_DMISS + tlbld r3 + mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r3 + rfi +DataAddressInvalid: + mfspr r3,SPRN_SRR1 + rlwinm r1,r3,9,6,6 /* Get load/store bit */ + addis r1,r1,0x2000 + mtspr SPRN_DSISR,r1 + mtctr r0 /* Restore CTR */ + andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ + mtspr SPRN_SRR1,r2 + mfspr r1,SPRN_DMISS /* Get failing address */ + rlwinm. r2,r2,0,31,31 /* Check for little endian access */ + beq 20f /* Jump if big endian */ + xori r1,r1,3 +20: mtspr SPRN_DAR,r1 /* Set fault address */ + mfmsr r0 /* Restore "normal" registers */ + xoris r0,r0,MSR_TGPR>>16 + mtcrf 0x80,r3 /* Restore CR0 */ + mtmsr r0 + b DataAccess + +/* + * Handle TLB miss for DATA Store on 603/603e + */ + . = 0x1200 +DataStoreTLBMiss: +/* + * r0: stored ctr + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + mfctr r0 + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_DMISS + lis r1,KERNELBASE@h /* check if kernel address */ + cmplw 0,r3,r1 + mfspr r2,SPRN_SPRG3 + li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */ + lwz r2,PGDIR(r2) + blt+ 112f + lis r2,swapper_pg_dir@ha /* if kernel address, use */ + addi r2,r2,swapper_pg_dir@l /* kernel page table */ + mfspr r1,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ + rlwinm r1,r1,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ +112: tophys(r2,r2) + rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- DataAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r3,0(r2) /* get linux-style pte */ + andc. r1,r1,r3 /* check access & ~permission */ + bne- DataAddressInvalid /* return if access not permitted */ + ori r3,r3,_PAGE_ACCESSED|_PAGE_DIRTY + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + stw r3,0(r2) /* update PTE (accessed/dirty bits) */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */ + li r1,0xe15 /* clear out reserved bits and M */ + andc r1,r3,r1 /* PP = user? 2: 0 */ + mtspr SPRN_RPA,r1 + mfspr r3,SPRN_DMISS + tlbld r3 + mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r3 + rfi + +#ifndef CONFIG_ALTIVEC +#define altivec_assist_exception unknown_exception +#endif + + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) + EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE) + + .globl mol_trampoline + .set mol_trampoline, i0x2f00 + + . = 0x3000 + +AltiVecUnavailable: + EXCEPTION_PROLOG +#ifdef CONFIG_ALTIVEC + bne load_up_altivec /* if from user, just load it up */ +#endif /* CONFIG_ALTIVEC */ + EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) + +#ifdef CONFIG_ALTIVEC +/* Note that the AltiVec support is closely modeled after the FP + * support. Changes to one are likely to be applicable to the + * other! */ +load_up_altivec: +/* + * Disable AltiVec for the task which had AltiVec previously, + * and save its AltiVec registers in its thread_struct. + * Enables AltiVec for use in the kernel on return. + * On SMP we know the AltiVec units are free, since we give it up every + * switch. -- Kumar + */ + mfmsr r5 + oris r5,r5,MSR_VEC@h + MTMSRD(r5) /* enable use of AltiVec now */ + isync +/* + * For SMP, we don't do lazy AltiVec switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_altivec in switch_to. + */ +#ifndef CONFIG_SMP + tophys(r6,0) + addis r3,r6,last_task_used_altivec@ha + lwz r4,last_task_used_altivec@l(r3) + cmpwi 0,r4,0 + beq 1f + add r4,r4,r6 + addi r4,r4,THREAD /* want THREAD of last_task_used_altivec */ + SAVE_32VRS(0,r10,r4) + mfvscr vr0 + li r10,THREAD_VSCR + stvx vr0,r10,r4 + lwz r5,PT_REGS(r4) + add r5,r5,r6 + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r10,MSR_VEC@h + andc r4,r4,r10 /* disable altivec for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of AltiVec after return */ + oris r9,r9,MSR_VEC@h + mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + li r4,1 + li r10,THREAD_VSCR + stw r4,THREAD_USED_VR(r5) + lvx vr0,r10,r5 + mtvscr vr0 + REST_32VRS(0,r10,r5) +#ifndef CONFIG_SMP + subi r4,r5,THREAD + sub r4,r4,r6 + stw r4,last_task_used_altivec@l(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + /* we haven't used ctr or xer or lr */ + b fast_exception_return + +/* + * AltiVec unavailable trap from kernel - print a message, but let + * the task use AltiVec in the kernel until it returns to user mode. + */ +KernelAltiVec: + lwz r3,_MSR(r1) + oris r3,r3,MSR_VEC@h + stw r3,_MSR(r1) /* enable use of AltiVec after return */ + lis r3,87f@h + ori r3,r3,87f@l + mr r4,r2 /* current */ + lwz r5,_NIP(r1) + bl printk + b ret_from_except +87: .string "AltiVec used in kernel (task=%p, pc=%x) \n" + .align 4,0 + +/* + * giveup_altivec(tsk) + * Disable AltiVec for the task given as the argument, + * and save the AltiVec registers in its thread_struct. + * Enables AltiVec for use in the kernel on return. + */ + + .globl giveup_altivec +giveup_altivec: + mfmsr r5 + oris r5,r5,MSR_VEC@h + SYNC + MTMSRD(r5) /* enable use of AltiVec now */ + isync + cmpwi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + lwz r5,PT_REGS(r3) + cmpwi 0,r5,0 + SAVE_32VRS(0, r4, r3) + mfvscr vr0 + li r4,THREAD_VSCR + stvx vr0,r4,r3 + beq 1f + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_VEC@h + andc r4,r4,r3 /* disable AltiVec for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + lis r4,last_task_used_altivec@ha + stw r5,last_task_used_altivec@l(r4) +#endif /* CONFIG_SMP */ + blr +#endif /* CONFIG_ALTIVEC */ + +/* + * This code is jumped to from the startup code to copy + * the kernel image to physical address 0. + */ +relocate_kernel: + addis r9,r26,klimit@ha /* fetch klimit */ + lwz r25,klimit@l(r9) + addis r25,r25,-KERNELBASE@h + li r3,0 /* Destination base address */ + li r6,0 /* Destination offset */ + li r5,0x4000 /* # bytes of memory to copy */ + bl copy_and_flush /* copy the first 0x4000 bytes */ + addi r0,r3,4f@l /* jump to the address of 4f */ + mtctr r0 /* in copy and do the rest. */ + bctr /* jump to the copy */ +4: mr r5,r25 + bl copy_and_flush /* copy the rest */ + b turn_on_mmu + +/* + * Copy routine used to copy the kernel to start at physical address 0 + * and flush and invalidate the caches as needed. + * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset + * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. + */ +copy_and_flush: + addi r5,r5,-4 + addi r6,r6,-4 +4: li r0,L1_CACHE_LINE_SIZE/4 + mtctr r0 +3: addi r6,r6,4 /* copy a cache line */ + lwzx r0,r6,r4 + stwx r0,r6,r3 + bdnz 3b + dcbst r6,r3 /* write it to memory */ + sync + icbi r6,r3 /* flush the icache line */ + cmplw 0,r6,r5 + blt 4b + sync /* additional sync needed on g4 */ + isync + addi r5,r5,4 + addi r6,r6,4 + blr + +#ifdef CONFIG_APUS +/* + * On APUS the physical base address of the kernel is not known at compile + * time, which means the __pa/__va constants used are incorrect. In the + * __init section is recorded the virtual addresses of instructions using + * these constants, so all that has to be done is fix these before + * continuing the kernel boot. + * + * r4 = The physical address of the kernel base. + */ +fix_mem_constants: + mr r10,r4 + addis r10,r10,-KERNELBASE@h /* virt_to_phys constant */ + neg r11,r10 /* phys_to_virt constant */ + + lis r12,__vtop_table_begin@h + ori r12,r12,__vtop_table_begin@l + add r12,r12,r10 /* table begin phys address */ + lis r13,__vtop_table_end@h + ori r13,r13,__vtop_table_end@l + add r13,r13,r10 /* table end phys address */ + subi r12,r12,4 + subi r13,r13,4 +1: lwzu r14,4(r12) /* virt address of instruction */ + add r14,r14,r10 /* phys address of instruction */ + lwz r15,0(r14) /* instruction, now insert top */ + rlwimi r15,r10,16,16,31 /* half of vp const in low half */ + stw r15,0(r14) /* of instruction and restore. */ + dcbst r0,r14 /* write it to memory */ + sync + icbi r0,r14 /* flush the icache line */ + cmpw r12,r13 + bne 1b + sync /* additional sync needed on g4 */ + isync + +/* + * Map the memory where the exception handlers will + * be copied to when hash constants have been patched. + */ +#ifdef CONFIG_APUS_FAST_EXCEPT + lis r8,0xfff0 +#else + lis r8,0 +#endif + ori r8,r8,0x2 /* 128KB, supervisor */ + mtspr SPRN_DBAT3U,r8 + mtspr SPRN_DBAT3L,r8 + + lis r12,__ptov_table_begin@h + ori r12,r12,__ptov_table_begin@l + add r12,r12,r10 /* table begin phys address */ + lis r13,__ptov_table_end@h + ori r13,r13,__ptov_table_end@l + add r13,r13,r10 /* table end phys address */ + subi r12,r12,4 + subi r13,r13,4 +1: lwzu r14,4(r12) /* virt address of instruction */ + add r14,r14,r10 /* phys address of instruction */ + lwz r15,0(r14) /* instruction, now insert top */ + rlwimi r15,r11,16,16,31 /* half of pv const in low half*/ + stw r15,0(r14) /* of instruction and restore. */ + dcbst r0,r14 /* write it to memory */ + sync + icbi r0,r14 /* flush the icache line */ + cmpw r12,r13 + bne 1b + + sync /* additional sync needed on g4 */ + isync /* No speculative loading until now */ + blr + +/*********************************************************************** + * Please note that on APUS the exception handlers are located at the + * physical address 0xfff0000. For this reason, the exception handlers + * cannot use relative branches to access the code below. + ***********************************************************************/ +#endif /* CONFIG_APUS */ + +#ifdef CONFIG_SMP +#ifdef CONFIG_GEMINI + .globl __secondary_start_gemini +__secondary_start_gemini: + mfspr r4,SPRN_HID0 + ori r4,r4,HID0_ICFI + li r3,0 + ori r3,r3,HID0_ICE + andc r4,r4,r3 + mtspr SPRN_HID0,r4 + sync + b __secondary_start +#endif /* CONFIG_GEMINI */ + + .globl __secondary_start_pmac_0 +__secondary_start_pmac_0: + /* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */ + li r24,0 + b 1f + li r24,1 + b 1f + li r24,2 + b 1f + li r24,3 +1: + /* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0 + set to map the 0xf0000000 - 0xffffffff region */ + mfmsr r0 + rlwinm r0,r0,0,28,26 /* clear DR (0x10) */ + SYNC + mtmsr r0 + isync + + .globl __secondary_start +__secondary_start: + /* Copy some CPU settings from CPU 0 */ + bl __restore_cpu_setup + + lis r3,-KERNELBASE@h + mr r4,r24 + bl call_setup_cpu /* Call setup_cpu for this CPU */ +#ifdef CONFIG_6xx + lis r3,-KERNELBASE@h + bl init_idle_6xx +#endif /* CONFIG_6xx */ + + /* get current_thread_info and current */ + lis r1,secondary_ti@ha + tophys(r1,r1) + lwz r1,secondary_ti@l(r1) + tophys(r2,r1) + lwz r2,TI_TASK(r2) + + /* stack */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r0,0 + tophys(r3,r1) + stw r0,0(r3) + + /* load up the MMU */ + bl load_up_mmu + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* phys address of our thread_struct */ + CLR_TOP32(r4) + mtspr SPRN_SPRG3,r4 + li r3,0 + mtspr SPRN_SPRG2,r3 /* 0 => not in RTAS */ + + /* enable MMU and jump to start_secondary */ + li r4,MSR_KERNEL + FIX_SRR1(r4,r5) + lis r3,start_secondary@h + ori r3,r3,start_secondary@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + SYNC + RFI +#endif /* CONFIG_SMP */ + +/* + * Those generic dummy functions are kept for CPUs not + * included in CONFIG_6xx + */ +#if !defined(CONFIG_6xx) +_GLOBAL(__save_cpu_setup) + blr +_GLOBAL(__restore_cpu_setup) + blr +#endif /* !defined(CONFIG_6xx) */ + + +/* + * Load stuff into the MMU. Intended to be called with + * IR=0 and DR=0. + */ +load_up_mmu: + sync /* Force all PTE updates to finish */ + isync + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + /* Load the SDR1 register (hash table base & size) */ + lis r6,_SDR1@ha + tophys(r6,r6) + lwz r6,_SDR1@l(r6) + mtspr SPRN_SDR1,r6 + li r0,16 /* load up segment register values */ + mtctr r0 /* for context 0 */ + lis r3,0x2000 /* Ku = 1, VSID = 0 */ + li r4,0 +3: mtsrin r3,r4 + addi r3,r3,0x111 /* increment VSID */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b + +/* Load the BAT registers with the values set up by MMU_init. + MMU_init takes care of whether we're on a 601 or not. */ + mfpvr r3 + srwi r3,r3,16 + cmpwi r3,1 + lis r3,BATS@ha + addi r3,r3,BATS@l + tophys(r3,r3) + LOAD_BAT(0,r3,r4,r5) + LOAD_BAT(1,r3,r4,r5) + LOAD_BAT(2,r3,r4,r5) + LOAD_BAT(3,r3,r4,r5) + + blr + +/* + * This is where the main kernel code starts. + */ +start_here: + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + /* Set up for using our exception vectors */ + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + CLR_TOP32(r4) + mtspr SPRN_SPRG3,r4 + li r3,0 + mtspr SPRN_SPRG2,r3 /* 0 => not in RTAS */ + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) +/* + * Do early platform-specific initialization, + * and set up the MMU. + */ + mr r3,r31 + mr r4,r30 + bl machine_init + bl MMU_init + +#ifdef CONFIG_APUS + /* Copy exception code to exception vector base on APUS. */ + lis r4,KERNELBASE@h +#ifdef CONFIG_APUS_FAST_EXCEPT + lis r3,0xfff0 /* Copy to 0xfff00000 */ +#else + lis r3,0 /* Copy to 0x00000000 */ +#endif + li r5,0x4000 /* # bytes of memory to copy */ + li r6,0 + bl copy_and_flush /* copy the first 0x4000 bytes */ +#endif /* CONFIG_APUS */ + +/* + * Go back to running unmapped so we can load up new values + * for SDR1 (hash table pointer) and the segment registers + * and change to using our exception vectors. + */ + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) + FIX_SRR1(r3,r5) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + SYNC + RFI +/* Load up the kernel context */ +2: bl load_up_mmu + +#ifdef CONFIG_BDI_SWITCH + /* Add helper information for the Abatron bdiGDB debugger. + * We do this here because we know the mmu is disabled, and + * will be enabled for real in just a few instructions. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(r0) /* This much match your Abatron config */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + tophys(r5, r5) + stw r6, 0(r5) +#endif /* CONFIG_BDI_SWITCH */ + +/* Now turn on the MMU for real! */ + li r4,MSR_KERNEL + FIX_SRR1(r4,r5) + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + SYNC + RFI + +/* + * Set up the segment registers for a new context. + */ +_GLOBAL(set_context) + mulli r3,r3,897 /* multiply context by skew factor */ + rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ + addis r3,r3,0x6000 /* Set Ks, Ku bits */ + li r0,NUM_USER_SEGMENTS + mtctr r0 + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is passed as second argument. + */ + lis r5, KERNELBASE@h + lwz r5, 0xf0(r5) + stw r4, 0x4(r5) +#endif + li r4,0 + isync +3: + mtsrin r3,r4 + addi r3,r3,0x111 /* next VSID */ + rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b + sync + isync + blr + +/* + * An undocumented "feature" of 604e requires that the v bit + * be cleared before changing BAT values. + * + * Also, newer IBM firmware does not clear bat3 and 4 so + * this makes sure it's done. + * -- Cort + */ +clear_bats: + li r10,0 + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi r9, 1 + beq 1f + + mtspr SPRN_DBAT0U,r10 + mtspr SPRN_DBAT0L,r10 + mtspr SPRN_DBAT1U,r10 + mtspr SPRN_DBAT1L,r10 + mtspr SPRN_DBAT2U,r10 + mtspr SPRN_DBAT2L,r10 + mtspr SPRN_DBAT3U,r10 + mtspr SPRN_DBAT3L,r10 +1: + mtspr SPRN_IBAT0U,r10 + mtspr SPRN_IBAT0L,r10 + mtspr SPRN_IBAT1U,r10 + mtspr SPRN_IBAT1L,r10 + mtspr SPRN_IBAT2U,r10 + mtspr SPRN_IBAT2L,r10 + mtspr SPRN_IBAT3U,r10 + mtspr SPRN_IBAT3L,r10 +BEGIN_FTR_SECTION + /* Here's a tweak: at this point, CPU setup have + * not been called yet, so HIGH_BAT_EN may not be + * set in HID0 for the 745x processors. However, it + * seems that doesn't affect our ability to actually + * write to these SPRs. + */ + mtspr SPRN_DBAT4U,r10 + mtspr SPRN_DBAT4L,r10 + mtspr SPRN_DBAT5U,r10 + mtspr SPRN_DBAT5L,r10 + mtspr SPRN_DBAT6U,r10 + mtspr SPRN_DBAT6L,r10 + mtspr SPRN_DBAT7U,r10 + mtspr SPRN_DBAT7L,r10 + mtspr SPRN_IBAT4U,r10 + mtspr SPRN_IBAT4L,r10 + mtspr SPRN_IBAT5U,r10 + mtspr SPRN_IBAT5L,r10 + mtspr SPRN_IBAT6U,r10 + mtspr SPRN_IBAT6L,r10 + mtspr SPRN_IBAT7U,r10 + mtspr SPRN_IBAT7L,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS) + blr + +flush_tlbs: + lis r10, 0x40 +1: addic. r10, r10, -0x1000 + tlbie r10 + blt 1b + sync + blr + +mmu_off: + addi r4, r3, __after_mmu_off - _start + mfmsr r3 + andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ + beqlr + andc r3,r3,r0 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + sync + RFI + +/* + * Use the first pair of BAT registers to map the 1st 16MB + * of RAM to KERNELBASE. From this point on we can't safely + * call OF any more. + */ +initial_bats: + lis r11,KERNELBASE@h + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi 0,r9,1 + bne 4f + ori r11,r11,4 /* set up BAT registers for 601 */ + li r8,0x7f /* valid, block length = 8MB */ + oris r9,r11,0x800000@h /* set up BAT reg for 2nd 8M */ + oris r10,r8,0x800000@h /* set up BAT reg for 2nd 8M */ + mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */ + mtspr SPRN_IBAT0L,r8 /* lower BAT register */ + mtspr SPRN_IBAT1U,r9 + mtspr SPRN_IBAT1L,r10 + isync + blr + +4: tophys(r8,r11) +#ifdef CONFIG_SMP + ori r8,r8,0x12 /* R/W access, M=1 */ +#else + ori r8,r8,2 /* R/W access */ +#endif /* CONFIG_SMP */ +#ifdef CONFIG_APUS + ori r11,r11,BL_8M<<2|0x2 /* set up 8MB BAT registers for 604 */ +#else + ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ +#endif /* CONFIG_APUS */ + + mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */ + mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ + mtspr SPRN_IBAT0L,r8 + mtspr SPRN_IBAT0U,r11 + isync + blr + +#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) +setup_disp_bat: + /* + * setup the display bat prepared for us in prom.c + */ + mflr r8 + bl reloc_offset + mtlr r8 + addis r8,r3,disp_BAT@ha + addi r8,r8,disp_BAT@l + lwz r11,0(r8) + lwz r8,4(r8) + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi 0,r9,1 + beq 1f + mtspr SPRN_DBAT3L,r8 + mtspr SPRN_DBAT3U,r11 + blr +1: mtspr SPRN_IBAT3L,r8 + mtspr SPRN_IBAT3U,r11 + blr + +#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */ + + +#ifdef CONFIG_8260 +/* Jump into the system reset for the rom. + * We first disable the MMU, and then jump to the ROM reset address. + * + * r3 is the board info structure, r4 is the location for starting. + * I use this for building a small kernel that can load other kernels, + * rather than trying to write or rely on a rom monitor that can tftp load. + */ + .globl m8260_gorom +m8260_gorom: + mfmsr r0 + rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ + sync + mtmsr r0 + sync + mfspr r11, SPRN_HID0 + lis r10, 0 + ori r10,r10,HID0_ICE|HID0_DCE + andc r11, r11, r10 + mtspr SPRN_HID0, r11 + isync + li r5, MSR_ME|MSR_RI + lis r6,2f@h + addis r6,r6,-KERNELBASE@h + ori r6,r6,2f@l + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r5 + isync + sync + rfi +2: + mtlr r4 + blr +#endif + + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. + */ + .data + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + + .globl swapper_pg_dir +swapper_pg_dir: + .space 4096 + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * Used to pass parameters into the kernel like root=/dev/sda1, etc. + */ + .globl cmd_line +cmd_line: + .space 512 + + .globl intercept_table +intercept_table: + .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700 + .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0 + .long 0, 0, 0, i0x1300, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + +/* Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S new file mode 100644 index 00000000000..b1b9dc08abc --- /dev/null +++ b/arch/powerpc/kernel/head_44x.S @@ -0,0 +1,778 @@ +/* + * arch/ppc/kernel/head_44x.S + * + * Kernel execution entry point code. + * + * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * Copyright 2002-2005 MontaVista Software, Inc. + * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/ibm4xx.h> +#include <asm/ibm44x.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include "head_booke.h" + + +/* As with the other PowerPC ports, it is expected that when code + * execution begins here, the following registers contain valid, yet + * optional, information: + * + * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) + * r4 - Starting address of the init RAM disk + * r5 - Ending address of the init RAM disk + * r6 - Start of kernel command line string (e.g. "mem=128") + * r7 - End of kernel command line string + * + */ + .text +_GLOBAL(_stext) +_GLOBAL(_start) + /* + * Reserve a word at a fixed location to store the address + * of abatron_pteptrs + */ + nop +/* + * Save parameters we are passed + */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + li r24,0 /* CPU number */ + +/* + * Set up the initial MMU state + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + * + * We first invalidate all TLB entries but the one + * we are running from. We then load the KERNELBASE + * mappings so we can begin to use kernel addresses + * natively and so the interrupt vector locations are + * permanently pinned (necessary since Book E + * implementations always have translation enabled). + * + * TODO: Use the known TLB entry we are running from to + * determine which physical region we are located + * in. This can be used to determine where in RAM + * (on a shared CPU system) or PCI memory space + * (on a DRAMless system) we are located. + * For now, we assume a perfect world which means + * we are located at the base of DRAM (physical 0). + */ + +/* + * Search TLB for entry that we are currently using. + * Invalidate all entries but the one we are using. + */ + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq wmmucr /* If not, leave STS=0 */ + oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */ +wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + bl invstr /* Find our address */ +invstr: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skpinv /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skpinv: addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync /* If so, context change */ + +/* + * Configure and load pinned entry into TLB slot 63. + */ + + lis r3,KERNELBASE@h /* Load the kernel virtual address */ + ori r3,r3,KERNELBASE@l + + /* Kernel is at the base of RAM */ + li r4, 0 /* Load the kernel physical address */ + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + + /* pageid fields */ + clrrwi r3,r3,10 /* Mask off the effective page number */ + ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M + + /* xlat fields */ + clrrwi r4,r4,10 /* Mask off the real page number */ + /* ERPN is 0 for first 4GB page */ + + /* attrib fields */ + /* Added guarded bit to protect against speculative loads/stores */ + li r5,0 + ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + li r0,63 /* TLB slot 63 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ + tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ + tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* If necessary, invalidate original entry we used */ +3: cmpwi r23,63 + beq 4f + li r6,0 + tlbwe r6,r23,PPC44x_TLB_PAGEID + isync + +4: +#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* + * Add temporary UART mapping for early debug. + * We can map UART registers wherever we want as long as they don't + * interfere with other system mappings (e.g. with pinned entries). + * For an example of how we handle this - see ocotea.h. --ebs + */ + /* pageid fields */ + lis r3,UART0_IO_BASE@h + ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_4K + + /* xlat fields */ + lis r4,UART0_PHYS_IO_BASE@h /* RPN depends on SoC */ +#ifndef CONFIG_440EP + ori r4,r4,0x0001 /* ERPN is 1 for second 4GB page */ +#endif + + /* attrib fields */ + li r5,0 + ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_I | PPC44x_TLB_G) + + li r0,0 /* TLB slot 0 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ + tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ + tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ + + /* Force context change */ + isync +#endif /* CONFIG_SERIAL_TEXT_DEBUG */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheck); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError); + SET_IVOR(14, InstructionTLBError); + SET_IVOR(15, Debug); + + /* Establish the interrupt vector base */ + lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ + mtspr SPRN_IVPR,r4 + +#ifdef CONFIG_440EP + /* Clear DAPUIB flag in CCR0 (enable APU between CPU and FPU) */ + mfspr r2,SPRN_CCR0 + lis r3,0xffef + ori r3,r3,0xffff + and r2,r2,r3 + mtspr SPRN_CCR0,r2 + isync +#endif + + /* + * This is where the main kernel code starts. + */ + + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to current thread */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + /* stack */ + lis r1,init_thread_union@h + ori r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + bl machine_init + bl MMU_init + + /* Setup PTE pointers for the Abatron bdiGDB */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + lis r4, KERNELBASE@h + ori r4, r4, KERNELBASE@l + stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */ + stw r6, 0(r5) + + /* Let's move on */ + lis r4,start_kernel@h + ori r4,r4,start_kernel@l + lis r3,MSR_KERNEL@h + ori r3,r3,MSR_KERNEL@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi /* change context and jump to start_kernel */ + +/* + * Interrupt vector entry code + * + * The Book E MMUs are always on so we don't need to handle + * interrupts in real mode as with previous PPC processors. In + * this case we handle interrupts in the kernel virtual address + * space. + * + * Interrupt vectors are dynamically placed relative to the + * interrupt prefix as determined by the address of interrupt_base. + * The interrupt vectors offsets are programmed using the labels + * for each interrupt vector entry. + * + * Interrupt vectors must be aligned on a 16 byte boundary. + * We align on a 32 byte cache line boundary for good measure. + */ + +interrupt_base: + /* Critical Input Interrupt */ + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + + /* Machine Check Interrupt */ +#ifdef CONFIG_440A + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) +#else + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) +#endif + + /* Data Storage Interrupt */ + START_EXCEPTION(DataStorage) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + + /* + * Check if it was a store fault, if not then bail + * because a user tried to access a kernel or + * read-protected page. Otherwise, get the + * offending address and handle it. + */ + mfspr r10, SPRN_ESR + andis. r10, r10, ESR_ST@h + beq 2f + + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MMUCR + rlwinm r12,r12,0,0,23 /* Clear TID */ + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + + /* Load PID into MMUCR TID */ + mfspr r12,SPRN_MMUCR /* Get MMUCR */ + mfspr r13,SPRN_PID /* Get PID */ + rlwimi r12,r13,0,24,31 /* Set TID */ + +4: + mtspr SPRN_MMUCR,r12 + + rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + lwzx r11, r12, r11 /* Get pgd/pmd entry */ + rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + lwz r11, 4(r12) /* Get pte entry */ + + andi. r13, r11, _PAGE_RW /* Is it writeable? */ + beq 2f /* Bail if not */ + + /* Update 'changed'. + */ + ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + stw r11, 4(r12) /* Update Linux page table */ + + li r13, PPC44x_TLB_SR@l /* Set SR */ + rlwimi r13, r11, 29, 29, 29 /* SX = _PAGE_HWEXEC */ + rlwimi r13, r11, 0, 30, 30 /* SW = _PAGE_RW */ + rlwimi r13, r11, 29, 28, 28 /* UR = _PAGE_USER */ + rlwimi r12, r11, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */ + rlwimi r12, r11, 29, 30, 30 /* (_PAGE_USER>>3)->r12 */ + and r12, r12, r11 /* HWEXEC/RW & USER */ + rlwimi r13, r12, 0, 26, 26 /* UX = HWEXEC & USER */ + rlwimi r13, r12, 3, 27, 27 /* UW = RW & USER */ + + rlwimi r11,r13,0,26,31 /* Insert static perms */ + + rlwinm r11,r11,0,20,15 /* Clear U0-U3 */ + + /* find the TLB index that caused the fault. It has to be here. */ + tlbsx r10, 0, r10 + + tlbwe r11, r10, PPC44x_TLB_ATTRIB /* Write ATTRIB */ + + /* Done...restore registers and get out of here. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + rfi /* Force context change */ + +2: + /* + * The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b data_access + + /* Instruction Storage Interrupt */ + INSTRUCTION_STORAGE_EXCEPTION + + /* External Input Interrupt */ + EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + + /* Alignment Interrupt */ + ALIGNMENT_EXCEPTION + + /* Program Interrupt */ + PROGRAM_EXCEPTION + + /* Floating Point Unavailable Interrupt */ +#ifdef CONFIG_PPC_FPU + FP_UNAVAILABLE_EXCEPTION +#else + EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) +#endif + + /* System Call Interrupt */ + START_EXCEPTION(SystemCall) + NORMAL_EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0x0c00, DoSyscall) + + /* Auxillary Processor Unavailable Interrupt */ + EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + + /* Decrementer Interrupt */ + DECREMENTER_EXCEPTION + + /* Fixed Internal Timer Interrupt */ + /* TODO: Add FIT support */ + EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + + /* Watchdog Timer Interrupt */ + /* TODO: Add watchdog support */ +#ifdef CONFIG_BOOKE_WDT + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) +#else + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) +#endif + + /* Data TLB Error Interrupt */ + START_EXCEPTION(DataTLBError) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MMUCR + rlwinm r12,r12,0,0,23 /* Clear TID */ + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + + /* Load PID into MMUCR TID */ + mfspr r12,SPRN_MMUCR + mfspr r13,SPRN_PID /* Get PID */ + rlwimi r12,r13,0,24,31 /* Set TID */ + +4: + mtspr SPRN_MMUCR,r12 + + rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + lwzx r11, r12, r11 /* Get pgd/pmd entry */ + rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + lwz r11, 4(r12) /* Get pte entry */ + andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ + beq 2f /* Bail if not present */ + + ori r11, r11, _PAGE_ACCESSED + stw r11, 4(r12) + + /* Jump to common tlb load */ + b finish_tlb_load + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b data_access + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + mfspr r10, SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MMUCR + rlwinm r12,r12,0,0,23 /* Clear TID */ + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + + /* Load PID into MMUCR TID */ + mfspr r12,SPRN_MMUCR + mfspr r13,SPRN_PID /* Get PID */ + rlwimi r12,r13,0,24,31 /* Set TID */ + +4: + mtspr SPRN_MMUCR,r12 + + rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + lwzx r11, r12, r11 /* Get pgd/pmd entry */ + rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + lwz r11, 4(r12) /* Get pte entry */ + andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ + beq 2f /* Bail if not present */ + + ori r11, r11, _PAGE_ACCESSED + stw r11, 4(r12) + + /* Jump to common TLB load point */ + b finish_tlb_load + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b InstructionStorage + + /* Debug Interrupt */ + DEBUG_EXCEPTION + +/* + * Local functions + */ + /* + * Data TLB exceptions will bail out to this point + * if they can't resolve the lightweight TLB fault. + */ +data_access: + NORMAL_EXCEPTION_PROLOG + mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5,_ESR(r11) + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + EXC_XFER_EE_LITE(0x0300, handle_page_fault) + +/* + + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - EA of fault + * r11 - available to use + * r12 - Pointer to the 64-bit PTE + * r13 - available to use + * MMUCR - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load: + /* + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * If shared is set, we cause a zero PID->TID load. + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + + /* Load the next available TLB index */ + lis r13, tlb_44x_index@ha + lwz r13, tlb_44x_index@l(r13) + /* Load the TLB high watermark */ + lis r11, tlb_44x_hwater@ha + lwz r11, tlb_44x_hwater@l(r11) + + /* Increment, rollover, and store TLB index */ + addi r13, r13, 1 + cmpw 0, r13, r11 /* reserve entries */ + ble 7f + li r13, 0 +7: + /* Store the next available TLB index */ + lis r11, tlb_44x_index@ha + stw r13, tlb_44x_index@l(r11) + + lwz r11, 0(r12) /* Get MS word of PTE */ + lwz r12, 4(r12) /* Get LS word of PTE */ + rlwimi r11, r12, 0, 0 , 19 /* Insert RPN */ + tlbwe r11, r13, PPC44x_TLB_XLAT /* Write XLAT */ + + /* + * Create PAGEID. This is the faulting address, + * page size, and valid flag. + */ + li r11, PPC44x_TLB_VALID | PPC44x_TLB_4K + rlwimi r10, r11, 0, 20, 31 /* Insert valid and page size */ + tlbwe r10, r13, PPC44x_TLB_PAGEID /* Write PAGEID */ + + li r10, PPC44x_TLB_SR@l /* Set SR */ + rlwimi r10, r12, 0, 30, 30 /* Set SW = _PAGE_RW */ + rlwimi r10, r12, 29, 29, 29 /* SX = _PAGE_HWEXEC */ + rlwimi r10, r12, 29, 28, 28 /* UR = _PAGE_USER */ + rlwimi r11, r12, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */ + and r11, r12, r11 /* HWEXEC & USER */ + rlwimi r10, r11, 0, 26, 26 /* UX = HWEXEC & USER */ + + rlwimi r12, r10, 0, 26, 31 /* Insert static perms */ + rlwinm r12, r12, 0, 20, 15 /* Clear U0-U3 */ + tlbwe r12, r13, PPC44x_TLB_ATTRIB /* Write ATTRIB */ + + /* Done...restore registers and get out of here. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + rfi /* Force context change */ + +/* + * Global functions + */ + +/* + * extern void giveup_altivec(struct task_struct *prev) + * + * The 44x core does not have an AltiVec unit. + */ +_GLOBAL(giveup_altivec) + blr + +/* + * extern void giveup_fpu(struct task_struct *prev) + * + * The 44x core does not have an FPU. + */ +#ifndef CONFIG_PPC_FPU +_GLOBAL(giveup_fpu) + blr +#endif + +/* + * extern void abort(void) + * + * At present, this routine just applies a system reset. + */ +_GLOBAL(abort) + mfspr r13,SPRN_DBCR0 + oris r13,r13,DBCR0_RST_SYSTEM@h + mtspr SPRN_DBCR0,r13 + +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r4, 0x4(r5) +#endif + mtspr SPRN_PID,r3 + isync /* Force context change */ + blr + +/* + * We put a few things here that have to be page-aligned. This stuff + * goes at the beginning of the data segment, which is page-aligned. + */ + .data +_GLOBAL(sdata) +_GLOBAL(empty_zero_page) + .space 4096 + +/* + * To support >32-bit physical addresses, we use an 8KB pgdir. + */ +_GLOBAL(swapper_pg_dir) + .space 8192 + +/* Reserved 4k for the critical exception stack & 4k for the machine + * check stack per CPU for kernel mode exceptions */ + .section .bss + .align 12 +exception_stack_bottom: + .space BOOKE_EXCEPTION_STACK_SIZE +_GLOBAL(exception_stack_top) + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * which is used to pass parameters into the kernel like root=/dev/sda1, etc. + */ +_GLOBAL(cmd_line) + .space 512 + +/* + * Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 + + diff --git a/arch/powerpc/kernel/head_4xx.S b/arch/powerpc/kernel/head_4xx.S new file mode 100644 index 00000000000..5772ce97e24 --- /dev/null +++ b/arch/powerpc/kernel/head_4xx.S @@ -0,0 +1,1016 @@ +/* + * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * + * + * Module name: head_4xx.S + * + * Description: + * Kernel execution entry point code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/ibm4xx.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* As with the other PowerPC ports, it is expected that when code + * execution begins here, the following registers contain valid, yet + * optional, information: + * + * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) + * r4 - Starting address of the init RAM disk + * r5 - Ending address of the init RAM disk + * r6 - Start of kernel command line string (e.g. "mem=96m") + * r7 - End of kernel command line string + * + * This is all going to change RSN when we add bi_recs....... -- Dan + */ + .text +_GLOBAL(_stext) +_GLOBAL(_start) + + /* Save parameters we are passed. + */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + + /* We have to turn on the MMU right away so we get cache modes + * set correctly. + */ + bl initial_mmu + +/* We now have the lower 16 Meg mapped into TLB entries, and the caches + * ready to work. + */ +turn_on_mmu: + lis r0,MSR_KERNEL@h + ori r0,r0,MSR_KERNEL@l + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + SYNC + rfi /* enables MMU */ + b . /* prevent prefetch past rfi */ + +/* + * This area is used for temporarily saving registers during the + * critical exception prolog. + */ + . = 0xc0 +crit_save: +_GLOBAL(crit_r10) + .space 4 +_GLOBAL(crit_r11) + .space 4 + +/* + * Exception vector entry code. This code runs with address translation + * turned off (i.e. using physical addresses). We assume SPRG3 has the + * physical address of the current task thread_struct. + * Note that we have to have decremented r1 before we write to any fields + * of the exception frame, since a critical interrupt could occur at any + * time, and it will write to the area immediately below the current r1. + */ +#define NORMAL_EXCEPTION_PROLOG \ + mtspr SPRN_SPRG0,r10; /* save two registers to work with */\ + mtspr SPRN_SPRG1,r11; \ + mtspr SPRN_SPRG2,r1; \ + mfcr r10; /* save CR in r10 for now */\ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ + andi. r11,r11,MSR_PR; \ + beq 1f; \ + mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\ + lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\ + addi r1,r1,THREAD_SIZE; \ +1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ + tophys(r11,r1); \ + stw r10,_CCR(r11); /* save various registers */\ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r10,SPRN_SPRG2; \ + mfspr r12,SPRN_SRR0; \ + stw r10,GPR1(r11); \ + mfspr r9,SPRN_SRR1; \ + stw r10,0(r11); \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* + * Exception prolog for critical exceptions. This is a little different + * from the normal exception prolog above since a critical exception + * can potentially occur at any point during normal exception processing. + * Thus we cannot use the same SPRG registers as the normal prolog above. + * Instead we use a couple of words of memory at low physical addresses. + * This is OK since we don't support SMP on these processors. + */ +#define CRITICAL_EXCEPTION_PROLOG \ + stw r10,crit_r10@l(0); /* save two registers to work with */\ + stw r11,crit_r11@l(0); \ + mfcr r10; /* save CR in r10 for now */\ + mfspr r11,SPRN_SRR3; /* check whether user or kernel */\ + andi. r11,r11,MSR_PR; \ + lis r11,critical_stack_top@h; \ + ori r11,r11,critical_stack_top@l; \ + beq 1f; \ + /* COMING FROM USER MODE */ \ + mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\ + lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ + addi r11,r11,THREAD_SIZE; \ +1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\ + tophys(r11,r11); \ + stw r10,_CCR(r11); /* save various registers */\ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\ + stw r12,_DEAR(r11); /* since they may have had stuff */\ + mfspr r9,SPRN_ESR; /* in them at the point where the */\ + stw r9,_ESR(r11); /* exception was taken */\ + mfspr r12,SPRN_SRR2; \ + stw r1,GPR1(r11); \ + mfspr r9,SPRN_SRR3; \ + stw r1,0(r11); \ + tovirt(r1,r11); \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + + /* + * State at this point: + * r9 saved in stack frame, now saved SRR3 & ~MSR_WE + * r10 saved in crit_r10 and in stack frame, trashed + * r11 saved in crit_r11 and in stack frame, + * now phys stack/exception frame pointer + * r12 saved in stack frame, now saved SRR2 + * CR saved in stack frame, CR0.EQ = !SRR3.PR + * LR, DEAR, ESR in stack frame + * r1 saved in stack frame, now virt stack/excframe pointer + * r0, r3-r8 saved in stack frame + */ + +/* + * Exception vectors. + */ +#define START_EXCEPTION(n, label) \ + . = n; \ +label: + +#define EXCEPTION(n, label, hdlr, xfer) \ + START_EXCEPTION(n, label); \ + NORMAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define CRITICAL_EXCEPTION(n, label, hdlr) \ + START_EXCEPTION(n, label); \ + CRITICAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, crit_transfer_to_handler, \ + ret_from_crit_exc) + +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,TRAP(r11); \ + lis r10,msr@h; \ + ori r10,r10,msr@l; \ + copyee(r10, r9); \ + bl tfer; \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ + ret_from_except) + + +/* + * 0x0100 - Critical Interrupt Exception + */ + CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception) + +/* + * 0x0200 - Machine Check Exception + */ + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) + +/* + * 0x0300 - Data Storage Exception + * This happens for just a few reasons. U0 set (but we don't do that), + * or zone protection fault (user violation, write to protected page). + * If this is just an update of modified status, we do that quickly + * and exit. Otherwise, we call heavywight functions to do the work. + */ + START_EXCEPTION(0x0300, DataStorage) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 +#ifdef CONFIG_403GCX + stw r12, 0(r0) + stw r9, 4(r0) + mfcr r11 + mfspr r12, SPRN_PID + stw r11, 8(r0) + stw r12, 12(r0) +#else + mtspr SPRN_SPRG4, r12 + mtspr SPRN_SPRG5, r9 + mfcr r11 + mfspr r12, SPRN_PID + mtspr SPRN_SPRG7, r11 + mtspr SPRN_SPRG6, r12 +#endif + + /* First, check if it was a zone fault (which means a user + * tried to access a kernel or read-protected page - always + * a SEGV). All other faults here must be stores, so no + * need to check ESR_DST as well. */ + mfspr r10, SPRN_ESR + andis. r10, r10, ESR_DIZ@h + bne 2f + + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + li r9, 0 + mtspr SPRN_PID, r9 /* TLB will have 0 TID */ + b 4f + + /* Get the PGD for the current thread. + */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) +4: + tophys(r11, r11) + rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ + lwz r11, 0(r11) /* Get L1 entry */ + rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r12) /* Get Linux PTE */ + + andi. r9, r11, _PAGE_RW /* Is it writeable? */ + beq 2f /* Bail if not */ + + /* Update 'changed'. + */ + ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + stw r11, 0(r12) /* Update Linux page table */ + + /* Most of the Linux PTE is ready to load into the TLB LO. + * We set ZSEL, where only the LS-bit determines user access. + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * If shared is set, we cause a zero PID->TID load. + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + li r12, 0x0ce2 + andc r11, r11, r12 /* Make sure 20, 21 are zero */ + + /* find the TLB index that caused the fault. It has to be here. + */ + tlbsx r9, 0, r10 + + tlbwe r11, r9, TLB_DATA /* Load TLB LO */ + + /* Done...restore registers and get out of here. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + PPC405_ERR77_SYNC + rfi /* Should sync shadow TLBs */ + b . /* prevent prefetch past rfi */ + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b DataAccess + +/* + * 0x0400 - Instruction Storage Exception + * This is caused by a fetch from non-execute or guarded pages. + */ + START_EXCEPTION(0x0400, InstructionAccess) + NORMAL_EXCEPTION_PROLOG + mr r4,r12 /* Pass SRR0 as arg2 */ + li r5,0 /* Pass zero as arg3 */ + EXC_XFER_EE_LITE(0x400, handle_page_fault) + +/* 0x0500 - External Interrupt Exception */ + EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* 0x0600 - Alignment Exception */ + START_EXCEPTION(0x0600, Alignment) + NORMAL_EXCEPTION_PROLOG + mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ + stw r4,_DEAR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0x600, alignment_exception) + +/* 0x0700 - Program Exception */ + START_EXCEPTION(0x0700, ProgramCheck) + NORMAL_EXCEPTION_PROLOG + mfspr r4,SPRN_ESR /* Grab the ESR and save it */ + stw r4,_ESR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0x700, program_check_exception) + + EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) + +/* 0x0C00 - System Call Exception */ + START_EXCEPTION(0x0C00, SystemCall) + NORMAL_EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0xc00, DoSyscall) + + EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) + +/* 0x1000 - Programmable Interval Timer (PIT) Exception */ + START_EXCEPTION(0x1000, Decrementer) + NORMAL_EXCEPTION_PROLOG + lis r0,TSR_PIS@h + mtspr SPRN_TSR,r0 /* Clear the PIT exception */ + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_LITE(0x1000, timer_interrupt) + +#if 0 +/* NOTE: + * FIT and WDT handlers are not implemented yet. + */ + +/* 0x1010 - Fixed Interval Timer (FIT) Exception +*/ + STND_EXCEPTION(0x1010, FITException, unknown_exception) + +/* 0x1020 - Watchdog Timer (WDT) Exception +*/ +#ifdef CONFIG_BOOKE_WDT + CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException) +#else + CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception) +#endif +#endif + +/* 0x1100 - Data TLB Miss Exception + * As the name implies, translation is not in the MMU, so search the + * page tables and fix it. The only purpose of this function is to + * load TLB entries from the page table if they exist. + */ + START_EXCEPTION(0x1100, DTLBMiss) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 +#ifdef CONFIG_403GCX + stw r12, 0(r0) + stw r9, 4(r0) + mfcr r11 + mfspr r12, SPRN_PID + stw r11, 8(r0) + stw r12, 12(r0) +#else + mtspr SPRN_SPRG4, r12 + mtspr SPRN_SPRG5, r9 + mfcr r11 + mfspr r12, SPRN_PID + mtspr SPRN_SPRG7, r11 + mtspr SPRN_SPRG6, r12 +#endif + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + li r9, 0 + mtspr SPRN_PID, r9 /* TLB will have 0 TID */ + b 4f + + /* Get the PGD for the current thread. + */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) +4: + tophys(r11, r11) + rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ + lwz r12, 0(r11) /* Get L1 entry */ + andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r12) /* Get Linux PTE */ + andi. r9, r11, _PAGE_PRESENT + beq 5f + + ori r11, r11, _PAGE_ACCESSED + stw r11, 0(r12) + + /* Create TLB tag. This is the faulting address plus a static + * set of bits. These are size, valid, E, U0. + */ + li r12, 0x00c0 + rlwimi r10, r12, 0, 20, 31 + + b finish_tlb_load + +2: /* Check for possible large-page pmd entry */ + rlwinm. r9, r12, 2, 22, 24 + beq 5f + + /* Create TLB tag. This is the faulting address, plus a static + * set of bits (valid, E, U0) plus the size from the PMD. + */ + ori r9, r9, 0x40 + rlwimi r10, r9, 0, 20, 31 + mr r11, r12 + + b finish_tlb_load + +5: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b DataAccess + +/* 0x1200 - Instruction TLB Miss Exception + * Nearly the same as above, except we get our information from different + * registers and bailout to a different point. + */ + START_EXCEPTION(0x1200, ITLBMiss) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 +#ifdef CONFIG_403GCX + stw r12, 0(r0) + stw r9, 4(r0) + mfcr r11 + mfspr r12, SPRN_PID + stw r11, 8(r0) + stw r12, 12(r0) +#else + mtspr SPRN_SPRG4, r12 + mtspr SPRN_SPRG5, r9 + mfcr r11 + mfspr r12, SPRN_PID + mtspr SPRN_SPRG7, r11 + mtspr SPRN_SPRG6, r12 +#endif + mfspr r10, SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + li r9, 0 + mtspr SPRN_PID, r9 /* TLB will have 0 TID */ + b 4f + + /* Get the PGD for the current thread. + */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) +4: + tophys(r11, r11) + rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ + lwz r12, 0(r11) /* Get L1 entry */ + andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r12) /* Get Linux PTE */ + andi. r9, r11, _PAGE_PRESENT + beq 5f + + ori r11, r11, _PAGE_ACCESSED + stw r11, 0(r12) + + /* Create TLB tag. This is the faulting address plus a static + * set of bits. These are size, valid, E, U0. + */ + li r12, 0x00c0 + rlwimi r10, r12, 0, 20, 31 + + b finish_tlb_load + +2: /* Check for possible large-page pmd entry */ + rlwinm. r9, r12, 2, 22, 24 + beq 5f + + /* Create TLB tag. This is the faulting address, plus a static + * set of bits (valid, E, U0) plus the size from the PMD. + */ + ori r9, r9, 0x40 + rlwimi r10, r9, 0, 20, 31 + mr r11, r12 + + b finish_tlb_load + +5: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b InstructionAccess + + EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) +#ifdef CONFIG_IBM405_ERR51 + /* 405GP errata 51 */ + START_EXCEPTION(0x1700, Trap_17) + b DTLBMiss +#else + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) +#endif + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) + +/* Check for a single step debug exception while in an exception + * handler before state has been saved. This is to catch the case + * where an instruction that we are trying to single step causes + * an exception (eg ITLB/DTLB miss) and thus the first instruction of + * the exception handler generates a single step debug exception. + * + * If we get a debug trap on the first instruction of an exception handler, + * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is + * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR). + * The exception handler was handling a non-critical interrupt, so it will + * save (and later restore) the MSR via SPRN_SRR1, which will still have + * the MSR_DE bit set. + */ + /* 0x2000 - Debug Exception */ + START_EXCEPTION(0x2000, DebugTrap) + CRITICAL_EXCEPTION_PROLOG + + /* + * If this is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the SRR3 value and clearing the debug status. + */ + mfspr r10,SPRN_DBSR /* check single-step/branch taken */ + andis. r10,r10,DBSR_IC@h + beq+ 2f + + andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */ + beq 1f /* branch and fix it up */ + + mfspr r10,SPRN_SRR2 /* Faulting instruction address */ + cmplwi r10,0x2100 + bgt+ 2f /* address above exception vectors */ + + /* here it looks like we got an inappropriate debug exception. */ +1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */ + lis r10,DBSR_IC@h /* clear the IC event */ + mtspr SPRN_DBSR,r10 + /* restore state and get out */ + lwz r10,_CCR(r11) + lwz r0,GPR0(r11) + lwz r1,GPR1(r11) + mtcrf 0x80,r10 + mtspr SPRN_SRR2,r12 + mtspr SPRN_SRR3,r9 + lwz r9,GPR9(r11) + lwz r12,GPR12(r11) + lwz r10,crit_r10@l(0) + lwz r11,crit_r11@l(0) + PPC405_ERR77_SYNC + rfci + b . + + /* continue normal handling for a critical exception... */ +2: mfspr r4,SPRN_DBSR + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_TEMPLATE(DebugException, 0x2002, \ + (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + +/* + * The other Data TLB exceptions bail out to this point + * if they can't resolve the lightweight TLB fault. + */ +DataAccess: + NORMAL_EXCEPTION_PROLOG + mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5,_ESR(r11) + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + EXC_XFER_EE_LITE(0x300, handle_page_fault) + +/* Other PowerPC processors, namely those derived from the 6xx-series + * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. + * However, for the 4xx-series processors these are neither defined nor + * reserved. + */ + + /* Damn, I came up one instruction too many to fit into the + * exception space :-). Both the instruction and data TLB + * miss get to this point to load the TLB. + * r10 - TLB_TAG value + * r11 - Linux PTE + * r12, r9 - avilable to use + * PID - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + * Actually, it will fit now, but oh well.....a common place + * to load the TLB. + */ +tlb_4xx_index: + .long 0 +finish_tlb_load: + /* load the next available TLB index. + */ + lwz r9, tlb_4xx_index@l(0) + addi r9, r9, 1 + andi. r9, r9, (PPC4XX_TLB_SIZE-1) + stw r9, tlb_4xx_index@l(0) + +6: + /* + * Clear out the software-only bits in the PTE to generate the + * TLB_DATA value. These are the bottom 2 bits of the RPM, the + * top 3 bits of the zone field, and M. + */ + li r12, 0x0ce2 + andc r11, r11, r12 + + tlbwe r11, r9, TLB_DATA /* Load TLB LO */ + tlbwe r10, r9, TLB_TAG /* Load TLB HI */ + + /* Done...restore registers and get out of here. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + PPC405_ERR77_SYNC + rfi /* Should sync shadow TLBs */ + b . /* prevent prefetch past rfi */ + +/* extern void giveup_fpu(struct task_struct *prev) + * + * The PowerPC 4xx family of processors do not have an FPU, so this just + * returns. + */ +_GLOBAL(giveup_fpu) + blr + +/* This is where the main kernel code starts. + */ +start_here: + + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init /* We have to do this with MMU on */ + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + bl machine_init + bl MMU_init + +/* Go back to running unmapped so we can load up new values + * and change to using our exception vectors. + * On the 4xx, all we have to do is invalidate the TLB to clear + * the old 16M byte TLB mappings. + */ + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h + ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi + b . /* prevent prefetch past rfi */ + +/* Load up the kernel context */ +2: + sync /* Flush to memory before changing TLB */ + tlbia + isync /* Flush shadow TLBs */ + + /* set up the PTE pointers for the Abatron bdiGDB. + */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(r0) /* Must match your Abatron config file */ + tophys(r5,r5) + stw r6, 0(r5) + +/* Now turn on the MMU for real! */ + lis r4,MSR_KERNEL@h + ori r4,r4,MSR_KERNEL@l + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfi /* enable MMU and jump to start_kernel */ + b . /* prevent prefetch past rfi */ + +/* Set up the initial MMU state so we can do the first level of + * kernel initialization. This maps the first 16 MBytes of memory 1:1 + * virtual to physical and more importantly sets the cache mode. + */ +initial_mmu: + tlbia /* Invalidate all TLB entries */ + isync + + /* We should still be executing code at physical address 0x0000xxxx + * at this point. However, start_here is at virtual address + * 0xC000xxxx. So, set up a TLB mapping to cover this once + * translation is enabled. + */ + + lis r3,KERNELBASE@h /* Load the kernel virtual address */ + ori r3,r3,KERNELBASE@l + tophys(r4,r3) /* Load the kernel physical address */ + + iccci r0,r3 /* Invalidate the i-cache before use */ + + /* Load the kernel PID. + */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Configure and load two entries into TLB slots 62 and 63. + * In case we are pinning TLBs, these are reserved in by the + * other TLB functions. If not reserving, then it doesn't + * matter where they are loaded. + */ + clrrwi r4,r4,10 /* Mask off the real page number */ + ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ + + clrrwi r3,r3,10 /* Mask off the effective page number */ + ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M)) + + li r0,63 /* TLB slot 63 */ + + tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ + tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + +#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE) + + /* Load a TLB entry for the UART, so that ppc4xx_progress() can use + * the UARTs nice and early. We use a 4k real==virtual mapping. */ + + lis r3,SERIAL_DEBUG_IO_BASE@h + ori r3,r3,SERIAL_DEBUG_IO_BASE@l + mr r4,r3 + clrrwi r4,r4,12 + ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G) + + clrrwi r3,r3,12 + ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) + + li r0,0 /* TLB slot 0 */ + tlbwe r4,r0,TLB_DATA + tlbwe r3,r0,TLB_TAG +#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */ + + isync + + /* Establish the exception vector base + */ + lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */ + tophys(r0,r4) /* Use the physical address */ + mtspr SPRN_EVPR,r0 + + blr + +_GLOBAL(abort) + mfspr r13,SPRN_DBCR0 + oris r13,r13,DBCR0_RST_SYSTEM@h + mtspr SPRN_DBCR0,r13 + +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, KERNELBASE@h + lwz r5, 0xf0(r5) + stw r4, 0x4(r5) +#endif + sync + mtspr SPRN_PID,r3 + isync /* Need an isync to flush shadow */ + /* TLBs after changing PID */ + blr + +/* We put a few things here that have to be page-aligned. This stuff + * goes at the beginning of the data segment, which is page-aligned. + */ + .data +_GLOBAL(sdata) +_GLOBAL(empty_zero_page) + .space 4096 +_GLOBAL(swapper_pg_dir) + .space 4096 + + +/* Stack for handling critical exceptions from kernel mode */ + .section .bss + .align 12 +exception_stack_bottom: + .space 4096 +critical_stack_top: +_GLOBAL(exception_stack_top) + +/* This space gets a copy of optional info passed to us by the bootstrap + * which is used to pass parameters into the kernel like root=/dev/sda1, etc. + */ +_GLOBAL(cmd_line) + .space 512 + +/* Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S new file mode 100644 index 00000000000..a4ceb9ae20f --- /dev/null +++ b/arch/powerpc/kernel/head_64.S @@ -0,0 +1,1959 @@ +/* + * arch/ppc64/kernel/head.S + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com + * + * This file contains the low-level support and setup for the + * PowerPC-64 platform, including trap and interrupt dispatch. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/systemcfg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/bug.h> +#include <asm/cputable.h> +#include <asm/setup.h> +#include <asm/hvcall.h> +#include <asm/iSeries/LparMap.h> + +#ifdef CONFIG_PPC_ISERIES +#define DO_SOFT_DISABLE +#endif + +/* + * We layout physical memory as follows: + * 0x0000 - 0x00ff : Secondary processor spin code + * 0x0100 - 0x2fff : pSeries Interrupt prologs + * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs + * 0x6000 - 0x6fff : Initial (CPU0) segment table + * 0x7000 - 0x7fff : FWNMI data area + * 0x8000 - : Early init and support code + */ + +/* + * SPRG Usage + * + * Register Definition + * + * SPRG0 reserved for hypervisor + * SPRG1 temp - used to save gpr + * SPRG2 temp - used to save gpr + * SPRG3 virt addr of paca + */ + +/* + * Entering into this code we make the following assumptions: + * For pSeries: + * 1. The MMU is off & open firmware is running in real mode. + * 2. The kernel is entered at __start + * + * For iSeries: + * 1. The MMU is on (as it always is for iSeries) + * 2. The kernel is entered at system_reset_iSeries + */ + + .text + .globl _stext +_stext: +#ifdef CONFIG_PPC_MULTIPLATFORM +_GLOBAL(__start) + /* NOP this out unconditionally */ +BEGIN_FTR_SECTION + b .__start_initialization_multiplatform +END_FTR_SECTION(0, 1) +#endif /* CONFIG_PPC_MULTIPLATFORM */ + + /* Catch branch to 0 in real mode */ + trap + +#ifdef CONFIG_PPC_ISERIES + /* + * At offset 0x20, there is a pointer to iSeries LPAR data. + * This is required by the hypervisor + */ + . = 0x20 + .llong hvReleaseData-KERNELBASE + + /* + * At offset 0x28 and 0x30 are offsets to the mschunks_map + * array (used by the iSeries LPAR debugger to do translation + * between physical addresses and absolute addresses) and + * to the pidhash table (also used by the debugger) + */ + .llong mschunks_map-KERNELBASE + .llong 0 /* pidhash-KERNELBASE SFRXXX */ + + /* Offset 0x38 - Pointer to start of embedded System.map */ + .globl embedded_sysmap_start +embedded_sysmap_start: + .llong 0 + /* Offset 0x40 - Pointer to end of embedded System.map */ + .globl embedded_sysmap_end +embedded_sysmap_end: + .llong 0 + +#endif /* CONFIG_PPC_ISERIES */ + + /* Secondary processors spin on this value until it goes to 1. */ + .globl __secondary_hold_spinloop +__secondary_hold_spinloop: + .llong 0x0 + + /* Secondary processors write this value with their cpu # */ + /* after they enter the spin loop immediately below. */ + .globl __secondary_hold_acknowledge +__secondary_hold_acknowledge: + .llong 0x0 + + . = 0x60 +/* + * The following code is used on pSeries to hold secondary processors + * in a spin loop after they have been freed from OpenFirmware, but + * before the bulk of the kernel has been relocated. This code + * is relocated to physical address 0x60 before prom_init is run. + * All of it must fit below the first exception vector at 0x100. + */ +_GLOBAL(__secondary_hold) + mfmsr r24 + ori r24,r24,MSR_RI + mtmsrd r24 /* RI on */ + + /* Grab our linux cpu number */ + mr r24,r3 + + /* Tell the master cpu we're here */ + /* Relocation is off & we are located at an address less */ + /* than 0x100, so only need to grab low order offset. */ + std r24,__secondary_hold_acknowledge@l(0) + sync + + /* All secondary cpus wait here until told to start. */ +100: ld r4,__secondary_hold_spinloop@l(0) + cmpdi 0,r4,1 + bne 100b + +#ifdef CONFIG_HMT + b .hmt_init +#else +#ifdef CONFIG_SMP + mr r3,r24 + b .pSeries_secondary_smp_init +#else + BUG_OPCODE +#endif +#endif + +/* This value is used to mark exception frames on the stack. */ + .section ".toc","aw" +exception_marker: + .tc ID_72656773_68657265[TC],0x7265677368657265 + .text + +/* + * The following macros define the code that appears as + * the prologue to each of the exception handlers. They + * are split into two parts to allow a single kernel binary + * to be used for pSeries and iSeries. + * LOL. One day... - paulus + */ + +/* + * We make as much of the exception code common between native + * exception handlers (including pSeries LPAR) and iSeries LPAR + * implementations as possible. + */ + +/* + * This is the start of the interrupt handlers for pSeries + * This code runs with relocation off. + */ +#define EX_R9 0 +#define EX_R10 8 +#define EX_R11 16 +#define EX_R12 24 +#define EX_R13 32 +#define EX_SRR0 40 +#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */ +#define EX_DAR 48 +#define EX_LR 48 /* SLB miss saves LR, but not DAR */ +#define EX_DSISR 56 +#define EX_CCR 60 + +#define EXCEPTION_PROLOG_PSERIES(area, label) \ + mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ + std r9,area+EX_R9(r13); /* save r9 - r12 */ \ + std r10,area+EX_R10(r13); \ + std r11,area+EX_R11(r13); \ + std r12,area+EX_R12(r13); \ + mfspr r9,SPRN_SPRG1; \ + std r9,area+EX_R13(r13); \ + mfcr r9; \ + clrrdi r12,r13,32; /* get high part of &label */ \ + mfmsr r10; \ + mfspr r11,SPRN_SRR0; /* save SRR0 */ \ + ori r12,r12,(label)@l; /* virt addr of handler */ \ + ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \ + mtspr SPRN_SRR0,r12; \ + mfspr r12,SPRN_SRR1; /* and SRR1 */ \ + mtspr SPRN_SRR1,r10; \ + rfid; \ + b . /* prevent speculative execution */ + +/* + * This is the start of the interrupt handlers for iSeries + * This code runs with relocation on. + */ +#define EXCEPTION_PROLOG_ISERIES_1(area) \ + mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ + std r9,area+EX_R9(r13); /* save r9 - r12 */ \ + std r10,area+EX_R10(r13); \ + std r11,area+EX_R11(r13); \ + std r12,area+EX_R12(r13); \ + mfspr r9,SPRN_SPRG1; \ + std r9,area+EX_R13(r13); \ + mfcr r9 + +#define EXCEPTION_PROLOG_ISERIES_2 \ + mfmsr r10; \ + ld r11,PACALPPACA+LPPACASRR0(r13); \ + ld r12,PACALPPACA+LPPACASRR1(r13); \ + ori r10,r10,MSR_RI; \ + mtmsrd r10,1 + +/* + * The common exception prolog is used for all except a few exceptions + * such as a segment miss on a kernel address. We have to be prepared + * to take another exception from the point where we first touch the + * kernel stack onwards. + * + * On entry r13 points to the paca, r9-r13 are saved in the paca, + * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and + * SRR1, and relocation is on. + */ +#define EXCEPTION_PROLOG_COMMON(n, area) \ + andi. r10,r12,MSR_PR; /* See if coming from user */ \ + mr r10,r1; /* Save r1 */ \ + subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ + beq- 1f; \ + ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ +1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \ + bge- cr1,bad_stack; /* abort if it is */ \ + std r9,_CCR(r1); /* save CR in stackframe */ \ + std r11,_NIP(r1); /* save SRR0 in stackframe */ \ + std r12,_MSR(r1); /* save SRR1 in stackframe */ \ + std r10,0(r1); /* make stack chain pointer */ \ + std r0,GPR0(r1); /* save r0 in stackframe */ \ + std r10,GPR1(r1); /* save r1 in stackframe */ \ + std r2,GPR2(r1); /* save r2 in stackframe */ \ + SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ + SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ + ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \ + ld r10,area+EX_R10(r13); \ + std r9,GPR9(r1); \ + std r10,GPR10(r1); \ + ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \ + ld r10,area+EX_R12(r13); \ + ld r11,area+EX_R13(r13); \ + std r9,GPR11(r1); \ + std r10,GPR12(r1); \ + std r11,GPR13(r1); \ + ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ + mflr r9; /* save LR in stackframe */ \ + std r9,_LINK(r1); \ + mfctr r10; /* save CTR in stackframe */ \ + std r10,_CTR(r1); \ + mfspr r11,SPRN_XER; /* save XER in stackframe */ \ + std r11,_XER(r1); \ + li r9,(n)+1; \ + std r9,_TRAP(r1); /* set trap number */ \ + li r10,0; \ + ld r11,exception_marker@toc(r2); \ + std r10,RESULT(r1); /* clear regs->result */ \ + std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ + +/* + * Exception vectors. + */ +#define STD_EXCEPTION_PSERIES(n, label) \ + . = n; \ + .globl label##_pSeries; \ +label##_pSeries: \ + HMT_MEDIUM; \ + mtspr SPRN_SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) + +#define STD_EXCEPTION_ISERIES(n, label, area) \ + .globl label##_iSeries; \ +label##_iSeries: \ + HMT_MEDIUM; \ + mtspr SPRN_SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ + EXCEPTION_PROLOG_ISERIES_1(area); \ + EXCEPTION_PROLOG_ISERIES_2; \ + b label##_common + +#define MASKABLE_EXCEPTION_ISERIES(n, label) \ + .globl label##_iSeries; \ +label##_iSeries: \ + HMT_MEDIUM; \ + mtspr SPRN_SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ + lbz r10,PACAPROCENABLED(r13); \ + cmpwi 0,r10,0; \ + beq- label##_iSeries_masked; \ + EXCEPTION_PROLOG_ISERIES_2; \ + b label##_common; \ + +#ifdef DO_SOFT_DISABLE +#define DISABLE_INTS \ + lbz r10,PACAPROCENABLED(r13); \ + li r11,0; \ + std r10,SOFTE(r1); \ + mfmsr r10; \ + stb r11,PACAPROCENABLED(r13); \ + ori r10,r10,MSR_EE; \ + mtmsrd r10,1 + +#define ENABLE_INTS \ + lbz r10,PACAPROCENABLED(r13); \ + mfmsr r11; \ + std r10,SOFTE(r1); \ + ori r11,r11,MSR_EE; \ + mtmsrd r11,1 + +#else /* hard enable/disable interrupts */ +#define DISABLE_INTS + +#define ENABLE_INTS \ + ld r12,_MSR(r1); \ + mfmsr r11; \ + rlwimi r11,r12,0,MSR_EE; \ + mtmsrd r11,1 + +#endif + +#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + DISABLE_INTS; \ + bl .save_nvgprs; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except + +#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + DISABLE_INTS; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except_lite + +/* + * Start of pSeries system interrupt routines + */ + . = 0x100 + .globl __start_interrupts +__start_interrupts: + + STD_EXCEPTION_PSERIES(0x100, system_reset) + + . = 0x200 +_machine_check_pSeries: + HMT_MEDIUM + mtspr SPRN_SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + + . = 0x300 + .globl data_access_pSeries +data_access_pSeries: + HMT_MEDIUM + mtspr SPRN_SPRG1,r13 +BEGIN_FTR_SECTION + mtspr SPRN_SPRG2,r12 + mfspr r13,SPRN_DAR + mfspr r12,SPRN_DSISR + srdi r13,r13,60 + rlwimi r13,r12,16,0x20 + mfcr r12 + cmpwi r13,0x2c + beq .do_stab_bolted_pSeries + mtcrf 0x80,r12 + mfspr r12,SPRN_SPRG2 +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) + + . = 0x380 + .globl data_access_slb_pSeries +data_access_slb_pSeries: + HMT_MEDIUM + mtspr SPRN_SPRG1,r13 + RUNLATCH_ON(r13) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r9,SPRN_SPRG1 + std r9,PACA_EXSLB+EX_R13(r13) + mfcr r9 + mfspr r12,SPRN_SRR1 /* and SRR1 */ + mfspr r3,SPRN_DAR + b .do_slb_miss /* Rel. branch works in real mode */ + + STD_EXCEPTION_PSERIES(0x400, instruction_access) + + . = 0x480 + .globl instruction_access_slb_pSeries +instruction_access_slb_pSeries: + HMT_MEDIUM + mtspr SPRN_SPRG1,r13 + RUNLATCH_ON(r13) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r9,SPRN_SPRG1 + std r9,PACA_EXSLB+EX_R13(r13) + mfcr r9 + mfspr r12,SPRN_SRR1 /* and SRR1 */ + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ + b .do_slb_miss /* Rel. branch works in real mode */ + + STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) + STD_EXCEPTION_PSERIES(0x600, alignment) + STD_EXCEPTION_PSERIES(0x700, program_check) + STD_EXCEPTION_PSERIES(0x800, fp_unavailable) + STD_EXCEPTION_PSERIES(0x900, decrementer) + STD_EXCEPTION_PSERIES(0xa00, trap_0a) + STD_EXCEPTION_PSERIES(0xb00, trap_0b) + + . = 0xc00 + .globl system_call_pSeries +system_call_pSeries: + HMT_MEDIUM + RUNLATCH_ON(r9) + mr r9,r13 + mfmsr r10 + mfspr r13,SPRN_SPRG3 + mfspr r11,SPRN_SRR0 + clrrdi r12,r13,32 + oris r12,r12,system_call_common@h + ori r12,r12,system_call_common@l + mtspr SPRN_SRR0,r12 + ori r10,r10,MSR_IR|MSR_DR|MSR_RI + mfspr r12,SPRN_SRR1 + mtspr SPRN_SRR1,r10 + rfid + b . /* prevent speculative execution */ + + STD_EXCEPTION_PSERIES(0xd00, single_step) + STD_EXCEPTION_PSERIES(0xe00, trap_0e) + + /* We need to deal with the Altivec unavailable exception + * here which is at 0xf20, thus in the middle of the + * prolog code of the PerformanceMonitor one. A little + * trickery is thus necessary + */ + . = 0xf00 + b performance_monitor_pSeries + + STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable) + + STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) + STD_EXCEPTION_PSERIES(0x1700, altivec_assist) + + . = 0x3000 + +/*** pSeries interrupt support ***/ + + /* moved from 0xf00 */ + STD_EXCEPTION_PSERIES(., performance_monitor) + + .align 7 +_GLOBAL(do_stab_bolted_pSeries) + mtcrf 0x80,r12 + mfspr r12,SPRN_SPRG2 + EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) + +/* + * Vectors for the FWNMI option. Share common code. + */ + .globl system_reset_fwnmi +system_reset_fwnmi: + HMT_MEDIUM + mtspr SPRN_SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + + .globl machine_check_fwnmi +machine_check_fwnmi: + HMT_MEDIUM + mtspr SPRN_SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + +#ifdef CONFIG_PPC_ISERIES +/*** ISeries-LPAR interrupt handlers ***/ + + STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC) + + .globl data_access_iSeries +data_access_iSeries: + mtspr SPRN_SPRG1,r13 +BEGIN_FTR_SECTION + mtspr SPRN_SPRG2,r12 + mfspr r13,SPRN_DAR + mfspr r12,SPRN_DSISR + srdi r13,r13,60 + rlwimi r13,r12,16,0x20 + mfcr r12 + cmpwi r13,0x2c + beq .do_stab_bolted_iSeries + mtcrf 0x80,r12 + mfspr r12,SPRN_SPRG2 +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN) + EXCEPTION_PROLOG_ISERIES_2 + b data_access_common + +.do_stab_bolted_iSeries: + mtcrf 0x80,r12 + mfspr r12,SPRN_SPRG2 + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + EXCEPTION_PROLOG_ISERIES_2 + b .do_stab_bolted + + .globl data_access_slb_iSeries +data_access_slb_iSeries: + mtspr SPRN_SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + std r3,PACA_EXSLB+EX_R3(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + mfspr r3,SPRN_DAR + b .do_slb_miss + + STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) + + .globl instruction_access_slb_iSeries +instruction_access_slb_iSeries: + mtspr SPRN_SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + std r3,PACA_EXSLB+EX_R3(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + ld r3,PACALPPACA+LPPACASRR0(r13) + b .do_slb_miss + + MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) + STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) + STD_EXCEPTION_ISERIES(0x700, program_check, PACA_EXGEN) + STD_EXCEPTION_ISERIES(0x800, fp_unavailable, PACA_EXGEN) + MASKABLE_EXCEPTION_ISERIES(0x900, decrementer) + STD_EXCEPTION_ISERIES(0xa00, trap_0a, PACA_EXGEN) + STD_EXCEPTION_ISERIES(0xb00, trap_0b, PACA_EXGEN) + + .globl system_call_iSeries +system_call_iSeries: + mr r9,r13 + mfspr r13,SPRN_SPRG3 + EXCEPTION_PROLOG_ISERIES_2 + b system_call_common + + STD_EXCEPTION_ISERIES( 0xd00, single_step, PACA_EXGEN) + STD_EXCEPTION_ISERIES( 0xe00, trap_0e, PACA_EXGEN) + STD_EXCEPTION_ISERIES( 0xf00, performance_monitor, PACA_EXGEN) + + .globl system_reset_iSeries +system_reset_iSeries: + mfspr r13,SPRN_SPRG3 /* Get paca address */ + mfmsr r24 + ori r24,r24,MSR_RI + mtmsrd r24 /* RI on */ + lhz r24,PACAPACAINDEX(r13) /* Get processor # */ + cmpwi 0,r24,0 /* Are we processor 0? */ + beq .__start_initialization_iSeries /* Start up the first processor */ + mfspr r4,SPRN_CTRLF + li r5,CTRL_RUNLATCH /* Turn off the run light */ + andc r4,r4,r5 + mtspr SPRN_CTRLT,r4 + +1: + HMT_LOW +#ifdef CONFIG_SMP + lbz r23,PACAPROCSTART(r13) /* Test if this processor + * should start */ + sync + LOADADDR(r3,current_set) + sldi r28,r24,3 /* get current_set[cpu#] */ + ldx r3,r3,r28 + addi r1,r3,THREAD_SIZE + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpwi 0,r23,0 + beq iSeries_secondary_smp_loop /* Loop until told to go */ + bne .__secondary_start /* Loop until told to go */ +iSeries_secondary_smp_loop: + /* Let the Hypervisor know we are alive */ + /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ + lis r3,0x8002 + rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ +#else /* CONFIG_SMP */ + /* Yield the processor. This is required for non-SMP kernels + which are running on multi-threaded machines. */ + lis r3,0x8000 + rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */ + addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */ + li r4,0 /* "yield timed" */ + li r5,-1 /* "yield forever" */ +#endif /* CONFIG_SMP */ + li r0,-1 /* r0=-1 indicates a Hypervisor call */ + sc /* Invoke the hypervisor via a system call */ + mfspr r13,SPRN_SPRG3 /* Put r13 back ???? */ + b 1b /* If SMP not configured, secondaries + * loop forever */ + + .globl decrementer_iSeries_masked +decrementer_iSeries_masked: + li r11,1 + stb r11,PACALPPACA+LPPACADECRINT(r13) + lwz r12,PACADEFAULTDECR(r13) + mtspr SPRN_DEC,r12 + /* fall through */ + + .globl hardware_interrupt_iSeries_masked +hardware_interrupt_iSeries_masked: + mtcrf 0x80,r9 /* Restore regs */ + ld r11,PACALPPACA+LPPACASRR0(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + rfid + b . /* prevent speculative execution */ +#endif /* CONFIG_PPC_ISERIES */ + +/*** Common interrupt handlers ***/ + + STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) + + /* + * Machine check is different because we use a different + * save area: PACA_EXMC instead of PACA_EXGEN. + */ + .align 7 + .globl machine_check_common +machine_check_common: + EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) + DISABLE_INTS + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .machine_check_exception + b .ret_from_except + + STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) + STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) + STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) + STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) + STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) + STD_EXCEPTION_COMMON(0xf00, performance_monitor, .performance_monitor_exception) + STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) +#ifdef CONFIG_ALTIVEC + STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) +#else + STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception) +#endif + +/* + * Here we have detected that the kernel stack pointer is bad. + * R9 contains the saved CR, r13 points to the paca, + * r10 contains the (bad) kernel stack pointer, + * r11 and r12 contain the saved SRR0 and SRR1. + * We switch to using an emergency stack, save the registers there, + * and call kernel_bad_stack(), which panics. + */ +bad_stack: + ld r1,PACAEMERGSP(r13) + subi r1,r1,64+INT_FRAME_SIZE + std r9,_CCR(r1) + std r10,GPR1(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + mfspr r11,SPRN_DAR + mfspr r12,SPRN_DSISR + std r11,_DAR(r1) + std r12,_DSISR(r1) + mflr r10 + mfctr r11 + mfxer r12 + std r10,_LINK(r1) + std r11,_CTR(r1) + std r12,_XER(r1) + SAVE_GPR(0,r1) + SAVE_GPR(2,r1) + SAVE_4GPRS(3,r1) + SAVE_2GPRS(7,r1) + SAVE_10GPRS(12,r1) + SAVE_10GPRS(22,r1) + addi r11,r1,INT_FRAME_SIZE + std r11,0(r1) + li r12,0 + std r12,0(r11) + ld r2,PACATOC(r13) +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .kernel_bad_stack + b 1b + +/* + * Return from an exception with minimal checks. + * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. + * If interrupts have been enabled, or anything has been + * done that might have changed the scheduling status of + * any task or sent any task a signal, you should use + * ret_from_except or ret_from_except_lite instead of this. + */ + .globl fast_exception_return +fast_exception_return: + ld r12,_MSR(r1) + ld r11,_NIP(r1) + andi. r3,r12,MSR_RI /* check if RI is set */ + beq- unrecov_fer + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + mtcr r3 + mtlr r4 + mtctr r5 + mtxer r6 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + + mfmsr r10 + clrrdi r10,r10,2 /* clear RI (LE is 0 already) */ + mtmsrd r10,1 + + mtspr SPRN_SRR1,r12 + mtspr SPRN_SRR0,r11 + REST_4GPRS(10, r1) + ld r1,GPR1(r1) + rfid + b . /* prevent speculative execution */ + +unrecov_fer: + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +/* + * Here r13 points to the paca, r9 contains the saved CR, + * SRR0 and SRR1 are saved in r11 and r12, + * r9 - r13 are saved in paca->exgen. + */ + .align 7 + .globl data_access_common +data_access_common: + RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */ + mfspr r10,SPRN_DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + li r5,0x300 + b .do_hash_page /* Try to handle as hpte fault */ + + .align 7 + .globl instruction_access_common +instruction_access_common: + EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) + ld r3,_NIP(r1) + andis. r4,r12,0x5820 + li r5,0x400 + b .do_hash_page /* Try to handle as hpte fault */ + + .align 7 + .globl hardware_interrupt_common + .globl hardware_interrupt_entry +hardware_interrupt_common: + EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) +hardware_interrupt_entry: + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_IRQ + b .ret_from_except_lite + + .align 7 + .globl alignment_common +alignment_common: + mfspr r10,SPRN_DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + std r3,_DAR(r1) + std r4,_DSISR(r1) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .alignment_exception + b .ret_from_except + + .align 7 + .globl program_check_common +program_check_common: + EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .program_check_exception + b .ret_from_except + + .align 7 + .globl fp_unavailable_common +fp_unavailable_common: + EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) + bne .load_up_fpu /* if from user, just load it up */ + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .kernel_fp_unavailable_exception + BUG_OPCODE + + .align 7 + .globl altivec_unavailable_common +altivec_unavailable_common: + EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + bne .load_up_altivec /* if from user, just load it up */ +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .altivec_unavailable_exception + b .ret_from_except + +#ifdef CONFIG_ALTIVEC +/* + * load_up_altivec(unused, unused, tsk) + * Disable VMX for the task which had it previously, + * and save its vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + * On SMP we know the VMX is free, since we give it up every + * switch (ie, no lazy save of the vector registers). + * On entry: r13 == 'current' && last_task_used_altivec != 'current' + */ +_STATIC(load_up_altivec) + mfmsr r5 /* grab the current MSR */ + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + +/* + * For SMP, we don't do lazy VMX switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_altvec in switch_to. + * VRSAVE isn't dealt with here, that is done in the normal context + * switch code. Note that we could rely on vrsave value to eventually + * avoid saving all of the VREGs here... + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_altivec@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save VMX state to last_task_used_altivec's THREAD struct */ + addi r4,r4,THREAD + SAVE_32VRS(0,r5,r4) + mfvscr vr0 + li r10,THREAD_VSCR + stvx vr0,r10,r4 + /* Disable VMX for last_task_used_altivec */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r6,MSR_VEC@h + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* Hack: if we get an altivec unavailable trap with VRSAVE + * set to all zeros, we assume this is a broken application + * that fails to set it properly, and thus we switch it to + * all 1's + */ + mfspr r4,SPRN_VRSAVE + cmpdi 0,r4,0 + bne+ 1f + li r4,-1 + mtspr SPRN_VRSAVE,r4 +1: + /* enable use of VMX after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + oris r12,r12,MSR_VEC@h + std r12,_MSR(r1) + li r4,1 + li r10,THREAD_VSCR + stw r4,THREAD_USED_VR(r5) + lvx vr0,r10,r5 + mtvscr vr0 + REST_32VRS(0,r4,r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return +#endif /* CONFIG_ALTIVEC */ + +/* + * Hash table stuff + */ + .align 7 +_GLOBAL(do_hash_page) + std r3,_DAR(r1) + std r4,_DSISR(r1) + + andis. r0,r4,0xa450 /* weird error? */ + bne- .handle_page_fault /* if not, try to insert a HPTE */ +BEGIN_FTR_SECTION + andis. r0,r4,0x0020 /* Is it a segment table fault? */ + bne- .do_ste_alloc /* If so handle it */ +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + + /* + * We need to set the _PAGE_USER bit if MSR_PR is set or if we are + * accessing a userspace segment (even from the kernel). We assume + * kernel addresses always have the high bit set. + */ + rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */ + rotldi r0,r3,15 /* Move high bit into MSR_PR posn */ + orc r0,r12,r0 /* MSR_PR | ~high_bit */ + rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */ + ori r4,r4,1 /* add _PAGE_PRESENT */ + rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */ + + /* + * On iSeries, we soft-disable interrupts here, then + * hard-enable interrupts so that the hash_page code can spin on + * the hash_table_lock without problems on a shared processor. + */ + DISABLE_INTS + + /* + * r3 contains the faulting address + * r4 contains the required access permissions + * r5 contains the trap number + * + * at return r3 = 0 for success + */ + bl .hash_page /* build HPTE if possible */ + cmpdi r3,0 /* see if hash_page succeeded */ + +#ifdef DO_SOFT_DISABLE + /* + * If we had interrupts soft-enabled at the point where the + * DSI/ISI occurred, and an interrupt came in during hash_page, + * handle it now. + * We jump to ret_from_except_lite rather than fast_exception_return + * because ret_from_except_lite will check for and handle pending + * interrupts if necessary. + */ + beq .ret_from_except_lite + /* For a hash failure, we don't bother re-enabling interrupts */ + ble- 12f + + /* + * hash_page couldn't handle it, set soft interrupt enable back + * to what it was before the trap. Note that .local_irq_restore + * handles any interrupts pending at this point. + */ + ld r3,SOFTE(r1) + bl .local_irq_restore + b 11f +#else + beq fast_exception_return /* Return from exception on success */ + ble- 12f /* Failure return from hash_page */ + + /* fall through */ +#endif + +/* Here we have a page fault that hash_page can't handle. */ +_GLOBAL(handle_page_fault) + ENABLE_INTS +11: ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_page_fault + cmpdi r3,0 + beq+ .ret_from_except_lite + bl .save_nvgprs + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + lwz r4,_DAR(r1) + bl .bad_page_fault + b .ret_from_except + +/* We have a page fault that hash_page could handle but HV refused + * the PTE insertion + */ +12: bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + lwz r4,_DAR(r1) + bl .low_hash_fault + b .ret_from_except + + /* here we have a segment miss */ +_GLOBAL(do_ste_alloc) + bl .ste_allocate /* try to insert stab entry */ + cmpdi r3,0 + beq+ fast_exception_return + b .handle_page_fault + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r11 and r12 contain the saved SRR0 and SRR1. + * r9 - r13 are saved in paca->exslb. + * We assume we aren't going to take any exceptions during this procedure. + * We assume (DAR >> 60) == 0xc. + */ + .align 7 +_GLOBAL(do_stab_bolted) + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ + + /* Hash to the primary group */ + ld r10,PACASTABVIRT(r13) + mfspr r11,SPRN_DAR + srdi r11,r11,28 + rldimi r10,r11,7,52 /* r10 = first ste of the group */ + + /* Calculate VSID */ + /* This is a kernel address, so protovsid = ESID */ + ASM_VSID_SCRAMBLE(r11, r9) + rldic r9,r11,12,16 /* r9 = vsid << 12 */ + + /* Search the primary group for a free entry */ +1: ld r11,0(r10) /* Test valid bit of the current ste */ + andi. r11,r11,0x80 + beq 2f + addi r10,r10,16 + andi. r11,r10,0x70 + bne 1b + + /* Stick for only searching the primary group for now. */ + /* At least for now, we use a very simple random castout scheme */ + /* Use the TB as a random number ; OR in 1 to avoid entry 0 */ + mftb r11 + rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */ + ori r11,r11,0x10 + + /* r10 currently points to an ste one past the group of interest */ + /* make it point to the randomly selected entry */ + subi r10,r10,128 + or r10,r10,r11 /* r10 is the entry to invalidate */ + + isync /* mark the entry invalid */ + ld r11,0(r10) + rldicl r11,r11,56,1 /* clear the valid bit */ + rotldi r11,r11,8 + std r11,0(r10) + sync + + clrrdi r11,r11,28 /* Get the esid part of the ste */ + slbie r11 + +2: std r9,8(r10) /* Store the vsid part of the ste */ + eieio + + mfspr r11,SPRN_DAR /* Get the new esid */ + clrrdi r11,r11,28 /* Permits a full 32b of ESID */ + ori r11,r11,0x90 /* Turn on valid and kp */ + std r11,0(r10) /* Put new entry back into the stab */ + + sync + + /* All done -- return from exception. */ + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */ + + andi. r10,r12,MSR_RI + beq- unrecov_slb + + mtcrf 0x80,r9 /* restore CR */ + + mfmsr r10 + clrrdi r10,r10,2 + mtmsrd r10,1 + + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r11 and r12 contain the saved SRR0 and SRR1. + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(do_slb_miss) + mflr r10 + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate /* handle it */ + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ +#ifdef CONFIG_PPC_ISERIES + ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ +#endif /* CONFIG_PPC_ISERIES */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_slb + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + +#ifdef CONFIG_PPC_ISERIES + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +#endif /* CONFIG_PPC_ISERIES */ + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +/* + * Space for CPU0's segment table. + * + * On iSeries, the hypervisor must fill in at least one entry before + * we get control (with relocate on). The address is give to the hv + * as a page number (see xLparMap in lpardata.c), so this must be at a + * fixed address (the linker can't compute (u64)&initial_stab >> + * PAGE_SHIFT). + */ + . = STAB0_PHYS_ADDR /* 0x6000 */ + .globl initial_stab +initial_stab: + .space 4096 + +/* + * Data area reserved for FWNMI option. + * This address (0x7000) is fixed by the RPA. + */ + .= 0x7000 + .globl fwnmi_data_area +fwnmi_data_area: + + /* iSeries does not use the FWNMI stuff, so it is safe to put + * this here, even if we later allow kernels that will boot on + * both pSeries and iSeries */ +#ifdef CONFIG_PPC_ISERIES + . = LPARMAP_PHYS +#include "lparmap.s" +/* + * This ".text" is here for old compilers that generate a trailing + * .note section when compiling .c files to .s + */ + .text +#endif /* CONFIG_PPC_ISERIES */ + + . = 0x8000 + +/* + * On pSeries, secondary processors spin in the following code. + * At entry, r3 = this processor's number (physical cpu id) + */ +_GLOBAL(pSeries_secondary_smp_init) + mr r24,r3 + + /* turn on 64-bit mode */ + bl .enable_64b_mode + isync + + /* Copy some CPU settings from CPU 0 */ + bl .__restore_cpu_setup + + /* Set up a paca value for this processor. Since we have the + * physical cpu id in r24, we need to search the pacas to find + * which logical id maps to our physical one. + */ + LOADADDR(r13, paca) /* Get base vaddr of paca array */ + li r5,0 /* logical cpu id */ +1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ + cmpw r6,r24 /* Compare to our id */ + beq 2f + addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ + addi r5,r5,1 + cmpwi r5,NR_CPUS + blt 1b + + mr r3,r24 /* not found, copy phys to r3 */ + b .kexec_wait /* next kernel might do better */ + +2: mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */ + /* From now on, r24 is expected to be logical cpuid */ + mr r24,r5 +3: HMT_LOW + lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ + /* start. */ + sync + + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpwi 0,r23,0 +#ifdef CONFIG_SMP + bne .__secondary_start +#endif + b 3b /* Loop until told to go */ + +#ifdef CONFIG_PPC_ISERIES +_STATIC(__start_initialization_iSeries) + /* Clear out the BSS */ + LOADADDR(r11,__bss_stop) + LOADADDR(r8,__bss_start) + sub r11,r11,r8 /* bss size */ + addi r11,r11,7 /* round up to an even double word */ + rldicl. r11,r11,61,3 /* shift right by 3 */ + beq 4f + addi r8,r8,-8 + li r0,0 + mtctr r11 /* zero this many doublewords */ +3: stdu r0,8(r8) + bdnz 3b +4: + LOADADDR(r1,init_thread_union) + addi r1,r1,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + LOADADDR(r3,cpu_specs) + LOADADDR(r4,cur_cpu_spec) + li r5,0 + bl .identify_cpu + + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + bl .iSeries_early_setup + bl .early_setup + + /* relocation is on at this point */ + + b .start_here_common +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_PPC_MULTIPLATFORM + +_STATIC(__mmu_off) + mfmsr r3 + andi. r0,r3,MSR_IR|MSR_DR + beqlr + andc r3,r3,r0 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + sync + rfid + b . /* prevent speculative execution */ + + +/* + * Here is our main kernel entry point. We support currently 2 kind of entries + * depending on the value of r5. + * + * r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content + * in r3...r7 + * + * r5 == NULL -> kexec style entry. r3 is a physical pointer to the + * DT block, r4 is a physical pointer to the kernel itself + * + */ +_GLOBAL(__start_initialization_multiplatform) + /* + * Are we booted from a PROM Of-type client-interface ? + */ + cmpldi cr0,r5,0 + bne .__boot_from_prom /* yes -> prom */ + + /* Save parameters */ + mr r31,r3 + mr r30,r4 + + /* Make sure we are running in 64 bits mode */ + bl .enable_64b_mode + + /* Setup some critical 970 SPRs before switching MMU off */ + bl .__970_cpu_preinit + + /* cpu # */ + li r24,0 + + /* Switch off MMU if not already */ + LOADADDR(r4, .__after_prom_start - KERNELBASE) + add r4,r4,r30 + bl .__mmu_off + b .__after_prom_start + +_STATIC(__boot_from_prom) + /* Save parameters */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + + /* Make sure we are running in 64 bits mode */ + bl .enable_64b_mode + + /* put a relocation offset into r3 */ + bl .reloc_offset + + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + /* Relocate the TOC from a virt addr to a real addr */ + add r2,r2,r3 + + /* Restore parameters */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + + /* Do all of the interaction with OF client interface */ + bl .prom_init + /* We never return */ + trap + +/* + * At this point, r3 contains the physical address we are running at, + * returned by prom_init() + */ +_STATIC(__after_prom_start) + +/* + * We need to run with __start at physical address 0. + * This will leave some code in the first 256B of + * real memory, which are reserved for software use. + * The remainder of the first page is loaded with the fixed + * interrupt vectors. The next two pages are filled with + * unknown exception placeholders. + * + * Note: This process overwrites the OF exception vectors. + * r26 == relocation offset + * r27 == KERNELBASE + */ + bl .reloc_offset + mr r26,r3 + SET_REG_TO_CONST(r27,KERNELBASE) + + li r3,0 /* target addr */ + + // XXX FIXME: Use phys returned by OF (r30) + add r4,r27,r26 /* source addr */ + /* current address of _start */ + /* i.e. where we are running */ + /* the source addr */ + + LOADADDR(r5,copy_to_here) /* # bytes of memory to copy */ + sub r5,r5,r27 + + li r6,0x100 /* Start offset, the first 0x100 */ + /* bytes were copied earlier. */ + + bl .copy_and_flush /* copy the first n bytes */ + /* this includes the code being */ + /* executed here. */ + + LOADADDR(r0, 4f) /* Jump to the copy of this code */ + mtctr r0 /* that we just made/relocated */ + bctr + +4: LOADADDR(r5,klimit) + add r5,r5,r26 + ld r5,0(r5) /* get the value of klimit */ + sub r5,r5,r27 + bl .copy_and_flush /* copy the rest */ + b .start_here_multiplatform + +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +/* + * Copy routine used to copy the kernel to start at physical address 0 + * and flush and invalidate the caches as needed. + * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset + * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. + * + * Note: this routine *only* clobbers r0, r6 and lr + */ +_GLOBAL(copy_and_flush) + addi r5,r5,-8 + addi r6,r6,-8 +4: li r0,16 /* Use the least common */ + /* denominator cache line */ + /* size. This results in */ + /* extra cache line flushes */ + /* but operation is correct. */ + /* Can't get cache line size */ + /* from NACA as it is being */ + /* moved too. */ + + mtctr r0 /* put # words/line in ctr */ +3: addi r6,r6,8 /* copy a cache line */ + ldx r0,r6,r4 + stdx r0,r6,r3 + bdnz 3b + dcbst r6,r3 /* write it to memory */ + sync + icbi r6,r3 /* flush the icache line */ + cmpld 0,r6,r5 + blt 4b + sync + addi r5,r5,8 + addi r6,r6,8 + blr + +.align 8 +copy_to_here: + +#ifdef CONFIG_SMP +#ifdef CONFIG_PPC_PMAC +/* + * On PowerMac, secondary processors starts from the reset vector, which + * is temporarily turned into a call to one of the functions below. + */ + .section ".text"; + .align 2 ; + + .globl pmac_secondary_start_1 +pmac_secondary_start_1: + li r24, 1 + b .pmac_secondary_start + + .globl pmac_secondary_start_2 +pmac_secondary_start_2: + li r24, 2 + b .pmac_secondary_start + + .globl pmac_secondary_start_3 +pmac_secondary_start_3: + li r24, 3 + b .pmac_secondary_start + +_GLOBAL(pmac_secondary_start) + /* turn on 64-bit mode */ + bl .enable_64b_mode + isync + + /* Copy some CPU settings from CPU 0 */ + bl .__restore_cpu_setup + + /* pSeries do that early though I don't think we really need it */ + mfmsr r3 + ori r3,r3,MSR_RI + mtmsrd r3 /* RI on */ + + /* Set up a paca value for this processor. */ + LOADADDR(r4, paca) /* Get base vaddr of paca array */ + mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r4 /* for this processor. */ + mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */ + + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + b .__secondary_start + +#endif /* CONFIG_PPC_PMAC */ + +/* + * This function is called after the master CPU has released the + * secondary processors. The execution environment is relocation off. + * The paca for this processor has the following fields initialized at + * this point: + * 1. Processor number + * 2. Segment table pointer (virtual address) + * On entry the following are set: + * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries + * r24 = cpu# (in Linux terms) + * r13 = paca virtual address + * SPRG3 = paca virtual address + */ +_GLOBAL(__secondary_start) + + HMT_MEDIUM /* Set thread priority to MEDIUM */ + + ld r2,PACATOC(r13) + li r6,0 + stb r6,PACAPROCENABLED(r13) + +#ifndef CONFIG_PPC_ISERIES + /* Initialize the page table pointer register. */ + LOADADDR(r6,_SDR1) + ld r6,0(r6) /* get the value of _SDR1 */ + mtspr SPRN_SDR1,r6 /* set the htab location */ +#endif + /* Initialize the first segment table (or SLB) entry */ + ld r3,PACASTABVIRT(r13) /* get addr of segment table */ + bl .stab_initialize + + /* Initialize the kernel stack. Just a repeat for iSeries. */ + LOADADDR(r3,current_set) + sldi r28,r24,3 /* get current_set[cpu#] */ + ldx r1,r3,r28 + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + std r1,PACAKSAVE(r13) + + ld r3,PACASTABREAL(r13) /* get raddr of segment table */ + ori r4,r3,1 /* turn on valid bit */ + +#ifdef CONFIG_PPC_ISERIES + li r0,-1 /* hypervisor call */ + li r3,1 + sldi r3,r3,63 /* 0x8000000000000000 */ + ori r3,r3,4 /* 0x8000000000000004 */ + sc /* HvCall_setASR */ +#else + /* set the ASR */ + ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ + ld r3,0(r3) + lwz r3,PLATFORM(r3) /* r3 = platform flags */ + andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ + beq 98f /* branch if result is 0 */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmpwi r3,0x37 /* SStar */ + beq 97f + cmpwi r3,0x36 /* IStar */ + beq 97f + cmpwi r3,0x34 /* Pulsar */ + bne 98f +97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ + HVSC /* Invoking hcall */ + b 99f +98: /* !(rpa hypervisor) || !(star) */ + mtasr r4 /* set the stab location */ +99: +#endif + li r7,0 + mtlr r7 + + /* enable MMU and jump to start_secondary */ + LOADADDR(r3,.start_secondary_prolog) + SET_REG_TO_CONST(r4, MSR_KERNEL) +#ifdef DO_SOFT_DISABLE + ori r4,r4,MSR_EE +#endif + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfid + b . /* prevent speculative execution */ + +/* + * Running with relocation on at this point. All we want to do is + * zero the stack back-chain pointer before going into C code. + */ +_GLOBAL(start_secondary_prolog) + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary +#endif + +/* + * This subroutine clobbers r11 and r12 + */ +_GLOBAL(enable_64b_mode) + mfmsr r11 /* grab the current MSR */ + li r12,1 + rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) + or r11,r11,r12 + li r12,1 + rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + or r11,r11,r12 + mtmsrd r11 + isync + blr + +#ifdef CONFIG_PPC_MULTIPLATFORM +/* + * This is where the main kernel code starts. + */ +_STATIC(start_here_multiplatform) + /* get a new offset, now that the kernel has moved. */ + bl .reloc_offset + mr r26,r3 + + /* Clear out the BSS. It may have been done in prom_init, + * already but that's irrelevant since prom_init will soon + * be detached from the kernel completely. Besides, we need + * to clear it now for kexec-style entry. + */ + LOADADDR(r11,__bss_stop) + LOADADDR(r8,__bss_start) + sub r11,r11,r8 /* bss size */ + addi r11,r11,7 /* round up to an even double word */ + rldicl. r11,r11,61,3 /* shift right by 3 */ + beq 4f + addi r8,r8,-8 + li r0,0 + mtctr r11 /* zero this many doublewords */ +3: stdu r0,8(r8) + bdnz 3b +4: + + mfmsr r6 + ori r6,r6,MSR_RI + mtmsrd r6 /* RI on */ + +#ifdef CONFIG_HMT + /* Start up the second thread on cpu 0 */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmpwi r3,0x34 /* Pulsar */ + beq 90f + cmpwi r3,0x36 /* Icestar */ + beq 90f + cmpwi r3,0x37 /* SStar */ + beq 90f + b 91f /* HMT not supported */ +90: li r3,0 + bl .hmt_start_secondary +91: +#endif + + /* The following gets the stack and TOC set up with the regs */ + /* pointing to the real addr of the kernel stack. This is */ + /* all done to support the C function call below which sets */ + /* up the htab. This is done because we have relocated the */ + /* kernel but are still running in real mode. */ + + LOADADDR(r3,init_thread_union) + add r3,r3,r26 + + /* set up a stack pointer (physical address) */ + addi r1,r3,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + /* set up the TOC (physical address) */ + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + add r2,r2,r26 + + LOADADDR(r3,cpu_specs) + add r3,r3,r26 + LOADADDR(r4,cur_cpu_spec) + add r4,r4,r26 + mr r5,r26 + bl .identify_cpu + + /* Save some low level config HIDs of CPU0 to be copied to + * other CPUs later on, or used for suspend/resume + */ + bl .__save_cpu_setup + sync + + /* Setup a valid physical PACA pointer in SPRG3 for early_setup + * note that boot_cpuid can always be 0 nowadays since there is + * nowhere it can be initialized differently before we reach this + * code + */ + LOADADDR(r27, boot_cpuid) + add r27,r27,r26 + lwz r27,0(r27) + + LOADADDR(r24, paca) /* Get base vaddr of paca array */ + mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r24 /* for this processor. */ + add r13,r13,r26 /* convert to physical addr */ + mtspr SPRN_SPRG3,r13 /* PPPBBB: Temp... -Peter */ + + /* Do very early kernel initializations, including initial hash table, + * stab and slb setup before we turn on relocation. */ + + /* Restore parameters passed from prom_init/kexec */ + mr r3,r31 + bl .early_setup + + /* set the ASR */ + ld r3,PACASTABREAL(r13) + ori r4,r3,1 /* turn on valid bit */ + ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ + ld r3,0(r3) + lwz r3,PLATFORM(r3) /* r3 = platform flags */ + andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ + beq 98f /* branch if result is 0 */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmpwi r3,0x37 /* SStar */ + beq 97f + cmpwi r3,0x36 /* IStar */ + beq 97f + cmpwi r3,0x34 /* Pulsar */ + bne 98f +97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ + HVSC /* Invoking hcall */ + b 99f +98: /* !(rpa hypervisor) || !(star) */ + mtasr r4 /* set the stab location */ +99: + /* Set SDR1 (hash table pointer) */ + ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ + ld r3,0(r3) + lwz r3,PLATFORM(r3) /* r3 = platform flags */ + /* Test if bit 0 is set (LPAR bit) */ + andi. r3,r3,PLATFORM_LPAR + bne 98f /* branch if result is !0 */ + LOADADDR(r6,_SDR1) /* Only if NOT LPAR */ + add r6,r6,r26 + ld r6,0(r6) /* get the value of _SDR1 */ + mtspr SPRN_SDR1,r6 /* set the htab location */ +98: + LOADADDR(r3,.start_here_common) + SET_REG_TO_CONST(r4, MSR_KERNEL) + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfid + b . /* prevent speculative execution */ +#endif /* CONFIG_PPC_MULTIPLATFORM */ + + /* This is where all platforms converge execution */ +_STATIC(start_here_common) + /* relocation is on at this point */ + + /* The following code sets up the SP and TOC now that we are */ + /* running with translation enabled. */ + + LOADADDR(r3,init_thread_union) + + /* set up the stack */ + addi r1,r3,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + /* Apply the CPUs-specific fixups (nop out sections not relevant + * to this CPU + */ + li r3,0 + bl .do_cpu_ftr_fixups + + LOADADDR(r26, boot_cpuid) + lwz r26,0(r26) + + LOADADDR(r24, paca) /* Get base vaddr of paca array */ + mulli r13,r26,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r24 /* for this processor. */ + mtspr SPRN_SPRG3,r13 + + /* ptr to current */ + LOADADDR(r4,init_task) + std r4,PACACURRENT(r13) + + /* Load the TOC */ + ld r2,PACATOC(r13) + std r1,PACAKSAVE(r13) + + bl .setup_system + + /* Load up the kernel context */ +5: +#ifdef DO_SOFT_DISABLE + li r5,0 + stb r5,PACAPROCENABLED(r13) /* Soft Disabled */ + mfmsr r5 + ori r5,r5,MSR_EE /* Hard Enabled */ + mtmsrd r5 +#endif + + bl .start_kernel + +_GLOBAL(hmt_init) +#ifdef CONFIG_HMT + LOADADDR(r5, hmt_thread_data) + mfspr r7,SPRN_PVR + srwi r7,r7,16 + cmpwi r7,0x34 /* Pulsar */ + beq 90f + cmpwi r7,0x36 /* Icestar */ + beq 91f + cmpwi r7,0x37 /* SStar */ + beq 91f + b 101f +90: mfspr r6,SPRN_PIR + andi. r6,r6,0x1f + b 92f +91: mfspr r6,SPRN_PIR + andi. r6,r6,0x3ff +92: sldi r4,r24,3 + stwx r6,r5,r4 + bl .hmt_start_secondary + b 101f + +__hmt_secondary_hold: + LOADADDR(r5, hmt_thread_data) + clrldi r5,r5,4 + li r7,0 + mfspr r6,SPRN_PIR + mfspr r8,SPRN_PVR + srwi r8,r8,16 + cmpwi r8,0x34 + bne 93f + andi. r6,r6,0x1f + b 103f +93: andi. r6,r6,0x3f + +103: lwzx r8,r5,r7 + cmpw r8,r6 + beq 104f + addi r7,r7,8 + b 103b + +104: addi r7,r7,4 + lwzx r9,r5,r7 + mr r24,r9 +101: +#endif + mr r3,r24 + b .pSeries_secondary_smp_init + +#ifdef CONFIG_HMT +_GLOBAL(hmt_start_secondary) + LOADADDR(r4,__hmt_secondary_hold) + clrldi r4,r4,4 + mtspr SPRN_NIADORM, r4 + mfspr r4, SPRN_MSRDORM + li r5, -65 + and r4, r4, r5 + mtspr SPRN_MSRDORM, r4 + lis r4,0xffef + ori r4,r4,0x7403 + mtspr SPRN_TSC, r4 + li r4,0x1f4 + mtspr SPRN_TST, r4 + mfspr r4, SPRN_HID0 + ori r4, r4, 0x1 + mtspr SPRN_HID0, r4 + mfspr r4, SPRN_CTRLF + oris r4, r4, 0x40 + mtspr SPRN_CTRLT, r4 + blr +#endif + +#if defined(CONFIG_KEXEC) || defined(CONFIG_SMP) +_GLOBAL(smp_release_cpus) + /* All secondary cpus are spinning on a common + * spinloop, release them all now so they can start + * to spin on their individual paca spinloops. + * For non SMP kernels, the secondary cpus never + * get out of the common spinloop. + * XXX This does nothing useful on iSeries, secondaries are + * already waiting on their paca. + */ + li r3,1 + LOADADDR(r5,__secondary_hold_spinloop) + std r3,0(r5) + sync + blr +#endif /* CONFIG_SMP */ + + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the bss, which is page-aligned. + */ + .section ".bss" + + .align PAGE_SHIFT + + .globl empty_zero_page +empty_zero_page: + .space PAGE_SIZE + + .globl swapper_pg_dir +swapper_pg_dir: + .space PAGE_SIZE + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * Used to pass parameters into the kernel like root=/dev/sda1, etc. + */ + .globl cmd_line +cmd_line: + .space COMMAND_LINE_SIZE diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S new file mode 100644 index 00000000000..de097874222 --- /dev/null +++ b/arch/powerpc/kernel/head_8xx.S @@ -0,0 +1,860 @@ +/* + * arch/ppc/kernel/except_8xx.S + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications by Dan Malek + * Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains low-level support and setup for PowerPC 8xx + * embedded processors, including trap and interrupt dispatch. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/cache.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* Macro to make the code more readable. */ +#ifdef CONFIG_8xx_CPU6 +#define DO_8xx_CPU6(val, reg) \ + li reg, val; \ + stw reg, 12(r0); \ + lwz reg, 12(r0); +#else +#define DO_8xx_CPU6(val, reg) +#endif + .text + .globl _stext +_stext: + .text + .globl _start +_start: + +/* MPC8xx + * This port was done on an MBX board with an 860. Right now I only + * support an ELF compressed (zImage) boot from EPPC-Bug because the + * code there loads up some registers before calling us: + * r3: ptr to board info data + * r4: initrd_start or if no initrd then 0 + * r5: initrd_end - unused if r4 is 0 + * r6: Start of command line string + * r7: End of command line string + * + * I decided to use conditional compilation instead of checking PVR and + * adding more processor specific branches around code I don't need. + * Since this is an embedded processor, I also appreciate any memory + * savings I can get. + * + * The MPC8xx does not have any BATs, but it supports large page sizes. + * We first initialize the MMU to support 8M byte pages, then load one + * entry into each of the instruction and data TLBs to map the first + * 8M 1:1. I also mapped an additional I/O space 1:1 so we can get to + * the "internal" processor registers before MMU_init is called. + * + * The TLB code currently contains a major hack. Since I use the condition + * code register, I have to save and restore it. I am out of registers, so + * I just store it in memory location 0 (the TLB handlers are not reentrant). + * To avoid making any decisions, I need to use the "segment" valid bit + * in the first level table, but that would require many changes to the + * Linux page directory/table functions that I don't want to do right now. + * + * I used to use SPRG2 for a temporary register in the TLB handler, but it + * has since been put to other uses. I now use a hack to save a register + * and the CCR at memory location 0.....Someday I'll fix this..... + * -- Dan + */ + .globl __start +__start: + mr r31,r3 /* save parameters */ + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + + /* We have to turn on the MMU right away so we get cache modes + * set correctly. + */ + bl initial_mmu + +/* We now have the lower 8 Meg mapped into TLB entries, and the caches + * ready to work. + */ + +turn_on_mmu: + mfmsr r0 + ori r0,r0,MSR_DR|MSR_IR + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + SYNC + rfi /* enables MMU */ + +/* + * Exception entry code. This code runs with address translation + * turned off, i.e. using physical addresses. + * We assume sprg3 has the physical address of the current + * task's thread_struct. + */ +#define EXCEPTION_PROLOG \ + mtspr SPRN_SPRG0,r10; \ + mtspr SPRN_SPRG1,r11; \ + mfcr r10; \ + EXCEPTION_PROLOG_1; \ + EXCEPTION_PROLOG_2 + +#define EXCEPTION_PROLOG_1 \ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ + andi. r11,r11,MSR_PR; \ + tophys(r11,r1); /* use tophys(r1) if kernel */ \ + beq 1f; \ + mfspr r11,SPRN_SPRG3; \ + lwz r11,THREAD_INFO-THREAD(r11); \ + addi r11,r11,THREAD_SIZE; \ + tophys(r11,r11); \ +1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ + + +#define EXCEPTION_PROLOG_2 \ + CLR_TOP32(r11); \ + stw r10,_CCR(r11); /* save registers */ \ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_SRR0; \ + mfspr r9,SPRN_SRR1; \ + stw r1,GPR1(r11); \ + stw r1,0(r11); \ + tovirt(r1,r11); /* set new kernel sp */ \ + li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ + MTMSRD(r10); /* (except for mach check in rtas) */ \ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r11, r12 (SRR0), and r9 (SRR1). + * + * Note2: once we have set r1 we are in a position to take exceptions + * again, and we could thus set MSR:RI at that point. + */ + +/* + * Exception vectors. + */ +#define EXCEPTION(n, label, hdlr, xfer) \ + . = n; \ +label: \ + EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,TRAP(r11); \ + li r10,MSR_KERNEL; \ + copyee(r10, r9); \ + bl tfer; \ +i##n: \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ + ret_from_except) + +/* System reset */ + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) + +/* Machine check */ + . = 0x200 +MachineCheck: + EXCEPTION_PROLOG + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0x200, machine_check_exception) + +/* Data access exception. + * This is "never generated" by the MPC8xx. We jump to it for other + * translation errors. + */ + . = 0x300 +DataAccess: + EXCEPTION_PROLOG + mfspr r10,SPRN_DSISR + stw r10,_DSISR(r11) + mr r5,r10 + mfspr r4,SPRN_DAR + EXC_XFER_EE_LITE(0x300, handle_page_fault) + +/* Instruction access exception. + * This is "never generated" by the MPC8xx. We jump to it for other + * translation errors. + */ + . = 0x400 +InstructionAccess: + EXCEPTION_PROLOG + mr r4,r12 + mr r5,r9 + EXC_XFER_EE_LITE(0x400, handle_page_fault) + +/* External interrupt */ + EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* Alignment exception */ + . = 0x600 +Alignment: + EXCEPTION_PROLOG + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0x600, alignment_exception) + +/* Program check exception */ + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + +/* No FPU on MPC8xx. This exception is not supposed to happen. +*/ + EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD) + +/* Decrementer */ + EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + +/* System call */ + . = 0xc00 +SystemCall: + EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0xc00, DoSyscall) + +/* Single step - not used on 601 */ + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) + +/* On the MPC8xx, this is a software emulation interrupt. It occurs + * for all unimplemented and illegal instructions. + */ + EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD) + + . = 0x1100 +/* + * For the MPC8xx, this is a software tablewalk to load the instruction + * TLB. It is modelled after the example in the Motorola manual. The task + * switch loads the M_TWB register with the pointer to the first level table. + * If we discover there is no second level table (value is zero) or if there + * is an invalid pte, we load that into the TLB, which causes another fault + * into the TLB Error interrupt where we can handle such problems. + * We have to use the MD_xxx registers for the tablewalk because the + * equivalent MI_xxx registers only perform the attribute functions. + */ +InstructionTLBMiss: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 + stw r10, 0(r0) + stw r11, 4(r0) + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ + DO_8xx_CPU6(0x3780, r3) + mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ + beq 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + rlwimi r10, r11, 0, 2, 19 +3: + lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ + beq 2f /* If zero, don't try to find a pte */ + + /* We have a pte table, so load the MI_TWC with the attributes + * for this "segment." + */ + ori r11,r11,1 /* Set valid bit */ + DO_8xx_CPU6(0x2b80, r3) + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ + lwz r10, 0(r11) /* Get the pte */ + + ori r10, r10, _PAGE_ACCESSED + stw r10, 0(r11) + + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior + * of the MMU. + */ +2: li r11, 0x00f0 + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + DO_8xx_CPU6(0x2d80, r3) + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ + + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + rfi + + . = 0x1200 +DataStoreTLBMiss: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 + stw r10, 0(r0) + stw r11, 4(r0) + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + andi. r11, r10, 0x0800 + beq 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + rlwimi r10, r11, 0, 2, 19 +3: + lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ + beq 2f /* If zero, don't try to find a pte */ + + /* We have a pte table, so load fetch the pte from the table. + */ + ori r11, r11, 1 /* Set valid bit in physical L2 page */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r10, SPRN_MD_TWC /* ....and get the pte address */ + lwz r10, 0(r10) /* Get the pte */ + + /* Insert the Guarded flag into the TWC from the Linux PTE. + * It is bit 27 of both the Linux PTE and the TWC (at least + * I got that right :-). It will be better when we can put + * this into the Linux pgd/pmd and load it in the operation + * above. + */ + rlwimi r11, r10, 0, 27, 27 + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 + + mfspr r11, SPRN_MD_TWC /* get the pte address again */ + ori r10, r10, _PAGE_ACCESSED + stw r10, 0(r11) + + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior + * of the MMU. + */ +2: li r11, 0x00f0 + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + DO_8xx_CPU6(0x3d80, r3) + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + rfi + +/* This is an instruction TLB error on the MPC8xx. This could be due + * to many reasons, such as executing guarded memory or illegal instruction + * addresses. There is nothing to do but handle a big time error fault. + */ + . = 0x1300 +InstructionTLBError: + b InstructionAccess + +/* This is the data TLB error on the MPC8xx. This could be due to + * many reasons, including a dirty update to a pte. We can catch that + * one here, but anything else is an error. First, we track down the + * Linux pte. If it is valid, write access is allowed, but the + * page dirty bit is not set, we will set it and reload the TLB. For + * any other case, we bail out to a higher level function that can + * handle it. + */ + . = 0x1400 +DataTLBError: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 + stw r10, 0(r0) + stw r11, 4(r0) + + /* First, make sure this was a store operation. + */ + mfspr r10, SPRN_DSISR + andis. r11, r10, 0x0200 /* If set, indicates store op */ + beq 2f + + /* The EA of a data TLB miss is automatically stored in the MD_EPN + * register. The EA of a data TLB error is automatically stored in + * the DAR, but not the MD_EPN register. We must copy the 20 most + * significant bits of the EA from the DAR to MD_EPN before we + * start walking the page tables. We also need to copy the CASID + * value from the M_CASID register. + * Addendum: The EA of a data TLB error is _supposed_ to be stored + * in DAR, but it seems that this doesn't happen in some cases, such + * as when the error is due to a dcbi instruction to a page with a + * TLB that doesn't have the changed bit set. In such cases, there + * does not appear to be any way to recover the EA of the error + * since it is neither in DAR nor MD_EPN. As a workaround, the + * _PAGE_HWWRITE bit is set for all kernel data pages when the PTEs + * are initialized in mapin_ram(). This will avoid the problem, + * assuming we only use the dcbi instruction on kernel addresses. + */ + mfspr r10, SPRN_DAR + rlwinm r11, r10, 0, 0, 19 + ori r11, r11, MD_EVALID + mfspr r10, SPRN_M_CASID + rlwimi r11, r10, 0, 28, 31 + DO_8xx_CPU6(0x3780, r3) + mtspr SPRN_MD_EPN, r11 + + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + andi. r11, r10, 0x0800 + beq 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + rlwimi r10, r11, 0, 2, 19 +3: + lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ + beq 2f /* If zero, bail */ + + /* We have a pte table, so fetch the pte from the table. + */ + ori r11, r11, 1 /* Set valid bit in physical L2 page */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ + lwz r10, 0(r11) /* Get the pte */ + + andi. r11, r10, _PAGE_RW /* Is it writeable? */ + beq 2f /* Bail out if not */ + + /* Update 'changed', among others. + */ + ori r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + mfspr r11, SPRN_MD_TWC /* Get pte address again */ + stw r10, 0(r11) /* and update pte in table */ + + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior + * of the MMU. + */ + li r11, 0x00f0 + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + DO_8xx_CPU6(0x3d80, r3) + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + rfi +2: + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + b DataAccess + + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + +/* On the MPC8xx, these next four traps are used for development + * support of breakpoints and such. Someday I will get around to + * using them. + */ + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) + + . = 0x2000 + + .globl giveup_fpu +giveup_fpu: + blr + +/* + * This is where the main kernel code starts. + */ +start_here: + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + li r3,0 + mtspr SPRN_SPRG2,r3 /* 0 => r1 has kernel sp */ + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init /* We have to do this with MMU on */ + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + bl machine_init + bl MMU_init + +/* + * Go back to running unmapped so we can load up new values + * and change to using our exception vectors. + * On the 8xx, all we have to do is invalidate the TLB to clear + * the old 8M byte TLB mappings and load the page table base register. + */ + /* The right way to do this would be to track it down through + * init's THREAD like the context switch code does, but this is + * easier......until someone changes init's static structures. + */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + tophys(r6,r6) +#ifdef CONFIG_8xx_CPU6 + lis r4, cpu6_errata_word@h + ori r4, r4, cpu6_errata_word@l + li r3, 0x3980 + stw r3, 12(r4) + lwz r3, 12(r4) +#endif + mtspr SPRN_M_TWB, r6 + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi +/* Load up the kernel context */ +2: + SYNC /* Force all PTE updates to finish */ + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + + /* set up the PTE pointers for the Abatron bdiGDB. + */ + tovirt(r6,r6) + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(r0) /* Must match your Abatron config file */ + tophys(r5,r5) + stw r6, 0(r5) + +/* Now turn on the MMU for real! */ + li r4,MSR_KERNEL + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfi /* enable MMU and jump to start_kernel */ + +/* Set up the initial MMU state so we can do the first level of + * kernel initialization. This maps the first 8 MBytes of memory 1:1 + * virtual to physical. Also, set the cache mode since that is defined + * by TLB entries and perform any additional mapping (like of the IMMR). + * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel, + * 24 Mbytes of data, and the 8M IMMR space. Anything not covered by + * these mappings is mapped by page tables. + */ +initial_mmu: + tlbia /* Invalidate all TLB entries */ +#ifdef CONFIG_PIN_TLB + lis r8, MI_RSV4I@h + ori r8, r8, 0x1c00 +#else + li r8, 0 +#endif + mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ + +#ifdef CONFIG_PIN_TLB + lis r10, (MD_RSV4I | MD_RESETVAL)@h + ori r10, r10, 0x1c00 + mr r8, r10 +#else + lis r10, MD_RESETVAL@h +#endif +#ifndef CONFIG_8xx_COPYBACK + oris r10, r10, MD_WTDEF@h +#endif + mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ + + /* Now map the lower 8 Meg into the TLBs. For this quick hack, + * we can load the instruction and data TLB registers with the + * same values. + */ + lis r8, KERNELBASE@h /* Create vaddr for TLB */ + ori r8, r8, MI_EVALID /* Mark it valid */ + mtspr SPRN_MI_EPN, r8 + mtspr SPRN_MD_EPN, r8 + li r8, MI_PS8MEG /* Set 8M byte page */ + ori r8, r8, MI_SVALID /* Make it valid */ + mtspr SPRN_MI_TWC, r8 + mtspr SPRN_MD_TWC, r8 + li r8, MI_BOOTINIT /* Create RPN for address 0 */ + mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ + mtspr SPRN_MD_RPN, r8 + lis r8, MI_Kp@h /* Set the protection mode */ + mtspr SPRN_MI_AP, r8 + mtspr SPRN_MD_AP, r8 + + /* Map another 8 MByte at the IMMR to get the processor + * internal registers (among other things). + */ +#ifdef CONFIG_PIN_TLB + addi r10, r10, 0x0100 + mtspr SPRN_MD_CTR, r10 +#endif + mfspr r9, 638 /* Get current IMMR */ + andis. r9, r9, 0xff80 /* Get 8Mbyte boundary */ + + mr r8, r9 /* Create vaddr for TLB */ + ori r8, r8, MD_EVALID /* Mark it valid */ + mtspr SPRN_MD_EPN, r8 + li r8, MD_PS8MEG /* Set 8M byte page */ + ori r8, r8, MD_SVALID /* Make it valid */ + mtspr SPRN_MD_TWC, r8 + mr r8, r9 /* Create paddr for TLB */ + ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ + mtspr SPRN_MD_RPN, r8 + +#ifdef CONFIG_PIN_TLB + /* Map two more 8M kernel data pages. + */ + addi r10, r10, 0x0100 + mtspr SPRN_MD_CTR, r10 + + lis r8, KERNELBASE@h /* Create vaddr for TLB */ + addis r8, r8, 0x0080 /* Add 8M */ + ori r8, r8, MI_EVALID /* Mark it valid */ + mtspr SPRN_MD_EPN, r8 + li r9, MI_PS8MEG /* Set 8M byte page */ + ori r9, r9, MI_SVALID /* Make it valid */ + mtspr SPRN_MD_TWC, r9 + li r11, MI_BOOTINIT /* Create RPN for address 0 */ + addis r11, r11, 0x0080 /* Add 8M */ + mtspr SPRN_MD_RPN, r8 + + addis r8, r8, 0x0080 /* Add 8M */ + mtspr SPRN_MD_EPN, r8 + mtspr SPRN_MD_TWC, r9 + addis r11, r11, 0x0080 /* Add 8M */ + mtspr SPRN_MD_RPN, r8 +#endif + + /* Since the cache is enabled according to the information we + * just loaded into the TLB, invalidate and enable the caches here. + * We should probably check/set other modes....later. + */ + lis r8, IDC_INVALL@h + mtspr SPRN_IC_CST, r8 + mtspr SPRN_DC_CST, r8 + lis r8, IDC_ENABLE@h + mtspr SPRN_IC_CST, r8 +#ifdef CONFIG_8xx_COPYBACK + mtspr SPRN_DC_CST, r8 +#else + /* For a debug option, I left this here to easily enable + * the write through cache mode + */ + lis r8, DC_SFWT@h + mtspr SPRN_DC_CST, r8 + lis r8, IDC_ENABLE@h + mtspr SPRN_DC_CST, r8 +#endif + blr + + +/* + * Set up to use a given MMU context. + * r3 is context number, r4 is PGD pointer. + * + * We place the physical address of the new task page directory loaded + * into the MMU base register, and set the ASID compare register with + * the new "context." + */ +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is passed as second argument. + */ + lis r5, KERNELBASE@h + lwz r5, 0xf0(r5) + stw r4, 0x4(r5) +#endif + +#ifdef CONFIG_8xx_CPU6 + lis r6, cpu6_errata_word@h + ori r6, r6, cpu6_errata_word@l + tophys (r4, r4) + li r7, 0x3980 + stw r7, 12(r6) + lwz r7, 12(r6) + mtspr SPRN_M_TWB, r4 /* Update MMU base address */ + li r7, 0x3380 + stw r7, 12(r6) + lwz r7, 12(r6) + mtspr SPRN_M_CASID, r3 /* Update context */ +#else + mtspr SPRN_M_CASID,r3 /* Update context */ + tophys (r4, r4) + mtspr SPRN_M_TWB, r4 /* and pgd */ +#endif + SYNC + blr + +#ifdef CONFIG_8xx_CPU6 +/* It's here because it is unique to the 8xx. + * It is important we get called with interrupts disabled. I used to + * do that, but it appears that all code that calls this already had + * interrupt disabled. + */ + .globl set_dec_cpu6 +set_dec_cpu6: + lis r7, cpu6_errata_word@h + ori r7, r7, cpu6_errata_word@l + li r4, 0x2c00 + stw r4, 8(r7) + lwz r4, 8(r7) + mtspr 22, r3 /* Update Decrementer */ + SYNC + blr +#endif + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. + */ + .data + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + + .globl swapper_pg_dir +swapper_pg_dir: + .space 4096 + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * Used to pass parameters into the kernel like root=/dev/sda1, etc. + */ + .globl cmd_line +cmd_line: + .space 512 + +/* Room for two PTE table poiners, usually the kernel and current user + * pointer to their respective root page table (pgdir). + */ +abatron_pteptrs: + .space 8 + +#ifdef CONFIG_8xx_CPU6 + .globl cpu6_errata_word +cpu6_errata_word: + .space 16 +#endif + diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S new file mode 100644 index 00000000000..53949811efd --- /dev/null +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -0,0 +1,1058 @@ +/* + * arch/ppc/kernel/head_fsl_booke.S + * + * Kernel execution entry point code. + * + * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * Copyright 2002-2004 MontaVista Software, Inc. + * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org> + * Copyright 2004 Freescale Semiconductor, Inc + * PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/config.h> +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include "head_booke.h" + +/* As with the other PowerPC ports, it is expected that when code + * execution begins here, the following registers contain valid, yet + * optional, information: + * + * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) + * r4 - Starting address of the init RAM disk + * r5 - Ending address of the init RAM disk + * r6 - Start of kernel command line string (e.g. "mem=128") + * r7 - End of kernel command line string + * + */ + .text +_GLOBAL(_stext) +_GLOBAL(_start) + /* + * Reserve a word at a fixed location to store the address + * of abatron_pteptrs + */ + nop +/* + * Save parameters we are passed + */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + li r24,0 /* CPU number */ + +/* We try to not make any assumptions about how the boot loader + * setup or used the TLBs. We invalidate all mappings from the + * boot loader and load a single entry in TLB1[0] to map the + * first 16M of kernel memory. Any boot info passed from the + * bootloader needs to live in this first 16M. + * + * Requirement on bootloader: + * - The page we're executing in needs to reside in TLB1 and + * have IPROT=1. If not an invalidate broadcast could + * evict the entry we're currently executing in. + * + * r3 = Index of TLB1 were executing in + * r4 = Current MSR[IS] + * r5 = Index of TLB1 temp mapping + * + * Later in mapin_ram we will correctly map lowmem, and resize TLB1[0] + * if needed + */ + +/* 1. Find the index of the entry we're executing in */ + bl invstr /* Find our address */ +invstr: mflr r6 /* Make it accessible */ + mfmsr r7 + rlwinm r4,r7,27,31,31 /* extract MSR[IS] */ + mfspr r7, SPRN_PID0 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */ +#ifndef CONFIG_E200 + mfspr r7,SPRN_MAS1 + andis. r7,r7,MAS1_VALID@h + bne match_TLB + mfspr r7,SPRN_PID1 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID1 */ + mfspr r7,SPRN_MAS1 + andis. r7,r7,MAS1_VALID@h + bne match_TLB + mfspr r7, SPRN_PID2 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* Fall through, we had to match */ +#endif +match_TLB: + mfspr r7,SPRN_MAS0 + rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */ + + mfspr r7,SPRN_MAS1 /* Insure IPROT set */ + oris r7,r7,MAS1_IPROT@h + mtspr SPRN_MAS1,r7 + tlbwe + +/* 2. Invalidate all entries except the entry we're executing in */ + mfspr r9,SPRN_TLB1CFG + andi. r9,r9,0xfff + li r6,0 /* Set Entry counter to 0 */ +1: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r7,SPRN_MAS1 + rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */ + cmpw r3,r6 + beq skpinv /* Dont update the current execution TLB */ + mtspr SPRN_MAS1,r7 + tlbwe + isync +skpinv: addi r6,r6,1 /* Increment */ + cmpw r6,r9 /* Are we done? */ + bne 1b /* If not, repeat */ + + /* Invalidate TLB0 */ + li r6,0x04 + tlbivax 0,r6 +#ifdef CONFIG_SMP + tlbsync +#endif + /* Invalidate TLB1 */ + li r6,0x0c + tlbivax 0,r6 +#ifdef CONFIG_SMP + tlbsync +#endif + msync + +/* 3. Setup a temp mapping and jump to it */ + andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */ + addi r5, r5, 0x1 + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r7 + tlbre + + /* Just modify the entry ID and EPN for the temp mapping */ + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ + mtspr SPRN_MAS0,r7 + xori r6,r4,1 /* Setup TMP mapping in the other Address space */ + slwi r6,r6,12 + oris r6,r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOKE_PAGESZ_4K))@l + mtspr SPRN_MAS1,r6 + mfspr r6,SPRN_MAS2 + li r7,0 /* temp EPN = 0 */ + rlwimi r7,r6,0,20,31 + mtspr SPRN_MAS2,r7 + tlbwe + + xori r6,r4,1 + slwi r6,r6,5 /* setup new context with other address space */ + bl 1f /* Find our address */ +1: mflr r9 + rlwimi r7,r9,0,20,31 + addi r7,r7,24 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r6 + rfi + +/* 4. Clear out PIDs & Search info */ + li r6,0 + mtspr SPRN_PID0,r6 +#ifndef CONFIG_E200 + mtspr SPRN_PID1,r6 + mtspr SPRN_PID2,r6 +#endif + mtspr SPRN_MAS6,r6 + +/* 5. Invalidate mapping we started in */ + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r7 + tlbre + li r6,0 + mtspr SPRN_MAS1,r6 + tlbwe + /* Invalidate TLB1 */ + li r9,0x0c + tlbivax 0,r9 +#ifdef CONFIG_SMP + tlbsync +#endif + msync + +/* 6. Setup KERNELBASE mapping in TLB1[0] */ + lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */ + mtspr SPRN_MAS0,r6 + lis r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOKE_PAGESZ_16M))@l + mtspr SPRN_MAS1,r6 + li r7,0 + lis r6,KERNELBASE@h + ori r6,r6,KERNELBASE@l + rlwimi r6,r7,0,20,31 + mtspr SPRN_MAS2,r6 + li r7,(MAS3_SX|MAS3_SW|MAS3_SR) + mtspr SPRN_MAS3,r7 + tlbwe + +/* 7. Jump to KERNELBASE mapping */ + lis r7,MSR_KERNEL@h + ori r7,r7,MSR_KERNEL@l + bl 1f /* Find our address */ +1: mflr r9 + rlwimi r6,r9,0,20,31 + addi r6,r6,24 + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r7 + rfi /* start execution out of TLB1[0] entry */ + +/* 8. Clear out the temp mapping */ + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ + mtspr SPRN_MAS0,r7 + tlbre + mtspr SPRN_MAS1,r8 + tlbwe + /* Invalidate TLB1 */ + li r9,0x0c + tlbivax 0,r9 +#ifdef CONFIG_SMP + tlbsync +#endif + msync + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheck); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError); + SET_IVOR(14, InstructionTLBError); + SET_IVOR(15, Debug); + SET_IVOR(32, SPEUnavailable); + SET_IVOR(33, SPEFloatingPointData); + SET_IVOR(34, SPEFloatingPointRound); +#ifndef CONFIG_E200 + SET_IVOR(35, PerformanceMonitor); +#endif + + /* Establish the interrupt vector base */ + lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ + mtspr SPRN_IVPR,r4 + + /* Setup the defaults for TLB entries */ + li r2,(MAS4_TSIZED(BOOKE_PAGESZ_4K))@l +#ifdef CONFIG_E200 + oris r2,r2,MAS4_TLBSELD(1)@h +#endif + mtspr SPRN_MAS4, r2 + +#if 0 + /* Enable DOZE */ + mfspr r2,SPRN_HID0 + oris r2,r2,HID0_DOZE@h + mtspr SPRN_HID0, r2 +#endif +#ifdef CONFIG_E200 + /* enable dedicated debug exception handling resources (Debug APU) */ + mfspr r2,SPRN_HID0 + ori r2,r2,HID0_DAPUEN@l + mtspr SPRN_HID0,r2 +#endif + +#if !defined(CONFIG_BDI_SWITCH) + /* + * The Abatron BDI JTAG debugger does not tolerate others + * mucking with the debug registers. + */ + lis r2,DBCR0_IDM@h + mtspr SPRN_DBCR0,r2 + /* clear any residual debug events */ + li r2,-1 + mtspr SPRN_DBSR,r2 +#endif + + /* + * This is where the main kernel code starts. + */ + + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to current thread */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + /* stack */ + lis r1,init_thread_union@h + ori r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init + + mfspr r3,SPRN_TLB1CFG + andi. r3,r3,0xfff + lis r4,num_tlbcam_entries@ha + stw r3,num_tlbcam_entries@l(r4) +/* + * Decide what sort of machine this is and initialize the MMU. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + bl machine_init + bl MMU_init + + /* Setup PTE pointers for the Abatron bdiGDB */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + lis r4, KERNELBASE@h + ori r4, r4, KERNELBASE@l + stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */ + stw r6, 0(r5) + + /* Let's move on */ + lis r4,start_kernel@h + ori r4,r4,start_kernel@l + lis r3,MSR_KERNEL@h + ori r3,r3,MSR_KERNEL@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi /* change context and jump to start_kernel */ + +/* Macros to hide the PTE size differences + * + * FIND_PTE -- walks the page tables given EA & pgdir pointer + * r10 -- EA of fault + * r11 -- PGDIR pointer + * r12 -- free + * label 2: is the bailout case + * + * if we find the pte (fall through): + * r11 is low pte word + * r12 is pointer to the pte + */ +#ifdef CONFIG_PTE_64BIT +#define PTE_FLAGS_OFFSET 4 +#define FIND_PTE \ + rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ + lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ + beq 2f; /* Bail if no table */ \ + rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + lwz r11, 4(r12); /* Get pte entry */ +#else +#define PTE_FLAGS_OFFSET 0 +#define FIND_PTE \ + rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ + lwz r11, 0(r11); /* Get L1 entry */ \ + rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \ + beq 2f; /* Bail if no table */ \ + rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \ + lwz r11, 0(r12); /* Get Linux PTE */ +#endif + +/* + * Interrupt vector entry code + * + * The Book E MMUs are always on so we don't need to handle + * interrupts in real mode as with previous PPC processors. In + * this case we handle interrupts in the kernel virtual address + * space. + * + * Interrupt vectors are dynamically placed relative to the + * interrupt prefix as determined by the address of interrupt_base. + * The interrupt vectors offsets are programmed using the labels + * for each interrupt vector entry. + * + * Interrupt vectors must be aligned on a 16 byte boundary. + * We align on a 32 byte cache line boundary for good measure. + */ + +interrupt_base: + /* Critical Input Interrupt */ + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + + /* Machine Check Interrupt */ +#ifdef CONFIG_E200 + /* no RFMCI, MCSRRs on E200 */ + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) +#else + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) +#endif + + /* Data Storage Interrupt */ + START_EXCEPTION(DataStorage) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + + /* + * Check if it was a store fault, if not then bail + * because a user tried to access a kernel or + * read-protected page. Otherwise, get the + * offending address and handle it. + */ + mfspr r10, SPRN_ESR + andis. r10, r10, ESR_ST@h + beq 2f + + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + ori r11, r11, TASK_SIZE@l + cmplw 0, r10, r11 + bge 2f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) +4: + FIND_PTE + + /* Are _PAGE_USER & _PAGE_RW set & _PAGE_HWWRITE not? */ + andi. r13, r11, _PAGE_RW|_PAGE_USER|_PAGE_HWWRITE + cmpwi 0, r13, _PAGE_RW|_PAGE_USER + bne 2f /* Bail if not */ + + /* Update 'changed'. */ + ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + stw r11, PTE_FLAGS_OFFSET(r12) /* Update Linux page table */ + + /* MAS2 not updated as the entry does exist in the tlb, this + fault taken to detect state transition (eg: COW -> DIRTY) + */ + andi. r11, r11, _PAGE_HWEXEC + rlwimi r11, r11, 31, 27, 27 /* SX <- _PAGE_HWEXEC */ + ori r11, r11, (MAS3_UW|MAS3_SW|MAS3_UR|MAS3_SR)@l /* set static perms */ + + /* update search PID in MAS6, AS = 0 */ + mfspr r12, SPRN_PID0 + slwi r12, r12, 16 + mtspr SPRN_MAS6, r12 + + /* find the TLB index that caused the fault. It has to be here. */ + tlbsx 0, r10 + + /* only update the perm bits, assume the RPN is fine */ + mfspr r12, SPRN_MAS3 + rlwimi r12, r11, 0, 20, 31 + mtspr SPRN_MAS3,r12 + tlbwe + + /* Done...restore registers and get out of here. */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + rfi /* Force context change */ + +2: + /* + * The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b data_access + + /* Instruction Storage Interrupt */ + INSTRUCTION_STORAGE_EXCEPTION + + /* External Input Interrupt */ + EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + + /* Alignment Interrupt */ + ALIGNMENT_EXCEPTION + + /* Program Interrupt */ + PROGRAM_EXCEPTION + + /* Floating Point Unavailable Interrupt */ +#ifdef CONFIG_PPC_FPU + FP_UNAVAILABLE_EXCEPTION +#else +#ifdef CONFIG_E200 + /* E200 treats 'normal' floating point instructions as FP Unavail exception */ + EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) +#else + EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) +#endif +#endif + + /* System Call Interrupt */ + START_EXCEPTION(SystemCall) + NORMAL_EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0x0c00, DoSyscall) + + /* Auxillary Processor Unavailable Interrupt */ + EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + + /* Decrementer Interrupt */ + DECREMENTER_EXCEPTION + + /* Fixed Internal Timer Interrupt */ + /* TODO: Add FIT support */ + EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + + /* Watchdog Timer Interrupt */ +#ifdef CONFIG_BOOKE_WDT + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) +#else + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) +#endif + + /* Data TLB Error Interrupt */ + START_EXCEPTION(DataTLBError) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + ori r11, r11, TASK_SIZE@l + cmplw 5, r10, r11 + blt 5, 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MAS1 /* Set TID to 0 */ + rlwinm r12,r12,0,16,1 + mtspr SPRN_MAS1,r12 + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + +4: + FIND_PTE + andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ + beq 2f /* Bail if not present */ + +#ifdef CONFIG_PTE_64BIT + lwz r13, 0(r12) +#endif + ori r11, r11, _PAGE_ACCESSED + stw r11, PTE_FLAGS_OFFSET(r12) + + /* Jump to common tlb load */ + b finish_tlb_load +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b data_access + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + mfspr r10, SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + ori r11, r11, TASK_SIZE@l + cmplw 5, r10, r11 + blt 5, 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MAS1 /* Set TID to 0 */ + rlwinm r12,r12,0,16,1 + mtspr SPRN_MAS1,r12 + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + +4: + FIND_PTE + andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ + beq 2f /* Bail if not present */ + +#ifdef CONFIG_PTE_64BIT + lwz r13, 0(r12) +#endif + ori r11, r11, _PAGE_ACCESSED + stw r11, PTE_FLAGS_OFFSET(r12) + + /* Jump to common TLB load point */ + b finish_tlb_load + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b InstructionStorage + +#ifdef CONFIG_SPE + /* SPE Unavailable */ + START_EXCEPTION(SPEUnavailable) + NORMAL_EXCEPTION_PROLOG + bne load_up_spe + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x2010, KernelSPE) +#else + EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) +#endif /* CONFIG_SPE */ + + /* SPE Floating Point Data */ +#ifdef CONFIG_SPE + EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); +#else + EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) +#endif /* CONFIG_SPE */ + + /* SPE Floating Point Round */ + EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) + + /* Performance Monitor */ + EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) + + + /* Debug Interrupt */ + DEBUG_EXCEPTION + +/* + * Local functions + */ + + /* + * Data TLB exceptions will bail out to this point + * if they can't resolve the lightweight TLB fault. + */ +data_access: + NORMAL_EXCEPTION_PROLOG + mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5,_ESR(r11) + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + andis. r10,r5,(ESR_ILK|ESR_DLK)@h + bne 1f + EXC_XFER_EE_LITE(0x0300, handle_page_fault) +1: + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x0300, CacheLockingException) + +/* + + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - EA of fault + * r11 - TLB (info from Linux PTE) + * r12, r13 - available to use + * CR5 - results of addr < TASK_SIZE + * MAS0, MAS1 - loaded with proper value when we get here + * MAS2, MAS3 - will need additional info from Linux PTE + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load: + /* + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + + mfspr r12, SPRN_MAS2 +#ifdef CONFIG_PTE_64BIT + rlwimi r12, r11, 26, 24, 31 /* extract ...WIMGE from pte */ +#else + rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ +#endif + mtspr SPRN_MAS2, r12 + + bge 5, 1f + + /* is user addr */ + andi. r12, r11, (_PAGE_USER | _PAGE_HWWRITE | _PAGE_HWEXEC) + andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ + srwi r10, r12, 1 + or r12, r12, r10 /* Copy user perms into supervisor */ + iseleq r12, 0, r12 + b 2f + + /* is kernel addr */ +1: rlwinm r12, r11, 31, 29, 29 /* Extract _PAGE_HWWRITE into SW */ + ori r12, r12, (MAS3_SX | MAS3_SR) + +#ifdef CONFIG_PTE_64BIT +2: rlwimi r12, r13, 24, 0, 7 /* grab RPN[32:39] */ + rlwimi r12, r11, 24, 8, 19 /* grab RPN[40:51] */ + mtspr SPRN_MAS3, r12 +BEGIN_FTR_SECTION + srwi r10, r13, 8 /* grab RPN[8:31] */ + mtspr SPRN_MAS7, r10 +END_FTR_SECTION_IFSET(CPU_FTR_BIG_PHYS) +#else +2: rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ + mtspr SPRN_MAS3, r11 +#endif +#ifdef CONFIG_E200 + /* Round robin TLB1 entries assignment */ + mfspr r12, SPRN_MAS0 + + /* Extract TLB1CFG(NENTRY) */ + mfspr r11, SPRN_TLB1CFG + andi. r11, r11, 0xfff + + /* Extract MAS0(NV) */ + andi. r13, r12, 0xfff + addi r13, r13, 1 + cmpw 0, r13, r11 + addi r12, r12, 1 + + /* check if we need to wrap */ + blt 7f + + /* wrap back to first free tlbcam entry */ + lis r13, tlbcam_index@ha + lwz r13, tlbcam_index@l(r13) + rlwimi r12, r13, 0, 20, 31 +7: + mtspr SPRN_MAS0,r12 +#endif /* CONFIG_E200 */ + + tlbwe + + /* Done...restore registers and get out of here. */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + rfi /* Force context change */ + +#ifdef CONFIG_SPE +/* Note that the SPE support is closely modeled after the AltiVec + * support. Changes to one are likely to be applicable to the + * other! */ +load_up_spe: +/* + * Disable SPE for the task which had SPE previously, + * and save its SPE registers in its thread_struct. + * Enables SPE for use in the kernel on return. + * On SMP we know the SPE units are free, since we give it up every + * switch. -- Kumar + */ + mfmsr r5 + oris r5,r5,MSR_SPE@h + mtmsr r5 /* enable use of SPE now */ + isync +/* + * For SMP, we don't do lazy SPE switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_spe in switch_to. + */ +#ifndef CONFIG_SMP + lis r3,last_task_used_spe@ha + lwz r4,last_task_used_spe@l(r3) + cmpi 0,r4,0 + beq 1f + addi r4,r4,THREAD /* want THREAD of last_task_used_spe */ + SAVE_32EVRS(0,r10,r4) + evxor evr10, evr10, evr10 /* clear out evr10 */ + evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */ + li r5,THREAD_ACC + evstddx evr10, r4, r5 /* save off accumulator */ + lwz r5,PT_REGS(r4) + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r10,MSR_SPE@h + andc r4,r4,r10 /* disable SPE for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of SPE after return */ + oris r9,r9,MSR_SPE@h + mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + li r4,1 + li r10,THREAD_ACC + stw r4,THREAD_USED_SPE(r5) + evlddx evr4,r10,r5 + evmra evr4,evr4 + REST_32EVRS(0,r10,r5) +#ifndef CONFIG_SMP + subi r4,r5,THREAD + stw r4,last_task_used_spe@l(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ +2: REST_4GPRS(3, r11) + lwz r10,_CCR(r11) + REST_GPR(1, r11) + mtcr r10 + lwz r10,_LINK(r11) + mtlr r10 + REST_GPR(10, r11) + mtspr SPRN_SRR1,r9 + mtspr SPRN_SRR0,r12 + REST_GPR(9, r11) + REST_GPR(12, r11) + lwz r11,GPR11(r11) + SYNC + rfi + +/* + * SPE unavailable trap from kernel - print a message, but let + * the task use SPE in the kernel until it returns to user mode. + */ +KernelSPE: + lwz r3,_MSR(r1) + oris r3,r3,MSR_SPE@h + stw r3,_MSR(r1) /* enable use of SPE after return */ + lis r3,87f@h + ori r3,r3,87f@l + mr r4,r2 /* current */ + lwz r5,_NIP(r1) + bl printk + b ret_from_except +87: .string "SPE used in kernel (task=%p, pc=%x) \n" + .align 4,0 + +#endif /* CONFIG_SPE */ + +/* + * Global functions + */ + +/* + * extern void loadcam_entry(unsigned int index) + * + * Load TLBCAM[index] entry in to the L2 CAM MMU + */ +_GLOBAL(loadcam_entry) + lis r4,TLBCAM@ha + addi r4,r4,TLBCAM@l + mulli r5,r3,20 + add r3,r5,r4 + lwz r4,0(r3) + mtspr SPRN_MAS0,r4 + lwz r4,4(r3) + mtspr SPRN_MAS1,r4 + lwz r4,8(r3) + mtspr SPRN_MAS2,r4 + lwz r4,12(r3) + mtspr SPRN_MAS3,r4 + tlbwe + isync + blr + +/* + * extern void giveup_altivec(struct task_struct *prev) + * + * The e500 core does not have an AltiVec unit. + */ +_GLOBAL(giveup_altivec) + blr + +#ifdef CONFIG_SPE +/* + * extern void giveup_spe(struct task_struct *prev) + * + */ +_GLOBAL(giveup_spe) + mfmsr r5 + oris r5,r5,MSR_SPE@h + SYNC + mtmsr r5 /* enable use of SPE now */ + isync + cmpi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + lwz r5,PT_REGS(r3) + cmpi 0,r5,0 + SAVE_32EVRS(0, r4, r3) + evxor evr6, evr6, evr6 /* clear out evr6 */ + evmwumiaa evr6, evr6, evr6 /* evr6 <- ACC = 0 * 0 + ACC */ + li r4,THREAD_ACC + evstddx evr6, r4, r3 /* save off accumulator */ + mfspr r6,SPRN_SPEFSCR + stw r6,THREAD_SPEFSCR(r3) /* save spefscr register value */ + beq 1f + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_SPE@h + andc r4,r4,r3 /* disable SPE for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + lis r4,last_task_used_spe@ha + stw r5,last_task_used_spe@l(r4) +#endif /* CONFIG_SMP */ + blr +#endif /* CONFIG_SPE */ + +/* + * extern void giveup_fpu(struct task_struct *prev) + * + * Not all FSL Book-E cores have an FPU + */ +#ifndef CONFIG_PPC_FPU +_GLOBAL(giveup_fpu) + blr +#endif + +/* + * extern void abort(void) + * + * At present, this routine just applies a system reset. + */ +_GLOBAL(abort) + li r13,0 + mtspr SPRN_DBCR0,r13 /* disable all debug events */ + mfmsr r13 + ori r13,r13,MSR_DE@l /* Enable Debug Events */ + mtmsr r13 + mfspr r13,SPRN_DBCR0 + lis r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h + mtspr SPRN_DBCR0,r13 + +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r4, 0x4(r5) +#endif + mtspr SPRN_PID,r3 + isync /* Force context change */ + blr + +/* + * We put a few things here that have to be page-aligned. This stuff + * goes at the beginning of the data segment, which is page-aligned. + */ + .data +_GLOBAL(sdata) +_GLOBAL(empty_zero_page) + .space 4096 +_GLOBAL(swapper_pg_dir) + .space 4096 + +/* Reserved 4k for the critical exception stack & 4k for the machine + * check stack per CPU for kernel mode exceptions */ + .section .bss + .align 12 +exception_stack_bottom: + .space BOOKE_EXCEPTION_STACK_SIZE * NR_CPUS +_GLOBAL(exception_stack_top) + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * which is used to pass parameters into the kernel like root=/dev/sda1, etc. + */ +_GLOBAL(cmd_line) + .space 512 + +/* + * Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 + diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S new file mode 100644 index 00000000000..444fdcc769f --- /dev/null +++ b/arch/powerpc/kernel/idle_6xx.S @@ -0,0 +1,233 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +#undef DEBUG + + .text + +/* + * Init idle, called at early CPU setup time from head.S for each CPU + * Make sure no rest of NAP mode remains in HID0, save default + * values for some CPU specific registers. Called with r24 + * containing CPU number and r3 reloc offset + */ +_GLOBAL(init_idle_6xx) +BEGIN_FTR_SECTION + mfspr r4,SPRN_HID0 + rlwinm r4,r4,0,10,8 /* Clear NAP */ + mtspr SPRN_HID0, r4 + b 1f +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) + blr +1: + slwi r5,r24,2 + add r5,r5,r3 +BEGIN_FTR_SECTION + mfspr r4,SPRN_MSSCR0 + addis r6,r5, nap_save_msscr0@ha + stw r4,nap_save_msscr0@l(r6) +END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) +BEGIN_FTR_SECTION + mfspr r4,SPRN_HID1 + addis r6,r5,nap_save_hid1@ha + stw r4,nap_save_hid1@l(r6) +END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) + blr + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(ppc6xx_idle) + /* Check if we can nap or doze, put HID0 mask in r3 + */ + lis r3, 0 +BEGIN_FTR_SECTION + lis r3,HID0_DOZE@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) +BEGIN_FTR_SECTION + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + lis r4,cur_cpu_spec@ha + lwz r4,cur_cpu_spec@l(r4) + lwz r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beq 1f + /* Now check if user or arch enabled NAP mode */ + lis r4,powersave_nap@ha + lwz r4,powersave_nap@l(r4) + cmpwi 0,r4,0 + beq 1f + lis r3,HID0_NAP@h +1: +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) + cmpwi 0,r3,0 + beqlr + + /* Clear MSR:EE */ + mfmsr r7 + rlwinm r0,r7,0,17,15 + mtmsr r0 + + /* Check current_thread_info()->flags */ + rlwinm r4,r1,0,0,18 + lwz r4,TI_FLAGS(r4) + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + mtmsr r7 /* out of line this ? */ + blr +1: + /* Some pre-nap cleanups needed on some CPUs */ + andis. r0,r3,HID0_NAP@h + beq 2f +BEGIN_FTR_SECTION + /* Disable L2 prefetch on some 745x and try to ensure + * L2 prefetch engines are idle. As explained by errata + * text, we can't be sure they are, we just hope very hard + * that well be enough (sic !). At least I noticed Apple + * doesn't even bother doing the dcbf's here... + */ + mfspr r4,SPRN_MSSCR0 + rlwinm r4,r4,0,0,29 + sync + mtspr SPRN_MSSCR0,r4 + sync + isync + lis r4,KERNELBASE@h + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 +END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) +#ifdef DEBUG + lis r6,nap_enter_count@ha + lwz r4,nap_enter_count@l(r6) + addi r4,r4,1 + stw r4,nap_enter_count@l(r6) +#endif +2: +BEGIN_FTR_SECTION + /* Go to low speed mode on some 750FX */ + lis r4,powersave_lowspeed@ha + lwz r4,powersave_lowspeed@l(r4) + cmpwi 0,r4,0 + beq 1f + mfspr r4,SPRN_HID1 + oris r4,r4,0x0001 + mtspr SPRN_HID1,r4 +1: +END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) + + /* Go to NAP or DOZE now */ + mfspr r4,SPRN_HID0 + lis r5,(HID0_NAP|HID0_SLEEP)@h +BEGIN_FTR_SECTION + oris r5,r5,HID0_DOZE@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) + andc r4,r4,r5 + or r4,r4,r3 +BEGIN_FTR_SECTION + oris r4,r4,HID0_DPM@h /* that should be done once for all */ +END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) + mtspr SPRN_HID0,r4 +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + ori r7,r7,MSR_EE /* Could be ommited (already set) */ + oris r7,r7,MSR_POW@h + sync + isync + mtmsr r7 + isync + sync + blr + +/* + * Return from NAP/DOZE mode, restore some CPU specific registers, + * we are called with DR/IR still off and r2 containing physical + * address of current. + */ +_GLOBAL(power_save_6xx_restore) + mfspr r11,SPRN_HID0 + rlwinm. r11,r11,0,10,8 /* Clear NAP & copy NAP bit !state to cr1 EQ */ + cror 4*cr1+eq,4*cr0+eq,4*cr0+eq +BEGIN_FTR_SECTION + rlwinm r11,r11,0,9,7 /* Clear DOZE */ +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) + mtspr SPRN_HID0, r11 + +#ifdef DEBUG + beq cr1,1f + lis r11,(nap_return_count-KERNELBASE)@ha + lwz r9,nap_return_count@l(r11) + addi r9,r9,1 + stw r9,nap_return_count@l(r11) +1: +#endif + + rlwinm r9,r1,0,0,18 + tophys(r9,r9) + lwz r11,TI_CPU(r9) + slwi r11,r11,2 + /* Todo make sure all these are in the same page + * and load r22 (@ha part + CPU offset) only once + */ +BEGIN_FTR_SECTION + beq cr1,1f + addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha + lwz r9,nap_save_msscr0@l(r9) + mtspr SPRN_MSSCR0, r9 + sync + isync +1: +END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) +BEGIN_FTR_SECTION + addis r9,r11,(nap_save_hid1-KERNELBASE)@ha + lwz r9,nap_save_hid1@l(r9) + mtspr SPRN_HID1, r9 +END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) + b transfer_to_handler_cont + + .data + +_GLOBAL(nap_save_msscr0) + .space 4*NR_CPUS + +_GLOBAL(nap_save_hid1) + .space 4*NR_CPUS + +_GLOBAL(powersave_nap) + .long 0 +_GLOBAL(powersave_lowspeed) + .long 0 + +#ifdef DEBUG +_GLOBAL(nap_enter_count) + .space 4 +_GLOBAL(nap_return_count) + .space 4 +#endif diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S new file mode 100644 index 00000000000..5596fad6c87 --- /dev/null +++ b/arch/powerpc/kernel/idle_power4.S @@ -0,0 +1,78 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +#undef DEBUG + + .text + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(power4_idle) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + LOADBASE(r3,cur_cpu_spec) + ld r4,cur_cpu_spec@l(r3) + ld r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beqlr + /* Now check if user or arch enabled NAP mode */ + LOADBASE(r3,powersave_nap) + lwz r4,powersave_nap@l(r3) + cmpwi 0,r4,0 + beqlr + + /* Clear MSR:EE */ + mfmsr r7 + li r4,0 + ori r4,r4,MSR_EE + andc r0,r7,r4 + mtmsrd r0 + + /* Check current_thread_info()->flags */ + clrrdi r4,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r4) + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + mtmsrd r7 /* out of line this ? */ + blr +1: + /* Go to NAP now */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + oris r7,r7,MSR_POW@h + sync + isync + mtmsrd r7 + isync + sync + blr diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c new file mode 100644 index 00000000000..941043ae040 --- /dev/null +++ b/arch/powerpc/kernel/init_task.c @@ -0,0 +1,36 @@ +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/init_task.h> +#include <linux/fs.h> +#include <linux/mqueue.h> +#include <asm/uaccess.h> + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); + +EXPORT_SYMBOL(init_mm); + +/* + * Initial thread structure. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = + { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c new file mode 100644 index 00000000000..b81de286df5 --- /dev/null +++ b/arch/powerpc/kernel/lparmap.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2005 Stephen Rothwell IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/mmu.h> +#include <asm/page.h> +#include <asm/iSeries/LparMap.h> + +const struct LparMap __attribute__((__section__(".text"))) xLparMap = { + .xNumberEsids = HvEsidsToMap, + .xNumberRanges = HvRangesToMap, + .xSegmentTableOffs = STAB0_PAGE, + + .xEsids = { + { .xKernelEsid = GET_ESID(KERNELBASE), + .xKernelVsid = KERNEL_VSID(KERNELBASE), }, + { .xKernelEsid = GET_ESID(VMALLOCBASE), + .xKernelVsid = KERNEL_VSID(VMALLOCBASE), }, + }, + + .xRanges = { + { .xPages = HvPagesToMap, + .xOffset = 0, + .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT), + }, + }, +}; diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S new file mode 100644 index 00000000000..fa8c20ffec7 --- /dev/null +++ b/arch/powerpc/kernel/misc_32.S @@ -0,0 +1,1039 @@ +/* + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/sys.h> +#include <asm/unistd.h> +#include <asm/errno.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/cache.h> +#include <asm/cputable.h> +#include <asm/mmu.h> +#include <asm/ppc_asm.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> + + .text + + .align 5 +_GLOBAL(__delay) + cmpwi 0,r3,0 + mtctr r3 + beqlr +1: bdnz 1b + blr + +/* + * Returns (address we're running at) - (address we were linked at) + * for use before the text and data are mapped to KERNELBASE. + */ +_GLOBAL(reloc_offset) + mflr r0 + bl 1f +1: mflr r3 + lis r4,1b@ha + addi r4,r4,1b@l + subf r3,r4,r3 + mtlr r0 + blr + +/* + * add_reloc_offset(x) returns x + reloc_offset(). + */ +_GLOBAL(add_reloc_offset) + mflr r0 + bl 1f +1: mflr r5 + lis r4,1b@ha + addi r4,r4,1b@l + subf r5,r4,r5 + add r3,r3,r5 + mtlr r0 + blr + +/* + * sub_reloc_offset(x) returns x - reloc_offset(). + */ +_GLOBAL(sub_reloc_offset) + mflr r0 + bl 1f +1: mflr r5 + lis r4,1b@ha + addi r4,r4,1b@l + subf r5,r4,r5 + subf r3,r5,r3 + mtlr r0 + blr + +/* + * reloc_got2 runs through the .got2 section adding an offset + * to each entry. + */ +_GLOBAL(reloc_got2) + mflr r11 + lis r7,__got2_start@ha + addi r7,r7,__got2_start@l + lis r8,__got2_end@ha + addi r8,r8,__got2_end@l + subf r8,r7,r8 + srwi. r8,r8,2 + beqlr + mtctr r8 + bl 1f +1: mflr r0 + lis r4,1b@ha + addi r4,r4,1b@l + subf r0,r4,r0 + add r7,r0,r7 +2: lwz r0,0(r7) + add r0,r0,r3 + stw r0,0(r7) + addi r7,r7,4 + bdnz 2b + mtlr r11 + blr + +/* + * identify_cpu, + * called with r3 = data offset and r4 = CPU number + * doesn't change r3 + */ +_GLOBAL(identify_cpu) + addis r8,r3,cpu_specs@ha + addi r8,r8,cpu_specs@l + mfpvr r7 +1: + lwz r5,CPU_SPEC_PVR_MASK(r8) + and r5,r5,r7 + lwz r6,CPU_SPEC_PVR_VALUE(r8) + cmplw 0,r6,r5 + beq 1f + addi r8,r8,CPU_SPEC_ENTRY_SIZE + b 1b +1: + addis r6,r3,cur_cpu_spec@ha + addi r6,r6,cur_cpu_spec@l + sub r8,r8,r3 + stw r8,0(r6) + blr + +/* + * do_cpu_ftr_fixups - goes through the list of CPU feature fixups + * and writes nop's over sections of code that don't apply for this cpu. + * r3 = data offset (not changed) + */ +_GLOBAL(do_cpu_ftr_fixups) + /* Get CPU 0 features */ + addis r6,r3,cur_cpu_spec@ha + addi r6,r6,cur_cpu_spec@l + lwz r4,0(r6) + add r4,r4,r3 + lwz r4,CPU_SPEC_FEATURES(r4) + + /* Get the fixup table */ + addis r6,r3,__start___ftr_fixup@ha + addi r6,r6,__start___ftr_fixup@l + addis r7,r3,__stop___ftr_fixup@ha + addi r7,r7,__stop___ftr_fixup@l + + /* Do the fixup */ +1: cmplw 0,r6,r7 + bgelr + addi r6,r6,16 + lwz r8,-16(r6) /* mask */ + and r8,r8,r4 + lwz r9,-12(r6) /* value */ + cmplw 0,r8,r9 + beq 1b + lwz r8,-8(r6) /* section begin */ + lwz r9,-4(r6) /* section end */ + subf. r9,r8,r9 + beq 1b + /* write nops over the section of code */ + /* todo: if large section, add a branch at the start of it */ + srwi r9,r9,2 + mtctr r9 + add r8,r8,r3 + lis r0,0x60000000@h /* nop */ +3: stw r0,0(r8) + andi. r10,r4,CPU_FTR_SPLIT_ID_CACHE@l + beq 2f + dcbst 0,r8 /* suboptimal, but simpler */ + sync + icbi 0,r8 +2: addi r8,r8,4 + bdnz 3b + sync /* additional sync needed on g4 */ + isync + b 1b + +/* + * call_setup_cpu - call the setup_cpu function for this cpu + * r3 = data offset, r24 = cpu number + * + * Setup function is called with: + * r3 = data offset + * r4 = ptr to CPU spec (relocated) + */ +_GLOBAL(call_setup_cpu) + addis r4,r3,cur_cpu_spec@ha + addi r4,r4,cur_cpu_spec@l + lwz r4,0(r4) + add r4,r4,r3 + lwz r5,CPU_SPEC_SETUP(r4) + cmpi 0,r5,0 + add r5,r5,r3 + beqlr + mtctr r5 + bctr + +#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_6xx) + +/* This gets called by via-pmu.c to switch the PLL selection + * on 750fx CPU. This function should really be moved to some + * other place (as most of the cpufreq code in via-pmu + */ +_GLOBAL(low_choose_750fx_pll) + /* Clear MSR:EE */ + mfmsr r7 + rlwinm r0,r7,0,17,15 + mtmsr r0 + + /* If switching to PLL1, disable HID0:BTIC */ + cmplwi cr0,r3,0 + beq 1f + mfspr r5,SPRN_HID0 + rlwinm r5,r5,0,27,25 + sync + mtspr SPRN_HID0,r5 + isync + sync + +1: + /* Calc new HID1 value */ + mfspr r4,SPRN_HID1 /* Build a HID1:PS bit from parameter */ + rlwinm r5,r3,16,15,15 /* Clear out HID1:PS from value read */ + rlwinm r4,r4,0,16,14 /* Could have I used rlwimi here ? */ + or r4,r4,r5 + mtspr SPRN_HID1,r4 + + /* Store new HID1 image */ + rlwinm r6,r1,0,0,18 + lwz r6,TI_CPU(r6) + slwi r6,r6,2 + addis r6,r6,nap_save_hid1@ha + stw r4,nap_save_hid1@l(r6) + + /* If switching to PLL0, enable HID0:BTIC */ + cmplwi cr0,r3,0 + bne 1f + mfspr r5,SPRN_HID0 + ori r5,r5,HID0_BTIC + sync + mtspr SPRN_HID0,r5 + isync + sync + +1: + /* Return */ + mtmsr r7 + blr + +_GLOBAL(low_choose_7447a_dfs) + /* Clear MSR:EE */ + mfmsr r7 + rlwinm r0,r7,0,17,15 + mtmsr r0 + + /* Calc new HID1 value */ + mfspr r4,SPRN_HID1 + insrwi r4,r3,1,9 /* insert parameter into bit 9 */ + sync + mtspr SPRN_HID1,r4 + sync + isync + + /* Return */ + mtmsr r7 + blr + +#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_6xx */ + +/* + * complement mask on the msr then "or" some values on. + * _nmask_and_or_msr(nmask, value_to_or) + */ +_GLOBAL(_nmask_and_or_msr) + mfmsr r0 /* Get current msr */ + andc r0,r0,r3 /* And off the bits set in r3 (first parm) */ + or r0,r0,r4 /* Or on the bits in r4 (second parm) */ + SYNC /* Some chip revs have problems here... */ + mtmsr r0 /* Update machine state */ + isync + blr /* Done */ + + +/* + * Flush MMU TLB + */ +_GLOBAL(_tlbia) +#if defined(CONFIG_40x) + sync /* Flush to memory before changing mapping */ + tlbia + isync /* Flush shadow TLB */ +#elif defined(CONFIG_44x) + li r3,0 + sync + + /* Load high watermark */ + lis r4,tlb_44x_hwater@ha + lwz r5,tlb_44x_hwater@l(r4) + +1: tlbwe r3,r3,PPC44x_TLB_PAGEID + addi r3,r3,1 + cmpw 0,r3,r5 + ble 1b + + isync +#elif defined(CONFIG_FSL_BOOKE) + /* Invalidate all entries in TLB0 */ + li r3, 0x04 + tlbivax 0,3 + /* Invalidate all entries in TLB1 */ + li r3, 0x0c + tlbivax 0,3 + /* Invalidate all entries in TLB2 */ + li r3, 0x14 + tlbivax 0,3 + /* Invalidate all entries in TLB3 */ + li r3, 0x1c + tlbivax 0,3 + msync +#ifdef CONFIG_SMP + tlbsync +#endif /* CONFIG_SMP */ +#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ +#if defined(CONFIG_SMP) + rlwinm r8,r1,0,0,18 + lwz r8,TI_CPU(r8) + oris r8,r8,10 + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + SYNC_601 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b + sync + tlbia + sync + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + SYNC_601 + isync +#else /* CONFIG_SMP */ + sync + tlbia + sync +#endif /* CONFIG_SMP */ +#endif /* ! defined(CONFIG_40x) */ + blr + +/* + * Flush MMU TLB for a particular address + */ +_GLOBAL(_tlbie) +#if defined(CONFIG_40x) + tlbsx. r3, 0, r3 + bne 10f + sync + /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is clear. + * Since 25 is the V bit in the TLB_TAG, loading this value will invalidate + * the TLB entry. */ + tlbwe r3, r3, TLB_TAG + isync +10: +#elif defined(CONFIG_44x) + mfspr r4,SPRN_MMUCR + mfspr r5,SPRN_PID /* Get PID */ + rlwimi r4,r5,0,24,31 /* Set TID */ + mtspr SPRN_MMUCR,r4 + + tlbsx. r3, 0, r3 + bne 10f + sync + /* There are only 64 TLB entries, so r3 < 64, + * which means bit 22, is clear. Since 22 is + * the V bit in the TLB_PAGEID, loading this + * value will invalidate the TLB entry. + */ + tlbwe r3, r3, PPC44x_TLB_PAGEID + isync +10: +#elif defined(CONFIG_FSL_BOOKE) + rlwinm r4, r3, 0, 0, 19 + ori r5, r4, 0x08 /* TLBSEL = 1 */ + ori r6, r4, 0x10 /* TLBSEL = 2 */ + ori r7, r4, 0x18 /* TLBSEL = 3 */ + tlbivax 0, r4 + tlbivax 0, r5 + tlbivax 0, r6 + tlbivax 0, r7 + msync +#if defined(CONFIG_SMP) + tlbsync +#endif /* CONFIG_SMP */ +#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ +#if defined(CONFIG_SMP) + rlwinm r8,r1,0,0,18 + lwz r8,TI_CPU(r8) + oris r8,r8,11 + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + SYNC_601 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b + eieio + tlbie r3 + sync + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + SYNC_601 + isync +#else /* CONFIG_SMP */ + tlbie r3 + sync +#endif /* CONFIG_SMP */ +#endif /* ! CONFIG_40x */ + blr + +/* + * Flush instruction cache. + * This is a no-op on the 601. + */ +_GLOBAL(flush_instruction_cache) +#if defined(CONFIG_8xx) + isync + lis r5, IDC_INVALL@h + mtspr SPRN_IC_CST, r5 +#elif defined(CONFIG_4xx) +#ifdef CONFIG_403GCX + li r3, 512 + mtctr r3 + lis r4, KERNELBASE@h +1: iccci 0, r4 + addi r4, r4, 16 + bdnz 1b +#else + lis r3, KERNELBASE@h + iccci 0,r3 +#endif +#elif CONFIG_FSL_BOOKE +BEGIN_FTR_SECTION + mfspr r3,SPRN_L1CSR0 + ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC + /* msync; isync recommended here */ + mtspr SPRN_L1CSR0,r3 + isync + blr +END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) + mfspr r3,SPRN_L1CSR1 + ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR + mtspr SPRN_L1CSR1,r3 +#else + mfspr r3,SPRN_PVR + rlwinm r3,r3,16,16,31 + cmpwi 0,r3,1 + beqlr /* for 601, do nothing */ + /* 603/604 processor - use invalidate-all bit in HID0 */ + mfspr r3,SPRN_HID0 + ori r3,r3,HID0_ICFI + mtspr SPRN_HID0,r3 +#endif /* CONFIG_8xx/4xx */ + isync + blr + +/* + * Write any modified data cache blocks out to memory + * and invalidate the corresponding instruction cache blocks. + * This is a no-op on the 601. + * + * flush_icache_range(unsigned long start, unsigned long stop) + */ +_GLOBAL(flush_icache_range) +BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) + li r5,L1_CACHE_LINE_SIZE-1 + andc r3,r3,r5 + subf r4,r3,r4 + add r4,r4,r5 + srwi. r4,r4,LG_L1_CACHE_LINE_SIZE + beqlr + mtctr r4 + mr r6,r3 +1: dcbst 0,r3 + addi r3,r3,L1_CACHE_LINE_SIZE + bdnz 1b + sync /* wait for dcbst's to get to ram */ + mtctr r4 +2: icbi 0,r6 + addi r6,r6,L1_CACHE_LINE_SIZE + bdnz 2b + sync /* additional sync needed on g4 */ + isync + blr +/* + * Write any modified data cache blocks out to memory. + * Does not invalidate the corresponding cache lines (especially for + * any corresponding instruction cache). + * + * clean_dcache_range(unsigned long start, unsigned long stop) + */ +_GLOBAL(clean_dcache_range) + li r5,L1_CACHE_LINE_SIZE-1 + andc r3,r3,r5 + subf r4,r3,r4 + add r4,r4,r5 + srwi. r4,r4,LG_L1_CACHE_LINE_SIZE + beqlr + mtctr r4 + +1: dcbst 0,r3 + addi r3,r3,L1_CACHE_LINE_SIZE + bdnz 1b + sync /* wait for dcbst's to get to ram */ + blr + +/* + * Write any modified data cache blocks out to memory and invalidate them. + * Does not invalidate the corresponding instruction cache blocks. + * + * flush_dcache_range(unsigned long start, unsigned long stop) + */ +_GLOBAL(flush_dcache_range) + li r5,L1_CACHE_LINE_SIZE-1 + andc r3,r3,r5 + subf r4,r3,r4 + add r4,r4,r5 + srwi. r4,r4,LG_L1_CACHE_LINE_SIZE + beqlr + mtctr r4 + +1: dcbf 0,r3 + addi r3,r3,L1_CACHE_LINE_SIZE + bdnz 1b + sync /* wait for dcbst's to get to ram */ + blr + +/* + * Like above, but invalidate the D-cache. This is used by the 8xx + * to invalidate the cache so the PPC core doesn't get stale data + * from the CPM (no cache snooping here :-). + * + * invalidate_dcache_range(unsigned long start, unsigned long stop) + */ +_GLOBAL(invalidate_dcache_range) + li r5,L1_CACHE_LINE_SIZE-1 + andc r3,r3,r5 + subf r4,r3,r4 + add r4,r4,r5 + srwi. r4,r4,LG_L1_CACHE_LINE_SIZE + beqlr + mtctr r4 + +1: dcbi 0,r3 + addi r3,r3,L1_CACHE_LINE_SIZE + bdnz 1b + sync /* wait for dcbi's to get to ram */ + blr + +#ifdef CONFIG_NOT_COHERENT_CACHE +/* + * 40x cores have 8K or 16K dcache and 32 byte line size. + * 44x has a 32K dcache and 32 byte line size. + * 8xx has 1, 2, 4, 8K variants. + * For now, cover the worst case of the 44x. + * Must be called with external interrupts disabled. + */ +#define CACHE_NWAYS 64 +#define CACHE_NLINES 16 + +_GLOBAL(flush_dcache_all) + li r4, (2 * CACHE_NWAYS * CACHE_NLINES) + mtctr r4 + lis r5, KERNELBASE@h +1: lwz r3, 0(r5) /* Load one word from every line */ + addi r5, r5, L1_CACHE_LINE_SIZE + bdnz 1b + blr +#endif /* CONFIG_NOT_COHERENT_CACHE */ + +/* + * Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * This is a no-op on the 601 which has a unified cache. + * + * void __flush_dcache_icache(void *page) + */ +_GLOBAL(__flush_dcache_icache) +BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) + rlwinm r3,r3,0,0,19 /* Get page base address */ + li r4,4096/L1_CACHE_LINE_SIZE /* Number of lines in a page */ + mtctr r4 + mr r6,r3 +0: dcbst 0,r3 /* Write line to ram */ + addi r3,r3,L1_CACHE_LINE_SIZE + bdnz 0b + sync + mtctr r4 +1: icbi 0,r6 + addi r6,r6,L1_CACHE_LINE_SIZE + bdnz 1b + sync + isync + blr + +/* + * Flush a particular page from the data cache to RAM, identified + * by its physical address. We turn off the MMU so we can just use + * the physical address (this may be a highmem page without a kernel + * mapping). + * + * void __flush_dcache_icache_phys(unsigned long physaddr) + */ +_GLOBAL(__flush_dcache_icache_phys) +BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) + mfmsr r10 + rlwinm r0,r10,0,28,26 /* clear DR */ + mtmsr r0 + isync + rlwinm r3,r3,0,0,19 /* Get page base address */ + li r4,4096/L1_CACHE_LINE_SIZE /* Number of lines in a page */ + mtctr r4 + mr r6,r3 +0: dcbst 0,r3 /* Write line to ram */ + addi r3,r3,L1_CACHE_LINE_SIZE + bdnz 0b + sync + mtctr r4 +1: icbi 0,r6 + addi r6,r6,L1_CACHE_LINE_SIZE + bdnz 1b + sync + mtmsr r10 /* restore DR */ + isync + blr + +/* + * Clear pages using the dcbz instruction, which doesn't cause any + * memory traffic (except to write out any cache lines which get + * displaced). This only works on cacheable memory. + * + * void clear_pages(void *page, int order) ; + */ +_GLOBAL(clear_pages) + li r0,4096/L1_CACHE_LINE_SIZE + slw r0,r0,r4 + mtctr r0 +#ifdef CONFIG_8xx + li r4, 0 +1: stw r4, 0(r3) + stw r4, 4(r3) + stw r4, 8(r3) + stw r4, 12(r3) +#else +1: dcbz 0,r3 +#endif + addi r3,r3,L1_CACHE_LINE_SIZE + bdnz 1b + blr + +/* + * Copy a whole page. We use the dcbz instruction on the destination + * to reduce memory traffic (it eliminates the unnecessary reads of + * the destination into cache). This requires that the destination + * is cacheable. + */ +#define COPY_16_BYTES \ + lwz r6,4(r4); \ + lwz r7,8(r4); \ + lwz r8,12(r4); \ + lwzu r9,16(r4); \ + stw r6,4(r3); \ + stw r7,8(r3); \ + stw r8,12(r3); \ + stwu r9,16(r3) + +_GLOBAL(copy_page) + addi r3,r3,-4 + addi r4,r4,-4 + +#ifdef CONFIG_8xx + /* don't use prefetch on 8xx */ + li r0,4096/L1_CACHE_LINE_SIZE + mtctr r0 +1: COPY_16_BYTES + bdnz 1b + blr + +#else /* not 8xx, we can prefetch */ + li r5,4 + +#if MAX_COPY_PREFETCH > 1 + li r0,MAX_COPY_PREFETCH + li r11,4 + mtctr r0 +11: dcbt r11,r4 + addi r11,r11,L1_CACHE_LINE_SIZE + bdnz 11b +#else /* MAX_COPY_PREFETCH == 1 */ + dcbt r5,r4 + li r11,L1_CACHE_LINE_SIZE+4 +#endif /* MAX_COPY_PREFETCH */ + li r0,4096/L1_CACHE_LINE_SIZE - MAX_COPY_PREFETCH + crclr 4*cr0+eq +2: + mtctr r0 +1: + dcbt r11,r4 + dcbz r5,r3 + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 1b + beqlr + crnot 4*cr0+eq,4*cr0+eq + li r0,MAX_COPY_PREFETCH + li r11,4 + b 2b +#endif /* CONFIG_8xx */ + +/* + * void atomic_clear_mask(atomic_t mask, atomic_t *addr) + * void atomic_set_mask(atomic_t mask, atomic_t *addr); + */ +_GLOBAL(atomic_clear_mask) +10: lwarx r5,0,r4 + andc r5,r5,r3 + PPC405_ERR77(0,r4) + stwcx. r5,0,r4 + bne- 10b + blr +_GLOBAL(atomic_set_mask) +10: lwarx r5,0,r4 + or r5,r5,r3 + PPC405_ERR77(0,r4) + stwcx. r5,0,r4 + bne- 10b + blr + +/* + * I/O string operations + * + * insb(port, buf, len) + * outsb(port, buf, len) + * insw(port, buf, len) + * outsw(port, buf, len) + * insl(port, buf, len) + * outsl(port, buf, len) + * insw_ns(port, buf, len) + * outsw_ns(port, buf, len) + * insl_ns(port, buf, len) + * outsl_ns(port, buf, len) + * + * The *_ns versions don't do byte-swapping. + */ +_GLOBAL(_insb) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,1 + blelr- +00: lbz r5,0(r3) + eieio + stbu r5,1(r4) + bdnz 00b + blr + +_GLOBAL(_outsb) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,1 + blelr- +00: lbzu r5,1(r4) + stb r5,0(r3) + eieio + bdnz 00b + blr + +_GLOBAL(_insw) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhbrx r5,0,r3 + eieio + sthu r5,2(r4) + bdnz 00b + blr + +_GLOBAL(_outsw) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhzu r5,2(r4) + eieio + sthbrx r5,0,r3 + bdnz 00b + blr + +_GLOBAL(_insl) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwbrx r5,0,r3 + eieio + stwu r5,4(r4) + bdnz 00b + blr + +_GLOBAL(_outsl) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwzu r5,4(r4) + stwbrx r5,0,r3 + eieio + bdnz 00b + blr + +_GLOBAL(__ide_mm_insw) +_GLOBAL(_insw_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhz r5,0(r3) + eieio + sthu r5,2(r4) + bdnz 00b + blr + +_GLOBAL(__ide_mm_outsw) +_GLOBAL(_outsw_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhzu r5,2(r4) + sth r5,0(r3) + eieio + bdnz 00b + blr + +_GLOBAL(__ide_mm_insl) +_GLOBAL(_insl_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwz r5,0(r3) + eieio + stwu r5,4(r4) + bdnz 00b + blr + +_GLOBAL(__ide_mm_outsl) +_GLOBAL(_outsl_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwzu r5,4(r4) + stw r5,0(r3) + eieio + bdnz 00b + blr + +/* + * Extended precision shifts. + * + * Updated to be valid for shift counts from 0 to 63 inclusive. + * -- Gabriel + * + * R3/R4 has 64 bit value + * R5 has shift count + * result in R3/R4 + * + * ashrdi3: arithmetic right shift (sign propagation) + * lshrdi3: logical right shift + * ashldi3: left shift + */ +_GLOBAL(__ashrdi3) + subfic r6,r5,32 + srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count + addi r7,r5,32 # could be xori, or addi with -32 + slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) + rlwinm r8,r7,0,32 # t3 = (count < 32) ? 32 : 0 + sraw r7,r3,r7 # t2 = MSW >> (count-32) + or r4,r4,r6 # LSW |= t1 + slw r7,r7,r8 # t2 = (count < 32) ? 0 : t2 + sraw r3,r3,r5 # MSW = MSW >> count + or r4,r4,r7 # LSW |= t2 + blr + +_GLOBAL(__ashldi3) + subfic r6,r5,32 + slw r3,r3,r5 # MSW = count > 31 ? 0 : MSW << count + addi r7,r5,32 # could be xori, or addi with -32 + srw r6,r4,r6 # t1 = count > 31 ? 0 : LSW >> (32-count) + slw r7,r4,r7 # t2 = count < 32 ? 0 : LSW << (count-32) + or r3,r3,r6 # MSW |= t1 + slw r4,r4,r5 # LSW = LSW << count + or r3,r3,r7 # MSW |= t2 + blr + +_GLOBAL(__lshrdi3) + subfic r6,r5,32 + srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count + addi r7,r5,32 # could be xori, or addi with -32 + slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) + srw r7,r3,r7 # t2 = count < 32 ? 0 : MSW >> (count-32) + or r4,r4,r6 # LSW |= t1 + srw r3,r3,r5 # MSW = MSW >> count + or r4,r4,r7 # LSW |= t2 + blr + +_GLOBAL(abs) + srawi r4,r3,31 + xor r3,r3,r4 + sub r3,r3,r4 + blr + +_GLOBAL(_get_SP) + mr r3,r1 /* Close enough */ + blr + +/* + * These are used in the alignment trap handler when emulating + * single-precision loads and stores. + * We restore and save the fpscr so the task gets the same result + * and exceptions as if the cpu had performed the load or store. + */ + +#ifdef CONFIG_PPC_FPU +_GLOBAL(cvt_fd) + lfd 0,-4(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfs 0,0(r3) + stfd 0,0(r4) + mffs 0 /* save new fpscr value */ + stfd 0,-4(r5) + blr + +_GLOBAL(cvt_df) + lfd 0,-4(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfd 0,0(r3) + stfs 0,0(r4) + mffs 0 /* save new fpscr value */ + stfd 0,-4(r5) + blr +#endif + +/* + * Create a kernel thread + * kernel_thread(fn, arg, flags) + */ +_GLOBAL(kernel_thread) + stwu r1,-16(r1) + stw r30,8(r1) + stw r31,12(r1) + mr r30,r3 /* function */ + mr r31,r4 /* argument */ + ori r3,r5,CLONE_VM /* flags */ + oris r3,r3,CLONE_UNTRACED>>16 + li r4,0 /* new sp (unused) */ + li r0,__NR_clone + sc + cmpwi 0,r3,0 /* parent or child? */ + bne 1f /* return if parent */ + li r0,0 /* make top-level stack frame */ + stwu r0,-16(r1) + mtlr r30 /* fn addr in lr */ + mr r3,r31 /* load arg and call fn */ + PPC440EP_ERR42 + blrl + li r0,__NR_exit /* exit if function returns */ + li r3,0 + sc +1: lwz r30,8(r1) + lwz r31,12(r1) + addi r1,r1,16 + blr + +_GLOBAL(execve) + li r0,__NR_execve + sc + bnslr + neg r3,r3 + blr + +/* + * This routine is just here to keep GCC happy - sigh... + */ +_GLOBAL(__main) + blr diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S new file mode 100644 index 00000000000..8fe295693c0 --- /dev/null +++ b/arch/powerpc/kernel/misc_64.S @@ -0,0 +1,898 @@ +/* + * arch/powerpc/kernel/misc64.S + * + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/sys.h> +#include <asm/unistd.h> +#include <asm/errno.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cache.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cputable.h> + + .text + +/* + * Returns (address we are running at) - (address we were linked at) + * for use before the text and data are mapped to KERNELBASE. + */ + +_GLOBAL(reloc_offset) + mflr r0 + bl 1f +1: mflr r3 + LOADADDR(r4,1b) + subf r3,r4,r3 + mtlr r0 + blr + +/* + * add_reloc_offset(x) returns x + reloc_offset(). + */ +_GLOBAL(add_reloc_offset) + mflr r0 + bl 1f +1: mflr r5 + LOADADDR(r4,1b) + subf r5,r4,r5 + add r3,r3,r5 + mtlr r0 + blr + +_GLOBAL(get_msr) + mfmsr r3 + blr + +_GLOBAL(get_dar) + mfdar r3 + blr + +_GLOBAL(get_srr0) + mfsrr0 r3 + blr + +_GLOBAL(get_srr1) + mfsrr1 r3 + blr + +_GLOBAL(get_sp) + mr r3,r1 + blr + +#ifdef CONFIG_IRQSTACKS +_GLOBAL(call_do_softirq) + mflr r0 + std r0,16(r1) + stdu r1,THREAD_SIZE-112(r3) + mr r1,r3 + bl .__do_softirq + ld r1,0(r1) + ld r0,16(r1) + mtlr r0 + blr + +_GLOBAL(call_handle_IRQ_event) + mflr r0 + std r0,16(r1) + stdu r1,THREAD_SIZE-112(r6) + mr r1,r6 + bl .handle_IRQ_event + ld r1,0(r1) + ld r0,16(r1) + mtlr r0 + blr +#endif /* CONFIG_IRQSTACKS */ + + /* + * To be called by C code which needs to do some operations with MMU + * disabled. Note that interrupts have to be disabled by the caller + * prior to calling us. The code called _MUST_ be in the RMO of course + * and part of the linear mapping as we don't attempt to translate the + * stack pointer at all. The function is called with the stack switched + * to this CPU emergency stack + * + * prototype is void *call_with_mmu_off(void *func, void *data); + * + * the called function is expected to be of the form + * + * void *called(void *data); + */ +_GLOBAL(call_with_mmu_off) + mflr r0 /* get link, save it on stackframe */ + std r0,16(r1) + mr r1,r5 /* save old stack ptr */ + ld r1,PACAEMERGSP(r13) /* get emerg. stack */ + subi r1,r1,STACK_FRAME_OVERHEAD + std r0,16(r1) /* save link on emerg. stack */ + std r5,0(r1) /* save old stack ptr in backchain */ + ld r3,0(r3) /* get to real function ptr (assume same TOC) */ + bl 2f /* we need LR to return, continue at label 2 */ + + ld r0,16(r1) /* we return here from the call, get LR and */ + ld r1,0(r1) /* .. old stack ptr */ + mtspr SPRN_SRR0,r0 /* and get back to virtual mode with these */ + mfmsr r4 + ori r4,r4,MSR_IR|MSR_DR + mtspr SPRN_SRR1,r4 + rfid + +2: mtspr SPRN_SRR0,r3 /* coming from above, enter real mode */ + mr r3,r4 /* get parameter */ + mfmsr r0 + ori r0,r0,MSR_IR|MSR_DR + xori r0,r0,MSR_IR|MSR_DR + mtspr SPRN_SRR1,r0 + rfid + + + .section ".toc","aw" +PPC64_CACHES: + .tc ppc64_caches[TC],ppc64_caches + .section ".text" + +/* + * Write any modified data cache blocks out to memory + * and invalidate the corresponding instruction cache blocks. + * + * flush_icache_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start through stop-1 inclusive + */ + +_KPROBE(__flush_icache_range) + +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + * and in some cases i-cache and d-cache line sizes differ from + * each other. + */ + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +1: dcbst 0,r6 + add r6,r6,r7 + bdnz 1b + sync + +/* Now invalidate the instruction cache */ + + lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 + lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +2: icbi 0,r6 + add r6,r6,r7 + bdnz 2b + isync + blr + .previous .text +/* + * Like above, but only do the D-cache. + * + * flush_dcache_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start to stop-1 inclusive + */ +_GLOBAL(flush_dcache_range) + +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + */ + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +0: dcbst 0,r6 + add r6,r6,r7 + bdnz 0b + sync + blr + +/* + * Like above, but works on non-mapped physical addresses. + * Use only for non-LPAR setups ! It also assumes real mode + * is cacheable. Used for flushing out the DART before using + * it as uncacheable memory + * + * flush_dcache_phys_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start to stop-1 inclusive + */ +_GLOBAL(flush_dcache_phys_range) + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mfmsr r5 /* Disable MMU Data Relocation */ + ori r0,r5,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsr r0 + sync + isync + mtctr r8 +0: dcbst 0,r6 + add r6,r6,r7 + bdnz 0b + sync + isync + mtmsr r5 /* Re-enable MMU Data Relocation */ + sync + isync + blr + +_GLOBAL(flush_inval_dcache_range) + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + sync + isync + mtctr r8 +0: dcbf 0,r6 + add r6,r6,r7 + bdnz 0b + sync + isync + blr + + +/* + * Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * void __flush_dcache_icache(void *page) + */ +_GLOBAL(__flush_dcache_icache) +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + */ + +/* Flush the dcache */ + ld r7,PPC64_CACHES@toc(r2) + clrrdi r3,r3,PAGE_SHIFT /* Page align */ + lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ + lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ + mr r6,r3 + mtctr r4 +0: dcbst 0,r6 + add r6,r6,r5 + bdnz 0b + sync + +/* Now invalidate the icache */ + + lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ + lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ + mtctr r4 +1: icbi 0,r3 + add r3,r3,r5 + bdnz 1b + isync + blr + +/* + * I/O string operations + * + * insb(port, buf, len) + * outsb(port, buf, len) + * insw(port, buf, len) + * outsw(port, buf, len) + * insl(port, buf, len) + * outsl(port, buf, len) + * insw_ns(port, buf, len) + * outsw_ns(port, buf, len) + * insl_ns(port, buf, len) + * outsl_ns(port, buf, len) + * + * The *_ns versions don't do byte-swapping. + */ +_GLOBAL(_insb) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,1 + blelr- +00: lbz r5,0(r3) + eieio + stbu r5,1(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +_GLOBAL(_outsb) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,1 + blelr- +00: lbzu r5,1(r4) + stb r5,0(r3) + bdnz 00b + sync + blr + +_GLOBAL(_insw) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhbrx r5,0,r3 + eieio + sthu r5,2(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +_GLOBAL(_outsw) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhzu r5,2(r4) + sthbrx r5,0,r3 + bdnz 00b + sync + blr + +_GLOBAL(_insl) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwbrx r5,0,r3 + eieio + stwu r5,4(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +_GLOBAL(_outsl) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwzu r5,4(r4) + stwbrx r5,0,r3 + bdnz 00b + sync + blr + +/* _GLOBAL(ide_insw) now in drivers/ide/ide-iops.c */ +_GLOBAL(_insw_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhz r5,0(r3) + eieio + sthu r5,2(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +/* _GLOBAL(ide_outsw) now in drivers/ide/ide-iops.c */ +_GLOBAL(_outsw_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhzu r5,2(r4) + sth r5,0(r3) + bdnz 00b + sync + blr + +_GLOBAL(_insl_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwz r5,0(r3) + eieio + stwu r5,4(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +_GLOBAL(_outsl_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwzu r5,4(r4) + stw r5,0(r3) + bdnz 00b + sync + blr + + +_GLOBAL(cvt_fd) + lfd 0,0(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfs 0,0(r3) + stfd 0,0(r4) + mffs 0 /* save new fpscr value */ + stfd 0,0(r5) + blr + +_GLOBAL(cvt_df) + lfd 0,0(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfd 0,0(r3) + stfs 0,0(r4) + mffs 0 /* save new fpscr value */ + stfd 0,0(r5) + blr + +/* + * identify_cpu and calls setup_cpu + * In: r3 = base of the cpu_specs array + * r4 = address of cur_cpu_spec + * r5 = relocation offset + */ +_GLOBAL(identify_cpu) + mfpvr r7 +1: + lwz r8,CPU_SPEC_PVR_MASK(r3) + and r8,r8,r7 + lwz r9,CPU_SPEC_PVR_VALUE(r3) + cmplw 0,r9,r8 + beq 1f + addi r3,r3,CPU_SPEC_ENTRY_SIZE + b 1b +1: + sub r0,r3,r5 + std r0,0(r4) + ld r4,CPU_SPEC_SETUP(r3) + add r4,r4,r5 + ld r4,0(r4) + add r4,r4,r5 + mtctr r4 + /* Calling convention for cpu setup is r3=offset, r4=cur_cpu_spec */ + mr r4,r3 + mr r3,r5 + bctr + +/* + * do_cpu_ftr_fixups - goes through the list of CPU feature fixups + * and writes nop's over sections of code that don't apply for this cpu. + * r3 = data offset (not changed) + */ +_GLOBAL(do_cpu_ftr_fixups) + /* Get CPU 0 features */ + LOADADDR(r6,cur_cpu_spec) + sub r6,r6,r3 + ld r4,0(r6) + sub r4,r4,r3 + ld r4,CPU_SPEC_FEATURES(r4) + /* Get the fixup table */ + LOADADDR(r6,__start___ftr_fixup) + sub r6,r6,r3 + LOADADDR(r7,__stop___ftr_fixup) + sub r7,r7,r3 + /* Do the fixup */ +1: cmpld r6,r7 + bgelr + addi r6,r6,32 + ld r8,-32(r6) /* mask */ + and r8,r8,r4 + ld r9,-24(r6) /* value */ + cmpld r8,r9 + beq 1b + ld r8,-16(r6) /* section begin */ + ld r9,-8(r6) /* section end */ + subf. r9,r8,r9 + beq 1b + /* write nops over the section of code */ + /* todo: if large section, add a branch at the start of it */ + srwi r9,r9,2 + mtctr r9 + sub r8,r8,r3 + lis r0,0x60000000@h /* nop */ +3: stw r0,0(r8) + andi. r10,r4,CPU_FTR_SPLIT_ID_CACHE@l + beq 2f + dcbst 0,r8 /* suboptimal, but simpler */ + sync + icbi 0,r8 +2: addi r8,r8,4 + bdnz 3b + sync /* additional sync needed on g4 */ + isync + b 1b + +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +/* + * Do an IO access in real mode + */ +_GLOBAL(real_readb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsrd r0 + sync + isync + mfspr r6,SPRN_HID4 + rldicl r5,r6,32,0 + ori r5,r5,0x100 + rldicl r5,r5,32,0 + sync + mtspr SPRN_HID4,r5 + isync + slbia + isync + lbz r3,0(r3) + sync + mtspr SPRN_HID4,r6 + isync + slbia + isync + mtmsrd r7 + sync + isync + blr + + /* + * Do an IO access in real mode + */ +_GLOBAL(real_writeb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsrd r0 + sync + isync + mfspr r6,SPRN_HID4 + rldicl r5,r6,32,0 + ori r5,r5,0x100 + rldicl r5,r5,32,0 + sync + mtspr SPRN_HID4,r5 + isync + slbia + isync + stb r3,0(r4) + sync + mtspr SPRN_HID4,r6 + isync + slbia + isync + mtmsrd r7 + sync + isync + blr +#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ + +/* + * Create a kernel thread + * kernel_thread(fn, arg, flags) + */ +_GLOBAL(kernel_thread) + std r29,-24(r1) + std r30,-16(r1) + stdu r1,-STACK_FRAME_OVERHEAD(r1) + mr r29,r3 + mr r30,r4 + ori r3,r5,CLONE_VM /* flags */ + oris r3,r3,(CLONE_UNTRACED>>16) + li r4,0 /* new sp (unused) */ + li r0,__NR_clone + sc + cmpdi 0,r3,0 /* parent or child? */ + bne 1f /* return if parent */ + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + ld r2,8(r29) + ld r29,0(r29) + mtlr r29 /* fn addr in lr */ + mr r3,r30 /* load arg and call fn */ + blrl + li r0,__NR_exit /* exit after child exits */ + li r3,0 + sc +1: addi r1,r1,STACK_FRAME_OVERHEAD + ld r29,-24(r1) + ld r30,-16(r1) + blr + +/* + * disable_kernel_fp() + * Disable the FPU. + */ +_GLOBAL(disable_kernel_fp) + mfmsr r3 + rldicl r0,r3,(63-MSR_FP_LG),1 + rldicl r3,r0,(MSR_FP_LG+1),0 + mtmsrd r3 /* disable use of fpu now */ + isync + blr + +#ifdef CONFIG_ALTIVEC + +#if 0 /* this has no callers for now */ +/* + * disable_kernel_altivec() + * Disable the VMX. + */ +_GLOBAL(disable_kernel_altivec) + mfmsr r3 + rldicl r0,r3,(63-MSR_VEC_LG),1 + rldicl r3,r0,(MSR_VEC_LG+1),0 + mtmsrd r3 /* disable use of VMX now */ + isync + blr +#endif /* 0 */ + +/* + * giveup_altivec(tsk) + * Disable VMX for the task given as the argument, + * and save the vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + */ +_GLOBAL(giveup_altivec) + mfmsr r5 + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + SAVE_32VRS(0,r4,r3) + mfvscr vr0 + li r4,THREAD_VSCR + stvx vr0,r4,r3 + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_VEC@h + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_altivec@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + +#endif /* CONFIG_ALTIVEC */ + +_GLOBAL(__setup_cpu_power3) + blr + +_GLOBAL(execve) + li r0,__NR_execve + sc + bnslr + neg r3,r3 + blr + +/* kexec_wait(phys_cpu) + * + * wait for the flag to change, indicating this kernel is going away but + * the slave code for the next one is at addresses 0 to 100. + * + * This is used by all slaves. + * + * Physical (hardware) cpu id should be in r3. + */ +_GLOBAL(kexec_wait) + bl 1f +1: mflr r5 + addi r5,r5,kexec_flag-1b + +99: HMT_LOW +#ifdef CONFIG_KEXEC /* use no memory without kexec */ + lwz r4,0(r5) + cmpwi 0,r4,0 + bnea 0x60 +#endif + b 99b + +/* this can be in text because we won't change it until we are + * running in real anyways + */ +kexec_flag: + .long 0 + + +#ifdef CONFIG_KEXEC + +/* kexec_smp_wait(void) + * + * call with interrupts off + * note: this is a terminal routine, it does not save lr + * + * get phys id from paca + * set paca id to -1 to say we got here + * switch to real mode + * join other cpus in kexec_wait(phys_id) + */ +_GLOBAL(kexec_smp_wait) + lhz r3,PACAHWCPUID(r13) + li r4,-1 + sth r4,PACAHWCPUID(r13) /* let others know we left */ + bl real_mode + b .kexec_wait + +/* + * switch to real mode (turn mmu off) + * we use the early kernel trick that the hardware ignores bits + * 0 and 1 (big endian) of the effective address in real mode + * + * don't overwrite r3 here, it is live for kexec_wait above. + */ +real_mode: /* assume normal blr return */ +1: li r9,MSR_RI + li r10,MSR_DR|MSR_IR + mflr r11 /* return address to SRR0 */ + mfmsr r12 + andc r9,r12,r9 + andc r10,r12,r10 + + mtmsrd r9,1 + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + rfid + + +/* + * kexec_sequence(newstack, start, image, control, clear_all()) + * + * does the grungy work with stack switching and real mode switches + * also does simple calls to other code + */ + +_GLOBAL(kexec_sequence) + mflr r0 + std r0,16(r1) + + /* switch stacks to newstack -- &kexec_stack.stack */ + stdu r1,THREAD_SIZE-112(r3) + mr r1,r3 + + li r0,0 + std r0,16(r1) + + /* save regs for local vars on new stack. + * yes, we won't go back, but ... + */ + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + stdu r1,-112-64(r1) + + /* save args into preserved regs */ + mr r31,r3 /* newstack (both) */ + mr r30,r4 /* start (real) */ + mr r29,r5 /* image (virt) */ + mr r28,r6 /* control, unused */ + mr r27,r7 /* clear_all() fn desc */ + mr r26,r8 /* spare */ + lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ + + /* disable interrupts, we are overwriting kernel data next */ + mfmsr r3 + rlwinm r3,r3,0,17,15 + mtmsrd r3,1 + + /* copy dest pages, flush whole dest image */ + mr r3,r29 + bl .kexec_copy_flush /* (image) */ + + /* turn off mmu */ + bl real_mode + + /* clear out hardware hash page table and tlb */ + ld r5,0(r27) /* deref function descriptor */ + mtctr r5 + bctrl /* ppc_md.hash_clear_all(void); */ + +/* + * kexec image calling is: + * the first 0x100 bytes of the entry point are copied to 0 + * + * all slaves branch to slave = 0x60 (absolute) + * slave(phys_cpu_id); + * + * master goes to start = entry point + * start(phys_cpu_id, start, 0); + * + * + * a wrapper is needed to call existing kernels, here is an approximate + * description of one method: + * + * v2: (2.6.10) + * start will be near the boot_block (maybe 0x100 bytes before it?) + * it will have a 0x60, which will b to boot_block, where it will wait + * and 0 will store phys into struct boot-block and load r3 from there, + * copy kernel 0-0x100 and tell slaves to back down to 0x60 again + * + * v1: (2.6.9) + * boot block will have all cpus scanning device tree to see if they + * are the boot cpu ????? + * other device tree differences (prop sizes, va vs pa, etc)... + */ + + /* copy 0x100 bytes starting at start to 0 */ + li r3,0 + mr r4,r30 + li r5,0x100 + li r6,0 + bl .copy_and_flush /* (dest, src, copy limit, start offset) */ +1: /* assume normal blr return */ + + /* release other cpus to the new kernel secondary start at 0x60 */ + mflr r5 + li r6,1 + stw r6,kexec_flag-1b(5) + mr r3,r25 # my phys cpu + mr r4,r30 # start, aka phys mem offset + mtlr 4 + li r5,0 + blr /* image->start(physid, image->start, 0); */ +#endif /* CONFIG_KEXEC */ diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c new file mode 100644 index 00000000000..76671881451 --- /dev/null +++ b/arch/powerpc/kernel/of_device.c @@ -0,0 +1,274 @@ +#include <linux/config.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <asm/errno.h> +#include <asm/of_device.h> + +/** + * of_match_device - Tell if an of_device structure has a matching + * of_match structure + * @ids: array of of device match structures to search in + * @dev: the of device structure to match against + * + * Used by a driver to check whether an of_device present in the + * system is in its list of supported devices. + */ +const struct of_device_id *of_match_device(const struct of_device_id *matches, + const struct of_device *dev) +{ + if (!dev->node) + return NULL; + while (matches->name[0] || matches->type[0] || matches->compatible[0]) { + int match = 1; + if (matches->name[0]) + match &= dev->node->name + && !strcmp(matches->name, dev->node->name); + if (matches->type[0]) + match &= dev->node->type + && !strcmp(matches->type, dev->node->type); + if (matches->compatible[0]) + match &= device_is_compatible(dev->node, + matches->compatible); + if (match) + return matches; + matches++; + } + return NULL; +} + +static int of_platform_bus_match(struct device *dev, struct device_driver *drv) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * of_drv = to_of_platform_driver(drv); + const struct of_device_id * matches = of_drv->match_table; + + if (!matches) + return 0; + + return of_match_device(matches, of_dev) != NULL; +} + +struct of_device *of_dev_get(struct of_device *dev) +{ + struct device *tmp; + + if (!dev) + return NULL; + tmp = get_device(&dev->dev); + if (tmp) + return to_of_device(tmp); + else + return NULL; +} + +void of_dev_put(struct of_device *dev) +{ + if (dev) + put_device(&dev->dev); +} + + +static int of_device_probe(struct device *dev) +{ + int error = -ENODEV; + struct of_platform_driver *drv; + struct of_device *of_dev; + const struct of_device_id *match; + + drv = to_of_platform_driver(dev->driver); + of_dev = to_of_device(dev); + + if (!drv->probe) + return error; + + of_dev_get(of_dev); + + match = of_match_device(drv->match_table, of_dev); + if (match) + error = drv->probe(of_dev, match); + if (error) + of_dev_put(of_dev); + + return error; +} + +static int of_device_remove(struct device *dev) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * drv = to_of_platform_driver(dev->driver); + + if (dev->driver && drv->remove) + drv->remove(of_dev); + return 0; +} + +static int of_device_suspend(struct device *dev, pm_message_t state) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * drv = to_of_platform_driver(dev->driver); + int error = 0; + + if (dev->driver && drv->suspend) + error = drv->suspend(of_dev, state); + return error; +} + +static int of_device_resume(struct device * dev) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * drv = to_of_platform_driver(dev->driver); + int error = 0; + + if (dev->driver && drv->resume) + error = drv->resume(of_dev); + return error; +} + +struct bus_type of_platform_bus_type = { + .name = "of_platform", + .match = of_platform_bus_match, + .suspend = of_device_suspend, + .resume = of_device_resume, +}; + +static int __init of_bus_driver_init(void) +{ + return bus_register(&of_platform_bus_type); +} + +postcore_initcall(of_bus_driver_init); + +int of_register_driver(struct of_platform_driver *drv) +{ + int count = 0; + + /* initialize common driver fields */ + drv->driver.name = drv->name; + drv->driver.bus = &of_platform_bus_type; + drv->driver.probe = of_device_probe; + drv->driver.remove = of_device_remove; + + /* register with core */ + count = driver_register(&drv->driver); + return count ? count : 1; +} + +void of_unregister_driver(struct of_platform_driver *drv) +{ + driver_unregister(&drv->driver); +} + + +static ssize_t dev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct of_device *ofdev; + + ofdev = to_of_device(dev); + return sprintf(buf, "%s", ofdev->node->full_name); +} + +static DEVICE_ATTR(devspec, S_IRUGO, dev_show_devspec, NULL); + +/** + * of_release_dev - free an of device structure when all users of it are finished. + * @dev: device that's been disconnected + * + * Will be called only by the device core when all users of this of device are + * done. + */ +void of_release_dev(struct device *dev) +{ + struct of_device *ofdev; + + ofdev = to_of_device(dev); + of_node_put(ofdev->node); + kfree(ofdev); +} + +int of_device_register(struct of_device *ofdev) +{ + int rc; + struct of_device **odprop; + + BUG_ON(ofdev->node == NULL); + + odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL); + if (!odprop) { + struct property *new_prop; + + new_prop = kmalloc(sizeof(struct property) + sizeof(struct of_device *), + GFP_KERNEL); + if (new_prop == NULL) + return -ENOMEM; + new_prop->name = "linux,device"; + new_prop->length = sizeof(sizeof(struct of_device *)); + new_prop->value = (unsigned char *)&new_prop[1]; + odprop = (struct of_device **)new_prop->value; + *odprop = NULL; + prom_add_property(ofdev->node, new_prop); + } + *odprop = ofdev; + + rc = device_register(&ofdev->dev); + if (rc) + return rc; + + device_create_file(&ofdev->dev, &dev_attr_devspec); + + return 0; +} + +void of_device_unregister(struct of_device *ofdev) +{ + struct of_device **odprop; + + device_remove_file(&ofdev->dev, &dev_attr_devspec); + + odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL); + if (odprop) + *odprop = NULL; + + device_unregister(&ofdev->dev); +} + +struct of_device* of_platform_device_create(struct device_node *np, + const char *bus_id, + struct device *parent) +{ + struct of_device *dev; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + memset(dev, 0, sizeof(*dev)); + + dev->node = of_node_get(np); + dev->dma_mask = 0xffffffffUL; + dev->dev.dma_mask = &dev->dma_mask; + dev->dev.parent = parent; + dev->dev.bus = &of_platform_bus_type; + dev->dev.release = of_release_dev; + + strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE); + + if (of_device_register(dev) != 0) { + kfree(dev); + return NULL; + } + + return dev; +} + +EXPORT_SYMBOL(of_match_device); +EXPORT_SYMBOL(of_platform_bus_type); +EXPORT_SYMBOL(of_register_driver); +EXPORT_SYMBOL(of_unregister_driver); +EXPORT_SYMBOL(of_device_register); +EXPORT_SYMBOL(of_device_unregister); +EXPORT_SYMBOL(of_dev_get); +EXPORT_SYMBOL(of_dev_put); +EXPORT_SYMBOL(of_platform_device_create); +EXPORT_SYMBOL(of_release_dev); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c new file mode 100644 index 00000000000..010554e5fe4 --- /dev/null +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -0,0 +1,285 @@ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/threads.h> +#include <linux/smp.h> +#include <linux/sched.h> +#include <linux/elfcore.h> +#include <linux/string.h> +#include <linux/interrupt.h> +#include <linux/tty.h> +#include <linux/vt_kern.h> +#include <linux/nvram.h> +#include <linux/console.h> +#include <linux/irq.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/ide.h> +#include <linux/pm.h> +#include <linux/bitops.h> + +#include <asm/page.h> +#include <asm/semaphore.h> +#include <asm/processor.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/ide.h> +#include <asm/atomic.h> +#include <asm/checksum.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <linux/adb.h> +#include <linux/cuda.h> +#include <linux/pmu.h> +#include <asm/prom.h> +#include <asm/system.h> +#include <asm/pci-bridge.h> +#include <asm/irq.h> +#include <asm/pmac_feature.h> +#include <asm/dma.h> +#include <asm/machdep.h> +#include <asm/hw_irq.h> +#include <asm/nvram.h> +#include <asm/mmu_context.h> +#include <asm/backlight.h> +#include <asm/time.h> +#include <asm/cputable.h> +#include <asm/btext.h> +#include <asm/div64.h> +#include <asm/xmon.h> + +#ifdef CONFIG_8xx +#include <asm/commproc.h> +#endif + +#ifdef CONFIG_PPC32 +extern void transfer_to_handler(void); +extern void do_IRQ(struct pt_regs *regs); +extern void machine_check_exception(struct pt_regs *regs); +extern void alignment_exception(struct pt_regs *regs); +extern void program_check_exception(struct pt_regs *regs); +extern void single_step_exception(struct pt_regs *regs); +extern int do_signal(sigset_t *, struct pt_regs *); +extern int pmac_newworld; +extern int sys_sigreturn(struct pt_regs *regs); + +EXPORT_SYMBOL(clear_pages); +EXPORT_SYMBOL(ISA_DMA_THRESHOLD); +EXPORT_SYMBOL(DMA_MODE_READ); +EXPORT_SYMBOL(DMA_MODE_WRITE); +EXPORT_SYMBOL(__div64_32); + +EXPORT_SYMBOL(do_signal); +EXPORT_SYMBOL(transfer_to_handler); +EXPORT_SYMBOL(do_IRQ); +EXPORT_SYMBOL(machine_check_exception); +EXPORT_SYMBOL(alignment_exception); +EXPORT_SYMBOL(program_check_exception); +EXPORT_SYMBOL(single_step_exception); +EXPORT_SYMBOL(sys_sigreturn); +#endif + +#if defined(CONFIG_PPC_PREP) +EXPORT_SYMBOL(_prep_type); +EXPORT_SYMBOL(ucSystemType); +#endif + +#if !defined(__INLINE_BITOPS) +EXPORT_SYMBOL(set_bit); +EXPORT_SYMBOL(clear_bit); +EXPORT_SYMBOL(change_bit); +EXPORT_SYMBOL(test_and_set_bit); +EXPORT_SYMBOL(test_and_clear_bit); +EXPORT_SYMBOL(test_and_change_bit); +#endif /* __INLINE_BITOPS */ + +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strncat); +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strpbrk); +EXPORT_SYMBOL(strstr); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(strcasecmp); + +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_tcpudp_magic); + +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); + +EXPORT_SYMBOL(_insb); +EXPORT_SYMBOL(_outsb); +EXPORT_SYMBOL(_insw); +EXPORT_SYMBOL(_outsw); +EXPORT_SYMBOL(_insl); +EXPORT_SYMBOL(_outsl); +EXPORT_SYMBOL(_insw_ns); +EXPORT_SYMBOL(_outsw_ns); +EXPORT_SYMBOL(_insl_ns); +EXPORT_SYMBOL(_outsl_ns); +EXPORT_SYMBOL(ioremap); +#ifdef CONFIG_44x +EXPORT_SYMBOL(ioremap64); +#endif +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ +#endif + +#if defined(CONFIG_PPC32) && (defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)) +EXPORT_SYMBOL(ppc_ide_md); +#endif + +#if defined(CONFIG_PCI) && defined(CONFIG_PPC32) +EXPORT_SYMBOL(isa_io_base); +EXPORT_SYMBOL(isa_mem_base); +EXPORT_SYMBOL(pci_dram_offset); +EXPORT_SYMBOL(pci_alloc_consistent); +EXPORT_SYMBOL(pci_free_consistent); +EXPORT_SYMBOL(pci_bus_io_base); +EXPORT_SYMBOL(pci_bus_io_base_phys); +EXPORT_SYMBOL(pci_bus_mem_base_phys); +EXPORT_SYMBOL(pci_bus_to_hose); +EXPORT_SYMBOL(pci_resource_to_bus); +EXPORT_SYMBOL(pci_phys_to_bus); +EXPORT_SYMBOL(pci_bus_to_phys); +#endif /* CONFIG_PCI */ + +#ifdef CONFIG_NOT_COHERENT_CACHE +EXPORT_SYMBOL(flush_dcache_all); +#endif + +EXPORT_SYMBOL(start_thread); +EXPORT_SYMBOL(kernel_thread); + +EXPORT_SYMBOL(giveup_fpu); +#ifdef CONFIG_ALTIVEC +EXPORT_SYMBOL(giveup_altivec); +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE +EXPORT_SYMBOL(giveup_spe); +#endif /* CONFIG_SPE */ + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(__flush_icache_range); +#else +EXPORT_SYMBOL(flush_instruction_cache); +EXPORT_SYMBOL(flush_icache_range); +EXPORT_SYMBOL(flush_tlb_kernel_range); +EXPORT_SYMBOL(flush_tlb_page); +EXPORT_SYMBOL(_tlbie); +#endif +EXPORT_SYMBOL(flush_dcache_range); + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(smp_call_function); +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(smp_hw_index); +#endif +#endif + +#ifdef CONFIG_ADB +EXPORT_SYMBOL(adb_request); +EXPORT_SYMBOL(adb_register); +EXPORT_SYMBOL(adb_unregister); +EXPORT_SYMBOL(adb_poll); +EXPORT_SYMBOL(adb_try_handler_change); +#endif /* CONFIG_ADB */ +#ifdef CONFIG_ADB_CUDA +EXPORT_SYMBOL(cuda_request); +EXPORT_SYMBOL(cuda_poll); +#endif /* CONFIG_ADB_CUDA */ +#if defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_PPC32) +EXPORT_SYMBOL(_machine); +#endif +#ifdef CONFIG_PPC_PMAC +EXPORT_SYMBOL(sys_ctrler); +#endif +#ifdef CONFIG_VT +EXPORT_SYMBOL(kd_mksound); +#endif +EXPORT_SYMBOL(to_tm); + +#ifdef CONFIG_PPC32 +long long __ashrdi3(long long, int); +long long __ashldi3(long long, int); +long long __lshrdi3(long long, int); +EXPORT_SYMBOL(__ashrdi3); +EXPORT_SYMBOL(__ashldi3); +EXPORT_SYMBOL(__lshrdi3); +#endif + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memscan); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); + +#if defined(CONFIG_FB_VGA16_MODULE) +EXPORT_SYMBOL(screen_info); +#endif + +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(pm_power_off); +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(timer_interrupt); +EXPORT_SYMBOL(irq_desc); +EXPORT_SYMBOL(tb_ticks_per_jiffy); +EXPORT_SYMBOL(console_drivers); +EXPORT_SYMBOL(cacheable_memcpy); +#endif + +#ifdef CONFIG_XMON +EXPORT_SYMBOL(xmon); +EXPORT_SYMBOL(xmon_printf); +#endif +EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__down_interruptible); + +#ifdef CONFIG_8xx +EXPORT_SYMBOL(cpm_install_handler); +EXPORT_SYMBOL(cpm_free_handler); +#endif /* CONFIG_8xx */ +#if defined(CONFIG_8xx) || defined(CONFIG_40x) || defined(CONFIG_85xx) ||\ + defined(CONFIG_83xx) +EXPORT_SYMBOL(__res); +#endif + +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(next_mmu_context); +EXPORT_SYMBOL(set_context); +EXPORT_SYMBOL(disarm_decr); +#endif + +#ifdef CONFIG_PPC_STD_MMU_32 +extern long mol_trampoline; +EXPORT_SYMBOL(mol_trampoline); /* For MOL */ +EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ +EXPORT_SYMBOL_GPL(__handle_mm_fault); /* For MOL */ +#ifdef CONFIG_SMP +extern int mmu_hash_lock; +EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ +#endif /* CONFIG_SMP */ +extern long *intercept_table; +EXPORT_SYMBOL(intercept_table); +#endif /* CONFIG_PPC_STD_MMU_32 */ +#ifdef CONFIG_PPC_PMAC +extern unsigned long agp_special_page; +EXPORT_SYMBOL(agp_special_page); +#endif +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +EXPORT_SYMBOL(__mtdcr); +EXPORT_SYMBOL(__mfdcr); +#endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c new file mode 100644 index 00000000000..f09908a0bee --- /dev/null +++ b/arch/powerpc/kernel/process.c @@ -0,0 +1,915 @@ +/* + * arch/ppc/kernel/process.c + * + * Derived from "arch/i386/kernel/process.c" + * Copyright (C) 1995 Linus Torvalds + * + * Updated and modified by Cort Dougan (cort@cs.nmt.edu) and + * Paul Mackerras (paulus@cs.anu.edu.au) + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/init.h> +#include <linux/prctl.h> +#include <linux/init_task.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/mqueue.h> +#include <linux/hardirq.h> +#include <linux/utsname.h> +#include <linux/kprobes.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/prom.h> +#ifdef CONFIG_PPC64 +#include <asm/firmware.h> +#include <asm/plpar_wrappers.h> +#include <asm/time.h> +#endif + +extern unsigned long _get_SP(void); + +#ifndef CONFIG_SMP +struct task_struct *last_task_used_math = NULL; +struct task_struct *last_task_used_altivec = NULL; +struct task_struct *last_task_used_spe = NULL; +#endif + +/* + * Make sure the floating-point register state in the + * the thread_struct is up to date for task tsk. + */ +void flush_fp_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + /* + * We need to disable preemption here because if we didn't, + * another process could get scheduled after the regs->msr + * test but before we have finished saving the FP registers + * to the thread_struct. That process could take over the + * FPU, and then when we get scheduled again we would store + * bogus values for the remaining FP registers. + */ + preempt_disable(); + if (tsk->thread.regs->msr & MSR_FP) { +#ifdef CONFIG_SMP + /* + * This should only ever be called for current or + * for a stopped child process. Since we save away + * the FP register state on context switch on SMP, + * there is something wrong if a stopped child appears + * to still have its FP state in the CPU registers. + */ + BUG_ON(tsk != current); +#endif + giveup_fpu(current); + } + preempt_enable(); + } +} + +void enable_kernel_fp(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) + giveup_fpu(current); + else + giveup_fpu(NULL); /* just enables FP for kernel */ +#else + giveup_fpu(last_task_used_math); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_fp); + +int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs) +{ + if (!tsk->thread.regs) + return 0; + flush_fp_to_thread(current); + + memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs)); + + return 1; +} + +#ifdef CONFIG_ALTIVEC +void enable_kernel_altivec(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) + giveup_altivec(current); + else + giveup_altivec(NULL); /* just enable AltiVec for kernel - force */ +#else + giveup_altivec(last_task_used_altivec); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_altivec); + +/* + * Make sure the VMX/Altivec register state in the + * the thread_struct is up to date for task tsk. + */ +void flush_altivec_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + if (tsk->thread.regs->msr & MSR_VEC) { +#ifdef CONFIG_SMP + BUG_ON(tsk != current); +#endif + giveup_altivec(current); + } + preempt_enable(); + } +} + +int dump_task_altivec(struct pt_regs *regs, elf_vrregset_t *vrregs) +{ + flush_altivec_to_thread(current); + memcpy(vrregs, ¤t->thread.vr[0], sizeof(*vrregs)); + return 1; +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_SPE + +void enable_kernel_spe(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) + giveup_spe(current); + else + giveup_spe(NULL); /* just enable SPE for kernel - force */ +#else + giveup_spe(last_task_used_spe); +#endif /* __SMP __ */ +} +EXPORT_SYMBOL(enable_kernel_spe); + +void flush_spe_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + if (tsk->thread.regs->msr & MSR_SPE) { +#ifdef CONFIG_SMP + BUG_ON(tsk != current); +#endif + giveup_spe(current); + } + preempt_enable(); + } +} + +int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs) +{ + flush_spe_to_thread(current); + /* We copy u32 evr[32] + u64 acc + u32 spefscr -> 35 */ + memcpy(evrregs, ¤t->thread.evr[0], sizeof(u32) * 35); + return 1; +} +#endif /* CONFIG_SPE */ + +static void set_dabr_spr(unsigned long val) +{ + mtspr(SPRN_DABR, val); +} + +int set_dabr(unsigned long dabr) +{ + int ret = 0; + +#ifdef CONFIG_PPC64 + if (firmware_has_feature(FW_FEATURE_XDABR)) { + /* We want to catch accesses from kernel and userspace */ + unsigned long flags = H_DABRX_KERNEL|H_DABRX_USER; + ret = plpar_set_xdabr(dabr, flags); + } else if (firmware_has_feature(FW_FEATURE_DABR)) { + ret = plpar_set_dabr(dabr); + } else +#endif + set_dabr_spr(dabr); + + return ret; +} + +#ifdef CONFIG_PPC64 +DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); +static DEFINE_PER_CPU(unsigned long, current_dabr); +#endif + +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *new) +{ + struct thread_struct *new_thread, *old_thread; + unsigned long flags; + struct task_struct *last; + +#ifdef CONFIG_SMP + /* avoid complexity of lazy save/restore of fpu + * by just saving it every time we switch out if + * this task used the fpu during the last quantum. + * + * If it tries to use the fpu again, it'll trap and + * reload its fp regs. So we don't have to do a restore + * every switch, just a save. + * -- Cort + */ + if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP)) + giveup_fpu(prev); +#ifdef CONFIG_ALTIVEC + /* + * If the previous thread used altivec in the last quantum + * (thus changing altivec regs) then save them. + * We used to check the VRSAVE register but not all apps + * set it, so we don't rely on it now (and in fact we need + * to save & restore VSCR even if VRSAVE == 0). -- paulus + * + * On SMP we always save/restore altivec regs just to avoid the + * complexity of changing processors. + * -- Cort + */ + if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC)) + giveup_altivec(prev); +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + /* + * If the previous thread used spe in the last quantum + * (thus changing spe regs) then save them. + * + * On SMP we always save/restore spe regs just to avoid the + * complexity of changing processors. + */ + if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE))) + giveup_spe(prev); +#endif /* CONFIG_SPE */ + +#else /* CONFIG_SMP */ +#ifdef CONFIG_ALTIVEC + /* Avoid the trap. On smp this this never happens since + * we don't set last_task_used_altivec -- Cort + */ + if (new->thread.regs && last_task_used_altivec == new) + new->thread.regs->msr |= MSR_VEC; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + /* Avoid the trap. On smp this this never happens since + * we don't set last_task_used_spe + */ + if (new->thread.regs && last_task_used_spe == new) + new->thread.regs->msr |= MSR_SPE; +#endif /* CONFIG_SPE */ + +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC64 /* for now */ + if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) { + set_dabr(new->thread.dabr); + __get_cpu_var(current_dabr) = new->thread.dabr; + } + + flush_tlb_pending(); +#endif + + new_thread = &new->thread; + old_thread = ¤t->thread; + +#ifdef CONFIG_PPC64 + /* + * Collect processor utilization data per process + */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + long unsigned start_tb, current_tb; + start_tb = old_thread->start_tb; + cu->current_tb = current_tb = mfspr(SPRN_PURR); + old_thread->accum_tb += (current_tb - start_tb); + new_thread->start_tb = current_tb; + } +#endif + + local_irq_save(flags); + last = _switch(old_thread, new_thread); + + local_irq_restore(flags); + + return last; +} + +static int instructions_to_print = 16; + +#ifdef CONFIG_PPC64 +#define BAD_PC(pc) ((REGION_ID(pc) != KERNEL_REGION_ID) && \ + (REGION_ID(pc) != VMALLOC_REGION_ID)) +#else +#define BAD_PC(pc) ((pc) < KERNELBASE) +#endif + +static void show_instructions(struct pt_regs *regs) +{ + int i; + unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 * + sizeof(int)); + + printk("Instruction dump:"); + + for (i = 0; i < instructions_to_print; i++) { + int instr; + + if (!(i % 8)) + printk("\n"); + + if (BAD_PC(pc) || __get_user(instr, (unsigned int *)pc)) { + printk("XXXXXXXX "); + } else { + if (regs->nip == pc) + printk("<%08x> ", instr); + else + printk("%08x ", instr); + } + + pc += sizeof(int); + } + + printk("\n"); +} + +static struct regbit { + unsigned long bit; + const char *name; +} msr_bits[] = { + {MSR_EE, "EE"}, + {MSR_PR, "PR"}, + {MSR_FP, "FP"}, + {MSR_ME, "ME"}, + {MSR_IR, "IR"}, + {MSR_DR, "DR"}, + {0, NULL} +}; + +static void printbits(unsigned long val, struct regbit *bits) +{ + const char *sep = ""; + + printk("<"); + for (; bits->bit; ++bits) + if (val & bits->bit) { + printk("%s%s", sep, bits->name); + sep = ","; + } + printk(">"); +} + +#ifdef CONFIG_PPC64 +#define REG "%016lX" +#define REGS_PER_LINE 4 +#define LAST_VOLATILE 13 +#else +#define REG "%08lX" +#define REGS_PER_LINE 8 +#define LAST_VOLATILE 12 +#endif + +void show_regs(struct pt_regs * regs) +{ + int i, trap; + + printk("NIP: "REG" LR: "REG" CTR: "REG"\n", + regs->nip, regs->link, regs->ctr); + printk("REGS: %p TRAP: %04lx %s (%s)\n", + regs, regs->trap, print_tainted(), system_utsname.release); + printk("MSR: "REG" ", regs->msr); + printbits(regs->msr, msr_bits); + printk(" CR: %08lX XER: %08lX\n", regs->ccr, regs->xer); + trap = TRAP(regs); + if (trap == 0x300 || trap == 0x600) + printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); + printk("TASK = %p[%d] '%s' THREAD: %p", + current, current->pid, current->comm, current->thread_info); + +#ifdef CONFIG_SMP + printk(" CPU: %d", smp_processor_id()); +#endif /* CONFIG_SMP */ + + for (i = 0; i < 32; i++) { + if ((i % REGS_PER_LINE) == 0) + printk("\n" KERN_INFO "GPR%02d: ", i); + printk(REG " ", regs->gpr[i]); + if (i == LAST_VOLATILE && !FULL_REGS(regs)) + break; + } + printk("\n"); +#ifdef CONFIG_KALLSYMS + /* + * Lookup NIP late so we have the best change of getting the + * above info out without failing + */ + printk("NIP ["REG"] ", regs->nip); + print_symbol("%s\n", regs->nip); + printk("LR ["REG"] ", regs->link); + print_symbol("%s\n", regs->link); +#endif + show_stack(current, (unsigned long *) regs->gpr[1]); + if (!user_mode(regs)) + show_instructions(regs); +} + +void exit_thread(void) +{ + kprobe_flush_task(current); + +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + if (last_task_used_spe == current) + last_task_used_spe = NULL; +#endif +#endif /* CONFIG_SMP */ +} + +void flush_thread(void) +{ +#ifdef CONFIG_PPC64 + struct thread_info *t = current_thread_info(); + + if (t->flags & _TIF_ABI_PENDING) + t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); +#endif + kprobe_flush_task(current); + +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + if (last_task_used_spe == current) + last_task_used_spe = NULL; +#endif +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC64 /* for now */ + if (current->thread.dabr) { + current->thread.dabr = 0; + set_dabr(0); + } +#endif +} + +void +release_thread(struct task_struct *t) +{ +} + +/* + * This gets called before we allocate a new thread and copy + * the current task into it. + */ +void prepare_to_copy(struct task_struct *tsk) +{ + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_spe_to_thread(current); +} + +/* + * Copy a thread.. + */ +int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, + unsigned long unused, struct task_struct *p, + struct pt_regs *regs) +{ + struct pt_regs *childregs, *kregs; + extern void ret_from_fork(void); + unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE; + + CHECK_FULL_REGS(regs); + /* Copy registers */ + sp -= sizeof(struct pt_regs); + childregs = (struct pt_regs *) sp; + *childregs = *regs; + if ((childregs->msr & MSR_PR) == 0) { + /* for kernel thread, set `current' and stackptr in new task */ + childregs->gpr[1] = sp + sizeof(struct pt_regs); +#ifdef CONFIG_PPC32 + childregs->gpr[2] = (unsigned long) p; +#else + clear_ti_thread_flag(p->thread_info, TIF_32BIT); +#endif + p->thread.regs = NULL; /* no user register state */ + } else { + childregs->gpr[1] = usp; + p->thread.regs = childregs; + if (clone_flags & CLONE_SETTLS) { +#ifdef CONFIG_PPC64 + if (!test_thread_flag(TIF_32BIT)) + childregs->gpr[13] = childregs->gpr[6]; + else +#endif + childregs->gpr[2] = childregs->gpr[6]; + } + } + childregs->gpr[3] = 0; /* Result from fork() */ + sp -= STACK_FRAME_OVERHEAD; + + /* + * The way this works is that at some point in the future + * some task will call _switch to switch to the new task. + * That will pop off the stack frame created below and start + * the new task running at ret_from_fork. The new task will + * do some house keeping and then return from the fork or clone + * system call, using the stack frame created above. + */ + sp -= sizeof(struct pt_regs); + kregs = (struct pt_regs *) sp; + sp -= STACK_FRAME_OVERHEAD; + p->thread.ksp = sp; + +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_SLB)) { + unsigned long sp_vsid = get_kernel_vsid(sp); + + sp_vsid <<= SLB_VSID_SHIFT; + sp_vsid |= SLB_VSID_KERNEL; + if (cpu_has_feature(CPU_FTR_16M_PAGE)) + sp_vsid |= SLB_VSID_L; + + p->thread.ksp_vsid = sp_vsid; + } + + /* + * The PPC64 ABI makes use of a TOC to contain function + * pointers. The function (ret_from_except) is actually a pointer + * to the TOC entry. The first entry is a pointer to the actual + * function. + */ + kregs->nip = *((unsigned long *)ret_from_fork); +#else + kregs->nip = (unsigned long)ret_from_fork; + p->thread.last_syscall = -1; +#endif + + return 0; +} + +/* + * Set up a thread for executing a new program + */ +void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) +{ + set_fs(USER_DS); + + /* + * If we exec out of a kernel thread then thread.regs will not be + * set. Do it now. + */ + if (!current->thread.regs) { + unsigned long childregs = (unsigned long)current->thread_info + + THREAD_SIZE; + childregs -= sizeof(struct pt_regs); + current->thread.regs = (struct pt_regs *)childregs; + } + + memset(regs->gpr, 0, sizeof(regs->gpr)); + regs->ctr = 0; + regs->link = 0; + regs->xer = 0; + regs->ccr = 0; + regs->gpr[1] = sp; + +#ifdef CONFIG_PPC32 + regs->mq = 0; + regs->nip = start; + regs->msr = MSR_USER; +#else + if (test_thread_flag(TIF_32BIT)) { + unsigned long entry, toc, load_addr = regs->gpr[2]; + + /* start is a relocated pointer to the function descriptor for + * the elf _start routine. The first entry in the function + * descriptor is the entry address of _start and the second + * entry is the TOC value we need to use. + */ + __get_user(entry, (unsigned long __user *)start); + __get_user(toc, (unsigned long __user *)start+1); + + /* Check whether the e_entry function descriptor entries + * need to be relocated before we can use them. + */ + if (load_addr != 0) { + entry += load_addr; + toc += load_addr; + } + regs->nip = entry; + regs->gpr[2] = toc; + regs->msr = MSR_USER64; + } else { + regs->nip = start; + regs->gpr[2] = 0; + regs->msr = MSR_USER32; + } +#endif + +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif +#ifdef CONFIG_SPE + if (last_task_used_spe == current) + last_task_used_spe = NULL; +#endif +#endif /* CONFIG_SMP */ + memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); + current->thread.fpscr = 0; +#ifdef CONFIG_ALTIVEC + memset(current->thread.vr, 0, sizeof(current->thread.vr)); + memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr)); + current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */ + current->thread.vrsave = 0; + current->thread.used_vr = 0; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + memset(current->thread.evr, 0, sizeof(current->thread.evr)); + current->thread.acc = 0; + current->thread.spefscr = 0; + current->thread.used_spe = 0; +#endif /* CONFIG_SPE */ +} + +#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ + | PR_FP_EXC_RES | PR_FP_EXC_INV) + +int set_fpexc_mode(struct task_struct *tsk, unsigned int val) +{ + struct pt_regs *regs = tsk->thread.regs; + + /* This is a bit hairy. If we are an SPE enabled processor + * (have embedded fp) we store the IEEE exception enable flags in + * fpexc_mode. fpexc_mode is also used for setting FP exception + * mode (asyn, precise, disabled) for 'Classic' FP. */ + if (val & PR_FP_EXC_SW_ENABLE) { +#ifdef CONFIG_SPE + tsk->thread.fpexc_mode = val & + (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); + return 0; +#else + return -EINVAL; +#endif + } + + /* on a CONFIG_SPE this does not hurt us. The bits that + * __pack_fe01 use do not overlap with bits used for + * PR_FP_EXC_SW_ENABLE. Additionally, the MSR[FE0,FE1] bits + * on CONFIG_SPE implementations are reserved so writing to + * them does not change anything */ + if (val > PR_FP_EXC_PRECISE) + return -EINVAL; + tsk->thread.fpexc_mode = __pack_fe01(val); + if (regs != NULL && (regs->msr & MSR_FP) != 0) + regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) + | tsk->thread.fpexc_mode; + return 0; +} + +int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) +{ + unsigned int val; + + if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) +#ifdef CONFIG_SPE + val = tsk->thread.fpexc_mode; +#else + return -EINVAL; +#endif + else + val = __unpack_fe01(tsk->thread.fpexc_mode); + return put_user(val, (unsigned int __user *) adr); +} + +#define TRUNC_PTR(x) ((typeof(x))(((unsigned long)(x)) & 0xffffffff)) + +int sys_clone(unsigned long clone_flags, unsigned long usp, + int __user *parent_tidp, void __user *child_threadptr, + int __user *child_tidp, int p6, + struct pt_regs *regs) +{ + CHECK_FULL_REGS(regs); + if (usp == 0) + usp = regs->gpr[1]; /* stack pointer for child */ +#ifdef CONFIG_PPC64 + if (test_thread_flag(TIF_32BIT)) { + parent_tidp = TRUNC_PTR(parent_tidp); + child_tidp = TRUNC_PTR(child_tidp); + } +#endif + return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp); +} + +int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, unsigned long p6, + struct pt_regs *regs) +{ + CHECK_FULL_REGS(regs); + return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL); +} + +int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, unsigned long p6, + struct pt_regs *regs) +{ + CHECK_FULL_REGS(regs); + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], + regs, 0, NULL, NULL); +} + +int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, unsigned long a5, + struct pt_regs *regs) +{ + int error; + char *filename; + + filename = getname((char __user *) a0); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_spe_to_thread(current); + error = do_execve(filename, (char __user * __user *) a1, + (char __user * __user *) a2, regs); + if (error == 0) { + task_lock(current); + current->ptrace &= ~PT_DTRACE; + task_unlock(current); + } + putname(filename); +out: + return error; +} + +static int validate_sp(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page = (unsigned long)p->thread_info; + + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + +#ifdef CONFIG_IRQSTACKS + stack_page = (unsigned long) hardirq_ctx[task_cpu(p)]; + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + stack_page = (unsigned long) softirq_ctx[task_cpu(p)]; + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; +#endif + + return 0; +} + +#ifdef CONFIG_PPC64 +#define MIN_STACK_FRAME 112 /* same as STACK_FRAME_OVERHEAD, in fact */ +#define FRAME_LR_SAVE 2 +#define INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD + 288) +#define REGS_MARKER 0x7265677368657265ul +#define FRAME_MARKER 12 +#else +#define MIN_STACK_FRAME 16 +#define FRAME_LR_SAVE 1 +#define INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define REGS_MARKER 0x72656773ul +#define FRAME_MARKER 2 +#endif + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ip, sp; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + sp = p->thread.ksp; + if (!validate_sp(sp, p, MIN_STACK_FRAME)) + return 0; + + do { + sp = *(unsigned long *)sp; + if (!validate_sp(sp, p, MIN_STACK_FRAME)) + return 0; + if (count > 0) { + ip = ((unsigned long *)sp)[FRAME_LR_SAVE]; + if (!in_sched_functions(ip)) + return ip; + } + } while (count++ < 16); + return 0; +} +EXPORT_SYMBOL(get_wchan); + +static int kstack_depth_to_print = 64; + +void show_stack(struct task_struct *tsk, unsigned long *stack) +{ + unsigned long sp, ip, lr, newsp; + int count = 0; + int firstframe = 1; + + sp = (unsigned long) stack; + if (tsk == NULL) + tsk = current; + if (sp == 0) { + if (tsk == current) + asm("mr %0,1" : "=r" (sp)); + else + sp = tsk->thread.ksp; + } + + lr = 0; + printk("Call Trace:\n"); + do { + if (!validate_sp(sp, tsk, MIN_STACK_FRAME)) + return; + + stack = (unsigned long *) sp; + newsp = stack[0]; + ip = stack[FRAME_LR_SAVE]; + if (!firstframe || ip != lr) { + printk("["REG"] ["REG"] ", sp, ip); + print_symbol("%s", ip); + if (firstframe) + printk(" (unreliable)"); + printk("\n"); + } + firstframe = 0; + + /* + * See if this is an exception frame. + * We look for the "regshere" marker in the current frame. + */ + if (validate_sp(sp, tsk, INT_FRAME_SIZE) + && stack[FRAME_MARKER] == REGS_MARKER) { + struct pt_regs *regs = (struct pt_regs *) + (sp + STACK_FRAME_OVERHEAD); + printk("--- Exception: %lx", regs->trap); + print_symbol(" at %s\n", regs->nip); + lr = regs->link; + print_symbol(" LR = %s\n", lr); + firstframe = 1; + } + + sp = newsp; + } while (count++ < kstack_depth_to_print); +} + +void dump_stack(void) +{ + show_stack(current, NULL); +} +EXPORT_SYMBOL(dump_stack); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c new file mode 100644 index 00000000000..ce0dff1caa8 --- /dev/null +++ b/arch/powerpc/kernel/prom.c @@ -0,0 +1,2145 @@ +/* + * Procedures for creating, accessing and interpreting the device tree. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996-2005 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. + * {engebret|bergner}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <stdarg.h> +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/threads.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/stringify.h> +#include <linux/delay.h> +#include <linux/initrd.h> +#include <linux/bitops.h> +#include <linux/module.h> + +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/lmb.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/system.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/pci.h> +#include <asm/iommu.h> +#include <asm/btext.h> +#include <asm/sections.h> +#include <asm/machdep.h> +#include <asm/pSeries_reconfig.h> +#include <asm/pci-bridge.h> +#ifdef CONFIG_PPC64 +#include <asm/systemcfg.h> +#endif + +#ifdef DEBUG +#define DBG(fmt...) printk(KERN_ERR fmt) +#else +#define DBG(fmt...) +#endif + +struct pci_reg_property { + struct pci_address addr; + u32 size_hi; + u32 size_lo; +}; + +struct isa_reg_property { + u32 space; + u32 address; + u32 size; +}; + + +typedef int interpret_func(struct device_node *, unsigned long *, + int, int, int); + +extern struct rtas_t rtas; +extern struct lmb lmb; +extern unsigned long klimit; + +static unsigned long memory_limit; + +static int __initdata dt_root_addr_cells; +static int __initdata dt_root_size_cells; + +#ifdef CONFIG_PPC64 +static int __initdata iommu_is_off; +int __initdata iommu_force_on; +extern unsigned long tce_alloc_start, tce_alloc_end; +#endif + +typedef u32 cell_t; + +#if 0 +static struct boot_param_header *initial_boot_params __initdata; +#else +struct boot_param_header *initial_boot_params; +#endif + +static struct device_node *allnodes = NULL; + +/* use when traversing tree through the allnext, child, sibling, + * or parent members of struct device_node. + */ +static DEFINE_RWLOCK(devtree_lock); + +/* export that to outside world */ +struct device_node *of_chosen; + +struct device_node *dflt_interrupt_controller; +int num_interrupt_controllers; + +u32 rtas_data; +u32 rtas_entry; + +/* + * Wrapper for allocating memory for various data that needs to be + * attached to device nodes as they are processed at boot or when + * added to the device tree later (e.g. DLPAR). At boot there is + * already a region reserved so we just increment *mem_start by size; + * otherwise we call kmalloc. + */ +static void * prom_alloc(unsigned long size, unsigned long *mem_start) +{ + unsigned long tmp; + + if (!mem_start) + return kmalloc(size, GFP_KERNEL); + + tmp = *mem_start; + *mem_start += size; + return (void *)tmp; +} + +/* + * Find the device_node with a given phandle. + */ +static struct device_node * find_phandle(phandle ph) +{ + struct device_node *np; + + for (np = allnodes; np != 0; np = np->allnext) + if (np->linux_phandle == ph) + return np; + return NULL; +} + +/* + * Find the interrupt parent of a node. + */ +static struct device_node * __devinit intr_parent(struct device_node *p) +{ + phandle *parp; + + parp = (phandle *) get_property(p, "interrupt-parent", NULL); + if (parp == NULL) + return p->parent; + p = find_phandle(*parp); + if (p != NULL) + return p; + /* + * On a powermac booted with BootX, we don't get to know the + * phandles for any nodes, so find_phandle will return NULL. + * Fortunately these machines only have one interrupt controller + * so there isn't in fact any ambiguity. -- paulus + */ + if (num_interrupt_controllers == 1) + p = dflt_interrupt_controller; + return p; +} + +/* + * Find out the size of each entry of the interrupts property + * for a node. + */ +int __devinit prom_n_intr_cells(struct device_node *np) +{ + struct device_node *p; + unsigned int *icp; + + for (p = np; (p = intr_parent(p)) != NULL; ) { + icp = (unsigned int *) + get_property(p, "#interrupt-cells", NULL); + if (icp != NULL) + return *icp; + if (get_property(p, "interrupt-controller", NULL) != NULL + || get_property(p, "interrupt-map", NULL) != NULL) { + printk("oops, node %s doesn't have #interrupt-cells\n", + p->full_name); + return 1; + } + } +#ifdef DEBUG_IRQ + printk("prom_n_intr_cells failed for %s\n", np->full_name); +#endif + return 1; +} + +/* + * Map an interrupt from a device up to the platform interrupt + * descriptor. + */ +static int __devinit map_interrupt(unsigned int **irq, struct device_node **ictrler, + struct device_node *np, unsigned int *ints, + int nintrc) +{ + struct device_node *p, *ipar; + unsigned int *imap, *imask, *ip; + int i, imaplen, match; + int newintrc = 0, newaddrc = 0; + unsigned int *reg; + int naddrc; + + reg = (unsigned int *) get_property(np, "reg", NULL); + naddrc = prom_n_addr_cells(np); + p = intr_parent(np); + while (p != NULL) { + if (get_property(p, "interrupt-controller", NULL) != NULL) + /* this node is an interrupt controller, stop here */ + break; + imap = (unsigned int *) + get_property(p, "interrupt-map", &imaplen); + if (imap == NULL) { + p = intr_parent(p); + continue; + } + imask = (unsigned int *) + get_property(p, "interrupt-map-mask", NULL); + if (imask == NULL) { + printk("oops, %s has interrupt-map but no mask\n", + p->full_name); + return 0; + } + imaplen /= sizeof(unsigned int); + match = 0; + ipar = NULL; + while (imaplen > 0 && !match) { + /* check the child-interrupt field */ + match = 1; + for (i = 0; i < naddrc && match; ++i) + match = ((reg[i] ^ imap[i]) & imask[i]) == 0; + for (; i < naddrc + nintrc && match; ++i) + match = ((ints[i-naddrc] ^ imap[i]) & imask[i]) == 0; + imap += naddrc + nintrc; + imaplen -= naddrc + nintrc; + /* grab the interrupt parent */ + ipar = find_phandle((phandle) *imap++); + --imaplen; + if (ipar == NULL && num_interrupt_controllers == 1) + /* cope with BootX not giving us phandles */ + ipar = dflt_interrupt_controller; + if (ipar == NULL) { + printk("oops, no int parent %x in map of %s\n", + imap[-1], p->full_name); + return 0; + } + /* find the parent's # addr and intr cells */ + ip = (unsigned int *) + get_property(ipar, "#interrupt-cells", NULL); + if (ip == NULL) { + printk("oops, no #interrupt-cells on %s\n", + ipar->full_name); + return 0; + } + newintrc = *ip; + ip = (unsigned int *) + get_property(ipar, "#address-cells", NULL); + newaddrc = (ip == NULL)? 0: *ip; + imap += newaddrc + newintrc; + imaplen -= newaddrc + newintrc; + } + if (imaplen < 0) { + printk("oops, error decoding int-map on %s, len=%d\n", + p->full_name, imaplen); + return 0; + } + if (!match) { +#ifdef DEBUG_IRQ + printk("oops, no match in %s int-map for %s\n", + p->full_name, np->full_name); +#endif + return 0; + } + p = ipar; + naddrc = newaddrc; + nintrc = newintrc; + ints = imap - nintrc; + reg = ints - naddrc; + } + if (p == NULL) { +#ifdef DEBUG_IRQ + printk("hmmm, int tree for %s doesn't have ctrler\n", + np->full_name); +#endif + return 0; + } + *irq = ints; + *ictrler = p; + return nintrc; +} + +static int __devinit finish_node_interrupts(struct device_node *np, + unsigned long *mem_start, + int measure_only) +{ + unsigned int *ints; + int intlen, intrcells, intrcount; + int i, j, n; + unsigned int *irq, virq; + struct device_node *ic; + + ints = (unsigned int *) get_property(np, "interrupts", &intlen); + if (ints == NULL) + return 0; + intrcells = prom_n_intr_cells(np); + intlen /= intrcells * sizeof(unsigned int); + + np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start); + if (!np->intrs) + return -ENOMEM; + + if (measure_only) + return 0; + + intrcount = 0; + for (i = 0; i < intlen; ++i, ints += intrcells) { + n = map_interrupt(&irq, &ic, np, ints, intrcells); + if (n <= 0) + continue; + + /* don't map IRQ numbers under a cascaded 8259 controller */ + if (ic && device_is_compatible(ic, "chrp,iic")) { + np->intrs[intrcount].line = irq[0]; + } else { +#ifdef CONFIG_PPC64 + virq = virt_irq_create_mapping(irq[0]); + if (virq == NO_IRQ) { + printk(KERN_CRIT "Could not allocate interrupt" + " number for %s\n", np->full_name); + continue; + } + virq = irq_offset_up(virq); +#else + virq = irq[0]; +#endif + np->intrs[intrcount].line = virq; + } + +#ifdef CONFIG_PPC64 + /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ + if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) { + char *name = get_property(ic->parent, "name", NULL); + if (name && !strcmp(name, "u3")) + np->intrs[intrcount].line += 128; + else if (!(name && !strcmp(name, "mac-io"))) + /* ignore other cascaded controllers, such as + the k2-sata-root */ + break; + } +#endif + np->intrs[intrcount].sense = 1; + if (n > 1) + np->intrs[intrcount].sense = irq[1]; + if (n > 2) { + printk("hmmm, got %d intr cells for %s:", n, + np->full_name); + for (j = 0; j < n; ++j) + printk(" %d", irq[j]); + printk("\n"); + } + ++intrcount; + } + np->n_intrs = intrcount; + + return 0; +} + +static int __devinit interpret_pci_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct address_range *adr; + struct pci_reg_property *pci_addrs; + int i, l, n_addrs; + + pci_addrs = (struct pci_reg_property *) + get_property(np, "assigned-addresses", &l); + if (!pci_addrs) + return 0; + + n_addrs = l / sizeof(*pci_addrs); + + adr = prom_alloc(n_addrs * sizeof(*adr), mem_start); + if (!adr) + return -ENOMEM; + + if (measure_only) + return 0; + + np->addrs = adr; + np->n_addrs = n_addrs; + + for (i = 0; i < n_addrs; i++) { + adr[i].space = pci_addrs[i].addr.a_hi; + adr[i].address = pci_addrs[i].addr.a_lo | + ((u64)pci_addrs[i].addr.a_mid << 32); + adr[i].size = pci_addrs[i].size_lo; + } + + return 0; +} + +static int __init interpret_dbdma_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct reg_property32 *rp; + struct address_range *adr; + unsigned long base_address; + int i, l; + struct device_node *db; + + base_address = 0; + if (!measure_only) { + for (db = np->parent; db != NULL; db = db->parent) { + if (!strcmp(db->type, "dbdma") && db->n_addrs != 0) { + base_address = db->addrs[0].address; + break; + } + } + } + + rp = (struct reg_property32 *) get_property(np, "reg", &l); + if (rp != 0 && l >= sizeof(struct reg_property32)) { + i = 0; + adr = (struct address_range *) (*mem_start); + while ((l -= sizeof(struct reg_property32)) >= 0) { + if (!measure_only) { + adr[i].space = 2; + adr[i].address = rp[i].address + base_address; + adr[i].size = rp[i].size; + } + ++i; + } + np->addrs = adr; + np->n_addrs = i; + (*mem_start) += i * sizeof(struct address_range); + } + + return 0; +} + +static int __init interpret_macio_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct reg_property32 *rp; + struct address_range *adr; + unsigned long base_address; + int i, l; + struct device_node *db; + + base_address = 0; + if (!measure_only) { + for (db = np->parent; db != NULL; db = db->parent) { + if (!strcmp(db->type, "mac-io") && db->n_addrs != 0) { + base_address = db->addrs[0].address; + break; + } + } + } + + rp = (struct reg_property32 *) get_property(np, "reg", &l); + if (rp != 0 && l >= sizeof(struct reg_property32)) { + i = 0; + adr = (struct address_range *) (*mem_start); + while ((l -= sizeof(struct reg_property32)) >= 0) { + if (!measure_only) { + adr[i].space = 2; + adr[i].address = rp[i].address + base_address; + adr[i].size = rp[i].size; + } + ++i; + } + np->addrs = adr; + np->n_addrs = i; + (*mem_start) += i * sizeof(struct address_range); + } + + return 0; +} + +static int __init interpret_isa_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct isa_reg_property *rp; + struct address_range *adr; + int i, l; + + rp = (struct isa_reg_property *) get_property(np, "reg", &l); + if (rp != 0 && l >= sizeof(struct isa_reg_property)) { + i = 0; + adr = (struct address_range *) (*mem_start); + while ((l -= sizeof(struct isa_reg_property)) >= 0) { + if (!measure_only) { + adr[i].space = rp[i].space; + adr[i].address = rp[i].address; + adr[i].size = rp[i].size; + } + ++i; + } + np->addrs = adr; + np->n_addrs = i; + (*mem_start) += i * sizeof(struct address_range); + } + + return 0; +} + +static int __init interpret_root_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct address_range *adr; + int i, l; + unsigned int *rp; + int rpsize = (naddrc + nsizec) * sizeof(unsigned int); + + rp = (unsigned int *) get_property(np, "reg", &l); + if (rp != 0 && l >= rpsize) { + i = 0; + adr = (struct address_range *) (*mem_start); + while ((l -= rpsize) >= 0) { + if (!measure_only) { + adr[i].space = 0; + adr[i].address = rp[naddrc - 1]; + adr[i].size = rp[naddrc + nsizec - 1]; + } + ++i; + rp += naddrc + nsizec; + } + np->addrs = adr; + np->n_addrs = i; + (*mem_start) += i * sizeof(struct address_range); + } + + return 0; +} + +static int __devinit finish_node(struct device_node *np, + unsigned long *mem_start, + interpret_func *ifunc, + int naddrc, int nsizec, + int measure_only) +{ + struct device_node *child; + int *ip, rc = 0; + + /* get the device addresses and interrupts */ + if (ifunc != NULL) + rc = ifunc(np, mem_start, naddrc, nsizec, measure_only); + if (rc) + goto out; + + rc = finish_node_interrupts(np, mem_start, measure_only); + if (rc) + goto out; + + /* Look for #address-cells and #size-cells properties. */ + ip = (int *) get_property(np, "#address-cells", NULL); + if (ip != NULL) + naddrc = *ip; + ip = (int *) get_property(np, "#size-cells", NULL); + if (ip != NULL) + nsizec = *ip; + + if (!strcmp(np->name, "device-tree") || np->parent == NULL) + ifunc = interpret_root_props; + else if (np->type == 0) + ifunc = NULL; + else if (!strcmp(np->type, "pci") || !strcmp(np->type, "vci")) + ifunc = interpret_pci_props; + else if (!strcmp(np->type, "dbdma")) + ifunc = interpret_dbdma_props; + else if (!strcmp(np->type, "mac-io") || ifunc == interpret_macio_props) + ifunc = interpret_macio_props; + else if (!strcmp(np->type, "isa")) + ifunc = interpret_isa_props; + else if (!strcmp(np->name, "uni-n") || !strcmp(np->name, "u3")) + ifunc = interpret_root_props; + else if (!((ifunc == interpret_dbdma_props + || ifunc == interpret_macio_props) + && (!strcmp(np->type, "escc") + || !strcmp(np->type, "media-bay")))) + ifunc = NULL; + + for (child = np->child; child != NULL; child = child->sibling) { + rc = finish_node(child, mem_start, ifunc, + naddrc, nsizec, measure_only); + if (rc) + goto out; + } +out: + return rc; +} + +static void __init scan_interrupt_controllers(void) +{ + struct device_node *np; + int n = 0; + char *name, *ic; + int iclen; + + for (np = allnodes; np != NULL; np = np->allnext) { + ic = get_property(np, "interrupt-controller", &iclen); + name = get_property(np, "name", NULL); + /* checking iclen makes sure we don't get a false + match on /chosen.interrupt_controller */ + if ((name != NULL + && strcmp(name, "interrupt-controller") == 0) + || (ic != NULL && iclen == 0 + && strcmp(name, "AppleKiwi"))) { + if (n == 0) + dflt_interrupt_controller = np; + ++n; + } + } + num_interrupt_controllers = n; +} + +/** + * finish_device_tree is called once things are running normally + * (i.e. with text and data mapped to the address they were linked at). + * It traverses the device tree and fills in some of the additional, + * fields in each node like {n_}addrs and {n_}intrs, the virt interrupt + * mapping is also initialized at this point. + */ +void __init finish_device_tree(void) +{ + unsigned long start, end, size = 0; + + DBG(" -> finish_device_tree\n"); + +#ifdef CONFIG_PPC64 + /* Initialize virtual IRQ map */ + virt_irq_init(); +#endif + scan_interrupt_controllers(); + + /* + * Finish device-tree (pre-parsing some properties etc...) + * We do this in 2 passes. One with "measure_only" set, which + * will only measure the amount of memory needed, then we can + * allocate that memory, and call finish_node again. However, + * we must be careful as most routines will fail nowadays when + * prom_alloc() returns 0, so we must make sure our first pass + * doesn't start at 0. We pre-initialize size to 16 for that + * reason and then remove those additional 16 bytes + */ + size = 16; + finish_node(allnodes, &size, NULL, 0, 0, 1); + size -= 16; + end = start = (unsigned long) __va(lmb_alloc(size, 128)); + finish_node(allnodes, &end, NULL, 0, 0, 0); + BUG_ON(end != start + size); + + DBG(" <- finish_device_tree\n"); +} + +static inline char *find_flat_dt_string(u32 offset) +{ + return ((char *)initial_boot_params) + + initial_boot_params->off_dt_strings + offset; +} + +/** + * This function is used to scan the flattened device-tree, it is + * used to extract the memory informations at boot before we can + * unflatten the tree + */ +static int __init scan_flat_dt(int (*it)(unsigned long node, + const char *uname, int depth, + void *data), + void *data) +{ + unsigned long p = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + int rc = 0; + int depth = -1; + + do { + u32 tag = *((u32 *)p); + char *pathp; + + p += 4; + if (tag == OF_DT_END_NODE) { + depth --; + continue; + } + if (tag == OF_DT_NOP) + continue; + if (tag == OF_DT_END) + break; + if (tag == OF_DT_PROP) { + u32 sz = *((u32 *)p); + p += 8; + if (initial_boot_params->version < 0x10) + p = _ALIGN(p, sz >= 8 ? 8 : 4); + p += sz; + p = _ALIGN(p, 4); + continue; + } + if (tag != OF_DT_BEGIN_NODE) { + printk(KERN_WARNING "Invalid tag %x scanning flattened" + " device tree !\n", tag); + return -EINVAL; + } + depth++; + pathp = (char *)p; + p = _ALIGN(p + strlen(pathp) + 1, 4); + if ((*pathp) == '/') { + char *lp, *np; + for (lp = NULL, np = pathp; *np; np++) + if ((*np) == '/') + lp = np+1; + if (lp != NULL) + pathp = lp; + } + rc = it(p, pathp, depth, data); + if (rc != 0) + break; + } while(1); + + return rc; +} + +/** + * This function can be used within scan_flattened_dt callback to get + * access to properties + */ +static void* __init get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size) +{ + unsigned long p = node; + + do { + u32 tag = *((u32 *)p); + u32 sz, noff; + const char *nstr; + + p += 4; + if (tag == OF_DT_NOP) + continue; + if (tag != OF_DT_PROP) + return NULL; + + sz = *((u32 *)p); + noff = *((u32 *)(p + 4)); + p += 8; + if (initial_boot_params->version < 0x10) + p = _ALIGN(p, sz >= 8 ? 8 : 4); + + nstr = find_flat_dt_string(noff); + if (nstr == NULL) { + printk(KERN_WARNING "Can't find property index" + " name !\n"); + return NULL; + } + if (strcmp(name, nstr) == 0) { + if (size) + *size = sz; + return (void *)p; + } + p += sz; + p = _ALIGN(p, 4); + } while(1); +} + +static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, + unsigned long align) +{ + void *res; + + *mem = _ALIGN(*mem, align); + res = (void *)*mem; + *mem += size; + + return res; +} + +static unsigned long __init unflatten_dt_node(unsigned long mem, + unsigned long *p, + struct device_node *dad, + struct device_node ***allnextpp, + unsigned long fpsize) +{ + struct device_node *np; + struct property *pp, **prev_pp = NULL; + char *pathp; + u32 tag; + unsigned int l, allocl; + int has_name = 0; + int new_format = 0; + + tag = *((u32 *)(*p)); + if (tag != OF_DT_BEGIN_NODE) { + printk("Weird tag at start of node: %x\n", tag); + return mem; + } + *p += 4; + pathp = (char *)*p; + l = allocl = strlen(pathp) + 1; + *p = _ALIGN(*p + l, 4); + + /* version 0x10 has a more compact unit name here instead of the full + * path. we accumulate the full path size using "fpsize", we'll rebuild + * it later. We detect this because the first character of the name is + * not '/'. + */ + if ((*pathp) != '/') { + new_format = 1; + if (fpsize == 0) { + /* root node: special case. fpsize accounts for path + * plus terminating zero. root node only has '/', so + * fpsize should be 2, but we want to avoid the first + * level nodes to have two '/' so we use fpsize 1 here + */ + fpsize = 1; + allocl = 2; + } else { + /* account for '/' and path size minus terminal 0 + * already in 'l' + */ + fpsize += l; + allocl = fpsize; + } + } + + + np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, + __alignof__(struct device_node)); + if (allnextpp) { + memset(np, 0, sizeof(*np)); + np->full_name = ((char*)np) + sizeof(struct device_node); + if (new_format) { + char *p = np->full_name; + /* rebuild full path for new format */ + if (dad && dad->parent) { + strcpy(p, dad->full_name); +#ifdef DEBUG + if ((strlen(p) + l + 1) != allocl) { + DBG("%s: p: %d, l: %d, a: %d\n", + pathp, strlen(p), l, allocl); + } +#endif + p += strlen(p); + } + *(p++) = '/'; + memcpy(p, pathp, l); + } else + memcpy(np->full_name, pathp, l); + prev_pp = &np->properties; + **allnextpp = np; + *allnextpp = &np->allnext; + if (dad != NULL) { + np->parent = dad; + /* we temporarily use the next field as `last_child'*/ + if (dad->next == 0) + dad->child = np; + else + dad->next->sibling = np; + dad->next = np; + } + kref_init(&np->kref); + } + while(1) { + u32 sz, noff; + char *pname; + + tag = *((u32 *)(*p)); + if (tag == OF_DT_NOP) { + *p += 4; + continue; + } + if (tag != OF_DT_PROP) + break; + *p += 4; + sz = *((u32 *)(*p)); + noff = *((u32 *)((*p) + 4)); + *p += 8; + if (initial_boot_params->version < 0x10) + *p = _ALIGN(*p, sz >= 8 ? 8 : 4); + + pname = find_flat_dt_string(noff); + if (pname == NULL) { + printk("Can't find property name in list !\n"); + break; + } + if (strcmp(pname, "name") == 0) + has_name = 1; + l = strlen(pname) + 1; + pp = unflatten_dt_alloc(&mem, sizeof(struct property), + __alignof__(struct property)); + if (allnextpp) { + if (strcmp(pname, "linux,phandle") == 0) { + np->node = *((u32 *)*p); + if (np->linux_phandle == 0) + np->linux_phandle = np->node; + } + if (strcmp(pname, "ibm,phandle") == 0) + np->linux_phandle = *((u32 *)*p); + pp->name = pname; + pp->length = sz; + pp->value = (void *)*p; + *prev_pp = pp; + prev_pp = &pp->next; + } + *p = _ALIGN((*p) + sz, 4); + } + /* with version 0x10 we may not have the name property, recreate + * it here from the unit name if absent + */ + if (!has_name) { + char *p = pathp, *ps = pathp, *pa = NULL; + int sz; + + while (*p) { + if ((*p) == '@') + pa = p; + if ((*p) == '/') + ps = p + 1; + p++; + } + if (pa < ps) + pa = p; + sz = (pa - ps) + 1; + pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, + __alignof__(struct property)); + if (allnextpp) { + pp->name = "name"; + pp->length = sz; + pp->value = (unsigned char *)(pp + 1); + *prev_pp = pp; + prev_pp = &pp->next; + memcpy(pp->value, ps, sz - 1); + ((char *)pp->value)[sz - 1] = 0; + DBG("fixed up name for %s -> %s\n", pathp, pp->value); + } + } + if (allnextpp) { + *prev_pp = NULL; + np->name = get_property(np, "name", NULL); + np->type = get_property(np, "device_type", NULL); + + if (!np->name) + np->name = "<NULL>"; + if (!np->type) + np->type = "<NULL>"; + } + while (tag == OF_DT_BEGIN_NODE) { + mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); + tag = *((u32 *)(*p)); + } + if (tag != OF_DT_END_NODE) { + printk("Weird tag at end of node: %x\n", tag); + return mem; + } + *p += 4; + return mem; +} + + +/** + * unflattens the device-tree passed by the firmware, creating the + * tree of struct device_node. It also fills the "name" and "type" + * pointers of the nodes so the normal device-tree walking functions + * can be used (this used to be done by finish_device_tree) + */ +void __init unflatten_device_tree(void) +{ + unsigned long start, mem, size; + struct device_node **allnextp = &allnodes; + char *p = NULL; + int l = 0; + + DBG(" -> unflatten_device_tree()\n"); + + /* First pass, scan for size */ + start = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + size = unflatten_dt_node(0, &start, NULL, NULL, 0); + size = (size | 3) + 1; + + DBG(" size is %lx, allocating...\n", size); + + /* Allocate memory for the expanded device tree */ + mem = lmb_alloc(size + 4, __alignof__(struct device_node)); + if (!mem) { + DBG("Couldn't allocate memory with lmb_alloc()!\n"); + panic("Couldn't allocate memory with lmb_alloc()!\n"); + } + mem = (unsigned long) __va(mem); + + ((u32 *)mem)[size / 4] = 0xdeadbeef; + + DBG(" unflattening %lx...\n", mem); + + /* Second pass, do actual unflattening */ + start = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + unflatten_dt_node(mem, &start, NULL, &allnextp, 0); + if (*((u32 *)start) != OF_DT_END) + printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); + if (((u32 *)mem)[size / 4] != 0xdeadbeef) + printk(KERN_WARNING "End of tree marker overwritten: %08x\n", + ((u32 *)mem)[size / 4] ); + *allnextp = NULL; + + /* Get pointer to OF "/chosen" node for use everywhere */ + of_chosen = of_find_node_by_path("/chosen"); + + /* Retreive command line */ + if (of_chosen != NULL) { + p = (char *)get_property(of_chosen, "bootargs", &l); + if (p != NULL && l > 0) + strlcpy(cmd_line, p, min(l, COMMAND_LINE_SIZE)); + } +#ifdef CONFIG_CMDLINE + if (l == 0 || (l == 1 && (*p) == 0)) + strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#endif /* CONFIG_CMDLINE */ + + DBG("Command line is: %s\n", cmd_line); + + DBG(" <- unflatten_device_tree()\n"); +} + + +static int __init early_init_dt_scan_cpus(unsigned long node, + const char *uname, int depth, void *data) +{ + char *type = get_flat_dt_prop(node, "device_type", NULL); + u32 *prop; + unsigned long size = 0; + + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; + +#ifdef CONFIG_PPC_PSERIES + /* On LPAR, look for the first ibm,pft-size property for the hash table size + */ + if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) { + u32 *pft_size; + pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL); + if (pft_size != NULL) { + /* pft_size[0] is the NUMA CEC cookie */ + ppc64_pft_size = pft_size[1]; + } + } +#endif + +#ifdef CONFIG_PPC64 + if (initial_boot_params && initial_boot_params->version >= 2) { + /* version 2 of the kexec param format adds the phys cpuid + * of booted proc. + */ + boot_cpuid_phys = initial_boot_params->boot_cpuid_phys; + boot_cpuid = 0; + } else { + /* Check if it's the boot-cpu, set it's hw index in paca now */ + if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { + prop = get_flat_dt_prop(node, "reg", NULL); + set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop); + boot_cpuid_phys = get_hard_smp_processor_id(0); + } + } +#endif + +#ifdef CONFIG_ALTIVEC + /* Check if we have a VMX and eventually update CPU features */ + prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size); + if (prop && (*prop) > 0) { + cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; + } + + /* Same goes for Apple's "altivec" property */ + prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL); + if (prop) { + cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; + } +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_PPC_PSERIES + /* + * Check for an SMT capable CPU and set the CPU feature. We do + * this by looking at the size of the ibm,ppc-interrupt-server#s + * property + */ + prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", + &size); + cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; + if (prop && ((size / sizeof(u32)) > 1)) + cur_cpu_spec->cpu_features |= CPU_FTR_SMT; +#endif + + return 0; +} + +static int __init early_init_dt_scan_chosen(unsigned long node, + const char *uname, int depth, void *data) +{ + u32 *prop; + unsigned long *lprop; + + DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); + + if (depth != 1 || strcmp(uname, "chosen") != 0) + return 0; + + /* get platform type */ + prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL); + if (prop == NULL) + return 0; +#ifdef CONFIG_PPC64 + systemcfg->platform = *prop; +#else + _machine = *prop; +#endif + +#ifdef CONFIG_PPC64 + /* check if iommu is forced on or off */ + if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) + iommu_is_off = 1; + if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) + iommu_force_on = 1; +#endif + + lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL); + if (lprop) + memory_limit = *lprop; + +#ifdef CONFIG_PPC64 + lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + if (lprop) + tce_alloc_start = *lprop; + lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + if (lprop) + tce_alloc_end = *lprop; +#endif + +#ifdef CONFIG_PPC_RTAS + /* To help early debugging via the front panel, we retreive a minimal + * set of RTAS infos now if available + */ + { + u64 *basep, *entryp; + + basep = get_flat_dt_prop(node, "linux,rtas-base", NULL); + entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL); + prop = get_flat_dt_prop(node, "linux,rtas-size", NULL); + if (basep && entryp && prop) { + rtas.base = *basep; + rtas.entry = *entryp; + rtas.size = *prop; + } + } +#endif /* CONFIG_PPC_RTAS */ + + /* break now */ + return 1; +} + +static int __init early_init_dt_scan_root(unsigned long node, + const char *uname, int depth, void *data) +{ + u32 *prop; + + if (depth != 0) + return 0; + + prop = get_flat_dt_prop(node, "#size-cells", NULL); + dt_root_size_cells = (prop == NULL) ? 1 : *prop; + DBG("dt_root_size_cells = %x\n", dt_root_size_cells); + + prop = get_flat_dt_prop(node, "#address-cells", NULL); + dt_root_addr_cells = (prop == NULL) ? 2 : *prop; + DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); + + /* break now */ + return 1; +} + +static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) +{ + cell_t *p = *cellp; + unsigned long r; + + /* Ignore more than 2 cells */ + while (s > sizeof(unsigned long) / 4) { + p++; + s--; + } + r = *p++; +#ifdef CONFIG_PPC64 + if (s > 1) { + r <<= 32; + r |= *(p++); + s--; + } +#endif + + *cellp = p; + return r; +} + + +static int __init early_init_dt_scan_memory(unsigned long node, + const char *uname, int depth, void *data) +{ + char *type = get_flat_dt_prop(node, "device_type", NULL); + cell_t *reg, *endp; + unsigned long l; + + /* We are scanning "memory" nodes only */ + if (type == NULL || strcmp(type, "memory") != 0) + return 0; + + reg = (cell_t *)get_flat_dt_prop(node, "reg", &l); + if (reg == NULL) + return 0; + + endp = reg + (l / sizeof(cell_t)); + + DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n", + uname, l, reg[0], reg[1], reg[2], reg[3]); + + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { + unsigned long base, size; + + base = dt_mem_next_cell(dt_root_addr_cells, ®); + size = dt_mem_next_cell(dt_root_size_cells, ®); + + if (size == 0) + continue; + DBG(" - %lx , %lx\n", base, size); +#ifdef CONFIG_PPC64 + if (iommu_is_off) { + if (base >= 0x80000000ul) + continue; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; + } +#endif + lmb_add(base, size); + } + return 0; +} + +static void __init early_reserve_mem(void) +{ + unsigned long base, size; + unsigned long *reserve_map; + + reserve_map = (unsigned long *)(((unsigned long)initial_boot_params) + + initial_boot_params->off_mem_rsvmap); + while (1) { + base = *(reserve_map++); + size = *(reserve_map++); + if (size == 0) + break; + DBG("reserving: %lx -> %lx\n", base, size); + lmb_reserve(base, size); + } + +#if 0 + DBG("memory reserved, lmbs :\n"); + lmb_dump_all(); +#endif +} + +void __init early_init_devtree(void *params) +{ + DBG(" -> early_init_devtree()\n"); + + /* Setup flat device-tree pointer */ + initial_boot_params = params; + + /* Retrieve various informations from the /chosen node of the + * device-tree, including the platform type, initrd location and + * size, TCE reserve, and more ... + */ + scan_flat_dt(early_init_dt_scan_chosen, NULL); + + /* Scan memory nodes and rebuild LMBs */ + lmb_init(); + scan_flat_dt(early_init_dt_scan_root, NULL); + scan_flat_dt(early_init_dt_scan_memory, NULL); + lmb_enforce_memory_limit(memory_limit); + lmb_analyze(); +#ifdef CONFIG_PPC64 + systemcfg->physicalMemorySize = lmb_phys_mem_size(); +#endif + lmb_reserve(0, __pa(klimit)); + + DBG("Phys. mem: %lx\n", lmb_phys_mem_size()); + + /* Reserve LMB regions used by kernel, initrd, dt, etc... */ + early_reserve_mem(); + + DBG("Scanning CPUs ...\n"); + + /* Retreive hash table size from flattened tree plus other + * CPU related informations (altivec support, boot CPU ID, ...) + */ + scan_flat_dt(early_init_dt_scan_cpus, NULL); + +#ifdef CONFIG_PPC_PSERIES + /* If hash size wasn't obtained above, we calculate it now based on + * the total RAM size + */ + if (ppc64_pft_size == 0) { + unsigned long rnd_mem_size, pteg_count; + + /* round mem_size up to next power of 2 */ + rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize); + if (rnd_mem_size < systemcfg->physicalMemorySize) + rnd_mem_size <<= 1; + + /* # pages / 2 */ + pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11); + + ppc64_pft_size = __ilog2(pteg_count << 7); + } + + DBG("Hash pftSize: %x\n", (int)ppc64_pft_size); +#endif + DBG(" <- early_init_devtree()\n"); +} + +#undef printk + +int +prom_n_addr_cells(struct device_node* np) +{ + int* ip; + do { + if (np->parent) + np = np->parent; + ip = (int *) get_property(np, "#address-cells", NULL); + if (ip != NULL) + return *ip; + } while (np->parent); + /* No #address-cells property for the root node, default to 1 */ + return 1; +} + +int +prom_n_size_cells(struct device_node* np) +{ + int* ip; + do { + if (np->parent) + np = np->parent; + ip = (int *) get_property(np, "#size-cells", NULL); + if (ip != NULL) + return *ip; + } while (np->parent); + /* No #size-cells property for the root node, default to 1 */ + return 1; +} + +/** + * Work out the sense (active-low level / active-high edge) + * of each interrupt from the device tree. + */ +void __init prom_get_irq_senses(unsigned char *senses, int off, int max) +{ + struct device_node *np; + int i, j; + + /* default to level-triggered */ + memset(senses, 1, max - off); + + for (np = allnodes; np != 0; np = np->allnext) { + for (j = 0; j < np->n_intrs; j++) { + i = np->intrs[j].line; + if (i >= off && i < max) + senses[i-off] = np->intrs[j].sense ? + IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE : + IRQ_SENSE_EDGE | IRQ_POLARITY_POSITIVE; + } + } +} + +/** + * Construct and return a list of the device_nodes with a given name. + */ +struct device_node *find_devices(const char *name) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + if (np->name != 0 && strcasecmp(np->name, name) == 0) { + *prevp = np; + prevp = &np->next; + } + } + *prevp = NULL; + return head; +} +EXPORT_SYMBOL(find_devices); + +/** + * Construct and return a list of the device_nodes with a given type. + */ +struct device_node *find_type_devices(const char *type) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + if (np->type != 0 && strcasecmp(np->type, type) == 0) { + *prevp = np; + prevp = &np->next; + } + } + *prevp = NULL; + return head; +} +EXPORT_SYMBOL(find_type_devices); + +/** + * Returns all nodes linked together + */ +struct device_node *find_all_nodes(void) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + *prevp = np; + prevp = &np->next; + } + *prevp = NULL; + return head; +} +EXPORT_SYMBOL(find_all_nodes); + +/** Checks if the given "compat" string matches one of the strings in + * the device's "compatible" property + */ +int device_is_compatible(struct device_node *device, const char *compat) +{ + const char* cp; + int cplen, l; + + cp = (char *) get_property(device, "compatible", &cplen); + if (cp == NULL) + return 0; + while (cplen > 0) { + if (strncasecmp(cp, compat, strlen(compat)) == 0) + return 1; + l = strlen(cp) + 1; + cp += l; + cplen -= l; + } + + return 0; +} +EXPORT_SYMBOL(device_is_compatible); + + +/** + * Indicates whether the root node has a given value in its + * compatible property. + */ +int machine_is_compatible(const char *compat) +{ + struct device_node *root; + int rc = 0; + + root = of_find_node_by_path("/"); + if (root) { + rc = device_is_compatible(root, compat); + of_node_put(root); + } + return rc; +} +EXPORT_SYMBOL(machine_is_compatible); + +/** + * Construct and return a list of the device_nodes with a given type + * and compatible property. + */ +struct device_node *find_compatible_devices(const char *type, + const char *compat) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + if (type != NULL + && !(np->type != 0 && strcasecmp(np->type, type) == 0)) + continue; + if (device_is_compatible(np, compat)) { + *prevp = np; + prevp = &np->next; + } + } + *prevp = NULL; + return head; +} +EXPORT_SYMBOL(find_compatible_devices); + +/** + * Find the device_node with a given full_name. + */ +struct device_node *find_path_device(const char *path) +{ + struct device_node *np; + + for (np = allnodes; np != 0; np = np->allnext) + if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0) + return np; + return NULL; +} +EXPORT_SYMBOL(find_path_device); + +/******* + * + * New implementation of the OF "find" APIs, return a refcounted + * object, call of_node_put() when done. The device tree and list + * are protected by a rw_lock. + * + * Note that property management will need some locking as well, + * this isn't dealt with yet. + * + *******/ + +/** + * of_find_node_by_name - Find a node by its "name" property + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @name: The name string to match against + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_node_by_name(struct device_node *from, + const char *name) +{ + struct device_node *np; + + read_lock(&devtree_lock); + np = from ? from->allnext : allnodes; + for (; np != 0; np = np->allnext) + if (np->name != 0 && strcasecmp(np->name, name) == 0 + && of_node_get(np)) + break; + if (from) + of_node_put(from); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_node_by_name); + +/** + * of_find_node_by_type - Find a node by its "device_type" property + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @name: The type string to match against + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_node_by_type(struct device_node *from, + const char *type) +{ + struct device_node *np; + + read_lock(&devtree_lock); + np = from ? from->allnext : allnodes; + for (; np != 0; np = np->allnext) + if (np->type != 0 && strcasecmp(np->type, type) == 0 + && of_node_get(np)) + break; + if (from) + of_node_put(from); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_node_by_type); + +/** + * of_find_compatible_node - Find a node based on type and one of the + * tokens in its "compatible" property + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @type: The type string to match "device_type" or NULL to ignore + * @compatible: The string to match to one of the tokens in the device + * "compatible" list. + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_compatible_node(struct device_node *from, + const char *type, const char *compatible) +{ + struct device_node *np; + + read_lock(&devtree_lock); + np = from ? from->allnext : allnodes; + for (; np != 0; np = np->allnext) { + if (type != NULL + && !(np->type != 0 && strcasecmp(np->type, type) == 0)) + continue; + if (device_is_compatible(np, compatible) && of_node_get(np)) + break; + } + if (from) + of_node_put(from); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_compatible_node); + +/** + * of_find_node_by_path - Find a node matching a full OF path + * @path: The full path to match + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_node_by_path(const char *path) +{ + struct device_node *np = allnodes; + + read_lock(&devtree_lock); + for (; np != 0; np = np->allnext) { + if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0 + && of_node_get(np)) + break; + } + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_node_by_path); + +/** + * of_find_node_by_phandle - Find a node given a phandle + * @handle: phandle of the node to find + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_node_by_phandle(phandle handle) +{ + struct device_node *np; + + read_lock(&devtree_lock); + for (np = allnodes; np != 0; np = np->allnext) + if (np->linux_phandle == handle) + break; + if (np) + of_node_get(np); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_node_by_phandle); + +/** + * of_find_all_nodes - Get next node in global list + * @prev: Previous node or NULL to start iteration + * of_node_put() will be called on it + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_all_nodes(struct device_node *prev) +{ + struct device_node *np; + + read_lock(&devtree_lock); + np = prev ? prev->allnext : allnodes; + for (; np != 0; np = np->allnext) + if (of_node_get(np)) + break; + if (prev) + of_node_put(prev); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_all_nodes); + +/** + * of_get_parent - Get a node's parent if any + * @node: Node to get parent + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_get_parent(const struct device_node *node) +{ + struct device_node *np; + + if (!node) + return NULL; + + read_lock(&devtree_lock); + np = of_node_get(node->parent); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_get_parent); + +/** + * of_get_next_child - Iterate a node childs + * @node: parent node + * @prev: previous child of the parent node, or NULL to get first + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_get_next_child(const struct device_node *node, + struct device_node *prev) +{ + struct device_node *next; + + read_lock(&devtree_lock); + next = prev ? prev->sibling : node->child; + for (; next != 0; next = next->sibling) + if (of_node_get(next)) + break; + if (prev) + of_node_put(prev); + read_unlock(&devtree_lock); + return next; +} +EXPORT_SYMBOL(of_get_next_child); + +/** + * of_node_get - Increment refcount of a node + * @node: Node to inc refcount, NULL is supported to + * simplify writing of callers + * + * Returns node. + */ +struct device_node *of_node_get(struct device_node *node) +{ + if (node) + kref_get(&node->kref); + return node; +} +EXPORT_SYMBOL(of_node_get); + +static inline struct device_node * kref_to_device_node(struct kref *kref) +{ + return container_of(kref, struct device_node, kref); +} + +/** + * of_node_release - release a dynamically allocated node + * @kref: kref element of the node to be released + * + * In of_node_put() this function is passed to kref_put() + * as the destructor. + */ +static void of_node_release(struct kref *kref) +{ + struct device_node *node = kref_to_device_node(kref); + struct property *prop = node->properties; + + if (!OF_IS_DYNAMIC(node)) + return; + while (prop) { + struct property *next = prop->next; + kfree(prop->name); + kfree(prop->value); + kfree(prop); + prop = next; + } + kfree(node->intrs); + kfree(node->addrs); + kfree(node->full_name); + kfree(node->data); + kfree(node); +} + +/** + * of_node_put - Decrement refcount of a node + * @node: Node to dec refcount, NULL is supported to + * simplify writing of callers + * + */ +void of_node_put(struct device_node *node) +{ + if (node) + kref_put(&node->kref, of_node_release); +} +EXPORT_SYMBOL(of_node_put); + +/* + * Plug a device node into the tree and global list. + */ +void of_attach_node(struct device_node *np) +{ + write_lock(&devtree_lock); + np->sibling = np->parent->child; + np->allnext = allnodes; + np->parent->child = np; + allnodes = np; + write_unlock(&devtree_lock); +} + +/* + * "Unplug" a node from the device tree. The caller must hold + * a reference to the node. The memory associated with the node + * is not freed until its refcount goes to zero. + */ +void of_detach_node(const struct device_node *np) +{ + struct device_node *parent; + + write_lock(&devtree_lock); + + parent = np->parent; + + if (allnodes == np) + allnodes = np->allnext; + else { + struct device_node *prev; + for (prev = allnodes; + prev->allnext != np; + prev = prev->allnext) + ; + prev->allnext = np->allnext; + } + + if (parent->child == np) + parent->child = np->sibling; + else { + struct device_node *prevsib; + for (prevsib = np->parent->child; + prevsib->sibling != np; + prevsib = prevsib->sibling) + ; + prevsib->sibling = np->sibling; + } + + write_unlock(&devtree_lock); +} + +#ifdef CONFIG_PPC_PSERIES +/* + * Fix up the uninitialized fields in a new device node: + * name, type, n_addrs, addrs, n_intrs, intrs, and pci-specific fields + * + * A lot of boot-time code is duplicated here, because functions such + * as finish_node_interrupts, interpret_pci_props, etc. cannot use the + * slab allocator. + * + * This should probably be split up into smaller chunks. + */ + +static int of_finish_dynamic_node(struct device_node *node, + unsigned long *unused1, int unused2, + int unused3, int unused4) +{ + struct device_node *parent = of_get_parent(node); + int err = 0; + phandle *ibm_phandle; + + node->name = get_property(node, "name", NULL); + node->type = get_property(node, "device_type", NULL); + + if (!parent) { + err = -ENODEV; + goto out; + } + + /* We don't support that function on PowerMac, at least + * not yet + */ + if (systemcfg->platform == PLATFORM_POWERMAC) + return -ENODEV; + + /* fix up new node's linux_phandle field */ + if ((ibm_phandle = (unsigned int *)get_property(node, "ibm,phandle", NULL))) + node->linux_phandle = *ibm_phandle; + +out: + of_node_put(parent); + return err; +} + +static int prom_reconfig_notifier(struct notifier_block *nb, + unsigned long action, void *node) +{ + int err; + + switch (action) { + case PSERIES_RECONFIG_ADD: + err = finish_node(node, NULL, of_finish_dynamic_node, 0, 0, 0); + if (err < 0) { + printk(KERN_ERR "finish_node returned %d\n", err); + err = NOTIFY_BAD; + } + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block prom_reconfig_nb = { + .notifier_call = prom_reconfig_notifier, + .priority = 10, /* This one needs to run first */ +}; + +static int __init prom_reconfig_setup(void) +{ + return pSeries_reconfig_notifier_register(&prom_reconfig_nb); +} +__initcall(prom_reconfig_setup); +#endif + +/* + * Find a property with a given name for a given node + * and return the value. + */ +unsigned char *get_property(struct device_node *np, const char *name, + int *lenp) +{ + struct property *pp; + + for (pp = np->properties; pp != 0; pp = pp->next) + if (strcmp(pp->name, name) == 0) { + if (lenp != 0) + *lenp = pp->length; + return pp->value; + } + return NULL; +} +EXPORT_SYMBOL(get_property); + +/* + * Add a property to a node + */ +void prom_add_property(struct device_node* np, struct property* prop) +{ + struct property **next = &np->properties; + + prop->next = NULL; + while (*next) + next = &(*next)->next; + *next = prop; +} + +/* I quickly hacked that one, check against spec ! */ +static inline unsigned long +bus_space_to_resource_flags(unsigned int bus_space) +{ + u8 space = (bus_space >> 24) & 0xf; + if (space == 0) + space = 0x02; + if (space == 0x02) + return IORESOURCE_MEM; + else if (space == 0x01) + return IORESOURCE_IO; + else { + printk(KERN_WARNING "prom.c: bus_space_to_resource_flags(), space: %x\n", + bus_space); + return 0; + } +} + +static struct resource *find_parent_pci_resource(struct pci_dev* pdev, + struct address_range *range) +{ + unsigned long mask; + int i; + + /* Check this one */ + mask = bus_space_to_resource_flags(range->space); + for (i=0; i<DEVICE_COUNT_RESOURCE; i++) { + if ((pdev->resource[i].flags & mask) == mask && + pdev->resource[i].start <= range->address && + pdev->resource[i].end > range->address) { + if ((range->address + range->size - 1) > pdev->resource[i].end) { + /* Add better message */ + printk(KERN_WARNING "PCI/OF resource overlap !\n"); + return NULL; + } + break; + } + } + if (i == DEVICE_COUNT_RESOURCE) + return NULL; + return &pdev->resource[i]; +} + +/* + * Request an OF device resource. Currently handles child of PCI devices, + * or other nodes attached to the root node. Ultimately, put some + * link to resources in the OF node. + */ +struct resource *request_OF_resource(struct device_node* node, int index, + const char* name_postfix) +{ + struct pci_dev* pcidev; + u8 pci_bus, pci_devfn; + unsigned long iomask; + struct device_node* nd; + struct resource* parent; + struct resource *res = NULL; + int nlen, plen; + + if (index >= node->n_addrs) + goto fail; + + /* Sanity check on bus space */ + iomask = bus_space_to_resource_flags(node->addrs[index].space); + if (iomask & IORESOURCE_MEM) + parent = &iomem_resource; + else if (iomask & IORESOURCE_IO) + parent = &ioport_resource; + else + goto fail; + + /* Find a PCI parent if any */ + nd = node; + pcidev = NULL; + while (nd) { + if (!pci_device_from_OF_node(nd, &pci_bus, &pci_devfn)) + pcidev = pci_find_slot(pci_bus, pci_devfn); + if (pcidev) break; + nd = nd->parent; + } + if (pcidev) + parent = find_parent_pci_resource(pcidev, &node->addrs[index]); + if (!parent) { + printk(KERN_WARNING "request_OF_resource(%s), parent not found\n", + node->name); + goto fail; + } + + res = __request_region(parent, node->addrs[index].address, + node->addrs[index].size, NULL); + if (!res) + goto fail; + nlen = strlen(node->name); + plen = name_postfix ? strlen(name_postfix) : 0; + res->name = (const char *)kmalloc(nlen+plen+1, GFP_KERNEL); + if (res->name) { + strcpy((char *)res->name, node->name); + if (plen) + strcpy((char *)res->name+nlen, name_postfix); + } + return res; +fail: + return NULL; +} +EXPORT_SYMBOL(request_OF_resource); + +int release_OF_resource(struct device_node *node, int index) +{ + struct pci_dev* pcidev; + u8 pci_bus, pci_devfn; + unsigned long iomask, start, end; + struct device_node* nd; + struct resource* parent; + struct resource *res = NULL; + + if (index >= node->n_addrs) + return -EINVAL; + + /* Sanity check on bus space */ + iomask = bus_space_to_resource_flags(node->addrs[index].space); + if (iomask & IORESOURCE_MEM) + parent = &iomem_resource; + else if (iomask & IORESOURCE_IO) + parent = &ioport_resource; + else + return -EINVAL; + + /* Find a PCI parent if any */ + nd = node; + pcidev = NULL; + while(nd) { + if (!pci_device_from_OF_node(nd, &pci_bus, &pci_devfn)) + pcidev = pci_find_slot(pci_bus, pci_devfn); + if (pcidev) break; + nd = nd->parent; + } + if (pcidev) + parent = find_parent_pci_resource(pcidev, &node->addrs[index]); + if (!parent) { + printk(KERN_WARNING "release_OF_resource(%s), parent not found\n", + node->name); + return -ENODEV; + } + + /* Find us in the parent and its childs */ + res = parent->child; + start = node->addrs[index].address; + end = start + node->addrs[index].size - 1; + while (res) { + if (res->start == start && res->end == end && + (res->flags & IORESOURCE_BUSY)) + break; + if (res->start <= start && res->end >= end) + res = res->child; + else + res = res->sibling; + } + if (!res) + return -ENODEV; + + if (res->name) { + kfree(res->name); + res->name = NULL; + } + release_resource(res); + kfree(res); + + return 0; +} +EXPORT_SYMBOL(release_OF_resource); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c new file mode 100644 index 00000000000..911a803f27d --- /dev/null +++ b/arch/powerpc/kernel/prom_init.c @@ -0,0 +1,2126 @@ +/* + * Procedures for interfacing to Open Firmware. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996-2005 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. + * {engebret|bergner}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG_PROM + +#include <stdarg.h> +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/threads.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/stringify.h> +#include <linux/delay.h> +#include <linux/initrd.h> +#include <linux/bitops.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/system.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/pci.h> +#include <asm/iommu.h> +#include <asm/bootinfo.h> +#include <asm/btext.h> +#include <asm/sections.h> +#include <asm/machdep.h> + +#ifdef CONFIG_LOGO_LINUX_CLUT224 +#include <linux/linux_logo.h> +extern const struct linux_logo logo_linux_clut224; +#endif + +/* + * Properties whose value is longer than this get excluded from our + * copy of the device tree. This value does need to be big enough to + * ensure that we don't lose things like the interrupt-map property + * on a PCI-PCI bridge. + */ +#define MAX_PROPERTY_LENGTH (1UL * 1024 * 1024) + +/* + * Eventually bump that one up + */ +#define DEVTREE_CHUNK_SIZE 0x100000 + +/* + * This is the size of the local memory reserve map that gets copied + * into the boot params passed to the kernel. That size is totally + * flexible as the kernel just reads the list until it encounters an + * entry with size 0, so it can be changed without breaking binary + * compatibility + */ +#define MEM_RESERVE_MAP_SIZE 8 + +/* + * prom_init() is called very early on, before the kernel text + * and data have been mapped to KERNELBASE. At this point the code + * is running at whatever address it has been loaded at. + * On ppc32 we compile with -mrelocatable, which means that references + * to extern and static variables get relocated automatically. + * On ppc64 we have to relocate the references explicitly with + * RELOC. (Note that strings count as static variables.) + * + * Because OF may have mapped I/O devices into the area starting at + * KERNELBASE, particularly on CHRP machines, we can't safely call + * OF once the kernel has been mapped to KERNELBASE. Therefore all + * OF calls must be done within prom_init(). + * + * ADDR is used in calls to call_prom. The 4th and following + * arguments to call_prom should be 32-bit values. + * On ppc64, 64 bit values are truncated to 32 bits (and + * fortunately don't get interpreted as two arguments). + */ +#ifdef CONFIG_PPC64 +#define RELOC(x) (*PTRRELOC(&(x))) +#define ADDR(x) (u32) add_reloc_offset((unsigned long)(x)) +#else +#define RELOC(x) (x) +#define ADDR(x) (u32) (x) +#endif + +#define PROM_BUG() do { \ + prom_printf("kernel BUG at %s line 0x%x!\n", \ + RELOC(__FILE__), __LINE__); \ + __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \ +} while (0) + +#ifdef DEBUG_PROM +#define prom_debug(x...) prom_printf(x) +#else +#define prom_debug(x...) +#endif + +#ifdef CONFIG_PPC32 +#define PLATFORM_POWERMAC _MACH_Pmac +#define PLATFORM_CHRP _MACH_chrp +#endif + + +typedef u32 prom_arg_t; + +struct prom_args { + u32 service; + u32 nargs; + u32 nret; + prom_arg_t args[10]; +}; + +struct prom_t { + ihandle root; + ihandle chosen; + int cpu; + ihandle stdout; +}; + +struct mem_map_entry { + unsigned long base; + unsigned long size; +}; + +typedef u32 cell_t; + +extern void __start(unsigned long r3, unsigned long r4, unsigned long r5); + +#ifdef CONFIG_PPC64 +extern void enter_prom(struct prom_args *args, unsigned long entry); +#else +static inline void enter_prom(struct prom_args *args, unsigned long entry) +{ + ((void (*)(struct prom_args *))entry)(args); +} +#endif + +extern void copy_and_flush(unsigned long dest, unsigned long src, + unsigned long size, unsigned long offset); + +/* prom structure */ +static struct prom_t __initdata prom; + +static unsigned long prom_entry __initdata; + +#define PROM_SCRATCH_SIZE 256 + +static char __initdata of_stdout_device[256]; +static char __initdata prom_scratch[PROM_SCRATCH_SIZE]; + +static unsigned long __initdata dt_header_start; +static unsigned long __initdata dt_struct_start, dt_struct_end; +static unsigned long __initdata dt_string_start, dt_string_end; + +static unsigned long __initdata prom_initrd_start, prom_initrd_end; + +#ifdef CONFIG_PPC64 +static int __initdata iommu_force_on; +static int __initdata ppc64_iommu_off; +static unsigned long __initdata prom_tce_alloc_start; +static unsigned long __initdata prom_tce_alloc_end; +#endif + +static int __initdata of_platform; + +static char __initdata prom_cmd_line[COMMAND_LINE_SIZE]; + +static unsigned long __initdata prom_memory_limit; + +static unsigned long __initdata alloc_top; +static unsigned long __initdata alloc_top_high; +static unsigned long __initdata alloc_bottom; +static unsigned long __initdata rmo_top; +static unsigned long __initdata ram_top; + +static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE]; +static int __initdata mem_reserve_cnt; + +static cell_t __initdata regbuf[1024]; + + +#define MAX_CPU_THREADS 2 + +/* TO GO */ +#ifdef CONFIG_HMT +struct { + unsigned int pir; + unsigned int threadid; +} hmt_thread_data[NR_CPUS]; +#endif /* CONFIG_HMT */ + +/* + * Error results ... some OF calls will return "-1" on error, some + * will return 0, some will return either. To simplify, here are + * macros to use with any ihandle or phandle return value to check if + * it is valid + */ + +#define PROM_ERROR (-1u) +#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR) +#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR) + + +/* This is the one and *ONLY* place where we actually call open + * firmware. + */ + +static int __init call_prom(const char *service, int nargs, int nret, ...) +{ + int i; + struct prom_args args; + va_list list; + + args.service = ADDR(service); + args.nargs = nargs; + args.nret = nret; + + va_start(list, nret); + for (i = 0; i < nargs; i++) + args.args[i] = va_arg(list, prom_arg_t); + va_end(list); + + for (i = 0; i < nret; i++) + args.args[nargs+i] = 0; + + enter_prom(&args, RELOC(prom_entry)); + + return (nret > 0) ? args.args[nargs] : 0; +} + +static int __init call_prom_ret(const char *service, int nargs, int nret, + prom_arg_t *rets, ...) +{ + int i; + struct prom_args args; + va_list list; + + args.service = ADDR(service); + args.nargs = nargs; + args.nret = nret; + + va_start(list, rets); + for (i = 0; i < nargs; i++) + args.args[i] = va_arg(list, prom_arg_t); + va_end(list); + + for (i = 0; i < nret; i++) + rets[nargs+i] = 0; + + enter_prom(&args, RELOC(prom_entry)); + + if (rets != NULL) + for (i = 1; i < nret; ++i) + rets[i-1] = args.args[nargs+i]; + + return (nret > 0) ? args.args[nargs] : 0; +} + + +static unsigned int __init prom_claim(unsigned long virt, unsigned long size, + unsigned long align) +{ + return (unsigned int)call_prom("claim", 3, 1, + (prom_arg_t)virt, (prom_arg_t)size, + (prom_arg_t)align); +} + +static void __init prom_print(const char *msg) +{ + const char *p, *q; + struct prom_t *_prom = &RELOC(prom); + + if (_prom->stdout == 0) + return; + + for (p = msg; *p != 0; p = q) { + for (q = p; *q != 0 && *q != '\n'; ++q) + ; + if (q > p) + call_prom("write", 3, 1, _prom->stdout, p, q - p); + if (*q == 0) + break; + ++q; + call_prom("write", 3, 1, _prom->stdout, ADDR("\r\n"), 2); + } +} + + +static void __init prom_print_hex(unsigned long val) +{ + int i, nibbles = sizeof(val)*2; + char buf[sizeof(val)*2+1]; + struct prom_t *_prom = &RELOC(prom); + + for (i = nibbles-1; i >= 0; i--) { + buf[i] = (val & 0xf) + '0'; + if (buf[i] > '9') + buf[i] += ('a'-'0'-10); + val >>= 4; + } + buf[nibbles] = '\0'; + call_prom("write", 3, 1, _prom->stdout, buf, nibbles); +} + + +static void __init prom_printf(const char *format, ...) +{ + const char *p, *q, *s; + va_list args; + unsigned long v; + struct prom_t *_prom = &RELOC(prom); + + va_start(args, format); +#ifdef CONFIG_PPC64 + format = PTRRELOC(format); +#endif + for (p = format; *p != 0; p = q) { + for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q) + ; + if (q > p) + call_prom("write", 3, 1, _prom->stdout, p, q - p); + if (*q == 0) + break; + if (*q == '\n') { + ++q; + call_prom("write", 3, 1, _prom->stdout, + ADDR("\r\n"), 2); + continue; + } + ++q; + if (*q == 0) + break; + switch (*q) { + case 's': + ++q; + s = va_arg(args, const char *); + prom_print(s); + break; + case 'x': + ++q; + v = va_arg(args, unsigned long); + prom_print_hex(v); + break; + } + } +} + + +static void __init __attribute__((noreturn)) prom_panic(const char *reason) +{ +#ifdef CONFIG_PPC64 + reason = PTRRELOC(reason); +#endif + prom_print(reason); + /* ToDo: should put up an SRC here on p/iSeries */ + call_prom("exit", 0, 0); + + for (;;) /* should never get here */ + ; +} + + +static int __init prom_next_node(phandle *nodep) +{ + phandle node; + + if ((node = *nodep) != 0 + && (*nodep = call_prom("child", 1, 1, node)) != 0) + return 1; + if ((*nodep = call_prom("peer", 1, 1, node)) != 0) + return 1; + for (;;) { + if ((node = call_prom("parent", 1, 1, node)) == 0) + return 0; + if ((*nodep = call_prom("peer", 1, 1, node)) != 0) + return 1; + } +} + +static int __init prom_getprop(phandle node, const char *pname, + void *value, size_t valuelen) +{ + return call_prom("getprop", 4, 1, node, ADDR(pname), + (u32)(unsigned long) value, (u32) valuelen); +} + +static int __init prom_getproplen(phandle node, const char *pname) +{ + return call_prom("getproplen", 2, 1, node, ADDR(pname)); +} + +static int __init prom_setprop(phandle node, const char *pname, + void *value, size_t valuelen) +{ + return call_prom("setprop", 4, 1, node, ADDR(pname), + (u32)(unsigned long) value, (u32) valuelen); +} + +/* We can't use the standard versions because of RELOC headaches. */ +#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ + || ('a' <= (c) && (c) <= 'f') \ + || ('A' <= (c) && (c) <= 'F')) + +#define isdigit(c) ('0' <= (c) && (c) <= '9') +#define islower(c) ('a' <= (c) && (c) <= 'z') +#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) + +unsigned long prom_strtoul(const char *cp, const char **endp) +{ + unsigned long result = 0, base = 10, value; + + if (*cp == '0') { + base = 8; + cp++; + if (toupper(*cp) == 'X') { + cp++; + base = 16; + } + } + + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { + result = result * base + value; + cp++; + } + + if (endp) + *endp = cp; + + return result; +} + +unsigned long prom_memparse(const char *ptr, const char **retptr) +{ + unsigned long ret = prom_strtoul(ptr, retptr); + int shift = 0; + + /* + * We can't use a switch here because GCC *may* generate a + * jump table which won't work, because we're not running at + * the address we're linked at. + */ + if ('G' == **retptr || 'g' == **retptr) + shift = 30; + + if ('M' == **retptr || 'm' == **retptr) + shift = 20; + + if ('K' == **retptr || 'k' == **retptr) + shift = 10; + + if (shift) { + ret <<= shift; + (*retptr)++; + } + + return ret; +} + +/* + * Early parsing of the command line passed to the kernel, used for + * "mem=x" and the options that affect the iommu + */ +static void __init early_cmdline_parse(void) +{ + struct prom_t *_prom = &RELOC(prom); + char *opt, *p; + int l = 0; + + RELOC(prom_cmd_line[0]) = 0; + p = RELOC(prom_cmd_line); + if ((long)_prom->chosen > 0) + l = prom_getprop(_prom->chosen, "bootargs", p, COMMAND_LINE_SIZE-1); +#ifdef CONFIG_CMDLINE + if (l == 0) /* dbl check */ + strlcpy(RELOC(prom_cmd_line), + RELOC(CONFIG_CMDLINE), sizeof(prom_cmd_line)); +#endif /* CONFIG_CMDLINE */ + prom_printf("command line: %s\n", RELOC(prom_cmd_line)); + +#ifdef CONFIG_PPC64 + opt = strstr(RELOC(prom_cmd_line), RELOC("iommu=")); + if (opt) { + prom_printf("iommu opt is: %s\n", opt); + opt += 6; + while (*opt && *opt == ' ') + opt++; + if (!strncmp(opt, RELOC("off"), 3)) + RELOC(ppc64_iommu_off) = 1; + else if (!strncmp(opt, RELOC("force"), 5)) + RELOC(iommu_force_on) = 1; + } +#endif + + opt = strstr(RELOC(prom_cmd_line), RELOC("mem=")); + if (opt) { + opt += 4; + RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt); +#ifdef CONFIG_PPC64 + /* Align to 16 MB == size of ppc64 large page */ + RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000); +#endif + } +} + +#ifdef CONFIG_PPC_PSERIES +/* + * To tell the firmware what our capabilities are, we have to pass + * it a fake 32-bit ELF header containing a couple of PT_NOTE sections + * that contain structures that contain the actual values. + */ +static struct fake_elf { + Elf32_Ehdr elfhdr; + Elf32_Phdr phdr[2]; + struct chrpnote { + u32 namesz; + u32 descsz; + u32 type; + char name[8]; /* "PowerPC" */ + struct chrpdesc { + u32 real_mode; + u32 real_base; + u32 real_size; + u32 virt_base; + u32 virt_size; + u32 load_base; + } chrpdesc; + } chrpnote; + struct rpanote { + u32 namesz; + u32 descsz; + u32 type; + char name[24]; /* "IBM,RPA-Client-Config" */ + struct rpadesc { + u32 lpar_affinity; + u32 min_rmo_size; + u32 min_rmo_percent; + u32 max_pft_size; + u32 splpar; + u32 min_load; + u32 new_mem_def; + u32 ignore_me; + } rpadesc; + } rpanote; +} fake_elf = { + .elfhdr = { + .e_ident = { 0x7f, 'E', 'L', 'F', + ELFCLASS32, ELFDATA2MSB, EV_CURRENT }, + .e_type = ET_EXEC, /* yeah right */ + .e_machine = EM_PPC, + .e_version = EV_CURRENT, + .e_phoff = offsetof(struct fake_elf, phdr), + .e_phentsize = sizeof(Elf32_Phdr), + .e_phnum = 2 + }, + .phdr = { + [0] = { + .p_type = PT_NOTE, + .p_offset = offsetof(struct fake_elf, chrpnote), + .p_filesz = sizeof(struct chrpnote) + }, [1] = { + .p_type = PT_NOTE, + .p_offset = offsetof(struct fake_elf, rpanote), + .p_filesz = sizeof(struct rpanote) + } + }, + .chrpnote = { + .namesz = sizeof("PowerPC"), + .descsz = sizeof(struct chrpdesc), + .type = 0x1275, + .name = "PowerPC", + .chrpdesc = { + .real_mode = ~0U, /* ~0 means "don't care" */ + .real_base = ~0U, + .real_size = ~0U, + .virt_base = ~0U, + .virt_size = ~0U, + .load_base = ~0U + }, + }, + .rpanote = { + .namesz = sizeof("IBM,RPA-Client-Config"), + .descsz = sizeof(struct rpadesc), + .type = 0x12759999, + .name = "IBM,RPA-Client-Config", + .rpadesc = { + .lpar_affinity = 0, + .min_rmo_size = 64, /* in megabytes */ + .min_rmo_percent = 0, + .max_pft_size = 48, /* 2^48 bytes max PFT size */ + .splpar = 1, + .min_load = ~0U, + .new_mem_def = 0 + } + } +}; + +static void __init prom_send_capabilities(void) +{ + ihandle elfloader; + + elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader")); + if (elfloader == 0) { + prom_printf("couldn't open /packages/elf-loader\n"); + return; + } + call_prom("call-method", 3, 1, ADDR("process-elf-header"), + elfloader, ADDR(&fake_elf)); + call_prom("close", 1, 0, elfloader); +} +#endif + +/* + * Memory allocation strategy... our layout is normally: + * + * at 14Mb or more we have vmlinux, then a gap and initrd. In some + * rare cases, initrd might end up being before the kernel though. + * We assume this won't override the final kernel at 0, we have no + * provision to handle that in this version, but it should hopefully + * never happen. + * + * alloc_top is set to the top of RMO, eventually shrink down if the + * TCEs overlap + * + * alloc_bottom is set to the top of kernel/initrd + * + * from there, allocations are done this way : rtas is allocated + * topmost, and the device-tree is allocated from the bottom. We try + * to grow the device-tree allocation as we progress. If we can't, + * then we fail, we don't currently have a facility to restart + * elsewhere, but that shouldn't be necessary. + * + * Note that calls to reserve_mem have to be done explicitly, memory + * allocated with either alloc_up or alloc_down isn't automatically + * reserved. + */ + + +/* + * Allocates memory in the RMO upward from the kernel/initrd + * + * When align is 0, this is a special case, it means to allocate in place + * at the current location of alloc_bottom or fail (that is basically + * extending the previous allocation). Used for the device-tree flattening + */ +static unsigned long __init alloc_up(unsigned long size, unsigned long align) +{ + unsigned long base = _ALIGN_UP(RELOC(alloc_bottom), align); + unsigned long addr = 0; + + prom_debug("alloc_up(%x, %x)\n", size, align); + if (RELOC(ram_top) == 0) + prom_panic("alloc_up() called with mem not initialized\n"); + + if (align) + base = _ALIGN_UP(RELOC(alloc_bottom), align); + else + base = RELOC(alloc_bottom); + + for(; (base + size) <= RELOC(alloc_top); + base = _ALIGN_UP(base + 0x100000, align)) { + prom_debug(" trying: 0x%x\n\r", base); + addr = (unsigned long)prom_claim(base, size, 0); + if (addr != PROM_ERROR) + break; + addr = 0; + if (align == 0) + break; + } + if (addr == 0) + return 0; + RELOC(alloc_bottom) = addr; + + prom_debug(" -> %x\n", addr); + prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); + prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); + prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); + prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); + prom_debug(" ram_top : %x\n", RELOC(ram_top)); + + return addr; +} + +/* + * Allocates memory downward, either from top of RMO, or if highmem + * is set, from the top of RAM. Note that this one doesn't handle + * failures. It does claim memory if highmem is not set. + */ +static unsigned long __init alloc_down(unsigned long size, unsigned long align, + int highmem) +{ + unsigned long base, addr = 0; + + prom_debug("alloc_down(%x, %x, %s)\n", size, align, + highmem ? RELOC("(high)") : RELOC("(low)")); + if (RELOC(ram_top) == 0) + prom_panic("alloc_down() called with mem not initialized\n"); + + if (highmem) { + /* Carve out storage for the TCE table. */ + addr = _ALIGN_DOWN(RELOC(alloc_top_high) - size, align); + if (addr <= RELOC(alloc_bottom)) + return 0; + /* Will we bump into the RMO ? If yes, check out that we + * didn't overlap existing allocations there, if we did, + * we are dead, we must be the first in town ! + */ + if (addr < RELOC(rmo_top)) { + /* Good, we are first */ + if (RELOC(alloc_top) == RELOC(rmo_top)) + RELOC(alloc_top) = RELOC(rmo_top) = addr; + else + return 0; + } + RELOC(alloc_top_high) = addr; + goto bail; + } + + base = _ALIGN_DOWN(RELOC(alloc_top) - size, align); + for (; base > RELOC(alloc_bottom); + base = _ALIGN_DOWN(base - 0x100000, align)) { + prom_debug(" trying: 0x%x\n\r", base); + addr = (unsigned long)prom_claim(base, size, 0); + if (addr != PROM_ERROR) + break; + addr = 0; + } + if (addr == 0) + return 0; + RELOC(alloc_top) = addr; + + bail: + prom_debug(" -> %x\n", addr); + prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); + prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); + prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); + prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); + prom_debug(" ram_top : %x\n", RELOC(ram_top)); + + return addr; +} + +/* + * Parse a "reg" cell + */ +static unsigned long __init prom_next_cell(int s, cell_t **cellp) +{ + cell_t *p = *cellp; + unsigned long r = 0; + + /* Ignore more than 2 cells */ + while (s > sizeof(unsigned long) / 4) { + p++; + s--; + } + r = *p++; +#ifdef CONFIG_PPC64 + if (s) { + r <<= 32; + r |= *(p++); + } +#endif + *cellp = p; + return r; +} + +/* + * Very dumb function for adding to the memory reserve list, but + * we don't need anything smarter at this point + * + * XXX Eventually check for collisions. They should NEVER happen. + * If problems seem to show up, it would be a good start to track + * them down. + */ +static void reserve_mem(unsigned long base, unsigned long size) +{ + unsigned long top = base + size; + unsigned long cnt = RELOC(mem_reserve_cnt); + + if (size == 0) + return; + + /* We need to always keep one empty entry so that we + * have our terminator with "size" set to 0 since we are + * dumb and just copy this entire array to the boot params + */ + base = _ALIGN_DOWN(base, PAGE_SIZE); + top = _ALIGN_UP(top, PAGE_SIZE); + size = top - base; + + if (cnt >= (MEM_RESERVE_MAP_SIZE - 1)) + prom_panic("Memory reserve map exhausted !\n"); + RELOC(mem_reserve_map)[cnt].base = base; + RELOC(mem_reserve_map)[cnt].size = size; + RELOC(mem_reserve_cnt) = cnt + 1; +} + +/* + * Initialize memory allocation mecanism, parse "memory" nodes and + * obtain that way the top of memory and RMO to setup out local allocator + */ +static void __init prom_init_mem(void) +{ + phandle node; + char *path, type[64]; + unsigned int plen; + cell_t *p, *endp; + struct prom_t *_prom = &RELOC(prom); + u32 rac, rsc; + + /* + * We iterate the memory nodes to find + * 1) top of RMO (first node) + * 2) top of memory + */ + rac = 2; + prom_getprop(_prom->root, "#address-cells", &rac, sizeof(rac)); + rsc = 1; + prom_getprop(_prom->root, "#size-cells", &rsc, sizeof(rsc)); + prom_debug("root_addr_cells: %x\n", (unsigned long) rac); + prom_debug("root_size_cells: %x\n", (unsigned long) rsc); + + prom_debug("scanning memory:\n"); + path = RELOC(prom_scratch); + + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + + if (strcmp(type, RELOC("memory"))) + continue; + + plen = prom_getprop(node, "reg", RELOC(regbuf), sizeof(regbuf)); + if (plen > sizeof(regbuf)) { + prom_printf("memory node too large for buffer !\n"); + plen = sizeof(regbuf); + } + p = RELOC(regbuf); + endp = p + (plen / sizeof(cell_t)); + +#ifdef DEBUG_PROM + memset(path, 0, PROM_SCRATCH_SIZE); + call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + prom_debug(" node %s :\n", path); +#endif /* DEBUG_PROM */ + + while ((endp - p) >= (rac + rsc)) { + unsigned long base, size; + + base = prom_next_cell(rac, &p); + size = prom_next_cell(rsc, &p); + + if (size == 0) + continue; + prom_debug(" %x %x\n", base, size); + if (base == 0) + RELOC(rmo_top) = size; + if ((base + size) > RELOC(ram_top)) + RELOC(ram_top) = base + size; + } + } + + RELOC(alloc_bottom) = PAGE_ALIGN((unsigned long)&RELOC(_end) + 0x4000); + + /* Check if we have an initrd after the kernel, if we do move our bottom + * point to after it + */ + if (RELOC(prom_initrd_start)) { + if (RELOC(prom_initrd_end) > RELOC(alloc_bottom)) + RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); + } + + /* + * If prom_memory_limit is set we reduce the upper limits *except* for + * alloc_top_high. This must be the real top of RAM so we can put + * TCE's up there. + */ + + RELOC(alloc_top_high) = RELOC(ram_top); + + if (RELOC(prom_memory_limit)) { + if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) { + prom_printf("Ignoring mem=%x <= alloc_bottom.\n", + RELOC(prom_memory_limit)); + RELOC(prom_memory_limit) = 0; + } else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) { + prom_printf("Ignoring mem=%x >= ram_top.\n", + RELOC(prom_memory_limit)); + RELOC(prom_memory_limit) = 0; + } else { + RELOC(ram_top) = RELOC(prom_memory_limit); + RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit)); + } + } + + /* + * Setup our top alloc point, that is top of RMO or top of + * segment 0 when running non-LPAR. + * Some RS64 machines have buggy firmware where claims up at + * 1GB fail. Cap at 768MB as a workaround. + * Since 768MB is plenty of room, and we need to cap to something + * reasonable on 32-bit, cap at 768MB on all machines. + */ + if (!RELOC(rmo_top)) + RELOC(rmo_top) = RELOC(ram_top); + RELOC(rmo_top) = min(0x30000000ul, RELOC(rmo_top)); + RELOC(alloc_top) = RELOC(rmo_top); + + prom_printf("memory layout at init:\n"); + prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); + prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom)); + prom_printf(" alloc_top : %x\n", RELOC(alloc_top)); + prom_printf(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); + prom_printf(" rmo_top : %x\n", RELOC(rmo_top)); + prom_printf(" ram_top : %x\n", RELOC(ram_top)); +} + + +/* + * Allocate room for and instantiate RTAS + */ +static void __init prom_instantiate_rtas(void) +{ + phandle rtas_node; + ihandle rtas_inst; + u32 base, entry = 0; + u32 size = 0; + + prom_debug("prom_instantiate_rtas: start...\n"); + + rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas")); + prom_debug("rtas_node: %x\n", rtas_node); + if (!PHANDLE_VALID(rtas_node)) + return; + + prom_getprop(rtas_node, "rtas-size", &size, sizeof(size)); + if (size == 0) + return; + + base = alloc_down(size, PAGE_SIZE, 0); + if (base == 0) { + prom_printf("RTAS allocation failed !\n"); + return; + } + + rtas_inst = call_prom("open", 1, 1, ADDR("/rtas")); + if (!IHANDLE_VALID(rtas_inst)) { + prom_printf("opening rtas package failed"); + return; + } + + prom_printf("instantiating rtas at 0x%x ...", base); + + if (call_prom_ret("call-method", 3, 2, &entry, + ADDR("instantiate-rtas"), + rtas_inst, base) == PROM_ERROR + || entry == 0) { + prom_printf(" failed\n"); + return; + } + prom_printf(" done\n"); + + reserve_mem(base, size); + + prom_setprop(rtas_node, "linux,rtas-base", &base, sizeof(base)); + prom_setprop(rtas_node, "linux,rtas-entry", &entry, sizeof(entry)); + + prom_debug("rtas base = 0x%x\n", base); + prom_debug("rtas entry = 0x%x\n", entry); + prom_debug("rtas size = 0x%x\n", (long)size); + + prom_debug("prom_instantiate_rtas: end...\n"); +} + +#ifdef CONFIG_PPC64 +/* + * Allocate room for and initialize TCE tables + */ +static void __init prom_initialize_tce_table(void) +{ + phandle node; + ihandle phb_node; + char compatible[64], type[64], model[64]; + char *path = RELOC(prom_scratch); + u64 base, align; + u32 minalign, minsize; + u64 tce_entry, *tce_entryp; + u64 local_alloc_top, local_alloc_bottom; + u64 i; + + if (RELOC(ppc64_iommu_off)) + return; + + prom_debug("starting prom_initialize_tce_table\n"); + + /* Cache current top of allocs so we reserve a single block */ + local_alloc_top = RELOC(alloc_top_high); + local_alloc_bottom = local_alloc_top; + + /* Search all nodes looking for PHBs. */ + for (node = 0; prom_next_node(&node); ) { + compatible[0] = 0; + type[0] = 0; + model[0] = 0; + prom_getprop(node, "compatible", + compatible, sizeof(compatible)); + prom_getprop(node, "device_type", type, sizeof(type)); + prom_getprop(node, "model", model, sizeof(model)); + + if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL)) + continue; + + /* Keep the old logic in tack to avoid regression. */ + if (compatible[0] != 0) { + if ((strstr(compatible, RELOC("python")) == NULL) && + (strstr(compatible, RELOC("Speedwagon")) == NULL) && + (strstr(compatible, RELOC("Winnipeg")) == NULL)) + continue; + } else if (model[0] != 0) { + if ((strstr(model, RELOC("ython")) == NULL) && + (strstr(model, RELOC("peedwagon")) == NULL) && + (strstr(model, RELOC("innipeg")) == NULL)) + continue; + } + + if (prom_getprop(node, "tce-table-minalign", &minalign, + sizeof(minalign)) == PROM_ERROR) + minalign = 0; + if (prom_getprop(node, "tce-table-minsize", &minsize, + sizeof(minsize)) == PROM_ERROR) + minsize = 4UL << 20; + + /* + * Even though we read what OF wants, we just set the table + * size to 4 MB. This is enough to map 2GB of PCI DMA space. + * By doing this, we avoid the pitfalls of trying to DMA to + * MMIO space and the DMA alias hole. + * + * On POWER4, firmware sets the TCE region by assuming + * each TCE table is 8MB. Using this memory for anything + * else will impact performance, so we always allocate 8MB. + * Anton + */ + if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p)) + minsize = 8UL << 20; + else + minsize = 4UL << 20; + + /* Align to the greater of the align or size */ + align = max(minalign, minsize); + base = alloc_down(minsize, align, 1); + if (base == 0) + prom_panic("ERROR, cannot find space for TCE table.\n"); + if (base < local_alloc_bottom) + local_alloc_bottom = base; + + /* Save away the TCE table attributes for later use. */ + prom_setprop(node, "linux,tce-base", &base, sizeof(base)); + prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize)); + + /* It seems OF doesn't null-terminate the path :-( */ + memset(path, 0, sizeof(path)); + /* Call OF to setup the TCE hardware */ + if (call_prom("package-to-path", 3, 1, node, + path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) { + prom_printf("package-to-path failed\n"); + } + + prom_debug("TCE table: %s\n", path); + prom_debug("\tnode = 0x%x\n", node); + prom_debug("\tbase = 0x%x\n", base); + prom_debug("\tsize = 0x%x\n", minsize); + + /* Initialize the table to have a one-to-one mapping + * over the allocated size. + */ + tce_entryp = (unsigned long *)base; + for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) { + tce_entry = (i << PAGE_SHIFT); + tce_entry |= 0x3; + *tce_entryp = tce_entry; + } + + prom_printf("opening PHB %s", path); + phb_node = call_prom("open", 1, 1, path); + if (phb_node == 0) + prom_printf("... failed\n"); + else + prom_printf("... done\n"); + + call_prom("call-method", 6, 0, ADDR("set-64-bit-addressing"), + phb_node, -1, minsize, + (u32) base, (u32) (base >> 32)); + call_prom("close", 1, 0, phb_node); + } + + reserve_mem(local_alloc_bottom, local_alloc_top - local_alloc_bottom); + + if (RELOC(prom_memory_limit)) { + /* + * We align the start to a 16MB boundary so we can map + * the TCE area using large pages if possible. + * The end should be the top of RAM so no need to align it. + */ + RELOC(prom_tce_alloc_start) = _ALIGN_DOWN(local_alloc_bottom, + 0x1000000); + RELOC(prom_tce_alloc_end) = local_alloc_top; + } + + /* Flag the first invalid entry */ + prom_debug("ending prom_initialize_tce_table\n"); +} +#endif + +/* + * With CHRP SMP we need to use the OF to start the other processors. + * We can't wait until smp_boot_cpus (the OF is trashed by then) + * so we have to put the processors into a holding pattern controlled + * by the kernel (not OF) before we destroy the OF. + * + * This uses a chunk of low memory, puts some holding pattern + * code there and sends the other processors off to there until + * smp_boot_cpus tells them to do something. The holding pattern + * checks that address until its cpu # is there, when it is that + * cpu jumps to __secondary_start(). smp_boot_cpus() takes care + * of setting those values. + * + * We also use physical address 0x4 here to tell when a cpu + * is in its holding pattern code. + * + * -- Cort + */ +static void __init prom_hold_cpus(void) +{ +#ifdef CONFIG_PPC64 + unsigned long i; + unsigned int reg; + phandle node; + char type[64]; + int cpuid = 0; + unsigned int interrupt_server[MAX_CPU_THREADS]; + unsigned int cpu_threads, hw_cpu_num; + int propsize; + extern void __secondary_hold(void); + extern unsigned long __secondary_hold_spinloop; + extern unsigned long __secondary_hold_acknowledge; + unsigned long *spinloop + = (void *) __pa(&__secondary_hold_spinloop); + unsigned long *acknowledge + = (void *) __pa(&__secondary_hold_acknowledge); +#ifdef CONFIG_PPC64 + unsigned long secondary_hold + = __pa(*PTRRELOC((unsigned long *)__secondary_hold)); +#else + unsigned long secondary_hold = __pa(&__secondary_hold); +#endif + struct prom_t *_prom = &RELOC(prom); + + prom_debug("prom_hold_cpus: start...\n"); + prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); + prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); + prom_debug(" 1) acknowledge = 0x%x\n", + (unsigned long)acknowledge); + prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge); + prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold); + + /* Set the common spinloop variable, so all of the secondary cpus + * will block when they are awakened from their OF spinloop. + * This must occur for both SMP and non SMP kernels, since OF will + * be trashed when we move the kernel. + */ + *spinloop = 0; + +#ifdef CONFIG_HMT + for (i = 0; i < NR_CPUS; i++) { + RELOC(hmt_thread_data)[i].pir = 0xdeadbeef; + } +#endif + /* look for cpus */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("cpu")) != 0) + continue; + + /* Skip non-configured cpus. */ + if (prom_getprop(node, "status", type, sizeof(type)) > 0) + if (strcmp(type, RELOC("okay")) != 0) + continue; + + reg = -1; + prom_getprop(node, "reg", ®, sizeof(reg)); + + prom_debug("\ncpuid = 0x%x\n", cpuid); + prom_debug("cpu hw idx = 0x%x\n", reg); + + /* Init the acknowledge var which will be reset by + * the secondary cpu when it awakens from its OF + * spinloop. + */ + *acknowledge = (unsigned long)-1; + + propsize = prom_getprop(node, "ibm,ppc-interrupt-server#s", + &interrupt_server, + sizeof(interrupt_server)); + if (propsize < 0) { + /* no property. old hardware has no SMT */ + cpu_threads = 1; + interrupt_server[0] = reg; /* fake it with phys id */ + } else { + /* We have a threaded processor */ + cpu_threads = propsize / sizeof(u32); + if (cpu_threads > MAX_CPU_THREADS) { + prom_printf("SMT: too many threads!\n" + "SMT: found %x, max is %x\n", + cpu_threads, MAX_CPU_THREADS); + cpu_threads = 1; /* ToDo: panic? */ + } + } + + hw_cpu_num = interrupt_server[0]; + if (hw_cpu_num != _prom->cpu) { + /* Primary Thread of non-boot cpu */ + prom_printf("%x : starting cpu hw idx %x... ", cpuid, reg); + call_prom("start-cpu", 3, 0, node, + secondary_hold, reg); + + for ( i = 0 ; (i < 100000000) && + (*acknowledge == ((unsigned long)-1)); i++ ) + mb(); + + if (*acknowledge == reg) { + prom_printf("done\n"); + /* We have to get every CPU out of OF, + * even if we never start it. */ + if (cpuid >= NR_CPUS) + goto next; + } else { + prom_printf("failed: %x\n", *acknowledge); + } + } +#ifdef CONFIG_SMP + else + prom_printf("%x : boot cpu %x\n", cpuid, reg); +#endif +next: +#ifdef CONFIG_SMP + /* Init paca for secondary threads. They start later. */ + for (i=1; i < cpu_threads; i++) { + cpuid++; + if (cpuid >= NR_CPUS) + continue; + } +#endif /* CONFIG_SMP */ + cpuid++; + } +#ifdef CONFIG_HMT + /* Only enable HMT on processors that provide support. */ + if (__is_processor(PV_PULSAR) || + __is_processor(PV_ICESTAR) || + __is_processor(PV_SSTAR)) { + prom_printf(" starting secondary threads\n"); + + for (i = 0; i < NR_CPUS; i += 2) { + if (!cpu_online(i)) + continue; + + if (i == 0) { + unsigned long pir = mfspr(SPRN_PIR); + if (__is_processor(PV_PULSAR)) { + RELOC(hmt_thread_data)[i].pir = + pir & 0x1f; + } else { + RELOC(hmt_thread_data)[i].pir = + pir & 0x3ff; + } + } + } + } else { + prom_printf("Processor is not HMT capable\n"); + } +#endif + + if (cpuid > NR_CPUS) + prom_printf("WARNING: maximum CPUs (" __stringify(NR_CPUS) + ") exceeded: ignoring extras\n"); + + prom_debug("prom_hold_cpus: end...\n"); +#endif +} + + +static void __init prom_init_client_services(unsigned long pp) +{ + struct prom_t *_prom = &RELOC(prom); + + /* Get a handle to the prom entry point before anything else */ + RELOC(prom_entry) = pp; + + /* get a handle for the stdout device */ + _prom->chosen = call_prom("finddevice", 1, 1, ADDR("/chosen")); + if (!PHANDLE_VALID(_prom->chosen)) + prom_panic("cannot find chosen"); /* msg won't be printed :( */ + + /* get device tree root */ + _prom->root = call_prom("finddevice", 1, 1, ADDR("/")); + if (!PHANDLE_VALID(_prom->root)) + prom_panic("cannot find device tree root"); /* msg won't be printed :( */ +} + +static void __init prom_init_stdout(void) +{ + struct prom_t *_prom = &RELOC(prom); + char *path = RELOC(of_stdout_device); + char type[16]; + u32 val; + + if (prom_getprop(_prom->chosen, "stdout", &val, sizeof(val)) <= 0) + prom_panic("cannot find stdout"); + + _prom->stdout = val; + + /* Get the full OF pathname of the stdout device */ + memset(path, 0, 256); + call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255); + val = call_prom("instance-to-package", 1, 1, _prom->stdout); + prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val)); + prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device)); + prom_setprop(_prom->chosen, "linux,stdout-path", + RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1); + + /* If it's a display, note it */ + memset(type, 0, sizeof(type)); + prom_getprop(val, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("display")) == 0) + prom_setprop(val, "linux,boot-display", NULL, 0); +} + +static void __init prom_close_stdin(void) +{ + struct prom_t *_prom = &RELOC(prom); + ihandle val; + + if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0) + call_prom("close", 1, 0, val); +} + +static int __init prom_find_machine_type(void) +{ + struct prom_t *_prom = &RELOC(prom); + char compat[256]; + int len, i = 0; + phandle rtas; + + len = prom_getprop(_prom->root, "compatible", + compat, sizeof(compat)-1); + if (len > 0) { + compat[len] = 0; + while (i < len) { + char *p = &compat[i]; + int sl = strlen(p); + if (sl == 0) + break; + if (strstr(p, RELOC("Power Macintosh")) || + strstr(p, RELOC("MacRISC4"))) + return PLATFORM_POWERMAC; +#ifdef CONFIG_PPC64 + if (strstr(p, RELOC("Momentum,Maple"))) + return PLATFORM_MAPLE; +#endif + i += sl + 1; + } + } +#ifdef CONFIG_PPC64 + /* Default to pSeries. We need to know if we are running LPAR */ + rtas = call_prom("finddevice", 1, 1, ADDR("/rtas")); + if (PHANDLE_VALID(rtas)) { + int x = prom_getproplen(rtas, "ibm,hypertas-functions"); + if (x != PROM_ERROR) { + prom_printf("Hypertas detected, assuming LPAR !\n"); + return PLATFORM_PSERIES_LPAR; + } + } + return PLATFORM_PSERIES; +#else + return PLATFORM_CHRP; +#endif +} + +static int __init setup_disp(phandle dp) +{ +#if defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32) + int width = 640, height = 480, depth = 8, pitch; + unsigned address; + u32 addrs[8][5]; + int i, naddrs; + char name[32]; + char *getprop = "getprop"; + + prom_printf("Initializing screen: "); + + memset(name, 0, sizeof(name)); + call_prom(getprop, 4, 1, dp, "name", name, sizeof(name)); + name[sizeof(name)-1] = 0; + prom_print(name); + prom_print("\n"); + call_prom(getprop, 4, 1, dp, "width", &width, sizeof(width)); + call_prom(getprop, 4, 1, dp, "height", &height, sizeof(height)); + call_prom(getprop, 4, 1, dp, "depth", &depth, sizeof(depth)); + pitch = width * ((depth + 7) / 8); + call_prom(getprop, 4, 1, dp, "linebytes", + &pitch, sizeof(pitch)); + if (pitch == 1) + pitch = 0x1000; /* for strange IBM display */ + address = 0; + call_prom(getprop, 4, 1, dp, "address", &address, sizeof(address)); + if (address == 0) { + /* look for an assigned address with a size of >= 1MB */ + naddrs = call_prom(getprop, 4, 1, dp, "assigned-addresses", + addrs, sizeof(addrs)); + naddrs /= 20; + for (i = 0; i < naddrs; ++i) { + if (addrs[i][4] >= (1 << 20)) { + address = addrs[i][2]; + /* use the BE aperture if possible */ + if (addrs[i][4] >= (16 << 20)) + address += (8 << 20); + break; + } + } + if (address == 0) { + prom_print("Failed to get address\n"); + return 0; + } + } + /* kludge for valkyrie */ + if (strcmp(name, "valkyrie") == 0) + address += 0x1000; + + prom_printf("\n\n\n\naddress = %x\n", address); + btext_setup_display(width, height, depth, pitch, address); +#endif /* CONFIG_BOOTX_TEXT && CONFIG_PPC32 */ + return 1; +} + +static int __init prom_set_color(ihandle ih, int i, int r, int g, int b) +{ + return call_prom("call-method", 6, 1, ADDR("color!"), ih, i, b, g, r); +} + +/* + * If we have a display that we don't know how to drive, + * we will want to try to execute OF's open method for it + * later. However, OF will probably fall over if we do that + * we've taken over the MMU. + * So we check whether we will need to open the display, + * and if so, open it now. + */ +static void __init prom_check_displays(void) +{ + char type[16], *path; + phandle node; + ihandle ih; + int i; + int got_display = 0; + + static unsigned char default_colors[] = { + 0x00, 0x00, 0x00, + 0x00, 0x00, 0xaa, + 0x00, 0xaa, 0x00, + 0x00, 0xaa, 0xaa, + 0xaa, 0x00, 0x00, + 0xaa, 0x00, 0xaa, + 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, + 0x55, 0x55, 0x55, + 0x55, 0x55, 0xff, + 0x55, 0xff, 0x55, + 0x55, 0xff, 0xff, + 0xff, 0x55, 0x55, + 0xff, 0x55, 0xff, + 0xff, 0xff, 0x55, + 0xff, 0xff, 0xff + }; + const unsigned char *clut; + + prom_printf("Looking for displays\n"); + for (node = 0; prom_next_node(&node); ) { + memset(type, 0, sizeof(type)); + prom_getprop(node, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("display")) != 0) + continue; + + /* It seems OF doesn't null-terminate the path :-( */ + path = RELOC(prom_scratch); + memset(path, 0, PROM_SCRATCH_SIZE); + + /* + * leave some room at the end of the path for appending extra + * arguments + */ + if (call_prom("package-to-path", 3, 1, node, path, + PROM_SCRATCH_SIZE-10) == PROM_ERROR) + continue; + prom_printf("found display : %s, opening ... ", path); + + ih = call_prom("open", 1, 1, path); + if (ih == 0) { + prom_printf("failed\n"); + continue; + } + + /* Success */ + prom_printf("done\n"); + prom_setprop(node, "linux,opened", NULL, 0); + + /* Setup a usable color table when the appropriate + * method is available. Should update this to set-colors */ + clut = RELOC(default_colors); + for (i = 0; i < 32; i++, clut += 3) + if (prom_set_color(ih, i, clut[0], clut[1], + clut[2]) != 0) + break; + +#ifdef CONFIG_LOGO_LINUX_CLUT224 + clut = PTRRELOC(RELOC(logo_linux_clut224.clut)); + for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3) + if (prom_set_color(ih, i + 32, clut[0], clut[1], + clut[2]) != 0) + break; +#endif /* CONFIG_LOGO_LINUX_CLUT224 */ + if (!got_display) + got_display = setup_disp(node); + } +} + + +/* Return (relocated) pointer to this much memory: moves initrd if reqd. */ +static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, + unsigned long needed, unsigned long align) +{ + void *ret; + + *mem_start = _ALIGN(*mem_start, align); + while ((*mem_start + needed) > *mem_end) { + unsigned long room, chunk; + + prom_debug("Chunk exhausted, claiming more at %x...\n", + RELOC(alloc_bottom)); + room = RELOC(alloc_top) - RELOC(alloc_bottom); + if (room > DEVTREE_CHUNK_SIZE) + room = DEVTREE_CHUNK_SIZE; + if (room < PAGE_SIZE) + prom_panic("No memory for flatten_device_tree (no room)"); + chunk = alloc_up(room, 0); + if (chunk == 0) + prom_panic("No memory for flatten_device_tree (claim failed)"); + *mem_end = RELOC(alloc_top); + } + + ret = (void *)*mem_start; + *mem_start += needed; + + return ret; +} + +#define dt_push_token(token, mem_start, mem_end) \ + do { *((u32 *)make_room(mem_start, mem_end, 4, 4)) = token; } while(0) + +static unsigned long __init dt_find_string(char *str) +{ + char *s, *os; + + s = os = (char *)RELOC(dt_string_start); + s += 4; + while (s < (char *)RELOC(dt_string_end)) { + if (strcmp(s, str) == 0) + return s - os; + s += strlen(s) + 1; + } + return 0; +} + +/* + * The Open Firmware 1275 specification states properties must be 31 bytes or + * less, however not all firmwares obey this. Make it 64 bytes to be safe. + */ +#define MAX_PROPERTY_NAME 64 + +static void __init scan_dt_build_strings(phandle node, + unsigned long *mem_start, + unsigned long *mem_end) +{ + char *prev_name, *namep, *sstart; + unsigned long soff; + phandle child; + + sstart = (char *)RELOC(dt_string_start); + + /* get and store all property names */ + prev_name = RELOC(""); + for (;;) { + /* 64 is max len of name including nul. */ + namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); + if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) { + /* No more nodes: unwind alloc */ + *mem_start = (unsigned long)namep; + break; + } + + /* skip "name" */ + if (strcmp(namep, RELOC("name")) == 0) { + *mem_start = (unsigned long)namep; + prev_name = RELOC("name"); + continue; + } + /* get/create string entry */ + soff = dt_find_string(namep); + if (soff != 0) { + *mem_start = (unsigned long)namep; + namep = sstart + soff; + } else { + /* Trim off some if we can */ + *mem_start = (unsigned long)namep + strlen(namep) + 1; + RELOC(dt_string_end) = *mem_start; + } + prev_name = namep; + } + + /* do all our children */ + child = call_prom("child", 1, 1, node); + while (child != 0) { + scan_dt_build_strings(child, mem_start, mem_end); + child = call_prom("peer", 1, 1, child); + } +} + +static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, + unsigned long *mem_end) +{ + phandle child; + char *namep, *prev_name, *sstart, *p, *ep, *lp, *path; + unsigned long soff; + unsigned char *valp; + static char pname[MAX_PROPERTY_NAME]; + int l; + + dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); + + /* get the node's full name */ + namep = (char *)*mem_start; + l = call_prom("package-to-path", 3, 1, node, + namep, *mem_end - *mem_start); + if (l >= 0) { + /* Didn't fit? Get more room. */ + if ((l+1) > (*mem_end - *mem_start)) { + namep = make_room(mem_start, mem_end, l+1, 1); + call_prom("package-to-path", 3, 1, node, namep, l); + } + namep[l] = '\0'; + + /* Fixup an Apple bug where they have bogus \0 chars in the + * middle of the path in some properties + */ + for (p = namep, ep = namep + l; p < ep; p++) + if (*p == '\0') { + memmove(p, p+1, ep - p); + ep--; l--; p--; + } + + /* now try to extract the unit name in that mess */ + for (p = namep, lp = NULL; *p; p++) + if (*p == '/') + lp = p + 1; + if (lp != NULL) + memmove(namep, lp, strlen(lp) + 1); + *mem_start = _ALIGN(((unsigned long) namep) + + strlen(namep) + 1, 4); + } + + /* get it again for debugging */ + path = RELOC(prom_scratch); + memset(path, 0, PROM_SCRATCH_SIZE); + call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + + /* get and store all properties */ + prev_name = RELOC(""); + sstart = (char *)RELOC(dt_string_start); + for (;;) { + if (call_prom("nextprop", 3, 1, node, prev_name, + RELOC(pname)) != 1) + break; + + /* skip "name" */ + if (strcmp(RELOC(pname), RELOC("name")) == 0) { + prev_name = RELOC("name"); + continue; + } + + /* find string offset */ + soff = dt_find_string(RELOC(pname)); + if (soff == 0) { + prom_printf("WARNING: Can't find string index for" + " <%s>, node %s\n", RELOC(pname), path); + break; + } + prev_name = sstart + soff; + + /* get length */ + l = call_prom("getproplen", 2, 1, node, RELOC(pname)); + + /* sanity checks */ + if (l == PROM_ERROR) + continue; + if (l > MAX_PROPERTY_LENGTH) { + prom_printf("WARNING: ignoring large property "); + /* It seems OF doesn't null-terminate the path :-( */ + prom_printf("[%s] ", path); + prom_printf("%s length 0x%x\n", RELOC(pname), l); + continue; + } + + /* push property head */ + dt_push_token(OF_DT_PROP, mem_start, mem_end); + dt_push_token(l, mem_start, mem_end); + dt_push_token(soff, mem_start, mem_end); + + /* push property content */ + valp = make_room(mem_start, mem_end, l, 4); + call_prom("getprop", 4, 1, node, RELOC(pname), valp, l); + *mem_start = _ALIGN(*mem_start, 4); + } + + /* Add a "linux,phandle" property. */ + soff = dt_find_string(RELOC("linux,phandle")); + if (soff == 0) + prom_printf("WARNING: Can't find string index for" + " <linux-phandle> node %s\n", path); + else { + dt_push_token(OF_DT_PROP, mem_start, mem_end); + dt_push_token(4, mem_start, mem_end); + dt_push_token(soff, mem_start, mem_end); + valp = make_room(mem_start, mem_end, 4, 4); + *(u32 *)valp = node; + } + + /* do all our children */ + child = call_prom("child", 1, 1, node); + while (child != 0) { + scan_dt_build_struct(child, mem_start, mem_end); + child = call_prom("peer", 1, 1, child); + } + + dt_push_token(OF_DT_END_NODE, mem_start, mem_end); +} + +static void __init flatten_device_tree(void) +{ + phandle root; + unsigned long mem_start, mem_end, room; + struct boot_param_header *hdr; + struct prom_t *_prom = &RELOC(prom); + char *namep; + u64 *rsvmap; + + /* + * Check how much room we have between alloc top & bottom (+/- a + * few pages), crop to 4Mb, as this is our "chuck" size + */ + room = RELOC(alloc_top) - RELOC(alloc_bottom) - 0x4000; + if (room > DEVTREE_CHUNK_SIZE) + room = DEVTREE_CHUNK_SIZE; + prom_debug("starting device tree allocs at %x\n", RELOC(alloc_bottom)); + + /* Now try to claim that */ + mem_start = (unsigned long)alloc_up(room, PAGE_SIZE); + if (mem_start == 0) + prom_panic("Can't allocate initial device-tree chunk\n"); + mem_end = RELOC(alloc_top); + + /* Get root of tree */ + root = call_prom("peer", 1, 1, (phandle)0); + if (root == (phandle)0) + prom_panic ("couldn't get device tree root\n"); + + /* Build header and make room for mem rsv map */ + mem_start = _ALIGN(mem_start, 4); + hdr = make_room(&mem_start, &mem_end, + sizeof(struct boot_param_header), 4); + RELOC(dt_header_start) = (unsigned long)hdr; + rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); + + /* Start of strings */ + mem_start = PAGE_ALIGN(mem_start); + RELOC(dt_string_start) = mem_start; + mem_start += 4; /* hole */ + + /* Add "linux,phandle" in there, we'll need it */ + namep = make_room(&mem_start, &mem_end, 16, 1); + strcpy(namep, RELOC("linux,phandle")); + mem_start = (unsigned long)namep + strlen(namep) + 1; + + /* Build string array */ + prom_printf("Building dt strings...\n"); + scan_dt_build_strings(root, &mem_start, &mem_end); + RELOC(dt_string_end) = mem_start; + + /* Build structure */ + mem_start = PAGE_ALIGN(mem_start); + RELOC(dt_struct_start) = mem_start; + prom_printf("Building dt structure...\n"); + scan_dt_build_struct(root, &mem_start, &mem_end); + dt_push_token(OF_DT_END, &mem_start, &mem_end); + RELOC(dt_struct_end) = PAGE_ALIGN(mem_start); + + /* Finish header */ + hdr->boot_cpuid_phys = _prom->cpu; + hdr->magic = OF_DT_HEADER; + hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); + hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); + hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); + hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start); + hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); + hdr->version = OF_DT_VERSION; + /* Version 16 is not backward compatible */ + hdr->last_comp_version = 0x10; + + /* Reserve the whole thing and copy the reserve map in, we + * also bump mem_reserve_cnt to cause further reservations to + * fail since it's too late. + */ + reserve_mem(RELOC(dt_header_start), hdr->totalsize); + memcpy(rsvmap, RELOC(mem_reserve_map), sizeof(mem_reserve_map)); + +#ifdef DEBUG_PROM + { + int i; + prom_printf("reserved memory map:\n"); + for (i = 0; i < RELOC(mem_reserve_cnt); i++) + prom_printf(" %x - %x\n", + RELOC(mem_reserve_map)[i].base, + RELOC(mem_reserve_map)[i].size); + } +#endif + RELOC(mem_reserve_cnt) = MEM_RESERVE_MAP_SIZE; + + prom_printf("Device tree strings 0x%x -> 0x%x\n", + RELOC(dt_string_start), RELOC(dt_string_end)); + prom_printf("Device tree struct 0x%x -> 0x%x\n", + RELOC(dt_struct_start), RELOC(dt_struct_end)); + +} + + +static void __init fixup_device_tree(void) +{ +#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) + phandle u3, i2c, mpic; + u32 u3_rev; + u32 interrupts[2]; + u32 parent; + + /* Some G5s have a missing interrupt definition, fix it up here */ + u3 = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000")); + if (!PHANDLE_VALID(u3)) + return; + i2c = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/i2c@f8001000")); + if (!PHANDLE_VALID(i2c)) + return; + mpic = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/mpic@f8040000")); + if (!PHANDLE_VALID(mpic)) + return; + + /* check if proper rev of u3 */ + if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev)) + == PROM_ERROR) + return; + if (u3_rev != 0x35 && u3_rev != 0x37) + return; + /* does it need fixup ? */ + if (prom_getproplen(i2c, "interrupts") > 0) + return; + + prom_printf("fixing up bogus interrupts for u3 i2c...\n"); + + /* interrupt on this revision of u3 is number 0 and level */ + interrupts[0] = 0; + interrupts[1] = 1; + prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts)); + parent = (u32)mpic; + prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent)); +#endif +} + + +static void __init prom_find_boot_cpu(void) +{ + struct prom_t *_prom = &RELOC(prom); + u32 getprop_rval; + ihandle prom_cpu; + phandle cpu_pkg; + + if (prom_getprop(_prom->chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0) + prom_panic("cannot find boot cpu"); + + cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); + + prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval)); + _prom->cpu = getprop_rval; + + prom_debug("Booting CPU hw index = 0x%x\n", _prom->cpu); +} + +static void __init prom_check_initrd(unsigned long r3, unsigned long r4) +{ +#ifdef CONFIG_BLK_DEV_INITRD + struct prom_t *_prom = &RELOC(prom); + + if (r3 && r4 && r4 != 0xdeadbeef) { + unsigned long val; + + RELOC(prom_initrd_start) = (r3 >= KERNELBASE) ? __pa(r3) : r3; + RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4; + + val = RELOC(prom_initrd_start); + prom_setprop(_prom->chosen, "linux,initrd-start", &val, + sizeof(val)); + val = RELOC(prom_initrd_end); + prom_setprop(_prom->chosen, "linux,initrd-end", &val, + sizeof(val)); + + reserve_mem(RELOC(prom_initrd_start), + RELOC(prom_initrd_end) - RELOC(prom_initrd_start)); + + prom_debug("initrd_start=0x%x\n", RELOC(prom_initrd_start)); + prom_debug("initrd_end=0x%x\n", RELOC(prom_initrd_end)); + } +#endif /* CONFIG_BLK_DEV_INITRD */ +} + +/* + * We enter here early on, when the Open Firmware prom is still + * handling exceptions and the MMU hash table for us. + */ + +unsigned long __init prom_init(unsigned long r3, unsigned long r4, + unsigned long pp, + unsigned long r6, unsigned long r7) +{ + struct prom_t *_prom; + extern char _stext[]; + unsigned long hdr; + u32 getprop_rval; + unsigned long offset = reloc_offset(); + +#ifdef CONFIG_PPC32 + reloc_got2(offset); +#endif + + _prom = &RELOC(prom); + + /* + * First zero the BSS + */ + memset(&RELOC(__bss_start), 0, __bss_stop - __bss_start); + + /* + * Init interface to Open Firmware, get some node references, + * like /chosen + */ + prom_init_client_services(pp); + + /* + * Init prom stdout device + */ + prom_init_stdout(); + + /* + * Check for an initrd + */ + prom_check_initrd(r3, r4); + + /* + * Get default machine type. At this point, we do not differentiate + * between pSeries SMP and pSeries LPAR + */ + RELOC(of_platform) = prom_find_machine_type(); + getprop_rval = RELOC(of_platform); + prom_setprop(_prom->chosen, "linux,platform", + &getprop_rval, sizeof(getprop_rval)); + +#ifdef CONFIG_PPC_PSERIES + /* + * On pSeries, inform the firmware about our capabilities + */ + if (RELOC(of_platform) & PLATFORM_PSERIES) + prom_send_capabilities(); +#endif + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_BPA) + /* + * On pSeries and BPA, copy the CPU hold code + */ + if (RELOC(of_platform) & (PLATFORM_PSERIES | PLATFORM_BPA)) + copy_and_flush(0, KERNELBASE + offset, 0x100, 0); +#endif + + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + + /* + * Initialize memory management within prom_init + */ + prom_init_mem(); + + /* + * Determine which cpu is actually running right _now_ + */ + prom_find_boot_cpu(); + + /* + * Initialize display devices + */ + prom_check_displays(); + +#ifdef CONFIG_PPC64 + /* + * Initialize IOMMU (TCE tables) on pSeries. Do that before anything else + * that uses the allocator, we need to make sure we get the top of memory + * available for us here... + */ + if (RELOC(of_platform) == PLATFORM_PSERIES) + prom_initialize_tce_table(); +#endif + + /* + * On non-powermacs, try to instantiate RTAS and puts all CPUs + * in spin-loops. PowerMacs don't have a working RTAS and use + * a different way to spin CPUs + */ + if (RELOC(of_platform) != PLATFORM_POWERMAC) { + prom_instantiate_rtas(); + prom_hold_cpus(); + } + + /* + * Fill in some infos for use by the kernel later on + */ + if (RELOC(prom_memory_limit)) + prom_setprop(_prom->chosen, "linux,memory-limit", + &RELOC(prom_memory_limit), + sizeof(prom_memory_limit)); +#ifdef CONFIG_PPC64 + if (RELOC(ppc64_iommu_off)) + prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0); + + if (RELOC(iommu_force_on)) + prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0); + + if (RELOC(prom_tce_alloc_start)) { + prom_setprop(_prom->chosen, "linux,tce-alloc-start", + &RELOC(prom_tce_alloc_start), + sizeof(prom_tce_alloc_start)); + prom_setprop(_prom->chosen, "linux,tce-alloc-end", + &RELOC(prom_tce_alloc_end), + sizeof(prom_tce_alloc_end)); + } +#endif + + /* + * Fixup any known bugs in the device-tree + */ + fixup_device_tree(); + + /* + * Now finally create the flattened device-tree + */ + prom_printf("copying OF device tree ...\n"); + flatten_device_tree(); + + /* in case stdin is USB and still active on IBM machines... */ + prom_close_stdin(); + + /* + * Call OF "quiesce" method to shut down pending DMA's from + * devices etc... + */ + prom_printf("Calling quiesce ...\n"); + call_prom("quiesce", 0, 0); + + /* + * And finally, call the kernel passing it the flattened device + * tree and NULL as r5, thus triggering the new entry point which + * is common to us and kexec + */ + hdr = RELOC(dt_header_start); + prom_printf("returning from prom_init\n"); + prom_debug("->dt_header_start=0x%x\n", hdr); + +#ifdef CONFIG_PPC32 + reloc_got2(-offset); +#endif + + __start(hdr, 0, 0); + + return 0; +} diff --git a/arch/powerpc/kernel/semaphore.c b/arch/powerpc/kernel/semaphore.c new file mode 100644 index 00000000000..2f8c3c95139 --- /dev/null +++ b/arch/powerpc/kernel/semaphore.c @@ -0,0 +1,135 @@ +/* + * PowerPC-specific semaphore code. + * + * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * April 2001 - Reworked by Paul Mackerras <paulus@samba.org> + * to eliminate the SMP races in the old version between the updates + * of `count' and `waking'. Now we use negative `count' values to + * indicate that some process(es) are waiting for the semaphore. + */ + +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/module.h> + +#include <asm/atomic.h> +#include <asm/semaphore.h> +#include <asm/errno.h> + +/* + * Atomically update sem->count. + * This does the equivalent of the following: + * + * old_count = sem->count; + * tmp = MAX(old_count, 0) + incr; + * sem->count = tmp; + * return old_count; + */ +static inline int __sem_update_count(struct semaphore *sem, int incr) +{ + int old_count, tmp; + + __asm__ __volatile__("\n" +"1: lwarx %0,0,%3\n" +" srawi %1,%0,31\n" +" andc %1,%0,%1\n" +" add %1,%1,%4\n" + PPC405_ERR77(0,%3) +" stwcx. %1,0,%3\n" +" bne 1b" + : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) + : "r" (&sem->count), "r" (incr), "m" (sem->count) + : "cc"); + + return old_count; +} + +void __up(struct semaphore *sem) +{ + /* + * Note that we incremented count in up() before we came here, + * but that was ineffective since the result was <= 0, and + * any negative value of count is equivalent to 0. + * This ends up setting count to 1, unless count is now > 0 + * (i.e. because some other cpu has called up() in the meantime), + * in which case we just increment count. + */ + __sem_update_count(sem, 1); + wake_up(&sem->wait); +} +EXPORT_SYMBOL(__up); + +/* + * Note that when we come in to __down or __down_interruptible, + * we have already decremented count, but that decrement was + * ineffective since the result was < 0, and any negative value + * of count is equivalent to 0. + * Thus it is only when we decrement count from some value > 0 + * that we have actually got the semaphore. + */ +void __sched __down(struct semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); + add_wait_queue_exclusive(&sem->wait, &wait); + + /* + * Try to get the semaphore. If the count is > 0, then we've + * got the semaphore; we decrement count and exit the loop. + * If the count is 0 or negative, we set it to -1, indicating + * that we are asleep, and then sleep. + */ + while (__sem_update_count(sem, -1) <= 0) { + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } + remove_wait_queue(&sem->wait, &wait); + __set_task_state(tsk, TASK_RUNNING); + + /* + * If there are any more sleepers, wake one of them up so + * that it can either get the semaphore, or set count to -1 + * indicating that there are still processes sleeping. + */ + wake_up(&sem->wait); +} +EXPORT_SYMBOL(__down); + +int __sched __down_interruptible(struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + __set_task_state(tsk, TASK_INTERRUPTIBLE); + add_wait_queue_exclusive(&sem->wait, &wait); + + while (__sem_update_count(sem, -1) <= 0) { + if (signal_pending(current)) { + /* + * A signal is pending - give up trying. + * Set sem->count to 0 if it is negative, + * since we are no longer sleeping. + */ + __sem_update_count(sem, 0); + retval = -EINTR; + break; + } + schedule(); + set_task_state(tsk, TASK_INTERRUPTIBLE); + } + remove_wait_queue(&sem->wait, &wait); + __set_task_state(tsk, TASK_RUNNING); + + wake_up(&sem->wait); + return retval; +} +EXPORT_SYMBOL(__down_interruptible); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c new file mode 100644 index 00000000000..bfa155c00ea --- /dev/null +++ b/arch/powerpc/kernel/setup_32.c @@ -0,0 +1,663 @@ +/* + * Common prep/pmac/chrp boot and setup code. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/delay.h> +#include <linux/initrd.h> +#include <linux/ide.h> +#include <linux/tty.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> +#include <linux/cpu.h> +#include <linux/console.h> + +#include <asm/residual.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/bootinfo.h> +#include <asm/setup.h> +#include <asm/amigappc.h> +#include <asm/smp.h> +#include <asm/elf.h> +#include <asm/cputable.h> +#include <asm/bootx.h> +#include <asm/btext.h> +#include <asm/machdep.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/pmac_feature.h> +#include <asm/sections.h> +#include <asm/nvram.h> +#include <asm/xmon.h> +#include <asm/ocp.h> + +#define USES_PPC_SYS (defined(CONFIG_85xx) || defined(CONFIG_83xx) || \ + defined(CONFIG_MPC10X_BRIDGE) || defined(CONFIG_8260) || \ + defined(CONFIG_PPC_MPC52xx)) + +#if USES_PPC_SYS +#include <asm/ppc_sys.h> +#endif + +#if defined CONFIG_KGDB +#include <asm/kgdb.h> +#endif + +extern void platform_init(void); +extern void bootx_init(unsigned long r4, unsigned long phys); + +extern void ppc6xx_idle(void); +extern void power4_idle(void); + +boot_infos_t *boot_infos; +struct ide_machdep_calls ppc_ide_md; + +/* Used with the BI_MEMSIZE bootinfo parameter to store the memory + size value reported by the boot loader. */ +unsigned long boot_mem_size; + +unsigned long ISA_DMA_THRESHOLD; +unsigned int DMA_MODE_READ; +unsigned int DMA_MODE_WRITE; + +int have_of = 1; + +#ifdef CONFIG_PPC_MULTIPLATFORM +int _machine = 0; + +extern void prep_init(void); +extern void pmac_init(void); +extern void chrp_init(void); + +dev_t boot_dev; +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +#ifdef CONFIG_MAGIC_SYSRQ +unsigned long SYSRQ_KEY = 0x54; +#endif /* CONFIG_MAGIC_SYSRQ */ + +#ifdef CONFIG_VGA_CONSOLE +unsigned long vgacon_remap_base; +#endif + +struct machdep_calls ppc_md; +EXPORT_SYMBOL(ppc_md); + +/* + * These are used in binfmt_elf.c to put aux entries on the stack + * for each elf executable being started. + */ +int dcache_bsize; +int icache_bsize; +int ucache_bsize; + +#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_FB_VGA16) || \ + defined(CONFIG_FB_VGA16_MODULE) || defined(CONFIG_FB_VESA) +struct screen_info screen_info = { + 0, 25, /* orig-x, orig-y */ + 0, /* unused */ + 0, /* orig-video-page */ + 0, /* orig-video-mode */ + 80, /* orig-video-cols */ + 0,0,0, /* ega_ax, ega_bx, ega_cx */ + 25, /* orig-video-lines */ + 1, /* orig-video-isVGA */ + 16 /* orig-video-points */ +}; +#endif /* CONFIG_VGA_CONSOLE || CONFIG_FB_VGA16 || CONFIG_FB_VESA */ + +void machine_restart(char *cmd) +{ +#ifdef CONFIG_NVRAM + nvram_sync(); +#endif + ppc_md.restart(cmd); +} + +void machine_power_off(void) +{ +#ifdef CONFIG_NVRAM + nvram_sync(); +#endif + ppc_md.power_off(); +} + +void machine_halt(void) +{ +#ifdef CONFIG_NVRAM + nvram_sync(); +#endif + ppc_md.halt(); +} + +void (*pm_power_off)(void) = machine_power_off; + +#ifdef CONFIG_TAU +extern u32 cpu_temp(unsigned long cpu); +extern u32 cpu_temp_both(unsigned long cpu); +#endif /* CONFIG_TAU */ + +int show_cpuinfo(struct seq_file *m, void *v) +{ + int i = (int) v - 1; + int err = 0; + unsigned int pvr; + unsigned short maj, min; + unsigned long lpj; + + if (i >= NR_CPUS) { + /* Show summary information */ +#ifdef CONFIG_SMP + unsigned long bogosum = 0; + for (i = 0; i < NR_CPUS; ++i) + if (cpu_online(i)) + bogosum += cpu_data[i].loops_per_jiffy; + seq_printf(m, "total bogomips\t: %lu.%02lu\n", + bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); +#endif /* CONFIG_SMP */ + + if (ppc_md.show_cpuinfo != NULL) + err = ppc_md.show_cpuinfo(m); + return err; + } + +#ifdef CONFIG_SMP + if (!cpu_online(i)) + return 0; + pvr = cpu_data[i].pvr; + lpj = cpu_data[i].loops_per_jiffy; +#else + pvr = mfspr(SPRN_PVR); + lpj = loops_per_jiffy; +#endif + + seq_printf(m, "processor\t: %d\n", i); + seq_printf(m, "cpu\t\t: "); + + if (cur_cpu_spec->pvr_mask) + seq_printf(m, "%s", cur_cpu_spec->cpu_name); + else + seq_printf(m, "unknown (%08x)", pvr); +#ifdef CONFIG_ALTIVEC + if (cur_cpu_spec->cpu_features & CPU_FTR_ALTIVEC) + seq_printf(m, ", altivec supported"); +#endif + seq_printf(m, "\n"); + +#ifdef CONFIG_TAU + if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) { +#ifdef CONFIG_TAU_AVERAGE + /* more straightforward, but potentially misleading */ + seq_printf(m, "temperature \t: %u C (uncalibrated)\n", + cpu_temp(i)); +#else + /* show the actual temp sensor range */ + u32 temp; + temp = cpu_temp_both(i); + seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", + temp & 0xff, temp >> 16); +#endif + } +#endif /* CONFIG_TAU */ + + if (ppc_md.show_percpuinfo != NULL) { + err = ppc_md.show_percpuinfo(m, i); + if (err) + return err; + } + + /* If we are a Freescale core do a simple check so + * we dont have to keep adding cases in the future */ + if ((PVR_VER(pvr) & 0x8000) == 0x8000) { + maj = PVR_MAJ(pvr); + min = PVR_MIN(pvr); + } else { + switch (PVR_VER(pvr)) { + case 0x0020: /* 403 family */ + maj = PVR_MAJ(pvr) + 1; + min = PVR_MIN(pvr); + break; + case 0x1008: /* 740P/750P ?? */ + maj = ((pvr >> 8) & 0xFF) - 1; + min = pvr & 0xFF; + break; + default: + maj = (pvr >> 8) & 0xFF; + min = pvr & 0xFF; + break; + } + } + + seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n", + maj, min, PVR_VER(pvr), PVR_REV(pvr)); + + seq_printf(m, "bogomips\t: %lu.%02lu\n", + lpj / (500000/HZ), (lpj / (5000/HZ)) % 100); + +#if USES_PPC_SYS + if (cur_ppc_sys_spec->ppc_sys_name) + seq_printf(m, "chipset\t\t: %s\n", + cur_ppc_sys_spec->ppc_sys_name); +#endif + +#ifdef CONFIG_SMP + seq_printf(m, "\n"); +#endif + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + int i = *pos; + + return i <= NR_CPUS? (void *) (i + 1): NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +struct seq_operations cpuinfo_op = { + .start =c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +/* + * We're called here very early in the boot. We determine the machine + * type and call the appropriate low-level setup functions. + * -- Cort <cort@fsmlabs.com> + * + * Note that the kernel may be running at an address which is different + * from the address that it was linked at, so we must use RELOC/PTRRELOC + * to access static data (including strings). -- paulus + */ +unsigned long __init early_init(unsigned long dt_ptr) +{ + unsigned long offset = reloc_offset(); + + reloc_got2(offset); + + /* + * Identify the CPU type and fix up code sections + * that depend on which cpu we have. + */ + identify_cpu(offset, 0); + do_cpu_ftr_fixups(offset); + +#ifdef CONFIG_BOOTX_TEXT + btext_prepare_BAT(); +#endif + + reloc_got2(-offset); + + return KERNELBASE + offset; +} + +#ifdef CONFIG_PPC_OF +/* + * Assume here that all clock rates are the same in a + * smp system. -- Cort + */ +int +of_show_percpuinfo(struct seq_file *m, int i) +{ + struct device_node *cpu_node; + u32 *fp; + int s; + + cpu_node = find_type_devices("cpu"); + if (!cpu_node) + return 0; + for (s = 0; s < i && cpu_node->next; s++) + cpu_node = cpu_node->next; + fp = (u32 *)get_property(cpu_node, "clock-frequency", NULL); + if (fp) + seq_printf(m, "clock\t\t: %dMHz\n", *fp / 1000000); + return 0; +} + +void __init +intuit_machine_type(void) +{ + char *model; + struct device_node *root; + + /* ask the OF info if we're a chrp or pmac */ + root = find_path_device("/"); + if (root != 0) { + /* assume pmac unless proven to be chrp -- Cort */ + _machine = _MACH_Pmac; + model = get_property(root, "device_type", NULL); + if (model && !strncmp("chrp", model, 4)) + _machine = _MACH_chrp; + else { + model = get_property(root, "model", NULL); + if (model && !strncmp(model, "IBM", 3)) + _machine = _MACH_chrp; + } + } +} +#endif + +#ifdef CONFIG_PPC_MULTIPLATFORM +/* + * The PPC_MULTIPLATFORM version of platform_init... + */ +void __init platform_init(void) +{ + /* if we didn't get any bootinfo telling us what we are... */ + if (_machine == 0) { + /* prep boot loader tells us if we're prep or not */ + if ( *(unsigned long *)(KERNELBASE) == (0xdeadc0de) ) + _machine = _MACH_prep; + } + +#ifdef CONFIG_PPC_PREP + /* not much more to do here, if prep */ + if (_machine == _MACH_prep) { + prep_init(); + return; + } +#endif + +#ifdef CONFIG_ADB + if (strstr(cmd_line, "adb_sync")) { + extern int __adb_probe_sync; + __adb_probe_sync = 1; + } +#endif /* CONFIG_ADB */ + + switch (_machine) { +#ifdef CONFIG_PPC_PMAC + case _MACH_Pmac: + pmac_init(); + break; +#endif +#ifdef CONFIG_PPC_CHRP + case _MACH_chrp: + chrp_init(); + break; +#endif + } +} + +#ifdef CONFIG_SERIAL_CORE_CONSOLE +extern char *of_stdout_device; + +static int __init set_preferred_console(void) +{ + struct device_node *prom_stdout; + char *name; + int offset = 0; + + if (of_stdout_device == NULL) + return -ENODEV; + + /* The user has requested a console so this is already set up. */ + if (strstr(saved_command_line, "console=")) + return -EBUSY; + + prom_stdout = find_path_device(of_stdout_device); + if (!prom_stdout) + return -ENODEV; + + name = (char *)get_property(prom_stdout, "name", NULL); + if (!name) + return -ENODEV; + + if (strcmp(name, "serial") == 0) { + int i; + u32 *reg = (u32 *)get_property(prom_stdout, "reg", &i); + if (i > 8) { + switch (reg[1]) { + case 0x3f8: + offset = 0; + break; + case 0x2f8: + offset = 1; + break; + case 0x898: + offset = 2; + break; + case 0x890: + offset = 3; + break; + default: + /* We dont recognise the serial port */ + return -ENODEV; + } + } + } else if (strcmp(name, "ch-a") == 0) + offset = 0; + else if (strcmp(name, "ch-b") == 0) + offset = 1; + else + return -ENODEV; + return add_preferred_console("ttyS", offset, NULL); +} +console_initcall(set_preferred_console); +#endif /* CONFIG_SERIAL_CORE_CONSOLE */ +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +/* + * Find out what kind of machine we're on and save any data we need + * from the early boot process (devtree is copied on pmac by prom_init()). + * This is called very early on the boot process, after a minimal + * MMU environment has been set up but before MMU_init is called. + */ +void __init machine_init(unsigned long dt_ptr, unsigned long phys) +{ + early_init_devtree(__va(dt_ptr)); + +#ifdef CONFIG_CMDLINE + strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line)); +#endif /* CONFIG_CMDLINE */ + +#ifdef CONFIG_6xx + ppc_md.power_save = ppc6xx_idle; +#endif +#ifdef CONFIG_POWER4 + ppc_md.power_save = power4_idle; +#endif + + platform_init(); + + if (ppc_md.progress) + ppc_md.progress("id mach(): done", 0x200); +} + +#ifdef CONFIG_BOOKE_WDT +/* Checks wdt=x and wdt_period=xx command-line option */ +int __init early_parse_wdt(char *p) +{ + if (p && strncmp(p, "0", 1) != 0) + booke_wdt_enabled = 1; + + return 0; +} +early_param("wdt", early_parse_wdt); + +int __init early_parse_wdt_period (char *p) +{ + if (p) + booke_wdt_period = simple_strtoul(p, NULL, 0); + + return 0; +} +early_param("wdt_period", early_parse_wdt_period); +#endif /* CONFIG_BOOKE_WDT */ + +/* Checks "l2cr=xxxx" command-line option */ +int __init ppc_setup_l2cr(char *str) +{ + if (cpu_has_feature(CPU_FTR_L2CR)) { + unsigned long val = simple_strtoul(str, NULL, 0); + printk(KERN_INFO "l2cr set to %lx\n", val); + _set_L2CR(0); /* force invalidate by disable cache */ + _set_L2CR(val); /* and enable it */ + } + return 1; +} +__setup("l2cr=", ppc_setup_l2cr); + +#ifdef CONFIG_GENERIC_NVRAM + +/* Generic nvram hooks used by drivers/char/gen_nvram.c */ +unsigned char nvram_read_byte(int addr) +{ + if (ppc_md.nvram_read_val) + return ppc_md.nvram_read_val(addr); + return 0xff; +} +EXPORT_SYMBOL(nvram_read_byte); + +void nvram_write_byte(unsigned char val, int addr) +{ + if (ppc_md.nvram_write_val) + ppc_md.nvram_write_val(addr, val); +} +EXPORT_SYMBOL(nvram_write_byte); + +void nvram_sync(void) +{ + if (ppc_md.nvram_sync) + ppc_md.nvram_sync(); +} +EXPORT_SYMBOL(nvram_sync); + +#endif /* CONFIG_NVRAM */ + +static struct cpu cpu_devices[NR_CPUS]; + +int __init ppc_init(void) +{ + int i; + + /* clear the progress line */ + if ( ppc_md.progress ) ppc_md.progress(" ", 0xffff); + + /* register CPU devices */ + for (i = 0; i < NR_CPUS; i++) + if (cpu_possible(i)) + register_cpu(&cpu_devices[i], i, NULL); + + /* call platform init */ + if (ppc_md.init != NULL) { + ppc_md.init(); + } + return 0; +} + +arch_initcall(ppc_init); + +/* Warning, IO base is not yet inited */ +void __init setup_arch(char **cmdline_p) +{ + extern char *klimit; + extern void do_init_bootmem(void); + + /* so udelay does something sensible, assume <= 1000 bogomips */ + loops_per_jiffy = 500000000 / HZ; + +#ifdef CONFIG_BOOTX_TEXT + map_boot_text(); +#endif + + unflatten_device_tree(); + finish_device_tree(); + +#ifdef CONFIG_PPC_MULTIPLATFORM + /* This could be called "early setup arch", it must be done + * now because xmon need it + */ + if (_machine == _MACH_Pmac) + pmac_feature_init(); /* New cool way */ +#endif + +#ifdef CONFIG_XMON + xmon_map_scc(); + if (strstr(cmd_line, "xmon")) + xmon(NULL); +#endif /* CONFIG_XMON */ + if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab); + +#if defined(CONFIG_KGDB) + if (ppc_md.kgdb_map_scc) + ppc_md.kgdb_map_scc(); + set_debug_traps(); + if (strstr(cmd_line, "gdb")) { + if (ppc_md.progress) + ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000); + printk("kgdb breakpoint activated\n"); + breakpoint(); + } +#endif + + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ + if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) { + dcache_bsize = cur_cpu_spec->dcache_bsize; + icache_bsize = cur_cpu_spec->icache_bsize; + ucache_bsize = 0; + } else + ucache_bsize = dcache_bsize = icache_bsize + = cur_cpu_spec->dcache_bsize; + + /* reboot on panic */ + panic_timeout = 180; + + init_mm.start_code = PAGE_OFFSET; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = (unsigned long) klimit; + + /* Save unparsed command line copy for /proc/cmdline */ + strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); + *cmdline_p = cmd_line; + + parse_early_param(); + + /* set up the bootmem stuff with available memory */ + do_init_bootmem(); + if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); + +#ifdef CONFIG_PPC_OCP + /* Initialize OCP device list */ + ocp_early_init(); + if ( ppc_md.progress ) ppc_md.progress("ocp: exit", 0x3eab); +#endif + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + ppc_md.setup_arch(); + if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); + + paging_init(); + + /* this is for modules since _machine can be a define -- Cort */ + ppc_md.ppc_machine = _machine; +} diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c new file mode 100644 index 00000000000..4fcf67575cb --- /dev/null +++ b/arch/powerpc/kernel/setup_64.c @@ -0,0 +1,1308 @@ +/* + * + * Common boot and setup code. + * + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/delay.h> +#include <linux/initrd.h> +#include <linux/ide.h> +#include <linux/seq_file.h> +#include <linux/ioport.h> +#include <linux/console.h> +#include <linux/utsname.h> +#include <linux/tty.h> +#include <linux/root_dev.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/unistd.h> +#include <linux/serial.h> +#include <linux/serial_8250.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/bootinfo.h> +#include <asm/smp.h> +#include <asm/elf.h> +#include <asm/machdep.h> +#include <asm/paca.h> +#include <asm/ppcdebug.h> +#include <asm/time.h> +#include <asm/cputable.h> +#include <asm/sections.h> +#include <asm/btext.h> +#include <asm/nvram.h> +#include <asm/setup.h> +#include <asm/system.h> +#include <asm/rtas.h> +#include <asm/iommu.h> +#include <asm/serial.h> +#include <asm/cache.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/lmb.h> +#include <asm/iSeries/ItLpNaca.h> +#include <asm/firmware.h> +#include <asm/systemcfg.h> + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * Here are some early debugging facilities. You can enable one + * but your kernel will not boot on anything else if you do so + */ + +/* This one is for use on LPAR machines that support an HVC console + * on vterm 0 + */ +extern void udbg_init_debug_lpar(void); +/* This one is for use on Apple G5 machines + */ +extern void udbg_init_pmac_realmode(void); +/* That's RTAS panel debug */ +extern void call_rtas_display_status_delay(unsigned char c); +/* Here's maple real mode debug */ +extern void udbg_init_maple_realmode(void); + +#define EARLY_DEBUG_INIT() do {} while(0) + +#if 0 +#define EARLY_DEBUG_INIT() udbg_init_debug_lpar() +#define EARLY_DEBUG_INIT() udbg_init_maple_realmode() +#define EARLY_DEBUG_INIT() udbg_init_pmac_realmode() +#define EARLY_DEBUG_INIT() \ + do { udbg_putc = call_rtas_display_status_delay; } while(0) +#endif + +/* extern void *stab; */ +extern unsigned long klimit; + +extern void mm_init_ppc64(void); +extern void stab_initialize(unsigned long stab); +extern void htab_initialize(void); +extern void early_init_devtree(void *flat_dt); +extern void unflatten_device_tree(void); + +extern void smp_release_cpus(void); + +int have_of = 1; +int boot_cpuid = 0; +int boot_cpuid_phys = 0; +dev_t boot_dev; +u64 ppc64_pft_size; + +struct ppc64_caches ppc64_caches; +EXPORT_SYMBOL_GPL(ppc64_caches); + +/* + * These are used in binfmt_elf.c to put aux entries on the stack + * for each elf executable being started. + */ +int dcache_bsize; +int icache_bsize; +int ucache_bsize; + +/* The main machine-dep calls structure + */ +struct machdep_calls ppc_md; +EXPORT_SYMBOL(ppc_md); + +#ifdef CONFIG_MAGIC_SYSRQ +unsigned long SYSRQ_KEY; +#endif /* CONFIG_MAGIC_SYSRQ */ + + +static int ppc64_panic_event(struct notifier_block *, unsigned long, void *); +static struct notifier_block ppc64_panic_block = { + .notifier_call = ppc64_panic_event, + .priority = INT_MIN /* may not return; must be done last */ +}; + +/* + * Perhaps we can put the pmac screen_info[] here + * on pmac as well so we don't need the ifdef's. + * Until we get multiple-console support in here + * that is. -- Cort + * Maybe tie it to serial consoles, since this is really what + * these processors use on existing boards. -- Dan + */ +struct screen_info screen_info = { + .orig_x = 0, + .orig_y = 25, + .orig_video_cols = 80, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 +}; + +#ifdef CONFIG_SMP + +static int smt_enabled_cmdline; + +/* Look for ibm,smt-enabled OF option */ +static void check_smt_enabled(void) +{ + struct device_node *dn; + char *smt_option; + + /* Allow the command line to overrule the OF option */ + if (smt_enabled_cmdline) + return; + + dn = of_find_node_by_path("/options"); + + if (dn) { + smt_option = (char *)get_property(dn, "ibm,smt-enabled", NULL); + + if (smt_option) { + if (!strcmp(smt_option, "on")) + smt_enabled_at_boot = 1; + else if (!strcmp(smt_option, "off")) + smt_enabled_at_boot = 0; + } + } +} + +/* Look for smt-enabled= cmdline option */ +static int __init early_smt_enabled(char *p) +{ + smt_enabled_cmdline = 1; + + if (!p) + return 0; + + if (!strcmp(p, "on") || !strcmp(p, "1")) + smt_enabled_at_boot = 1; + else if (!strcmp(p, "off") || !strcmp(p, "0")) + smt_enabled_at_boot = 0; + + return 0; +} +early_param("smt-enabled", early_smt_enabled); + +/** + * setup_cpu_maps - initialize the following cpu maps: + * cpu_possible_map + * cpu_present_map + * cpu_sibling_map + * + * Having the possible map set up early allows us to restrict allocations + * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. + * + * We do not initialize the online map here; cpus set their own bits in + * cpu_online_map as they come up. + * + * This function is valid only for Open Firmware systems. finish_device_tree + * must be called before using this. + * + * While we're here, we may as well set the "physical" cpu ids in the paca. + */ +static void __init setup_cpu_maps(void) +{ + struct device_node *dn = NULL; + int cpu = 0; + int swap_cpuid = 0; + + check_smt_enabled(); + + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { + u32 *intserv; + int j, len = sizeof(u32), nthreads; + + intserv = (u32 *)get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + if (!intserv) + intserv = (u32 *)get_property(dn, "reg", NULL); + + nthreads = len / sizeof(u32); + + for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { + cpu_set(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, intserv[j]); + + if (intserv[j] == boot_cpuid_phys) + swap_cpuid = cpu; + cpu_set(cpu, cpu_possible_map); + cpu++; + } + } + + /* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that + * boot cpu is logical 0. + */ + if (boot_cpuid_phys != get_hard_smp_processor_id(0)) { + u32 tmp; + tmp = get_hard_smp_processor_id(0); + set_hard_smp_processor_id(0, boot_cpuid_phys); + set_hard_smp_processor_id(swap_cpuid, tmp); + } + + /* + * On pSeries LPAR, we need to know how many cpus + * could possibly be added to this partition. + */ + if (systemcfg->platform == PLATFORM_PSERIES_LPAR && + (dn = of_find_node_by_path("/rtas"))) { + int num_addr_cell, num_size_cell, maxcpus; + unsigned int *ireg; + + num_addr_cell = prom_n_addr_cells(dn); + num_size_cell = prom_n_size_cells(dn); + + ireg = (unsigned int *) + get_property(dn, "ibm,lrdr-capacity", NULL); + + if (!ireg) + goto out; + + maxcpus = ireg[num_addr_cell + num_size_cell]; + + /* Double maxcpus for processors which have SMT capability */ + if (cpu_has_feature(CPU_FTR_SMT)) + maxcpus *= 2; + + if (maxcpus > NR_CPUS) { + printk(KERN_WARNING + "Partition configured for %d cpus, " + "operating system maximum is %d.\n", + maxcpus, NR_CPUS); + maxcpus = NR_CPUS; + } else + printk(KERN_INFO "Partition configured for %d cpus.\n", + maxcpus); + + for (cpu = 0; cpu < maxcpus; cpu++) + cpu_set(cpu, cpu_possible_map); + out: + of_node_put(dn); + } + + /* + * Do the sibling map; assume only two threads per processor. + */ + for_each_cpu(cpu) { + cpu_set(cpu, cpu_sibling_map[cpu]); + if (cpu_has_feature(CPU_FTR_SMT)) + cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); + } + + systemcfg->processorCount = num_present_cpus(); +} +#endif /* CONFIG_SMP */ + +extern struct machdep_calls pSeries_md; +extern struct machdep_calls pmac_md; +extern struct machdep_calls maple_md; +extern struct machdep_calls bpa_md; +extern struct machdep_calls iseries_md; + +/* Ultimately, stuff them in an elf section like initcalls... */ +static struct machdep_calls __initdata *machines[] = { +#ifdef CONFIG_PPC_PSERIES + &pSeries_md, +#endif /* CONFIG_PPC_PSERIES */ +#ifdef CONFIG_PPC_PMAC + &pmac_md, +#endif /* CONFIG_PPC_PMAC */ +#ifdef CONFIG_PPC_MAPLE + &maple_md, +#endif /* CONFIG_PPC_MAPLE */ +#ifdef CONFIG_PPC_BPA + &bpa_md, +#endif +#ifdef CONFIG_PPC_ISERIES + &iseries_md, +#endif + NULL +}; + +/* + * Early initialization entry point. This is called by head.S + * with MMU translation disabled. We rely on the "feature" of + * the CPU that ignores the top 2 bits of the address in real + * mode so we can access kernel globals normally provided we + * only toy with things in the RMO region. From here, we do + * some early parsing of the device-tree to setup out LMB + * data structures, and allocate & initialize the hash table + * and segment tables so we can start running with translation + * enabled. + * + * It is this function which will call the probe() callback of + * the various platform types and copy the matching one to the + * global ppc_md structure. Your platform can eventually do + * some very early initializations from the probe() routine, but + * this is not recommended, be very careful as, for example, the + * device-tree is not accessible via normal means at this point. + */ + +void __init early_setup(unsigned long dt_ptr) +{ + struct paca_struct *lpaca = get_paca(); + static struct machdep_calls **mach; + + /* + * Enable early debugging if any specified (see top of + * this file) + */ + EARLY_DEBUG_INIT(); + + DBG(" -> early_setup()\n"); + + /* + * Fill the default DBG level (do we want to keep + * that old mecanism around forever ?) + */ + ppcdbg_initialize(); + + /* + * Do early initializations using the flattened device + * tree, like retreiving the physical memory map or + * calculating/retreiving the hash table size + */ + early_init_devtree(__va(dt_ptr)); + + /* + * Iterate all ppc_md structures until we find the proper + * one for the current machine type + */ + DBG("Probing machine type for platform %x...\n", + systemcfg->platform); + + for (mach = machines; *mach; mach++) { + if ((*mach)->probe(systemcfg->platform)) + break; + } + /* What can we do if we didn't find ? */ + if (*mach == NULL) { + DBG("No suitable machine found !\n"); + for (;;); + } + ppc_md = **mach; + + DBG("Found, Initializing memory management...\n"); + + /* + * Initialize stab / SLB management + */ + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + stab_initialize(lpaca->stab_real); + + /* + * Initialize the MMU Hash table and create the linear mapping + * of memory + */ + htab_initialize(); + + DBG(" <- early_setup()\n"); +} + + +/* + * Initialize some remaining members of the ppc64_caches and systemcfg structures + * (at least until we get rid of them completely). This is mostly some + * cache informations about the CPU that will be used by cache flush + * routines and/or provided to userland + */ +static void __init initialize_cache_info(void) +{ + struct device_node *np; + unsigned long num_cpus = 0; + + DBG(" -> initialize_cache_info()\n"); + + for (np = NULL; (np = of_find_node_by_type(np, "cpu"));) { + num_cpus += 1; + + /* We're assuming *all* of the CPUs have the same + * d-cache and i-cache sizes... -Peter + */ + + if ( num_cpus == 1 ) { + u32 *sizep, *lsizep; + u32 size, lsize; + const char *dc, *ic; + + /* Then read cache informations */ + if (systemcfg->platform == PLATFORM_POWERMAC) { + dc = "d-cache-block-size"; + ic = "i-cache-block-size"; + } else { + dc = "d-cache-line-size"; + ic = "i-cache-line-size"; + } + + size = 0; + lsize = cur_cpu_spec->dcache_bsize; + sizep = (u32 *)get_property(np, "d-cache-size", NULL); + if (sizep != NULL) + size = *sizep; + lsizep = (u32 *) get_property(np, dc, NULL); + if (lsizep != NULL) + lsize = *lsizep; + if (sizep == 0 || lsizep == 0) + DBG("Argh, can't find dcache properties ! " + "sizep: %p, lsizep: %p\n", sizep, lsizep); + + systemcfg->dcache_size = ppc64_caches.dsize = size; + systemcfg->dcache_line_size = + ppc64_caches.dline_size = lsize; + ppc64_caches.log_dline_size = __ilog2(lsize); + ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; + + size = 0; + lsize = cur_cpu_spec->icache_bsize; + sizep = (u32 *)get_property(np, "i-cache-size", NULL); + if (sizep != NULL) + size = *sizep; + lsizep = (u32 *)get_property(np, ic, NULL); + if (lsizep != NULL) + lsize = *lsizep; + if (sizep == 0 || lsizep == 0) + DBG("Argh, can't find icache properties ! " + "sizep: %p, lsizep: %p\n", sizep, lsizep); + + systemcfg->icache_size = ppc64_caches.isize = size; + systemcfg->icache_line_size = + ppc64_caches.iline_size = lsize; + ppc64_caches.log_iline_size = __ilog2(lsize); + ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; + } + } + + /* Add an eye catcher and the systemcfg layout version number */ + strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); + systemcfg->version.major = SYSTEMCFG_MAJOR; + systemcfg->version.minor = SYSTEMCFG_MINOR; + systemcfg->processor = mfspr(SPRN_PVR); + + DBG(" <- initialize_cache_info()\n"); +} + +static void __init check_for_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + u64 *prop; + + DBG(" -> check_for_initrd()\n"); + + if (of_chosen) { + prop = (u64 *)get_property(of_chosen, + "linux,initrd-start", NULL); + if (prop != NULL) { + initrd_start = (unsigned long)__va(*prop); + prop = (u64 *)get_property(of_chosen, + "linux,initrd-end", NULL); + if (prop != NULL) { + initrd_end = (unsigned long)__va(*prop); + initrd_below_start_ok = 1; + } else + initrd_start = 0; + } + } + + /* If we were passed an initrd, set the ROOT_DEV properly if the values + * look sensible. If not, clear initrd reference. + */ + if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && + initrd_end > initrd_start) + ROOT_DEV = Root_RAM0; + else + initrd_start = initrd_end = 0; + + if (initrd_start) + printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); + + DBG(" <- check_for_initrd()\n"); +#endif /* CONFIG_BLK_DEV_INITRD */ +} + +/* + * Do some initial setup of the system. The parameters are those which + * were passed in from the bootloader. + */ +void __init setup_system(void) +{ + DBG(" -> setup_system()\n"); + + /* + * Unflatten the device-tree passed by prom_init or kexec + */ + unflatten_device_tree(); + + /* + * Fill the ppc64_caches & systemcfg structures with informations + * retreived from the device-tree. Need to be called before + * finish_device_tree() since the later requires some of the + * informations filled up here to properly parse the interrupt + * tree. + * It also sets up the cache line sizes which allows to call + * routines like flush_icache_range (used by the hash init + * later on). + */ + initialize_cache_info(); + +#ifdef CONFIG_PPC_RTAS + /* + * Initialize RTAS if available + */ + rtas_initialize(); +#endif /* CONFIG_PPC_RTAS */ + + /* + * Check if we have an initrd provided via the device-tree + */ + check_for_initrd(); + + /* + * Do some platform specific early initializations, that includes + * setting up the hash table pointers. It also sets up some interrupt-mapping + * related options that will be used by finish_device_tree() + */ + ppc_md.init_early(); + + /* + * "Finish" the device-tree, that is do the actual parsing of + * some of the properties like the interrupt map + */ + finish_device_tree(); + + /* + * Initialize xmon + */ +#ifdef CONFIG_XMON_DEFAULT + xmon_init(1); +#endif + /* + * Register early console + */ + register_early_udbg_console(); + + /* Save unparsed command line copy for /proc/cmdline */ + strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); + + parse_early_param(); + +#ifdef CONFIG_SMP + /* + * iSeries has already initialized the cpu maps at this point. + */ + setup_cpu_maps(); + + /* Release secondary cpus out of their spinloops at 0x60 now that + * we can map physical -> logical CPU ids + */ + smp_release_cpus(); +#endif + + printk("Starting Linux PPC64 %s\n", system_utsname.version); + + printk("-----------------------------------------------------\n"); + printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); + printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch); + printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); + printk("systemcfg = 0x%p\n", systemcfg); + printk("systemcfg->platform = 0x%x\n", systemcfg->platform); + printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount); + printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize); + printk("ppc64_caches.dcache_line_size = 0x%x\n", + ppc64_caches.dline_size); + printk("ppc64_caches.icache_line_size = 0x%x\n", + ppc64_caches.iline_size); + printk("htab_address = 0x%p\n", htab_address); + printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); + printk("-----------------------------------------------------\n"); + + mm_init_ppc64(); + + DBG(" <- setup_system()\n"); +} + +/* also used by kexec */ +void machine_shutdown(void) +{ + if (ppc_md.nvram_sync) + ppc_md.nvram_sync(); +} + +void machine_restart(char *cmd) +{ + machine_shutdown(); + ppc_md.restart(cmd); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; +} + +void machine_power_off(void) +{ + machine_shutdown(); + ppc_md.power_off(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; +} +/* Used by the G5 thermal driver */ +EXPORT_SYMBOL_GPL(machine_power_off); + +void machine_halt(void) +{ + machine_shutdown(); + ppc_md.halt(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; +} + +static int ppc64_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + ppc_md.panic((char *)ptr); /* May not return */ + return NOTIFY_DONE; +} + + +#ifdef CONFIG_SMP +DEFINE_PER_CPU(unsigned int, pvr); +#endif + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + unsigned long cpu_id = (unsigned long)v - 1; + unsigned int pvr; + unsigned short maj; + unsigned short min; + + if (cpu_id == NR_CPUS) { + seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); + + if (ppc_md.get_cpuinfo != NULL) + ppc_md.get_cpuinfo(m); + + return 0; + } + + /* We only show online cpus: disable preempt (overzealous, I + * knew) to prevent cpu going down. */ + preempt_disable(); + if (!cpu_online(cpu_id)) { + preempt_enable(); + return 0; + } + +#ifdef CONFIG_SMP + pvr = per_cpu(pvr, cpu_id); +#else + pvr = mfspr(SPRN_PVR); +#endif + maj = (pvr >> 8) & 0xFF; + min = pvr & 0xFF; + + seq_printf(m, "processor\t: %lu\n", cpu_id); + seq_printf(m, "cpu\t\t: "); + + if (cur_cpu_spec->pvr_mask) + seq_printf(m, "%s", cur_cpu_spec->cpu_name); + else + seq_printf(m, "unknown (%08x)", pvr); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + seq_printf(m, ", altivec supported"); +#endif /* CONFIG_ALTIVEC */ + + seq_printf(m, "\n"); + + /* + * Assume here that all clock rates are the same in a + * smp system. -- Cort + */ + seq_printf(m, "clock\t\t: %lu.%06luMHz\n", ppc_proc_freq / 1000000, + ppc_proc_freq % 1000000); + + seq_printf(m, "revision\t: %hd.%hd\n\n", maj, min); + + preempt_enable(); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos <= NR_CPUS ? (void *)((*pos)+1) : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + .start =c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +/* + * These three variables are used to save values passed to us by prom_init() + * via the device tree. The TCE variables are needed because with a memory_limit + * in force we may need to explicitly map the TCE are at the top of RAM. + */ +unsigned long memory_limit; +unsigned long tce_alloc_start; +unsigned long tce_alloc_end; + +#ifdef CONFIG_PPC_ISERIES +/* + * On iSeries we just parse the mem=X option from the command line. + * On pSeries it's a bit more complicated, see prom_init_mem() + */ +static int __init early_parsemem(char *p) +{ + if (!p) + return 0; + + memory_limit = ALIGN(memparse(p, &p), PAGE_SIZE); + + return 0; +} +early_param("mem", early_parsemem); +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_PPC_MULTIPLATFORM +static int __init set_preferred_console(void) +{ + struct device_node *prom_stdout = NULL; + char *name; + u32 *spd; + int offset = 0; + + DBG(" -> set_preferred_console()\n"); + + /* The user has requested a console so this is already set up. */ + if (strstr(saved_command_line, "console=")) { + DBG(" console was specified !\n"); + return -EBUSY; + } + + if (!of_chosen) { + DBG(" of_chosen is NULL !\n"); + return -ENODEV; + } + /* We are getting a weird phandle from OF ... */ + /* ... So use the full path instead */ + name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + if (name == NULL) { + DBG(" no linux,stdout-path !\n"); + return -ENODEV; + } + prom_stdout = of_find_node_by_path(name); + if (!prom_stdout) { + DBG(" can't find stdout package %s !\n", name); + return -ENODEV; + } + DBG("stdout is %s\n", prom_stdout->full_name); + + name = (char *)get_property(prom_stdout, "name", NULL); + if (!name) { + DBG(" stdout package has no name !\n"); + goto not_found; + } + spd = (u32 *)get_property(prom_stdout, "current-speed", NULL); + + if (0) + ; +#ifdef CONFIG_SERIAL_8250_CONSOLE + else if (strcmp(name, "serial") == 0) { + int i; + u32 *reg = (u32 *)get_property(prom_stdout, "reg", &i); + if (i > 8) { + switch (reg[1]) { + case 0x3f8: + offset = 0; + break; + case 0x2f8: + offset = 1; + break; + case 0x898: + offset = 2; + break; + case 0x890: + offset = 3; + break; + default: + /* We dont recognise the serial port */ + goto not_found; + } + } + } +#endif /* CONFIG_SERIAL_8250_CONSOLE */ +#ifdef CONFIG_PPC_PSERIES + else if (strcmp(name, "vty") == 0) { + u32 *reg = (u32 *)get_property(prom_stdout, "reg", NULL); + char *compat = (char *)get_property(prom_stdout, "compatible", NULL); + + if (reg && compat && (strcmp(compat, "hvterm-protocol") == 0)) { + /* Host Virtual Serial Interface */ + int offset; + switch (reg[0]) { + case 0x30000000: + offset = 0; + break; + case 0x30000001: + offset = 1; + break; + default: + goto not_found; + } + of_node_put(prom_stdout); + DBG("Found hvsi console at offset %d\n", offset); + return add_preferred_console("hvsi", offset, NULL); + } else { + /* pSeries LPAR virtual console */ + of_node_put(prom_stdout); + DBG("Found hvc console\n"); + return add_preferred_console("hvc", 0, NULL); + } + } +#endif /* CONFIG_PPC_PSERIES */ +#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE + else if (strcmp(name, "ch-a") == 0) + offset = 0; + else if (strcmp(name, "ch-b") == 0) + offset = 1; +#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */ + else + goto not_found; + of_node_put(prom_stdout); + + DBG("Found serial console at ttyS%d\n", offset); + + if (spd) { + static char __initdata opt[16]; + sprintf(opt, "%d", *spd); + return add_preferred_console("ttyS", offset, opt); + } else + return add_preferred_console("ttyS", offset, NULL); + + not_found: + DBG("No preferred console found !\n"); + of_node_put(prom_stdout); + return -ENODEV; +} +console_initcall(set_preferred_console); +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +#ifdef CONFIG_IRQSTACKS +static void __init irqstack_early_init(void) +{ + unsigned int i; + + /* + * interrupt stacks must be under 256MB, we cannot afford to take + * SLB misses on them. + */ + for_each_cpu(i) { + softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, + THREAD_SIZE, 0x10000000)); + hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, + THREAD_SIZE, 0x10000000)); + } +} +#else +#define irqstack_early_init() +#endif + +/* + * Stack space used when we detect a bad kernel stack pointer, and + * early in SMP boots before relocation is enabled. + */ +static void __init emergency_stack_init(void) +{ + unsigned long limit; + unsigned int i; + + /* + * Emergency stacks must be under 256MB, we cannot afford to take + * SLB misses on them. The ABI also requires them to be 128-byte + * aligned. + * + * Since we use these as temporary stacks during secondary CPU + * bringup, we need to get at them in real mode. This means they + * must also be within the RMO region. + */ + limit = min(0x10000000UL, lmb.rmo_size); + + for_each_cpu(i) + paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128, + limit)) + PAGE_SIZE; +} + +/* + * Called from setup_arch to initialize the bitmap of available + * syscalls in the systemcfg page + */ +void __init setup_syscall_map(void) +{ + unsigned int i, count64 = 0, count32 = 0; + extern unsigned long *sys_call_table; + extern unsigned long sys_ni_syscall; + + + for (i = 0; i < __NR_syscalls; i++) { + if (sys_call_table[i*2] != sys_ni_syscall) { + count64++; + systemcfg->syscall_map_64[i >> 5] |= + 0x80000000UL >> (i & 0x1f); + } + if (sys_call_table[i*2+1] != sys_ni_syscall) { + count32++; + systemcfg->syscall_map_32[i >> 5] |= + 0x80000000UL >> (i & 0x1f); + } + } + printk(KERN_INFO "Syscall map setup, %d 32-bit and %d 64-bit syscalls\n", + count32, count64); +} + +/* + * Called into from start_kernel, after lock_kernel has been called. + * Initializes bootmem, which is unsed to manage page allocation until + * mem_init is called. + */ +void __init setup_arch(char **cmdline_p) +{ + extern void do_init_bootmem(void); + + ppc64_boot_msg(0x12, "Setup Arch"); + + *cmdline_p = cmd_line; + + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ + dcache_bsize = ppc64_caches.dline_size; + icache_bsize = ppc64_caches.iline_size; + + /* reboot on panic */ + panic_timeout = 180; + + if (ppc_md.panic) + notifier_chain_register(&panic_notifier_list, &ppc64_panic_block); + + init_mm.start_code = PAGE_OFFSET; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = klimit; + + irqstack_early_init(); + emergency_stack_init(); + + stabs_alloc(); + + /* set up the bootmem stuff with available memory */ + do_init_bootmem(); + sparse_init(); + + /* initialize the syscall map in systemcfg */ + setup_syscall_map(); + + ppc_md.setup_arch(); + + /* Use the default idle loop if the platform hasn't provided one. */ + if (NULL == ppc_md.idle_loop) { + ppc_md.idle_loop = default_idle; + printk(KERN_INFO "Using default idle loop\n"); + } + + paging_init(); + ppc64_boot_msg(0x15, "Setup Done"); +} + + +/* ToDo: do something useful if ppc_md is not yet setup. */ +#define PPC64_LINUX_FUNCTION 0x0f000000 +#define PPC64_IPL_MESSAGE 0xc0000000 +#define PPC64_TERM_MESSAGE 0xb0000000 + +static void ppc64_do_msg(unsigned int src, const char *msg) +{ + if (ppc_md.progress) { + char buf[128]; + + sprintf(buf, "%08X\n", src); + ppc_md.progress(buf, 0); + snprintf(buf, 128, "%s", msg); + ppc_md.progress(buf, 0); + } +} + +/* Print a boot progress message. */ +void ppc64_boot_msg(unsigned int src, const char *msg) +{ + ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg); + printk("[boot]%04x %s\n", src, msg); +} + +/* Print a termination message (print only -- does not stop the kernel) */ +void ppc64_terminate_msg(unsigned int src, const char *msg) +{ + ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_TERM_MESSAGE|src, msg); + printk("[terminate]%04x %s\n", src, msg); +} + +/* This should only be called on processor 0 during calibrate decr */ +void __init setup_default_decr(void) +{ + struct paca_struct *lpaca = get_paca(); + + lpaca->default_decr = tb_ticks_per_jiffy; + lpaca->next_jiffy_update_tb = get_tb() + tb_ticks_per_jiffy; +} + +#ifndef CONFIG_PPC_ISERIES +/* + * This function can be used by platforms to "find" legacy serial ports. + * It works for "serial" nodes under an "isa" node, and will try to + * respect the "ibm,aix-loc" property if any. It works with up to 8 + * ports. + */ + +#define MAX_LEGACY_SERIAL_PORTS 8 +static struct plat_serial8250_port serial_ports[MAX_LEGACY_SERIAL_PORTS+1]; +static unsigned int old_serial_count; + +void __init generic_find_legacy_serial_ports(u64 *physport, + unsigned int *default_speed) +{ + struct device_node *np; + u32 *sizeprop; + + struct isa_reg_property { + u32 space; + u32 address; + u32 size; + }; + struct pci_reg_property { + struct pci_address addr; + u32 size_hi; + u32 size_lo; + }; + + DBG(" -> generic_find_legacy_serial_port()\n"); + + *physport = 0; + if (default_speed) + *default_speed = 0; + + np = of_find_node_by_path("/"); + if (!np) + return; + + /* First fill our array */ + for (np = NULL; (np = of_find_node_by_type(np, "serial"));) { + struct device_node *isa, *pci; + struct isa_reg_property *reg; + unsigned long phys_size, addr_size, io_base; + u32 *rangesp; + u32 *interrupts, *clk, *spd; + char *typep; + int index, rlen, rentsize; + + /* Ok, first check if it's under an "isa" parent */ + isa = of_get_parent(np); + if (!isa || strcmp(isa->name, "isa")) { + DBG("%s: no isa parent found\n", np->full_name); + continue; + } + + /* Now look for an "ibm,aix-loc" property that gives us ordering + * if any... + */ + typep = (char *)get_property(np, "ibm,aix-loc", NULL); + + /* Get the ISA port number */ + reg = (struct isa_reg_property *)get_property(np, "reg", NULL); + if (reg == NULL) + goto next_port; + /* We assume the interrupt number isn't translated ... */ + interrupts = (u32 *)get_property(np, "interrupts", NULL); + /* get clock freq. if present */ + clk = (u32 *)get_property(np, "clock-frequency", NULL); + /* get default speed if present */ + spd = (u32 *)get_property(np, "current-speed", NULL); + /* Default to locate at end of array */ + index = old_serial_count; /* end of the array by default */ + + /* If we have a location index, then use it */ + if (typep && *typep == 'S') { + index = simple_strtol(typep+1, NULL, 0) - 1; + /* if index is out of range, use end of array instead */ + if (index >= MAX_LEGACY_SERIAL_PORTS) + index = old_serial_count; + /* if our index is still out of range, that mean that + * array is full, we could scan for a free slot but that + * make little sense to bother, just skip the port + */ + if (index >= MAX_LEGACY_SERIAL_PORTS) + goto next_port; + if (index >= old_serial_count) + old_serial_count = index + 1; + /* Check if there is a port who already claimed our slot */ + if (serial_ports[index].iobase != 0) { + /* if we still have some room, move it, else override */ + if (old_serial_count < MAX_LEGACY_SERIAL_PORTS) { + DBG("Moved legacy port %d -> %d\n", index, + old_serial_count); + serial_ports[old_serial_count++] = + serial_ports[index]; + } else { + DBG("Replacing legacy port %d\n", index); + } + } + } + if (index >= MAX_LEGACY_SERIAL_PORTS) + goto next_port; + if (index >= old_serial_count) + old_serial_count = index + 1; + + /* Now fill the entry */ + memset(&serial_ports[index], 0, sizeof(struct plat_serial8250_port)); + serial_ports[index].uartclk = clk ? *clk : BASE_BAUD * 16; + serial_ports[index].iobase = reg->address; + serial_ports[index].irq = interrupts ? interrupts[0] : 0; + serial_ports[index].flags = ASYNC_BOOT_AUTOCONF; + + DBG("Added legacy port, index: %d, port: %x, irq: %d, clk: %d\n", + index, + serial_ports[index].iobase, + serial_ports[index].irq, + serial_ports[index].uartclk); + + /* Get phys address of IO reg for port 1 */ + if (index != 0) + goto next_port; + + pci = of_get_parent(isa); + if (!pci) { + DBG("%s: no pci parent found\n", np->full_name); + goto next_port; + } + + rangesp = (u32 *)get_property(pci, "ranges", &rlen); + if (rangesp == NULL) { + of_node_put(pci); + goto next_port; + } + rlen /= 4; + + /* we need the #size-cells of the PCI bridge node itself */ + phys_size = 1; + sizeprop = (u32 *)get_property(pci, "#size-cells", NULL); + if (sizeprop != NULL) + phys_size = *sizeprop; + /* we need the parent #addr-cells */ + addr_size = prom_n_addr_cells(pci); + rentsize = 3 + addr_size + phys_size; + io_base = 0; + for (;rlen >= rentsize; rlen -= rentsize,rangesp += rentsize) { + if (((rangesp[0] >> 24) & 0x3) != 1) + continue; /* not IO space */ + io_base = rangesp[3]; + if (addr_size == 2) + io_base = (io_base << 32) | rangesp[4]; + } + if (io_base != 0) { + *physport = io_base + reg->address; + if (default_speed && spd) + *default_speed = *spd; + } + of_node_put(pci); + next_port: + of_node_put(isa); + } + + DBG(" <- generic_find_legacy_serial_port()\n"); +} + +static struct platform_device serial_device = { + .name = "serial8250", + .id = PLAT8250_DEV_PLATFORM, + .dev = { + .platform_data = serial_ports, + }, +}; + +static int __init serial_dev_init(void) +{ + return platform_device_register(&serial_device); +} +arch_initcall(serial_dev_init); + +#endif /* CONFIG_PPC_ISERIES */ + +int check_legacy_ioport(unsigned long base_port) +{ + if (ppc_md.check_legacy_ioport == NULL) + return 0; + return ppc_md.check_legacy_ioport(base_port); +} +EXPORT_SYMBOL(check_legacy_ioport); + +#ifdef CONFIG_XMON +static int __init early_xmon(char *p) +{ + /* ensure xmon is enabled */ + if (p) { + if (strncmp(p, "on", 2) == 0) + xmon_init(1); + if (strncmp(p, "off", 3) == 0) + xmon_init(0); + if (strncmp(p, "early", 5) != 0) + return 0; + } + xmon_init(1); + debugger(NULL); + + return 0; +} +early_param("xmon", early_xmon); +#endif + +void cpu_die(void) +{ + if (ppc_md.cpu_die) + ppc_md.cpu_die(); +} diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S new file mode 100644 index 00000000000..82d1fedb441 --- /dev/null +++ b/arch/powerpc/kernel/systbl.S @@ -0,0 +1,323 @@ +/* + * This file contains the table of syscall-handling functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/ppc_asm.h> + +#ifdef CONFIG_PPC64 +#define SYSCALL(func) .llong .sys_##func,.sys_##func +#define SYSCALL32(func) .llong .sys_##func,.sys32_##func +#define COMPAT_SYS(func) .llong .sys_##func,.compat_sys_##func +#define PPC_SYS(func) .llong .ppc_##func,.ppc_##func +#define OLDSYS(func) .llong .sys_ni_syscall,.sys_ni_syscall +#define SYS32ONLY(func) .llong .sys_ni_syscall,.sys32_##func +#define SYSX(f, f3264, f32) .llong .f,.f3264 +#else +#define SYSCALL(func) .long sys_##func +#define SYSCALL32(func) .long sys_##func +#define COMPAT_SYS(func) .long sys_##func +#define PPC_SYS(func) .long ppc_##func +#define OLDSYS(func) .long sys_##func +#define SYS32ONLY(func) .long sys_##func +#define SYSX(f, f3264, f32) .long f32 +#endif + +#ifdef CONFIG_PPC64 +#define sys_sigpending sys_ni_syscall +#define sys_old_getrlimit sys_ni_syscall +#else +#define ppc_rtas sys_ni_syscall +#endif + +_GLOBAL(sys_call_table) +SYSCALL(restart_syscall) +SYSCALL(exit) +PPC_SYS(fork) +SYSCALL(read) +SYSCALL(write) +COMPAT_SYS(open) +SYSCALL(close) +SYSCALL32(waitpid) +SYSCALL32(creat) +SYSCALL(link) +SYSCALL(unlink) +SYSCALL32(execve) +SYSCALL(chdir) +SYSX(sys64_time,compat_sys_time,sys_time) +SYSCALL(mknod) +SYSCALL(chmod) +SYSCALL(lchown) +SYSCALL(ni_syscall) +OLDSYS(stat) +SYSX(sys_lseek,ppc32_lseek,sys_lseek) +SYSCALL(getpid) +COMPAT_SYS(mount) +SYSX(sys_ni_syscall,sys_oldumount,sys_oldumount) +SYSCALL(setuid) +SYSCALL(getuid) +COMPAT_SYS(stime) +SYSCALL32(ptrace) +SYSCALL(alarm) +OLDSYS(fstat) +SYSCALL32(pause) +COMPAT_SYS(utime) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL32(access) +SYSCALL32(nice) +SYSCALL(ni_syscall) +SYSCALL(sync) +SYSCALL32(kill) +SYSCALL(rename) +SYSCALL32(mkdir) +SYSCALL(rmdir) +SYSCALL(dup) +SYSCALL(pipe) +COMPAT_SYS(times) +SYSCALL(ni_syscall) +SYSCALL(brk) +SYSCALL(setgid) +SYSCALL(getgid) +SYSCALL(signal) +SYSCALL(geteuid) +SYSCALL(getegid) +SYSCALL(acct) +SYSCALL(umount) +SYSCALL(ni_syscall) +COMPAT_SYS(ioctl) +COMPAT_SYS(fcntl) +SYSCALL(ni_syscall) +SYSCALL32(setpgid) +SYSCALL(ni_syscall) +SYS32ONLY(olduname) +SYSCALL32(umask) +SYSCALL(chroot) +SYSCALL(ustat) +SYSCALL(dup2) +SYSCALL(getppid) +SYSCALL(getpgrp) +SYSCALL(setsid) +SYS32ONLY(sigaction) +SYSCALL(sgetmask) +SYSCALL32(ssetmask) +SYSCALL(setreuid) +SYSCALL(setregid) +SYSX(sys_ni_syscall,ppc32_sigsuspend,ppc_sigsuspend) +COMPAT_SYS(sigpending) +SYSCALL32(sethostname) +COMPAT_SYS(setrlimit) +COMPAT_SYS(old_getrlimit) +COMPAT_SYS(getrusage) +SYSCALL32(gettimeofday) +SYSCALL32(settimeofday) +SYSCALL32(getgroups) +SYSCALL32(setgroups) +SYSX(sys_ni_syscall,sys_ni_syscall,ppc_select) +SYSCALL(symlink) +OLDSYS(lstat) +SYSCALL32(readlink) +SYSCALL(uselib) +SYSCALL(swapon) +SYSCALL(reboot) +SYSX(sys_ni_syscall,old32_readdir,old_readdir) +SYSCALL(mmap) +SYSCALL(munmap) +SYSCALL(truncate) +SYSCALL(ftruncate) +SYSCALL(fchmod) +SYSCALL(fchown) +SYSCALL32(getpriority) +SYSCALL32(setpriority) +SYSCALL(ni_syscall) +COMPAT_SYS(statfs) +COMPAT_SYS(fstatfs) +SYSCALL(ni_syscall) +COMPAT_SYS(socketcall) +SYSCALL32(syslog) +COMPAT_SYS(setitimer) +COMPAT_SYS(getitimer) +COMPAT_SYS(newstat) +COMPAT_SYS(newlstat) +COMPAT_SYS(newfstat) +SYSX(sys_ni_syscall,sys32_uname,sys_uname) +SYSCALL(ni_syscall) +SYSCALL(vhangup) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +COMPAT_SYS(wait4) +SYSCALL(swapoff) +SYSCALL32(sysinfo) +SYSCALL32(ipc) +SYSCALL(fsync) +SYSX(sys_ni_syscall,ppc32_sigreturn,sys_sigreturn) +PPC_SYS(clone) +SYSCALL32(setdomainname) +SYSX(ppc64_newuname,ppc64_newuname,sys_newuname) +SYSCALL(ni_syscall) +SYSCALL32(adjtimex) +SYSCALL(mprotect) +SYSX(sys_ni_syscall,compat_sys_sigprocmask,sys_sigprocmask) +SYSCALL(ni_syscall) +SYSCALL(init_module) +SYSCALL(delete_module) +SYSCALL(ni_syscall) +SYSCALL(quotactl) +SYSCALL32(getpgid) +SYSCALL(fchdir) +SYSCALL(bdflush) +SYSCALL32(sysfs) +SYSX(ppc64_personality,ppc64_personality,sys_personality) +SYSCALL(ni_syscall) +SYSCALL(setfsuid) +SYSCALL(setfsgid) +SYSCALL(llseek) +SYSCALL32(getdents) +SYSX(sys_select,ppc32_select,ppc_select) +SYSCALL(flock) +SYSCALL(msync) +COMPAT_SYS(readv) +COMPAT_SYS(writev) +SYSCALL32(getsid) +SYSCALL(fdatasync) +SYSCALL32(sysctl) +SYSCALL(mlock) +SYSCALL(munlock) +SYSCALL(mlockall) +SYSCALL(munlockall) +SYSCALL32(sched_setparam) +SYSCALL32(sched_getparam) +SYSCALL32(sched_setscheduler) +SYSCALL32(sched_getscheduler) +SYSCALL(sched_yield) +SYSCALL32(sched_get_priority_max) +SYSCALL32(sched_get_priority_min) +SYSCALL32(sched_rr_get_interval) +COMPAT_SYS(nanosleep) +SYSCALL(mremap) +SYSCALL(setresuid) +SYSCALL(getresuid) +SYSCALL(ni_syscall) +SYSCALL(poll) +COMPAT_SYS(nfsservctl) +SYSCALL(setresgid) +SYSCALL(getresgid) +SYSCALL32(prctl) +SYSX(ppc64_rt_sigreturn,ppc32_rt_sigreturn,sys_rt_sigreturn) +SYSCALL32(rt_sigaction) +SYSCALL32(rt_sigprocmask) +SYSCALL32(rt_sigpending) +COMPAT_SYS(rt_sigtimedwait) +SYSCALL32(rt_sigqueueinfo) +SYSX(ppc64_rt_sigsuspend,ppc32_rt_sigsuspend,ppc_rt_sigsuspend) +SYSCALL32(pread64) +SYSCALL32(pwrite64) +SYSCALL(chown) +SYSCALL(getcwd) +SYSCALL(capget) +SYSCALL(capset) +SYSCALL32(sigaltstack) +SYSX(sys_sendfile64,sys32_sendfile,sys_sendfile) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +PPC_SYS(vfork) +COMPAT_SYS(getrlimit) +SYSCALL32(readahead) +SYS32ONLY(mmap2) +SYS32ONLY(truncate64) +SYS32ONLY(ftruncate64) +SYSX(sys_ni_syscall,sys_stat64,sys_stat64) +SYSX(sys_ni_syscall,sys_lstat64,sys_lstat64) +SYSX(sys_ni_syscall,sys_fstat64,sys_fstat64) +SYSCALL32(pciconfig_read) +SYSCALL32(pciconfig_write) +SYSCALL32(pciconfig_iobase) +SYSCALL(ni_syscall) +SYSCALL(getdents64) +SYSCALL(pivot_root) +SYSX(sys_ni_syscall,compat_sys_fcntl64,sys_fcntl64) +SYSCALL(madvise) +SYSCALL(mincore) +SYSCALL(gettid) +SYSCALL(tkill) +SYSCALL(setxattr) +SYSCALL(lsetxattr) +SYSCALL(fsetxattr) +SYSCALL(getxattr) +SYSCALL(lgetxattr) +SYSCALL(fgetxattr) +SYSCALL(listxattr) +SYSCALL(llistxattr) +SYSCALL(flistxattr) +SYSCALL(removexattr) +SYSCALL(lremovexattr) +SYSCALL(fremovexattr) +COMPAT_SYS(futex) +COMPAT_SYS(sched_setaffinity) +COMPAT_SYS(sched_getaffinity) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYS32ONLY(sendfile64) +COMPAT_SYS(io_setup) +SYSCALL(io_destroy) +COMPAT_SYS(io_getevents) +COMPAT_SYS(io_submit) +SYSCALL(io_cancel) +SYSCALL(set_tid_address) +SYSX(sys_fadvise64,ppc32_fadvise64,sys_fadvise64) +SYSCALL(exit_group) +SYSX(sys_lookup_dcookie,ppc32_lookup_dcookie,sys_lookup_dcookie) +SYSCALL(epoll_create) +SYSCALL(epoll_ctl) +SYSCALL(epoll_wait) +SYSCALL(remap_file_pages) +SYSX(sys_timer_create,ppc32_timer_create,sys_timer_create) +COMPAT_SYS(timer_settime) +COMPAT_SYS(timer_gettime) +SYSCALL(timer_getoverrun) +SYSCALL(timer_delete) +COMPAT_SYS(clock_settime) +COMPAT_SYS(clock_gettime) +COMPAT_SYS(clock_getres) +COMPAT_SYS(clock_nanosleep) +SYSX(ppc64_swapcontext,ppc32_swapcontext,ppc_swapcontext) +SYSCALL32(tgkill) +SYSCALL32(utimes) +COMPAT_SYS(statfs64) +COMPAT_SYS(fstatfs64) +SYSX(sys_ni_syscall, ppc32_fadvise64_64, sys_fadvise64_64) +PPC_SYS(rtas) +OLDSYS(debug_setcontext) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +COMPAT_SYS(mbind) +COMPAT_SYS(get_mempolicy) +COMPAT_SYS(set_mempolicy) +COMPAT_SYS(mq_open) +SYSCALL(mq_unlink) +COMPAT_SYS(mq_timedsend) +COMPAT_SYS(mq_timedreceive) +COMPAT_SYS(mq_notify) +COMPAT_SYS(mq_getsetattr) +COMPAT_SYS(kexec_load) +SYSCALL32(add_key) +SYSCALL32(request_key) +COMPAT_SYS(keyctl) +COMPAT_SYS(waitid) +SYSCALL32(ioprio_set) +SYSCALL32(ioprio_get) +SYSCALL(inotify_init) +SYSCALL(inotify_add_watch) +SYSCALL(inotify_rm_watch) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c new file mode 100644 index 00000000000..6a881e3ea01 --- /dev/null +++ b/arch/powerpc/kernel/traps.c @@ -0,0 +1,1102 @@ +/* + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Modified by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras (paulus@samba.org) + */ + +/* + * This file handles the architecture-dependent parts of hardware exceptions + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/prctl.h> +#include <linux/delay.h> +#include <linux/kprobes.h> + +#include <asm/kdebug.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/rtas.h> +#include <asm/xmon.h> +#ifdef CONFIG_PPC32 +#include <asm/reg.h> +#include <asm/perfmon.h> +#endif +#ifdef CONFIG_PMAC_BACKLIGHT +#include <asm/backlight.h> +#endif +#ifdef CONFIG_PPC64 +#include <asm/firmware.h> +#include <asm/processor.h> +#include <asm/systemcfg.h> +#include <asm/pmc.h> +#endif + +#ifdef CONFIG_PPC64 /* XXX */ +#define _IO_BASE pci_io_base +#endif + +#ifdef CONFIG_DEBUGGER +int (*__debugger)(struct pt_regs *regs); +int (*__debugger_ipi)(struct pt_regs *regs); +int (*__debugger_bpt)(struct pt_regs *regs); +int (*__debugger_sstep)(struct pt_regs *regs); +int (*__debugger_iabr_match)(struct pt_regs *regs); +int (*__debugger_dabr_match)(struct pt_regs *regs); +int (*__debugger_fault_handler)(struct pt_regs *regs); + +EXPORT_SYMBOL(__debugger); +EXPORT_SYMBOL(__debugger_ipi); +EXPORT_SYMBOL(__debugger_bpt); +EXPORT_SYMBOL(__debugger_sstep); +EXPORT_SYMBOL(__debugger_iabr_match); +EXPORT_SYMBOL(__debugger_dabr_match); +EXPORT_SYMBOL(__debugger_fault_handler); +#endif + +struct notifier_block *powerpc_die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&powerpc_die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + +/* + * Trap & Exception support + */ + +static DEFINE_SPINLOCK(die_lock); + +int die(const char *str, struct pt_regs *regs, long err) +{ + static int die_counter; + int nl = 0; + + if (debugger(regs)) + return 1; + + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); +#ifdef CONFIG_PMAC_BACKLIGHT + if (_machine == _MACH_Pmac) { + set_backlight_enable(1); + set_backlight_level(BACKLIGHT_MAX); + } +#endif + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); + nl = 1; +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); + nl = 1; +#endif +#ifdef CONFIG_PPC64 + switch (systemcfg->platform) { + case PLATFORM_PSERIES: + printk("PSERIES "); + nl = 1; + break; + case PLATFORM_PSERIES_LPAR: + printk("PSERIES LPAR "); + nl = 1; + break; + case PLATFORM_ISERIES_LPAR: + printk("ISERIES LPAR "); + nl = 1; + break; + case PLATFORM_POWERMAC: + printk("POWERMAC "); + nl = 1; + break; + case PLATFORM_BPA: + printk("BPA "); + nl = 1; + break; + } +#endif + if (nl) + printk("\n"); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) { +#ifdef CONFIG_PPC64 + printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); + ssleep(5); +#endif + panic("Fatal exception"); + } + do_exit(err); + + return 0; +} + +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + siginfo_t info; + + if (!user_mode(regs)) { + if (die("Exception in kernel mode", regs, signr)) + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = signr; + info.si_code = code; + info.si_addr = (void __user *) addr; + force_sig_info(signr, &info, current); + + /* + * Init gets no signals that it doesn't have a handler for. + * That's all very well, but if it has caused a synchronous + * exception and we ignore the resulting signal, it will just + * generate the same exception over and over again and we get + * nowhere. Better to kill it and let the kernel panic. + */ + if (current->pid == 1) { + __sighandler_t handler; + + spin_lock_irq(¤t->sighand->siglock); + handler = current->sighand->action[signr-1].sa.sa_handler; + spin_unlock_irq(¤t->sighand->siglock); + if (handler == SIG_DFL) { + /* init has generated a synchronous exception + and it doesn't have a handler for the signal */ + printk(KERN_CRIT "init has generated signal %d " + "but has no handler for it\n", signr); + do_exit(signr); + } + } +} + +#ifdef CONFIG_PPC64 +void system_reset_exception(struct pt_regs *regs) +{ + /* See if any machine dependent calls */ + if (ppc_md.system_reset_exception) + ppc_md.system_reset_exception(regs); + + die("System Reset", regs, SIGABRT); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable System Reset"); + + /* What should we do here? We could issue a shutdown or hard reset. */ +} +#endif + +/* + * I/O accesses can cause machine checks on powermacs. + * Check if the NIP corresponds to the address of a sync + * instruction for which there is an entry in the exception + * table. + * Note that the 601 only takes a machine check on TEA + * (transfer error ack) signal assertion, and does not + * set any of the top 16 bits of SRR1. + * -- paulus. + */ +static inline int check_io_access(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_PMAC + unsigned long msr = regs->msr; + const struct exception_table_entry *entry; + unsigned int *nip = (unsigned int *)regs->nip; + + if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000))) + && (entry = search_exception_tables(regs->nip)) != NULL) { + /* + * Check that it's a sync instruction, or somewhere + * in the twi; isync; nop sequence that inb/inw/inl uses. + * As the address is in the exception table + * we should be able to read the instr there. + * For the debug message, we look at the preceding + * load or store. + */ + if (*nip == 0x60000000) /* nop */ + nip -= 2; + else if (*nip == 0x4c00012c) /* isync */ + --nip; + if (*nip == 0x7c0004ac || (*nip >> 26) == 3) { + /* sync or twi */ + unsigned int rb; + + --nip; + rb = (*nip >> 11) & 0x1f; + printk(KERN_DEBUG "%s bad port %lx at %p\n", + (*nip & 0x100)? "OUT to": "IN from", + regs->gpr[rb] - _IO_BASE, nip); + regs->msr |= MSR_RI; + regs->nip = entry->fixup; + return 1; + } + } +#endif /* CONFIG_PPC_PMAC */ + return 0; +} + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +/* On 4xx, the reason for the machine check or program exception + is in the ESR. */ +#define get_reason(regs) ((regs)->dsisr) +#ifndef CONFIG_FSL_BOOKE +#define get_mc_reason(regs) ((regs)->dsisr) +#else +#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) +#endif +#define REASON_FP ESR_FP +#define REASON_ILLEGAL (ESR_PIL | ESR_PUO) +#define REASON_PRIVILEGED ESR_PPR +#define REASON_TRAP ESR_PTR + +/* single-step stuff */ +#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC) +#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) + +#else +/* On non-4xx, the reason for the machine check or program + exception is in the MSR. */ +#define get_reason(regs) ((regs)->msr) +#define get_mc_reason(regs) ((regs)->msr) +#define REASON_FP 0x100000 +#define REASON_ILLEGAL 0x80000 +#define REASON_PRIVILEGED 0x40000 +#define REASON_TRAP 0x20000 + +#define single_stepping(regs) ((regs)->msr & MSR_SE) +#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) +#endif + +/* + * This is "fall-back" implementation for configurations + * which don't provide platform-specific machine check info + */ +void __attribute__ ((weak)) +platform_machine_check(struct pt_regs *regs) +{ +} + +void machine_check_exception(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC64 + int recover = 0; + + /* See if any machine dependent calls */ + if (ppc_md.machine_check_exception) + recover = ppc_md.machine_check_exception(regs); + + if (recover) + return; +#else + unsigned long reason = get_mc_reason(regs); + + if (user_mode(regs)) { + regs->msr |= MSR_RI; + _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); + return; + } + +#if defined(CONFIG_8xx) && defined(CONFIG_PCI) + /* the qspan pci read routines can cause machine checks -- Cort */ + bad_page_fault(regs, regs->dar, SIGBUS); + return; +#endif + + if (debugger_fault_handler(regs)) { + regs->msr |= MSR_RI; + return; + } + + if (check_io_access(regs)) + return; + +#if defined(CONFIG_4xx) && !defined(CONFIG_440A) + if (reason & ESR_IMCP) { + printk("Instruction"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } else + printk("Data"); + printk(" machine check in kernel mode.\n"); +#elif defined(CONFIG_440A) + printk("Machine check in kernel mode.\n"); + if (reason & ESR_IMCP){ + printk("Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } + else { + u32 mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk("Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk("Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk("Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk("TLB Parity Error\n"); + if (mcsr & MCSR_ICP){ + flush_instruction_cache(); + printk("I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk("D-Cache Search Parity Error\n"); + if (mcsr & MCSR_DCFP) + printk("D-Cache Flush Parity Error\n"); + if (mcsr & MCSR_IMPE) + printk("Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + } +#elif defined (CONFIG_E500) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_ICPERR) + printk("Instruction Cache Parity Error\n"); + if (reason & MCSR_DCP_PERR) + printk("Data Cache Push Parity Error\n"); + if (reason & MCSR_DCPERR) + printk("Data Cache Parity Error\n"); + if (reason & MCSR_GL_CI) + printk("Guarded Load or Cache-Inhibited stwcx.\n"); + if (reason & MCSR_BUS_IAERR) + printk("Bus - Instruction Address Error\n"); + if (reason & MCSR_BUS_RAERR) + printk("Bus - Read Address Error\n"); + if (reason & MCSR_BUS_WAERR) + printk("Bus - Write Address Error\n"); + if (reason & MCSR_BUS_IBERR) + printk("Bus - Instruction Data Error\n"); + if (reason & MCSR_BUS_RBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_WBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_IPERR) + printk("Bus - Instruction Parity Error\n"); + if (reason & MCSR_BUS_RPERR) + printk("Bus - Read Parity Error\n"); +#elif defined (CONFIG_E200) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_CP_PERR) + printk("Cache Push Parity Error\n"); + if (reason & MCSR_CPERR) + printk("Cache Parity Error\n"); + if (reason & MCSR_EXCP_ERR) + printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); + if (reason & MCSR_BUS_IRERR) + printk("Bus - Read Bus Error on instruction fetch\n"); + if (reason & MCSR_BUS_DRERR) + printk("Bus - Read Bus Error on data load\n"); + if (reason & MCSR_BUS_WRERR) + printk("Bus - Write Bus Error on buffered store or cache line push\n"); +#else /* !CONFIG_4xx && !CONFIG_E500 && !CONFIG_E200 */ + printk("Machine check in kernel mode.\n"); + printk("Caused by (from SRR1=%lx): ", reason); + switch (reason & 0x601F0000) { + case 0x80000: + printk("Machine check signal\n"); + break; + case 0: /* for 601 */ + case 0x40000: + case 0x140000: /* 7450 MSS error and TEA */ + printk("Transfer error ack signal\n"); + break; + case 0x20000: + printk("Data parity error signal\n"); + break; + case 0x10000: + printk("Address parity error signal\n"); + break; + case 0x20000000: + printk("L1 Data Cache error\n"); + break; + case 0x40000000: + printk("L1 Instruction Cache error\n"); + break; + case 0x00100000: + printk("L2 data cache parity error\n"); + break; + default: + printk("Unknown values in msr\n"); + } +#endif /* CONFIG_4xx */ + + /* + * Optional platform-provided routine to print out + * additional info, e.g. bus error registers. + */ + platform_machine_check(regs); +#endif /* CONFIG_PPC64 */ + + if (debugger_fault_handler(regs)) + return; + die("Machine check", regs, SIGBUS); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable Machine check"); +} + +void SMIException(struct pt_regs *regs) +{ + die("System Management Interrupt", regs, SIGABRT); +} + +void unknown_exception(struct pt_regs *regs) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, 0, 0); +} + +void instruction_breakpoint_exception(struct pt_regs *regs) +{ + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_iabr_match(regs)) + return; + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); +} + +void RunModeException(struct pt_regs *regs) +{ + _exception(SIGTRAP, regs, 0, 0); +} + +void __kprobes single_step_exception(struct pt_regs *regs) +{ + regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_sstep(regs)) + return; + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); +} + +/* + * After we have successfully emulated an instruction, we have to + * check if the instruction was being single-stepped, and if so, + * pretend we got a single-step exception. This was pointed out + * by Kumar Gala. -- paulus + */ +static void emulate_single_step(struct pt_regs *regs) +{ + if (single_stepping(regs)) { + clear_single_step(regs); + _exception(SIGTRAP, regs, TRAP_TRACE, 0); + } +} + +static void parse_fpe(struct pt_regs *regs) +{ + int code = 0; + unsigned long fpscr; + + flush_fp_to_thread(current); + + fpscr = current->thread.fpscr; + + /* Invalid operation */ + if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) + code = FPE_FLTINV; + + /* Overflow */ + else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) + code = FPE_FLTOVF; + + /* Underflow */ + else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) + code = FPE_FLTUND; + + /* Divide by zero */ + else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) + code = FPE_FLTDIV; + + /* Inexact result */ + else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) + code = FPE_FLTRES; + + _exception(SIGFPE, regs, code, regs->nip); +} + +/* + * Illegal instruction emulation support. Originally written to + * provide the PVR to user applications using the mfspr rd, PVR. + * Return non-zero if we can't emulate, or -EFAULT if the associated + * memory access caused an access fault. Return zero on success. + * + * There are a couple of ways to do this, either "decode" the instruction + * or directly match lots of bits. In this case, matching lots of + * bits is faster and easier. + * + */ +#define INST_MFSPR_PVR 0x7c1f42a6 +#define INST_MFSPR_PVR_MASK 0xfc1fffff + +#define INST_DCBA 0x7c0005ec +#define INST_DCBA_MASK 0x7c0007fe + +#define INST_MCRXR 0x7c000400 +#define INST_MCRXR_MASK 0x7c0007fe + +#define INST_STRING 0x7c00042a +#define INST_STRING_MASK 0x7c0007fe +#define INST_STRING_GEN_MASK 0x7c00067e +#define INST_LSWI 0x7c0004aa +#define INST_LSWX 0x7c00042a +#define INST_STSWI 0x7c0005aa +#define INST_STSWX 0x7c00052a + +static int emulate_string_inst(struct pt_regs *regs, u32 instword) +{ + u8 rT = (instword >> 21) & 0x1f; + u8 rA = (instword >> 16) & 0x1f; + u8 NB_RB = (instword >> 11) & 0x1f; + u32 num_bytes; + unsigned long EA; + int pos = 0; + + /* Early out if we are an invalid form of lswx */ + if ((instword & INST_STRING_MASK) == INST_LSWX) + if ((rT == rA) || (rT == NB_RB)) + return -EINVAL; + + EA = (rA == 0) ? 0 : regs->gpr[rA]; + + switch (instword & INST_STRING_MASK) { + case INST_LSWX: + case INST_STSWX: + EA += NB_RB; + num_bytes = regs->xer & 0x7f; + break; + case INST_LSWI: + case INST_STSWI: + num_bytes = (NB_RB == 0) ? 32 : NB_RB; + break; + default: + return -EINVAL; + } + + while (num_bytes != 0) + { + u8 val; + u32 shift = 8 * (3 - (pos & 0x3)); + + switch ((instword & INST_STRING_MASK)) { + case INST_LSWX: + case INST_LSWI: + if (get_user(val, (u8 __user *)EA)) + return -EFAULT; + /* first time updating this reg, + * zero it out */ + if (pos == 0) + regs->gpr[rT] = 0; + regs->gpr[rT] |= val << shift; + break; + case INST_STSWI: + case INST_STSWX: + val = regs->gpr[rT] >> shift; + if (put_user(val, (u8 __user *)EA)) + return -EFAULT; + break; + } + /* move EA to next address */ + EA += 1; + num_bytes--; + + /* manage our position within the register */ + if (++pos == 4) { + pos = 0; + if (++rT == 32) + rT = 0; + } + } + + return 0; +} + +static int emulate_instruction(struct pt_regs *regs) +{ + u32 instword; + u32 rd; + + if (!user_mode(regs)) + return -EINVAL; + CHECK_FULL_REGS(regs); + + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EFAULT; + + /* Emulate the mfspr rD, PVR. */ + if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_PVR); + return 0; + } + + /* Emulating the dcba insn is just a no-op. */ + if ((instword & INST_DCBA_MASK) == INST_DCBA) + return 0; + + /* Emulate the mcrxr insn. */ + if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { + int shift = (instword >> 21) & 0x1c; + unsigned long msk = 0xf0000000UL >> shift; + + regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); + regs->xer &= ~0xf0000000UL; + return 0; + } + + /* Emulate load/store string insn. */ + if ((instword & INST_STRING_GEN_MASK) == INST_STRING) + return emulate_string_inst(regs, instword); + + return -EINVAL; +} + +/* + * Look through the list of trap instructions that are used for BUG(), + * BUG_ON() and WARN_ON() and see if we hit one. At this point we know + * that the exception was caused by a trap instruction of some kind. + * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 + * otherwise. + */ +extern struct bug_entry __start___bug_table[], __stop___bug_table[]; + +#ifndef CONFIG_MODULES +#define module_find_bug(x) NULL +#endif + +struct bug_entry *find_bug(unsigned long bugaddr) +{ + struct bug_entry *bug; + + for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) + if (bugaddr == bug->bug_addr) + return bug; + return module_find_bug(bugaddr); +} + +static int check_bug_trap(struct pt_regs *regs) +{ + struct bug_entry *bug; + unsigned long addr; + + if (regs->msr & MSR_PR) + return 0; /* not in kernel */ + addr = regs->nip; /* address of trap instruction */ + if (addr < PAGE_OFFSET) + return 0; + bug = find_bug(regs->nip); + if (bug == NULL) + return 0; + if (bug->line & BUG_WARNING_TRAP) { + /* this is a WARN_ON rather than BUG/BUG_ON */ +#ifdef CONFIG_XMON + xmon_printf(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); +#endif /* CONFIG_XMON */ + printk(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); + dump_stack(); + return 1; + } +#ifdef CONFIG_XMON + xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + xmon(regs); +#endif /* CONFIG_XMON */ + printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + + return 0; +} + +void __kprobes program_check_exception(struct pt_regs *regs) +{ + unsigned int reason = get_reason(regs); + extern int do_mathemu(struct pt_regs *regs); + +#ifdef CONFIG_MATH_EMULATION + /* (reason & REASON_ILLEGAL) would be the obvious thing here, + * but there seems to be a hardware bug on the 405GP (RevD) + * that means ESR is sometimes set incorrectly - either to + * ESR_DST (!?) or 0. In the process of chasing this with the + * hardware people - not sure if it can happen on any illegal + * instruction or only on FP instructions, whether there is a + * pattern to occurences etc. -dgibson 31/Mar/2003 */ + if (!(reason & REASON_TRAP) && do_mathemu(regs) == 0) { + emulate_single_step(regs); + return; + } +#endif /* CONFIG_MATH_EMULATION */ + + if (reason & REASON_FP) { + /* IEEE FP exception */ + parse_fpe(regs); + return; + } + if (reason & REASON_TRAP) { + /* trap exception */ + if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) + == NOTIFY_STOP) + return; + if (debugger_bpt(regs)) + return; + if (check_bug_trap(regs)) { + regs->nip += 4; + return; + } + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; + } + + /* Try to emulate it if we should. */ + if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { + switch (emulate_instruction(regs)) { + case 0: + regs->nip += 4; + emulate_single_step(regs); + return; + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + } + + if (reason & REASON_PRIVILEGED) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); +} + +void alignment_exception(struct pt_regs *regs) +{ + int fixed; + + fixed = fix_alignment(regs); + + if (fixed == 1) { + regs->nip += 4; /* skip over emulated instruction */ + emulate_single_step(regs); + return; + } + + /* Operand address was bad */ + if (fixed == -EFAULT) { + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->dar); + else + /* Search exception table */ + bad_page_fault(regs, regs->dar, SIGSEGV); + return; + } + _exception(SIGBUS, regs, BUS_ADRALN, regs->dar); +} + +void StackOverflow(struct pt_regs *regs) +{ + printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", + current, regs->gpr[1]); + debugger(regs); + show_regs(regs); + panic("kernel stack overflow"); +} + +void nonrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", + regs->nip, regs->msr); + debugger(regs); + die("nonrecoverable exception", regs, SIGKILL); +} + +void trace_syscall(struct pt_regs *regs) +{ + printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, current->pid, regs->nip, regs->link, regs->gpr[0], + regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); +} + +void kernel_fp_unavailable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); +} + +void altivec_unavailable_exception(struct pt_regs *regs) +{ +#if !defined(CONFIG_ALTIVEC) + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } +#endif + printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); +} + +#ifdef CONFIG_PPC64 +extern perf_irq_t perf_irq; +#endif + +#if defined(CONFIG_PPC64) || defined(CONFIG_E500) +void performance_monitor_exception(struct pt_regs *regs) +{ + perf_irq(regs); +} +#endif + +#ifdef CONFIG_8xx +void SoftwareEmulation(struct pt_regs *regs) +{ + extern int do_mathemu(struct pt_regs *); + extern int Soft_emulate_8xx(struct pt_regs *); + int errcode; + + CHECK_FULL_REGS(regs); + + if (!user_mode(regs)) { + debugger(regs); + die("Kernel Mode Software FPU Emulation", regs, SIGFPE); + } + +#ifdef CONFIG_MATH_EMULATION + errcode = do_mathemu(regs); +#else + errcode = Soft_emulate_8xx(regs); +#endif + if (errcode) { + if (errcode > 0) + _exception(SIGFPE, regs, 0, 0); + else if (errcode == -EFAULT) + _exception(SIGSEGV, regs, 0, 0); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + } else + emulate_single_step(regs); +} +#endif /* CONFIG_8xx */ + +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + +void DebugException(struct pt_regs *regs, unsigned long debug_status) +{ + if (debug_status & DBSR_IC) { /* instruction completion */ + regs->msr &= ~MSR_DE; + if (user_mode(regs)) { + current->thread.dbcr0 &= ~DBCR0_IC; + } else { + /* Disable instruction completion */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); + /* Clear the instruction completion event */ + mtspr(SPRN_DBSR, DBSR_IC); + if (debugger_sstep(regs)) + return; + } + _exception(SIGTRAP, regs, TRAP_TRACE, 0); + } +} +#endif /* CONFIG_4xx || CONFIG_BOOKE */ + +#if !defined(CONFIG_TAU_INT) +void TAUException(struct pt_regs *regs) +{ + printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", + regs->nip, regs->msr, regs->trap, print_tainted()); +} +#endif /* CONFIG_INT_TAU */ + +#ifdef CONFIG_ALTIVEC +void altivec_assist_exception(struct pt_regs *regs) +{ + int err; + + if (!user_mode(regs)) { + printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel VMX/Altivec assist exception", regs, SIGILL); + } + + flush_altivec_to_thread(current); + + err = emulate_altivec(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); + } else { + /* didn't recognize the instruction */ + /* XXX quick hack for now: set the non-Java bit in the VSCR */ + if (printk_ratelimit()) + printk(KERN_ERR "Unrecognized altivec instruction " + "in %s at %lx\n", current->comm, regs->nip); + current->thread.vscr.u[3] |= 0x10000; + } +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_FSL_BOOKE +void CacheLockingException(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + /* We treat cache locking instructions from the user + * as priv ops, in the future we could try to do + * something smarter + */ + if (error_code & (ESR_DLK|ESR_ILK)) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + return; +} +#endif /* CONFIG_FSL_BOOKE */ + +#ifdef CONFIG_SPE +void SPEFloatingPointException(struct pt_regs *regs) +{ + unsigned long spefscr; + int fpexc_mode; + int code = 0; + + spefscr = current->thread.spefscr; + fpexc_mode = current->thread.fpexc_mode; + + /* Hardware does not neccessarily set sticky + * underflow/overflow/invalid flags */ + if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) { + code = FPE_FLTOVF; + spefscr |= SPEFSCR_FOVFS; + } + else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) { + code = FPE_FLTUND; + spefscr |= SPEFSCR_FUNFS; + } + else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV)) + code = FPE_FLTDIV; + else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) { + code = FPE_FLTINV; + spefscr |= SPEFSCR_FINVS; + } + else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES)) + code = FPE_FLTRES; + + current->thread.spefscr = spefscr; + + _exception(SIGFPE, regs, code, regs->nip); + return; +} +#endif + +/* + * We enter here if we get an unrecoverable exception, that is, one + * that happened at a point where the RI (recoverable interrupt) bit + * in the MSR is 0. This indicates that SRR0/1 are live, and that + * we therefore lost state by taking this exception. + */ +void unrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", + regs->trap, regs->nip); + die("Unrecoverable exception", regs, SIGABRT); +} + +#ifdef CONFIG_BOOKE_WDT +/* + * Default handler for a Watchdog exception, + * spins until a reboot occurs + */ +void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) +{ + /* Generic WatchdogHandler, implement your own */ + mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); + return; +} + +void WatchdogException(struct pt_regs *regs) +{ + printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); + WatchdogHandler(regs); +} +#endif + +/* + * We enter here if we discover during exception entry that we are + * running in supervisor mode with a userspace value in the stack pointer. + */ +void kernel_bad_stack(struct pt_regs *regs) +{ + printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", + regs->gpr[1], regs->nip); + die("Bad kernel stack pointer", regs, SIGABRT); +} + +void __init trap_init(void) +{ +} diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c new file mode 100644 index 00000000000..604d0947cb2 --- /dev/null +++ b/arch/powerpc/kernel/vecemu.c @@ -0,0 +1,345 @@ +/* + * Routines to emulate some Altivec/VMX instructions, specifically + * those that can trap when given denormalized operands in Java mode. + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <asm/ptrace.h> +#include <asm/processor.h> +#include <asm/uaccess.h> + +/* Functions in vector.S */ +extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); +extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); +extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); +extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); +extern void vrefp(vector128 *dst, vector128 *src); +extern void vrsqrtefp(vector128 *dst, vector128 *src); +extern void vexptep(vector128 *dst, vector128 *src); + +static unsigned int exp2s[8] = { + 0x800000, + 0x8b95c2, + 0x9837f0, + 0xa5fed7, + 0xb504f3, + 0xc5672a, + 0xd744fd, + 0xeac0c7 +}; + +/* + * Computes an estimate of 2^x. The `s' argument is the 32-bit + * single-precision floating-point representation of x. + */ +static unsigned int eexp2(unsigned int s) +{ + int exp, pwr; + unsigned int mant, frac; + + /* extract exponent field from input */ + exp = ((s >> 23) & 0xff) - 127; + if (exp > 7) { + /* check for NaN input */ + if (exp == 128 && (s & 0x7fffff) != 0) + return s | 0x400000; /* return QNaN */ + /* 2^-big = 0, 2^+big = +Inf */ + return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ + } + if (exp < -23) + return 0x3f800000; /* 1.0 */ + + /* convert to fixed point integer in 9.23 representation */ + pwr = (s & 0x7fffff) | 0x800000; + if (exp > 0) + pwr <<= exp; + else + pwr >>= -exp; + if (s & 0x80000000) + pwr = -pwr; + + /* extract integer part, which becomes exponent part of result */ + exp = (pwr >> 23) + 126; + if (exp >= 254) + return 0x7f800000; + if (exp < -23) + return 0; + + /* table lookup on top 3 bits of fraction to get mantissa */ + mant = exp2s[(pwr >> 20) & 7]; + + /* linear interpolation using remaining 20 bits of fraction */ + asm("mulhwu %0,%1,%2" : "=r" (frac) + : "r" (pwr << 12), "r" (0x172b83ff)); + asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); + mant += frac; + + if (exp >= 0) + return mant + (exp << 23); + + /* denormalized result */ + exp = -exp; + mant += 1 << (exp - 1); + return mant >> exp; +} + +/* + * Computes an estimate of log_2(x). The `s' argument is the 32-bit + * single-precision floating-point representation of x. + */ +static unsigned int elog2(unsigned int s) +{ + int exp, mant, lz, frac; + + exp = s & 0x7f800000; + mant = s & 0x7fffff; + if (exp == 0x7f800000) { /* Inf or NaN */ + if (mant != 0) + s |= 0x400000; /* turn NaN into QNaN */ + return s; + } + if ((exp | mant) == 0) /* +0 or -0 */ + return 0xff800000; /* return -Inf */ + + if (exp == 0) { + /* denormalized */ + asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); + mant <<= lz - 8; + exp = (-118 - lz) << 23; + } else { + mant |= 0x800000; + exp -= 127 << 23; + } + + if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ + exp |= 0x400000; /* 0.5 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ + } + if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ + exp |= 0x200000; /* 0.25 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ + } + if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ + exp |= 0x100000; /* 0.125 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ + } + if (mant > 0x800000) { /* 1.0 * 2^23 */ + /* calculate (mant - 1) * 1.381097463 */ + /* 1.381097463 == 0.125 / (2^0.125 - 1) */ + asm("mulhwu %0,%1,%2" : "=r" (frac) + : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); + exp += frac; + } + s = exp & 0x80000000; + if (exp != 0) { + if (s) + exp = -exp; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); + lz = 8 - lz; + if (lz > 0) + exp >>= lz; + else if (lz < 0) + exp <<= -lz; + s += ((lz + 126) << 23) + exp; + } + return s; +} + +#define VSCR_SAT 1 + +static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) +{ + int exp, mant; + + exp = (x >> 23) & 0xff; + mant = x & 0x7fffff; + if (exp == 255 && mant != 0) + return 0; /* NaN -> 0 */ + exp = exp - 127 + scale; + if (exp < 0) + return 0; /* round towards zero */ + if (exp >= 31) { + /* saturate, unless the result would be -2^31 */ + if (x + (scale << 23) != 0xcf000000) + *vscrp |= VSCR_SAT; + return (x & 0x80000000)? 0x80000000: 0x7fffffff; + } + mant |= 0x800000; + mant = (mant << 7) >> (30 - exp); + return (x & 0x80000000)? -mant: mant; +} + +static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) +{ + int exp; + unsigned int mant; + + exp = (x >> 23) & 0xff; + mant = x & 0x7fffff; + if (exp == 255 && mant != 0) + return 0; /* NaN -> 0 */ + exp = exp - 127 + scale; + if (exp < 0) + return 0; /* round towards zero */ + if (x & 0x80000000) { + /* negative => saturate to 0 */ + *vscrp |= VSCR_SAT; + return 0; + } + if (exp >= 32) { + /* saturate */ + *vscrp |= VSCR_SAT; + return 0xffffffff; + } + mant |= 0x800000; + mant = (mant << 8) >> (31 - exp); + return mant; +} + +/* Round to floating integer, towards 0 */ +static unsigned int rfiz(unsigned int x) +{ + int exp; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if (exp < 0) + return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ + return x & ~(0x7fffff >> exp); +} + +/* Round to floating integer, towards +/- Inf */ +static unsigned int rfii(unsigned int x) +{ + int exp, mask; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if ((x & 0x7fffffff) == 0) + return x; /* +/-0 -> +/-0 */ + if (exp < 0) + /* 0 < |x| < 1.0 rounds to +/- 1.0 */ + return (x & 0x80000000) | 0x3f800000; + mask = 0x7fffff >> exp; + /* mantissa overflows into exponent - that's OK, + it can't overflow into the sign bit */ + return (x + mask) & ~mask; +} + +/* Round to floating integer, to nearest */ +static unsigned int rfin(unsigned int x) +{ + int exp, half; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if (exp < -1) + return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ + if (exp == -1) + /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ + return (x & 0x80000000) | 0x3f800000; + half = 0x400000 >> exp; + /* add 0.5 to the magnitude and chop off the fraction bits */ + return (x + half) & ~(0x7fffff >> exp); +} + +int emulate_altivec(struct pt_regs *regs) +{ + unsigned int instr, i; + unsigned int va, vb, vc, vd; + vector128 *vrs; + + if (get_user(instr, (unsigned int __user *) regs->nip)) + return -EFAULT; + if ((instr >> 26) != 4) + return -EINVAL; /* not an altivec instruction */ + vd = (instr >> 21) & 0x1f; + va = (instr >> 16) & 0x1f; + vb = (instr >> 11) & 0x1f; + vc = (instr >> 6) & 0x1f; + + vrs = current->thread.vr; + switch (instr & 0x3f) { + case 10: + switch (vc) { + case 0: /* vaddfp */ + vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); + break; + case 1: /* vsubfp */ + vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); + break; + case 4: /* vrefp */ + vrefp(&vrs[vd], &vrs[vb]); + break; + case 5: /* vrsqrtefp */ + vrsqrtefp(&vrs[vd], &vrs[vb]); + break; + case 6: /* vexptefp */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = eexp2(vrs[vb].u[i]); + break; + case 7: /* vlogefp */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = elog2(vrs[vb].u[i]); + break; + case 8: /* vrfin */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = rfin(vrs[vb].u[i]); + break; + case 9: /* vrfiz */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = rfiz(vrs[vb].u[i]); + break; + case 10: /* vrfip */ + for (i = 0; i < 4; ++i) { + u32 x = vrs[vb].u[i]; + x = (x & 0x80000000)? rfiz(x): rfii(x); + vrs[vd].u[i] = x; + } + break; + case 11: /* vrfim */ + for (i = 0; i < 4; ++i) { + u32 x = vrs[vb].u[i]; + x = (x & 0x80000000)? rfii(x): rfiz(x); + vrs[vd].u[i] = x; + } + break; + case 14: /* vctuxs */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, + ¤t->thread.vscr.u[3]); + break; + case 15: /* vctsxs */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, + ¤t->thread.vscr.u[3]); + break; + default: + return -EINVAL; + } + break; + case 46: /* vmaddfp */ + vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); + break; + case 47: /* vnmsubfp */ + vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S new file mode 100644 index 00000000000..66b3d03c5fa --- /dev/null +++ b/arch/powerpc/kernel/vector.S @@ -0,0 +1,197 @@ +#include <linux/config.h> +#include <asm/ppc_asm.h> +#include <asm/reg.h> + +/* + * The routines below are in assembler so we can closely control the + * usage of floating-point registers. These routines must be called + * with preempt disabled. + */ +#ifdef CONFIG_PPC32 + .data +fpzero: + .long 0 +fpone: + .long 0x3f800000 /* 1.0 in single-precision FP */ +fphalf: + .long 0x3f000000 /* 0.5 in single-precision FP */ + +#define LDCONST(fr, name) \ + lis r11,name@ha; \ + lfs fr,name@l(r11) +#else + + .section ".toc","aw" +fpzero: + .tc FD_0_0[TC],0 +fpone: + .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ +fphalf: + .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ + +#define LDCONST(fr, name) \ + lfd fr,name@toc(r2) +#endif + + .text +/* + * Internal routine to enable floating point and set FPSCR to 0. + * Don't call it from C; it doesn't use the normal calling convention. + */ +fpenable: +#ifdef CONFIG_PPC32 + stwu r1,-64(r1) +#else + stdu r1,-64(r1) +#endif + mfmsr r10 + ori r11,r10,MSR_FP + mtmsr r11 + isync + stfd fr0,24(r1) + stfd fr1,16(r1) + stfd fr31,8(r1) + LDCONST(fr1, fpzero) + mffs fr31 + mtfsf 0xff,fr1 + blr + +fpdisable: + mtlr r12 + mtfsf 0xff,fr31 + lfd fr31,8(r1) + lfd fr1,16(r1) + lfd fr0,24(r1) + mtmsr r10 + isync + addi r1,r1,64 + blr + +/* + * Vector add, floating point. + */ +_GLOBAL(vaddfp) + mflr r12 + bl fpenable + li r0,4 + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + lfsx fr1,r5,r6 + fadds fr0,fr0,fr1 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector subtract, floating point. + */ +_GLOBAL(vsubfp) + mflr r12 + bl fpenable + li r0,4 + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + lfsx fr1,r5,r6 + fsubs fr0,fr0,fr1 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector multiply and add, floating point. + */ +_GLOBAL(vmaddfp) + mflr r12 + bl fpenable + stfd fr2,32(r1) + li r0,4 + mtctr r0 + li r7,0 +1: lfsx fr0,r4,r7 + lfsx fr1,r5,r7 + lfsx fr2,r6,r7 + fmadds fr0,fr0,fr2,fr1 + stfsx fr0,r3,r7 + addi r7,r7,4 + bdnz 1b + lfd fr2,32(r1) + b fpdisable + +/* + * Vector negative multiply and subtract, floating point. + */ +_GLOBAL(vnmsubfp) + mflr r12 + bl fpenable + stfd fr2,32(r1) + li r0,4 + mtctr r0 + li r7,0 +1: lfsx fr0,r4,r7 + lfsx fr1,r5,r7 + lfsx fr2,r6,r7 + fnmsubs fr0,fr0,fr2,fr1 + stfsx fr0,r3,r7 + addi r7,r7,4 + bdnz 1b + lfd fr2,32(r1) + b fpdisable + +/* + * Vector reciprocal estimate. We just compute 1.0/x. + * r3 -> destination, r4 -> source. + */ +_GLOBAL(vrefp) + mflr r12 + bl fpenable + li r0,4 + LDCONST(fr1, fpone) + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + fdivs fr0,fr1,fr0 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector reciprocal square-root estimate, floating point. + * We use the frsqrte instruction for the initial estimate followed + * by 2 iterations of Newton-Raphson to get sufficient accuracy. + * r3 -> destination, r4 -> source. + */ +_GLOBAL(vrsqrtefp) + mflr r12 + bl fpenable + stfd fr2,32(r1) + stfd fr3,40(r1) + stfd fr4,48(r1) + stfd fr5,56(r1) + li r0,4 + LDCONST(fr4, fpone) + LDCONST(fr5, fphalf) + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + frsqrte fr1,fr0 /* r = frsqrte(s) */ + fmuls fr3,fr1,fr0 /* r * s */ + fmuls fr2,fr1,fr5 /* r * 0.5 */ + fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ + fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ + fmuls fr3,fr1,fr0 /* r * s */ + fmuls fr2,fr1,fr5 /* r * 0.5 */ + fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ + fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ + stfsx fr1,r3,r6 + addi r6,r6,4 + bdnz 1b + lfd fr5,56(r1) + lfd fr4,48(r1) + lfd fr3,40(r1) + lfd fr2,32(r1) + b fpdisable diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S new file mode 100644 index 00000000000..d4dfcfbce27 --- /dev/null +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -0,0 +1,279 @@ +#include <linux/config.h> +#ifdef CONFIG_PPC64 +#include <asm/page.h> +#else +#define PAGE_SIZE 4096 +#endif +#include <asm-generic/vmlinux.lds.h> + +#ifdef CONFIG_PPC64 +OUTPUT_ARCH(powerpc:common64) +jiffies = jiffies_64; +#else +OUTPUT_ARCH(powerpc:common) +jiffies = jiffies_64 + 4; +#endif +SECTIONS +{ + /* Sections to be discarded. */ + /DISCARD/ : { + *(.exitcall.exit) + *(.exit.data) + } + + + /* Read-only sections, merged into text segment: */ +#ifdef CONFIG_PPC32 + . = + SIZEOF_HEADERS; + .interp : { *(.interp) } + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .rel.text : { *(.rel.text) } + .rela.text : { *(.rela.text) } + .rel.data : { *(.rel.data) } + .rela.data : { *(.rela.data) } + .rel.rodata : { *(.rel.rodata) } + .rela.rodata : { *(.rela.rodata) } + .rel.got : { *(.rel.got) } + .rela.got : { *(.rela.got) } + .rel.ctors : { *(.rel.ctors) } + .rela.ctors : { *(.rela.ctors) } + .rel.dtors : { *(.rel.dtors) } + .rela.dtors : { *(.rela.dtors) } + .rel.bss : { *(.rel.bss) } + .rela.bss : { *(.rela.bss) } + .rel.plt : { *(.rel.plt) } + .rela.plt : { *(.rela.plt) } +/* .init : { *(.init) } =0*/ + .plt : { *(.plt) } +#endif + .text : { + *(.text .text.*) + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.fixup) +#ifdef CONFIG_PPC32 + *(.got1) + __got2_start = .; + *(.got2) + __got2_end = .; +#else + . = ALIGN(PAGE_SIZE); + _etext = .; +#endif + } +#ifdef CONFIG_PPC32 + _etext = .; + PROVIDE (etext = .); + + RODATA + .fini : { *(.fini) } =0 + .ctors : { *(.ctors) } + .dtors : { *(.dtors) } + + .fixup : { *(.fixup) } +#endif + + __ex_table : { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + __bug_table : { + __start___bug_table = .; + *(__bug_table) + __stop___bug_table = .; + } + +#ifdef CONFIG_PPC64 + __ftr_fixup : { + __start___ftr_fixup = .; + *(__ftr_fixup) + __stop___ftr_fixup = .; + } + + RODATA +#endif + +#ifdef CONFIG_PPC32 + /* Read-write section, merged into data segment: */ + . = ALIGN(PAGE_SIZE); + _sdata = .; + .data : + { + *(.data) + *(.data1) + *(.sdata) + *(.sdata2) + *(.got.plt) *(.got) + *(.dynamic) + CONSTRUCTORS + } + + . = ALIGN(PAGE_SIZE); + __nosave_begin = .; + .data_nosave : { *(.data.nosave) } + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + _edata = .; + PROVIDE (edata = .); + + . = ALIGN(8192); + .data.init_task : { *(.data.init_task) } +#endif + + /* will be freed after init */ + . = ALIGN(PAGE_SIZE); + __init_begin = .; + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } +#ifdef CONFIG_PPC32 + /* .exit.text is discarded at runtime, not link time, + to deal with references from __bug_table */ + .exit.text : { *(.exit.text) } +#endif + .init.data : { + *(.init.data); + __vtop_table_begin = .; + *(.vtop_fixup); + __vtop_table_end = .; + __ptov_table_begin = .; + *(.ptov_fixup); + __ptov_table_end = .; + } + + . = ALIGN(16); + .init.setup : { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + + .initcall.init : { + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + } + + .con_initcall.init : { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + SECURITY_INIT + +#ifdef CONFIG_PPC32 + __start___ftr_fixup = .; + __ftr_fixup : { *(__ftr_fixup) } + __stop___ftr_fixup = .; +#else + . = ALIGN(PAGE_SIZE); + .init.ramfs : { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } +#endif + +#ifdef CONFIG_PPC32 + . = ALIGN(32); +#endif + .data.percpu : { + __per_cpu_start = .; + *(.data.percpu) + __per_cpu_end = .; + } + + . = ALIGN(PAGE_SIZE); +#ifdef CONFIG_PPC64 + . = ALIGN(16384); + __init_end = .; + /* freed after init ends here */ + + /* Read/write sections */ + . = ALIGN(PAGE_SIZE); + . = ALIGN(16384); + _sdata = .; + /* The initial task and kernel stack */ + .data.init_task : { + *(.data.init_task) + } + + . = ALIGN(PAGE_SIZE); + .data.page_aligned : { + *(.data.page_aligned) + } + + .data.cacheline_aligned : { + *(.data.cacheline_aligned) + } + + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) + } + + .opd : { + *(.opd) + } + + .got : { + __toc_start = .; + *(.got) + *(.toc) + . = ALIGN(PAGE_SIZE); + _edata = .; + } + + . = ALIGN(PAGE_SIZE); +#else + __initramfs_start = .; + .init.ramfs : { + *(.init.ramfs) + } + __initramfs_end = .; + + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + _sextratext = .; + _eextratext = .; + + __bss_start = .; +#endif + + .bss : { + __bss_start = .; + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + *(COMMON) + __bss_stop = .; + } + +#ifdef CONFIG_PPC64 + . = ALIGN(PAGE_SIZE); +#endif + _end = . ; +#ifdef CONFIG_PPC32 + PROVIDE (end = .); +#endif +} diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile new file mode 100644 index 00000000000..30367a0237d --- /dev/null +++ b/arch/powerpc/lib/Makefile @@ -0,0 +1,13 @@ +# +# Makefile for ppc-specific library files.. +# + +obj-y := strcase.o string.o +obj-$(CONFIG_PPC32) += div64.o copy_32.o checksum_32.o +obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o memcpy_64.o \ + usercopy_64.o sstep.o checksum_64.o mem_64.o +obj-$(CONFIG_PPC_ISERIES) += e2a.o +ifeq ($(CONFIG_PPC64),y) +obj-$(CONFIG_SMP) += locks.o +endif + diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S new file mode 100644 index 00000000000..7874e8a8045 --- /dev/null +++ b/arch/powerpc/lib/checksum_32.S @@ -0,0 +1,225 @@ +/* + * This file contains assembly-language implementations + * of IP-style 1's complement checksum routines. + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). + */ + +#include <linux/sys.h> +#include <asm/processor.h> +#include <asm/errno.h> +#include <asm/ppc_asm.h> + + .text + +/* + * ip_fast_csum(buf, len) -- Optimized for IP header + * len is in words and is always >= 5. + */ +_GLOBAL(ip_fast_csum) + lwz r0,0(r3) + lwzu r5,4(r3) + addic. r4,r4,-2 + addc r0,r0,r5 + mtctr r4 + blelr- +1: lwzu r4,4(r3) + adde r0,r0,r4 + bdnz 1b + addze r0,r0 /* add in final carry */ + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Compute checksum of TCP or UDP pseudo-header: + * csum_tcpudp_magic(saddr, daddr, len, proto, sum) + */ +_GLOBAL(csum_tcpudp_magic) + rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ + addc r0,r3,r4 /* add 4 32-bit words together */ + adde r0,r0,r5 + adde r0,r0,r7 + addze r0,r0 /* add in final carry */ + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * csum_partial(buff, len, sum) + */ +_GLOBAL(csum_partial) + addic r0,r5,0 + subi r3,r3,4 + srwi. r6,r4,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r5,r3,2 /* Align buffer to longword boundary */ + beq+ 1f + lhz r5,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r4,r4,2 + addc r0,r0,r5 + srwi. r6,r4,2 /* # words to do */ + beq 3f +1: mtctr r6 +2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */ + adde r0,r0,r5 /* be unnecessary to unroll this loop */ + bdnz 2b + andi. r4,r4,3 +3: cmpwi 0,r4,2 + blt+ 4f + lhz r5,4(r3) + addi r3,r3,2 + subi r4,r4,2 + adde r0,r0,r5 +4: cmpwi 0,r4,1 + bne+ 5f + lbz r5,4(r3) + slwi r5,r5,8 /* Upper byte of word */ + adde r0,r0,r5 +5: addze r3,r0 /* add in final carry */ + blr + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit), while copying the block to dst. + * If an access exception occurs on src or dst, it stores -EFAULT + * to *src_err or *dst_err respectively, and (for an error on + * src) zeroes the rest of dst. + * + * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) + */ +_GLOBAL(csum_partial_copy_generic) + addic r0,r6,0 + subi r3,r3,4 + subi r4,r4,4 + srwi. r6,r5,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r9,r4,2 /* Align dst to longword boundary */ + beq+ 1f +81: lhz r6,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r5,r5,2 +91: sth r6,4(r4) + addi r4,r4,2 + addc r0,r0,r6 + srwi. r6,r5,2 /* # words to do */ + beq 3f +1: srwi. r6,r5,4 /* # groups of 4 words to do */ + beq 10f + mtctr r6 +71: lwz r6,4(r3) +72: lwz r9,8(r3) +73: lwz r10,12(r3) +74: lwzu r11,16(r3) + adde r0,r0,r6 +75: stw r6,4(r4) + adde r0,r0,r9 +76: stw r9,8(r4) + adde r0,r0,r10 +77: stw r10,12(r4) + adde r0,r0,r11 +78: stwu r11,16(r4) + bdnz 71b +10: rlwinm. r6,r5,30,30,31 /* # words left to do */ + beq 13f + mtctr r6 +82: lwzu r9,4(r3) +92: stwu r9,4(r4) + adde r0,r0,r9 + bdnz 82b +13: andi. r5,r5,3 +3: cmpwi 0,r5,2 + blt+ 4f +83: lhz r6,4(r3) + addi r3,r3,2 + subi r5,r5,2 +93: sth r6,4(r4) + addi r4,r4,2 + adde r0,r0,r6 +4: cmpwi 0,r5,1 + bne+ 5f +84: lbz r6,4(r3) +94: stb r6,4(r4) + slwi r6,r6,8 /* Upper byte of word */ + adde r0,r0,r6 +5: addze r3,r0 /* add in final carry */ + blr + +/* These shouldn't go in the fixup section, since that would + cause the ex_table addresses to get out of order. */ + +src_error_4: + mfctr r6 /* update # bytes remaining from ctr */ + rlwimi r5,r6,4,0,27 + b 79f +src_error_1: + li r6,0 + subi r5,r5,2 +95: sth r6,4(r4) + addi r4,r4,2 +79: srwi. r6,r5,2 + beq 3f + mtctr r6 +src_error_2: + li r6,0 +96: stwu r6,4(r4) + bdnz 96b +3: andi. r5,r5,3 + beq src_error +src_error_3: + li r6,0 + mtctr r5 + addi r4,r4,3 +97: stbu r6,1(r4) + bdnz 97b +src_error: + cmpwi 0,r7,0 + beq 1f + li r6,-EFAULT + stw r6,0(r7) +1: addze r3,r0 + blr + +dst_error: + cmpwi 0,r8,0 + beq 1f + li r6,-EFAULT + stw r6,0(r8) +1: addze r3,r0 + blr + +.section __ex_table,"a" + .long 81b,src_error_1 + .long 91b,dst_error + .long 71b,src_error_4 + .long 72b,src_error_4 + .long 73b,src_error_4 + .long 74b,src_error_4 + .long 75b,dst_error + .long 76b,dst_error + .long 77b,dst_error + .long 78b,dst_error + .long 82b,src_error_2 + .long 92b,dst_error + .long 83b,src_error_3 + .long 93b,dst_error + .long 84b,src_error_3 + .long 94b,dst_error + .long 95b,dst_error + .long 96b,dst_error + .long 97b,dst_error diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S new file mode 100644 index 00000000000..ef96c6c58ef --- /dev/null +++ b/arch/powerpc/lib/checksum_64.S @@ -0,0 +1,229 @@ +/* + * This file contains assembly-language implementations + * of IP-style 1's complement checksum routines. + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). + */ + +#include <linux/sys.h> +#include <asm/processor.h> +#include <asm/errno.h> +#include <asm/ppc_asm.h> + +/* + * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header + * len is in words and is always >= 5. + * + * In practice len == 5, but this is not guaranteed. So this code does not + * attempt to use doubleword instructions. + */ +_GLOBAL(ip_fast_csum) + lwz r0,0(r3) + lwzu r5,4(r3) + addic. r4,r4,-2 + addc r0,r0,r5 + mtctr r4 + blelr- +1: lwzu r4,4(r3) + adde r0,r0,r4 + bdnz 1b + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32-bit halves together */ + add r0,r0,r4 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Compute checksum of TCP or UDP pseudo-header: + * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) + * No real gain trying to do this specially for 64 bit, but + * the 32 bit addition may spill into the upper bits of + * the doubleword so we still must fold it down from 64. + */ +_GLOBAL(csum_tcpudp_magic) + rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ + addc r0,r3,r4 /* add 4 32-bit words together */ + adde r0,r0,r5 + adde r0,r0,r7 + rldicl r4,r0,32,0 /* fold 64 bit value */ + add r0,r4,r0 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit). + * + * This code assumes at least halfword alignment, though the length + * can be any number of bytes. The sum is accumulated in r5. + * + * csum_partial(r3=buff, r4=len, r5=sum) + */ +_GLOBAL(csum_partial) + subi r3,r3,8 /* we'll offset by 8 for the loads */ + srdi. r6,r4,3 /* divide by 8 for doubleword count */ + addic r5,r5,0 /* clear carry */ + beq 3f /* if we're doing < 8 bytes */ + andi. r0,r3,2 /* aligned on a word boundary already? */ + beq+ 1f + lhz r6,8(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r4,r4,2 + addc r5,r5,r6 + srdi. r6,r4,3 /* recompute number of doublewords */ + beq 3f /* any left? */ +1: mtctr r6 +2: ldu r6,8(r3) /* main sum loop */ + adde r5,r5,r6 + bdnz 2b + andi. r4,r4,7 /* compute bytes left to sum after doublewords */ +3: cmpwi 0,r4,4 /* is at least a full word left? */ + blt 4f + lwz r6,8(r3) /* sum this word */ + addi r3,r3,4 + subi r4,r4,4 + adde r5,r5,r6 +4: cmpwi 0,r4,2 /* is at least a halfword left? */ + blt+ 5f + lhz r6,8(r3) /* sum this halfword */ + addi r3,r3,2 + subi r4,r4,2 + adde r5,r5,r6 +5: cmpwi 0,r4,1 /* is at least a byte left? */ + bne+ 6f + lbz r6,8(r3) /* sum this byte */ + slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ + adde r5,r5,r6 +6: addze r5,r5 /* add in final carry */ + rldicl r4,r5,32,0 /* fold two 32-bit halves together */ + add r3,r4,r5 + srdi r3,r3,32 + blr + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit), while copying the block to dst. + * If an access exception occurs on src or dst, it stores -EFAULT + * to *src_err or *dst_err respectively, and (for an error on + * src) zeroes the rest of dst. + * + * This code needs to be reworked to take advantage of 64 bit sum+copy. + * However, due to tokenring halfword alignment problems this will be very + * tricky. For now we'll leave it until we instrument it somehow. + * + * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) + */ +_GLOBAL(csum_partial_copy_generic) + addic r0,r6,0 + subi r3,r3,4 + subi r4,r4,4 + srwi. r6,r5,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r9,r4,2 /* Align dst to longword boundary */ + beq+ 1f +81: lhz r6,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r5,r5,2 +91: sth r6,4(r4) + addi r4,r4,2 + addc r0,r0,r6 + srwi. r6,r5,2 /* # words to do */ + beq 3f +1: mtctr r6 +82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ +92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ + adde r0,r0,r6 + bdnz 82b + andi. r5,r5,3 +3: cmpwi 0,r5,2 + blt+ 4f +83: lhz r6,4(r3) + addi r3,r3,2 + subi r5,r5,2 +93: sth r6,4(r4) + addi r4,r4,2 + adde r0,r0,r6 +4: cmpwi 0,r5,1 + bne+ 5f +84: lbz r6,4(r3) +94: stb r6,4(r4) + slwi r6,r6,8 /* Upper byte of word */ + adde r0,r0,r6 +5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ + rldicl r4,r3,32,0 /* fold 64 bit value */ + add r3,r4,r3 + srdi r3,r3,32 + blr + +/* These shouldn't go in the fixup section, since that would + cause the ex_table addresses to get out of order. */ + + .globl src_error_1 +src_error_1: + li r6,0 + subi r5,r5,2 +95: sth r6,4(r4) + addi r4,r4,2 + srwi. r6,r5,2 + beq 3f + mtctr r6 + .globl src_error_2 +src_error_2: + li r6,0 +96: stwu r6,4(r4) + bdnz 96b +3: andi. r5,r5,3 + beq src_error + .globl src_error_3 +src_error_3: + li r6,0 + mtctr r5 + addi r4,r4,3 +97: stbu r6,1(r4) + bdnz 97b + .globl src_error +src_error: + cmpdi 0,r7,0 + beq 1f + li r6,-EFAULT + stw r6,0(r7) +1: addze r3,r0 + blr + + .globl dst_error +dst_error: + cmpdi 0,r8,0 + beq 1f + li r6,-EFAULT + stw r6,0(r8) +1: addze r3,r0 + blr + +.section __ex_table,"a" + .align 3 + .llong 81b,src_error_1 + .llong 91b,dst_error + .llong 82b,src_error_2 + .llong 92b,dst_error + .llong 83b,src_error_3 + .llong 93b,dst_error + .llong 84b,src_error_3 + .llong 94b,dst_error + .llong 95b,dst_error + .llong 96b,dst_error + .llong 97b,dst_error diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S new file mode 100644 index 00000000000..420a912198a --- /dev/null +++ b/arch/powerpc/lib/copy_32.S @@ -0,0 +1,543 @@ +/* + * Memory copy functions for 32-bit PowerPC. + * + * Copyright (C) 1996-2005 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/cache.h> +#include <asm/errno.h> +#include <asm/ppc_asm.h> + +#define COPY_16_BYTES \ + lwz r7,4(r4); \ + lwz r8,8(r4); \ + lwz r9,12(r4); \ + lwzu r10,16(r4); \ + stw r7,4(r6); \ + stw r8,8(r6); \ + stw r9,12(r6); \ + stwu r10,16(r6) + +#define COPY_16_BYTES_WITHEX(n) \ +8 ## n ## 0: \ + lwz r7,4(r4); \ +8 ## n ## 1: \ + lwz r8,8(r4); \ +8 ## n ## 2: \ + lwz r9,12(r4); \ +8 ## n ## 3: \ + lwzu r10,16(r4); \ +8 ## n ## 4: \ + stw r7,4(r6); \ +8 ## n ## 5: \ + stw r8,8(r6); \ +8 ## n ## 6: \ + stw r9,12(r6); \ +8 ## n ## 7: \ + stwu r10,16(r6) + +#define COPY_16_BYTES_EXCODE(n) \ +9 ## n ## 0: \ + addi r5,r5,-(16 * n); \ + b 104f; \ +9 ## n ## 1: \ + addi r5,r5,-(16 * n); \ + b 105f; \ +.section __ex_table,"a"; \ + .align 2; \ + .long 8 ## n ## 0b,9 ## n ## 0b; \ + .long 8 ## n ## 1b,9 ## n ## 0b; \ + .long 8 ## n ## 2b,9 ## n ## 0b; \ + .long 8 ## n ## 3b,9 ## n ## 0b; \ + .long 8 ## n ## 4b,9 ## n ## 1b; \ + .long 8 ## n ## 5b,9 ## n ## 1b; \ + .long 8 ## n ## 6b,9 ## n ## 1b; \ + .long 8 ## n ## 7b,9 ## n ## 1b; \ + .text + + .text + .stabs "arch/powerpc/lib/",N_SO,0,0,0f + .stabs "copy32.S",N_SO,0,0,0f +0: + +CACHELINE_BYTES = L1_CACHE_LINE_SIZE +LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE +CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1) + +/* + * Use dcbz on the complete cache lines in the destination + * to set them to zero. This requires that the destination + * area is cacheable. -- paulus + */ +_GLOBAL(cacheable_memzero) + mr r5,r4 + li r4,0 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + clrlwi r7,r6,32-LG_CACHELINE_BYTES + add r8,r7,r5 + srwi r9,r8,LG_CACHELINE_BYTES + addic. r9,r9,-1 /* total number of complete cachelines */ + ble 2f + xori r0,r7,CACHELINE_MASK & ~3 + srwi. r0,r0,2 + beq 3f + mtctr r0 +4: stwu r4,4(r6) + bdnz 4b +3: mtctr r9 + li r7,4 +#if !defined(CONFIG_8xx) +10: dcbz r7,r6 +#else +10: stw r4, 4(r6) + stw r4, 8(r6) + stw r4, 12(r6) + stw r4, 16(r6) +#if CACHE_LINE_SIZE >= 32 + stw r4, 20(r6) + stw r4, 24(r6) + stw r4, 28(r6) + stw r4, 32(r6) +#endif /* CACHE_LINE_SIZE */ +#endif + addi r6,r6,CACHELINE_BYTES + bdnz 10b + clrlwi r5,r8,32-LG_CACHELINE_BYTES + addi r5,r5,4 +2: srwi r0,r5,2 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + +_GLOBAL(memset) + rlwimi r4,r4,8,16,23 + rlwimi r4,r4,16,0,15 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + srwi r0,r5,2 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + +/* + * This version uses dcbz on the complete cache lines in the + * destination area to reduce memory traffic. This requires that + * the destination area is cacheable. + * We only use this version if the source and dest don't overlap. + * -- paulus. + */ +_GLOBAL(cacheable_memcpy) + add r7,r3,r5 /* test if the src & dst overlap */ + add r8,r4,r5 + cmplw 0,r4,r7 + cmplw 1,r3,r8 + crand 0,0,4 /* cr0.lt &= cr1.lt */ + blt memcpy /* if regions overlap */ + + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + subf r5,r0,r5 + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ + stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ + stwu r9,4(r6) + bdnz 72b + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + mtctr r0 + beq 63f +53: +#if !defined(CONFIG_8xx) + dcbz r11,r6 +#endif + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 53b + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) + stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) + stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: blr + +_GLOBAL(memmove) + cmplw 0,r3,r4 + bgt backwards_memcpy + /* fall through */ + +_GLOBAL(memcpy) + srwi. r7,r5,3 + addi r6,r3,-4 + addi r4,r4,-4 + beq 2f /* if less than 8 bytes to do */ + andi. r0,r6,3 /* get dest word aligned */ + mtctr r7 + bne 5f +1: lwz r7,4(r4) + lwzu r8,8(r4) + stw r7,4(r6) + stwu r8,8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,4(r4) + addi r5,r5,-4 + stwu r0,4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r4,r4,3 + addi r6,r6,3 +4: lbzu r0,1(r4) + stbu r0,1(r6) + bdnz 4b + blr +5: subfic r0,r0,4 + mtctr r0 +6: lbz r7,4(r4) + addi r4,r4,1 + stb r7,4(r6) + addi r6,r6,1 + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +_GLOBAL(backwards_memcpy) + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + add r6,r3,r5 + add r4,r4,r5 + beq 2f + andi. r0,r6,3 + mtctr r7 + bne 5f +1: lwz r7,-4(r4) + lwzu r8,-8(r4) + stw r7,-4(r6) + stwu r8,-8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,-4(r4) + subi r5,r5,4 + stwu r0,-4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 +4: lbzu r0,-1(r4) + stbu r0,-1(r6) + bdnz 4b + blr +5: mtctr r0 +6: lbzu r7,-1(r4) + stbu r7,-1(r6) + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +_GLOBAL(__copy_tofrom_user) + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ +71: stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: subf r5,r0,r5 + srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ +73: stwu r9,4(r6) + bdnz 72b + + .section __ex_table,"a" + .align 2 + .long 70b,100f + .long 71b,101f + .long 72b,102f + .long 73b,103f + .text + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + beq 63f + +#ifdef CONFIG_8xx + /* Don't use prefetch on 8xx */ + mtctr r0 + li r0,0 +53: COPY_16_BYTES_WITHEX(0) + bdnz 53b + +#else /* not CONFIG_8xx */ + /* Here we decide how far ahead to prefetch the source */ + li r3,4 + cmpwi r0,1 + li r7,0 + ble 114f + li r7,1 +#if MAX_COPY_PREFETCH > 1 + /* Heuristically, for large transfers we prefetch + MAX_COPY_PREFETCH cachelines ahead. For small transfers + we prefetch 1 cacheline ahead. */ + cmpwi r0,MAX_COPY_PREFETCH + ble 112f + li r7,MAX_COPY_PREFETCH +112: mtctr r7 +111: dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES + bdnz 111b +#else + dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES +#endif /* MAX_COPY_PREFETCH > 1 */ + +114: subf r8,r7,r0 + mr r0,r7 + mtctr r8 + +53: dcbt r3,r4 +54: dcbz r11,r6 + .section __ex_table,"a" + .align 2 + .long 54b,105f + .text +/* the main body of the cacheline loop */ + COPY_16_BYTES_WITHEX(0) +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES_WITHEX(1) +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES_WITHEX(2) + COPY_16_BYTES_WITHEX(3) +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES_WITHEX(4) + COPY_16_BYTES_WITHEX(5) + COPY_16_BYTES_WITHEX(6) + COPY_16_BYTES_WITHEX(7) +#endif +#endif +#endif + bdnz 53b + cmpwi r0,0 + li r3,4 + li r7,0 + bne 114b +#endif /* CONFIG_8xx */ + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) +31: stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) +41: stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: li r3,0 + blr + +/* read fault, initial single-byte copy */ +100: li r9,0 + b 90f +/* write fault, initial single-byte copy */ +101: li r9,1 +90: subf r5,r8,r5 + li r3,0 + b 99f +/* read fault, initial word copy */ +102: li r9,0 + b 91f +/* write fault, initial word copy */ +103: li r9,1 +91: li r3,2 + b 99f + +/* + * this stuff handles faults in the cacheline loop and branches to either + * 104f (if in read part) or 105f (if in write part), after updating r5 + */ + COPY_16_BYTES_EXCODE(0) +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES_EXCODE(1) +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES_EXCODE(2) + COPY_16_BYTES_EXCODE(3) +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES_EXCODE(4) + COPY_16_BYTES_EXCODE(5) + COPY_16_BYTES_EXCODE(6) + COPY_16_BYTES_EXCODE(7) +#endif +#endif +#endif + +/* read fault in cacheline loop */ +104: li r9,0 + b 92f +/* fault on dcbz (effectively a write fault) */ +/* or write fault in cacheline loop */ +105: li r9,1 +92: li r3,LG_CACHELINE_BYTES + mfctr r8 + add r0,r0,r8 + b 106f +/* read fault in final word loop */ +108: li r9,0 + b 93f +/* write fault in final word loop */ +109: li r9,1 +93: andi. r5,r5,3 + li r3,2 + b 99f +/* read fault in final byte loop */ +110: li r9,0 + b 94f +/* write fault in final byte loop */ +111: li r9,1 +94: li r5,0 + li r3,0 +/* + * At this stage the number of bytes not copied is + * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. + */ +99: mfctr r0 +106: slw r3,r0,r3 + add. r3,r3,r5 + beq 120f /* shouldn't happen */ + cmpwi 0,r9,0 + bne 120f +/* for a read fault, first try to continue the copy one byte at a time */ + mtctr r3 +130: lbz r0,4(r4) +131: stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 130b +/* then clear out the destination: r3 bytes starting at 4(r6) */ +132: mfctr r3 + srwi. r0,r3,2 + li r9,0 + mtctr r0 + beq 113f +112: stwu r9,4(r6) + bdnz 112b +113: andi. r0,r3,3 + mtctr r0 + beq 120f +114: stb r9,4(r6) + addi r6,r6,1 + bdnz 114b +120: blr + + .section __ex_table,"a" + .align 2 + .long 30b,108b + .long 31b,109b + .long 40b,110b + .long 41b,111b + .long 130b,132b + .long 131b,120b + .long 112b,120b + .long 114b,120b + .text diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S new file mode 100644 index 00000000000..733d61618bb --- /dev/null +++ b/arch/powerpc/lib/copypage_64.S @@ -0,0 +1,121 @@ +/* + * arch/ppc64/lib/copypage.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> + +_GLOBAL(copy_page) + std r31,-8(1) + std r30,-16(1) + std r29,-24(1) + std r28,-32(1) + std r27,-40(1) + std r26,-48(1) + std r25,-56(1) + std r24,-64(1) + std r23,-72(1) + std r22,-80(1) + std r21,-88(1) + std r20,-96(1) + li r5,4096/32 - 1 + addi r3,r3,-8 + li r12,5 +0: addi r5,r5,-24 + mtctr r12 + ld r22,640(4) + ld r21,512(4) + ld r20,384(4) + ld r11,256(4) + ld r9,128(4) + ld r7,0(4) + ld r25,648(4) + ld r24,520(4) + ld r23,392(4) + ld r10,264(4) + ld r8,136(4) + ldu r6,8(4) + cmpwi r5,24 +1: std r22,648(3) + std r21,520(3) + std r20,392(3) + std r11,264(3) + std r9,136(3) + std r7,8(3) + ld r28,648(4) + ld r27,520(4) + ld r26,392(4) + ld r31,264(4) + ld r30,136(4) + ld r29,8(4) + std r25,656(3) + std r24,528(3) + std r23,400(3) + std r10,272(3) + std r8,144(3) + std r6,16(3) + ld r22,656(4) + ld r21,528(4) + ld r20,400(4) + ld r11,272(4) + ld r9,144(4) + ld r7,16(4) + std r28,664(3) + std r27,536(3) + std r26,408(3) + std r31,280(3) + std r30,152(3) + stdu r29,24(3) + ld r25,664(4) + ld r24,536(4) + ld r23,408(4) + ld r10,280(4) + ld r8,152(4) + ldu r6,24(4) + bdnz 1b + std r22,648(3) + std r21,520(3) + std r20,392(3) + std r11,264(3) + std r9,136(3) + std r7,8(3) + addi r4,r4,640 + addi r3,r3,648 + bge 0b + mtctr r5 + ld r7,0(4) + ld r8,8(4) + ldu r9,16(4) +3: ld r10,8(4) + std r7,8(3) + ld r7,16(4) + std r8,16(3) + ld r8,24(4) + std r9,24(3) + ldu r9,32(4) + stdu r10,32(3) + bdnz 3b +4: ld r10,8(4) + std r7,8(3) + std r8,16(3) + std r9,24(3) + std r10,32(3) +9: ld r20,-96(1) + ld r21,-88(1) + ld r22,-80(1) + ld r23,-72(1) + ld r24,-64(1) + ld r25,-56(1) + ld r26,-48(1) + ld r27,-40(1) + ld r28,-32(1) + ld r29,-24(1) + ld r30,-16(1) + ld r31,-8(1) + blr diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S new file mode 100644 index 00000000000..a0b3fbbd6fb --- /dev/null +++ b/arch/powerpc/lib/copyuser_64.S @@ -0,0 +1,576 @@ +/* + * arch/ppc64/lib/copyuser.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> + + .align 7 +_GLOBAL(__copy_tofrom_user) + /* first check for a whole page copy on a page boundary */ + cmpldi cr1,r5,16 + cmpdi cr6,r5,4096 + or r0,r3,r4 + neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ + andi. r0,r0,4095 + std r3,-24(r1) + crand cr0*4+2,cr0*4+2,cr6*4+2 + std r4,-16(r1) + std r5,-8(r1) + dcbt 0,r4 + beq .Lcopy_page + andi. r6,r6,7 + mtcrf 0x01,r5 + blt cr1,.Lshort_copy + bne .Ldst_unaligned +.Ldst_aligned: + andi. r0,r4,7 + addi r3,r3,-16 + bne .Lsrc_unaligned + srdi r7,r5,4 +20: ld r9,0(r4) + addi r4,r4,-8 + mtctr r7 + andi. r5,r5,7 + bf cr7*4+0,22f + addi r3,r3,8 + addi r4,r4,8 + mr r8,r9 + blt cr1,72f +21: ld r9,8(r4) +70: std r8,8(r3) +22: ldu r8,16(r4) +71: stdu r9,16(r3) + bdnz 21b +72: std r8,8(r3) + beq+ 3f + addi r3,r3,16 +23: ld r9,8(r4) +.Ldo_tail: + bf cr7*4+1,1f + rotldi r9,r9,32 +73: stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 +74: sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 +75: stb r9,0(r3) +3: li r3,0 + blr + +.Lsrc_unaligned: + srdi r6,r5,3 + addi r5,r5,-16 + subf r4,r0,r4 + srdi r7,r5,4 + sldi r10,r0,3 + cmpldi cr6,r6,3 + andi. r5,r5,7 + mtctr r7 + subfic r11,r10,64 + add r5,r5,r0 + bt cr7*4+0,28f + +24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ +25: ld r0,8(r4) + sld r6,r9,r10 +26: ldu r9,16(r4) + srd r7,r0,r11 + sld r8,r0,r10 + or r7,r7,r6 + blt cr6,79f +27: ld r0,8(r4) + b 2f + +28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ +29: ldu r9,8(r4) + sld r8,r0,r10 + addi r3,r3,-8 + blt cr6,5f +30: ld r0,8(r4) + srd r12,r9,r11 + sld r6,r9,r10 +31: ldu r9,16(r4) + or r12,r8,r12 + srd r7,r0,r11 + sld r8,r0,r10 + addi r3,r3,16 + beq cr6,78f + +1: or r7,r7,r6 +32: ld r0,8(r4) +76: std r12,8(r3) +2: srd r12,r9,r11 + sld r6,r9,r10 +33: ldu r9,16(r4) + or r12,r8,r12 +77: stdu r7,16(r3) + srd r7,r0,r11 + sld r8,r0,r10 + bdnz 1b + +78: std r12,8(r3) + or r7,r7,r6 +79: std r7,16(r3) +5: srd r12,r9,r11 + or r12,r8,r12 +80: std r12,24(r3) + bne 6f + li r3,0 + blr +6: cmpwi cr1,r5,8 + addi r3,r3,32 + sld r9,r9,r10 + ble cr1,.Ldo_tail +34: ld r0,8(r4) + srd r7,r0,r11 + or r9,r7,r9 + b .Ldo_tail + +.Ldst_unaligned: + mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */ + subf r5,r6,r5 + li r7,0 + cmpldi r1,r5,16 + bf cr7*4+3,1f +35: lbz r0,0(r4) +81: stb r0,0(r3) + addi r7,r7,1 +1: bf cr7*4+2,2f +36: lhzx r0,r7,r4 +82: sthx r0,r7,r3 + addi r7,r7,2 +2: bf cr7*4+1,3f +37: lwzx r0,r7,r4 +83: stwx r0,r7,r3 +3: mtcrf 0x01,r5 + add r4,r6,r4 + add r3,r6,r3 + b .Ldst_aligned + +.Lshort_copy: + bf cr7*4+0,1f +38: lwz r0,0(r4) +39: lwz r9,4(r4) + addi r4,r4,8 +84: stw r0,0(r3) +85: stw r9,4(r3) + addi r3,r3,8 +1: bf cr7*4+1,2f +40: lwz r0,0(r4) + addi r4,r4,4 +86: stw r0,0(r3) + addi r3,r3,4 +2: bf cr7*4+2,3f +41: lhz r0,0(r4) + addi r4,r4,2 +87: sth r0,0(r3) + addi r3,r3,2 +3: bf cr7*4+3,4f +42: lbz r0,0(r4) +88: stb r0,0(r3) +4: li r3,0 + blr + +/* + * exception handlers follow + * we have to return the number of bytes not copied + * for an exception on a load, we set the rest of the destination to 0 + */ + +136: +137: + add r3,r3,r7 + b 1f +130: +131: + addi r3,r3,8 +120: +122: +124: +125: +126: +127: +128: +129: +133: + addi r3,r3,8 +121: +132: + addi r3,r3,8 +123: +134: +135: +138: +139: +140: +141: +142: + +/* + * here we have had a fault on a load and r3 points to the first + * unmodified byte of the destination + */ +1: ld r6,-24(r1) + ld r4,-16(r1) + ld r5,-8(r1) + subf r6,r6,r3 + add r4,r4,r6 + subf r5,r6,r5 /* #bytes left to go */ + +/* + * first see if we can copy any more bytes before hitting another exception + */ + mtctr r5 +43: lbz r0,0(r4) + addi r4,r4,1 +89: stb r0,0(r3) + addi r3,r3,1 + bdnz 43b + li r3,0 /* huh? all copied successfully this time? */ + blr + +/* + * here we have trapped again, need to clear ctr bytes starting at r3 + */ +143: mfctr r5 + li r0,0 + mr r4,r3 + mr r3,r5 /* return the number of bytes not copied */ +1: andi. r9,r4,7 + beq 3f +90: stb r0,0(r4) + addic. r5,r5,-1 + addi r4,r4,1 + bne 1b + blr +3: cmpldi cr1,r5,8 + srdi r9,r5,3 + andi. r5,r5,7 + blt cr1,93f + mtctr r9 +91: std r0,0(r4) + addi r4,r4,8 + bdnz 91b +93: beqlr + mtctr r5 +92: stb r0,0(r4) + addi r4,r4,1 + bdnz 92b + blr + +/* + * exception handlers for stores: we just need to work + * out how many bytes weren't copied + */ +182: +183: + add r3,r3,r7 + b 1f +180: + addi r3,r3,8 +171: +177: + addi r3,r3,8 +170: +172: +176: +178: + addi r3,r3,4 +185: + addi r3,r3,4 +173: +174: +175: +179: +181: +184: +186: +187: +188: +189: +1: + ld r6,-24(r1) + ld r5,-8(r1) + add r6,r6,r5 + subf r3,r3,r6 /* #bytes not copied */ +190: +191: +192: + blr /* #bytes not copied in r3 */ + + .section __ex_table,"a" + .align 3 + .llong 20b,120b + .llong 21b,121b + .llong 70b,170b + .llong 22b,122b + .llong 71b,171b + .llong 72b,172b + .llong 23b,123b + .llong 73b,173b + .llong 74b,174b + .llong 75b,175b + .llong 24b,124b + .llong 25b,125b + .llong 26b,126b + .llong 27b,127b + .llong 28b,128b + .llong 29b,129b + .llong 30b,130b + .llong 31b,131b + .llong 32b,132b + .llong 76b,176b + .llong 33b,133b + .llong 77b,177b + .llong 78b,178b + .llong 79b,179b + .llong 80b,180b + .llong 34b,134b + .llong 35b,135b + .llong 81b,181b + .llong 36b,136b + .llong 82b,182b + .llong 37b,137b + .llong 83b,183b + .llong 38b,138b + .llong 39b,139b + .llong 84b,184b + .llong 85b,185b + .llong 40b,140b + .llong 86b,186b + .llong 41b,141b + .llong 87b,187b + .llong 42b,142b + .llong 88b,188b + .llong 43b,143b + .llong 89b,189b + .llong 90b,190b + .llong 91b,191b + .llong 92b,192b + + .text + +/* + * Routine to copy a whole page of data, optimized for POWER4. + * On POWER4 it is more than 50% faster than the simple loop + * above (following the .Ldst_aligned label) but it runs slightly + * slower on POWER3. + */ +.Lcopy_page: + std r31,-32(1) + std r30,-40(1) + std r29,-48(1) + std r28,-56(1) + std r27,-64(1) + std r26,-72(1) + std r25,-80(1) + std r24,-88(1) + std r23,-96(1) + std r22,-104(1) + std r21,-112(1) + std r20,-120(1) + li r5,4096/32 - 1 + addi r3,r3,-8 + li r0,5 +0: addi r5,r5,-24 + mtctr r0 +20: ld r22,640(4) +21: ld r21,512(4) +22: ld r20,384(4) +23: ld r11,256(4) +24: ld r9,128(4) +25: ld r7,0(4) +26: ld r25,648(4) +27: ld r24,520(4) +28: ld r23,392(4) +29: ld r10,264(4) +30: ld r8,136(4) +31: ldu r6,8(4) + cmpwi r5,24 +1: +32: std r22,648(3) +33: std r21,520(3) +34: std r20,392(3) +35: std r11,264(3) +36: std r9,136(3) +37: std r7,8(3) +38: ld r28,648(4) +39: ld r27,520(4) +40: ld r26,392(4) +41: ld r31,264(4) +42: ld r30,136(4) +43: ld r29,8(4) +44: std r25,656(3) +45: std r24,528(3) +46: std r23,400(3) +47: std r10,272(3) +48: std r8,144(3) +49: std r6,16(3) +50: ld r22,656(4) +51: ld r21,528(4) +52: ld r20,400(4) +53: ld r11,272(4) +54: ld r9,144(4) +55: ld r7,16(4) +56: std r28,664(3) +57: std r27,536(3) +58: std r26,408(3) +59: std r31,280(3) +60: std r30,152(3) +61: stdu r29,24(3) +62: ld r25,664(4) +63: ld r24,536(4) +64: ld r23,408(4) +65: ld r10,280(4) +66: ld r8,152(4) +67: ldu r6,24(4) + bdnz 1b +68: std r22,648(3) +69: std r21,520(3) +70: std r20,392(3) +71: std r11,264(3) +72: std r9,136(3) +73: std r7,8(3) +74: addi r4,r4,640 +75: addi r3,r3,648 + bge 0b + mtctr r5 +76: ld r7,0(4) +77: ld r8,8(4) +78: ldu r9,16(4) +3: +79: ld r10,8(4) +80: std r7,8(3) +81: ld r7,16(4) +82: std r8,16(3) +83: ld r8,24(4) +84: std r9,24(3) +85: ldu r9,32(4) +86: stdu r10,32(3) + bdnz 3b +4: +87: ld r10,8(4) +88: std r7,8(3) +89: std r8,16(3) +90: std r9,24(3) +91: std r10,32(3) +9: ld r20,-120(1) + ld r21,-112(1) + ld r22,-104(1) + ld r23,-96(1) + ld r24,-88(1) + ld r25,-80(1) + ld r26,-72(1) + ld r27,-64(1) + ld r28,-56(1) + ld r29,-48(1) + ld r30,-40(1) + ld r31,-32(1) + li r3,0 + blr + +/* + * on an exception, reset to the beginning and jump back into the + * standard __copy_tofrom_user + */ +100: ld r20,-120(1) + ld r21,-112(1) + ld r22,-104(1) + ld r23,-96(1) + ld r24,-88(1) + ld r25,-80(1) + ld r26,-72(1) + ld r27,-64(1) + ld r28,-56(1) + ld r29,-48(1) + ld r30,-40(1) + ld r31,-32(1) + ld r3,-24(r1) + ld r4,-16(r1) + li r5,4096 + b .Ldst_aligned + + .section __ex_table,"a" + .align 3 + .llong 20b,100b + .llong 21b,100b + .llong 22b,100b + .llong 23b,100b + .llong 24b,100b + .llong 25b,100b + .llong 26b,100b + .llong 27b,100b + .llong 28b,100b + .llong 29b,100b + .llong 30b,100b + .llong 31b,100b + .llong 32b,100b + .llong 33b,100b + .llong 34b,100b + .llong 35b,100b + .llong 36b,100b + .llong 37b,100b + .llong 38b,100b + .llong 39b,100b + .llong 40b,100b + .llong 41b,100b + .llong 42b,100b + .llong 43b,100b + .llong 44b,100b + .llong 45b,100b + .llong 46b,100b + .llong 47b,100b + .llong 48b,100b + .llong 49b,100b + .llong 50b,100b + .llong 51b,100b + .llong 52b,100b + .llong 53b,100b + .llong 54b,100b + .llong 55b,100b + .llong 56b,100b + .llong 57b,100b + .llong 58b,100b + .llong 59b,100b + .llong 60b,100b + .llong 61b,100b + .llong 62b,100b + .llong 63b,100b + .llong 64b,100b + .llong 65b,100b + .llong 66b,100b + .llong 67b,100b + .llong 68b,100b + .llong 69b,100b + .llong 70b,100b + .llong 71b,100b + .llong 72b,100b + .llong 73b,100b + .llong 74b,100b + .llong 75b,100b + .llong 76b,100b + .llong 77b,100b + .llong 78b,100b + .llong 79b,100b + .llong 80b,100b + .llong 81b,100b + .llong 82b,100b + .llong 83b,100b + .llong 84b,100b + .llong 85b,100b + .llong 86b,100b + .llong 87b,100b + .llong 88b,100b + .llong 89b,100b + .llong 90b,100b + .llong 91b,100b diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S new file mode 100644 index 00000000000..3527569e992 --- /dev/null +++ b/arch/powerpc/lib/div64.S @@ -0,0 +1,58 @@ +/* + * Divide a 64-bit unsigned number by a 32-bit unsigned number. + * This routine assumes that the top 32 bits of the dividend are + * non-zero to start with. + * On entry, r3 points to the dividend, which get overwritten with + * the 64-bit quotient, and r4 contains the divisor. + * On exit, r3 contains the remainder. + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/ppc_asm.h> +#include <asm/processor.h> + +_GLOBAL(__div64_32) + lwz r5,0(r3) # get the dividend into r5/r6 + lwz r6,4(r3) + cmplw r5,r4 + li r7,0 + li r8,0 + blt 1f + divwu r7,r5,r4 # if dividend.hi >= divisor, + mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor + subf. r5,r0,r5 # dividend.hi %= divisor + beq 3f +1: mr r11,r5 # here dividend.hi != 0 + andis. r0,r5,0xc000 + bne 2f + cntlzw r0,r5 # we are shifting the dividend right + li r10,-1 # to make it < 2^32, and shifting + srw r10,r10,r0 # the divisor right the same amount, + add r9,r4,r10 # rounding up (so the estimate cannot + andc r11,r6,r10 # ever be too large, only too small) + andc r9,r9,r10 + or r11,r5,r11 + rotlw r9,r9,r0 + rotlw r11,r11,r0 + divwu r11,r11,r9 # then we divide the shifted quantities +2: mullw r10,r11,r4 # to get an estimate of the quotient, + mulhwu r9,r11,r4 # multiply the estimate by the divisor, + subfc r6,r10,r6 # take the product from the divisor, + add r8,r8,r11 # and add the estimate to the accumulated + subfe. r5,r9,r5 # quotient + bne 1b +3: cmplw r6,r4 + blt 4f + divwu r0,r6,r4 # perform the remaining 32-bit division + mullw r10,r0,r4 # and get the remainder + add r8,r8,r0 + subf r6,r10,r6 +4: stw r7,0(r3) # return the quotient in *r3 + stw r8,4(r3) + mr r3,r6 # return the remainder in r3 + blr diff --git a/arch/powerpc/lib/e2a.c b/arch/powerpc/lib/e2a.c new file mode 100644 index 00000000000..d2b83488792 --- /dev/null +++ b/arch/powerpc/lib/e2a.c @@ -0,0 +1,108 @@ +/* + * arch/ppc64/lib/e2a.c + * + * EBCDIC to ASCII conversion + * + * This function moved here from arch/ppc64/kernel/viopath.c + * + * (C) Copyright 2000-2004 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/module.h> + +unsigned char e2a(unsigned char x) +{ + switch (x) { + case 0xF0: + return '0'; + case 0xF1: + return '1'; + case 0xF2: + return '2'; + case 0xF3: + return '3'; + case 0xF4: + return '4'; + case 0xF5: + return '5'; + case 0xF6: + return '6'; + case 0xF7: + return '7'; + case 0xF8: + return '8'; + case 0xF9: + return '9'; + case 0xC1: + return 'A'; + case 0xC2: + return 'B'; + case 0xC3: + return 'C'; + case 0xC4: + return 'D'; + case 0xC5: + return 'E'; + case 0xC6: + return 'F'; + case 0xC7: + return 'G'; + case 0xC8: + return 'H'; + case 0xC9: + return 'I'; + case 0xD1: + return 'J'; + case 0xD2: + return 'K'; + case 0xD3: + return 'L'; + case 0xD4: + return 'M'; + case 0xD5: + return 'N'; + case 0xD6: + return 'O'; + case 0xD7: + return 'P'; + case 0xD8: + return 'Q'; + case 0xD9: + return 'R'; + case 0xE2: + return 'S'; + case 0xE3: + return 'T'; + case 0xE4: + return 'U'; + case 0xE5: + return 'V'; + case 0xE6: + return 'W'; + case 0xE7: + return 'X'; + case 0xE8: + return 'Y'; + case 0xE9: + return 'Z'; + } + return ' '; +} +EXPORT_SYMBOL(e2a); + + diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c new file mode 100644 index 00000000000..4b8c5ad5e7d --- /dev/null +++ b/arch/powerpc/lib/locks.c @@ -0,0 +1,95 @@ +/* + * Spin and read/write lock operations. + * + * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM + * Rework to support virtual processors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/stringify.h> + +/* waiting for a spinlock... */ +#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES) +#include <asm/hvcall.h> +#include <asm/iSeries/HvCall.h> + +void __spin_yield(raw_spinlock_t *lock) +{ + unsigned int lock_value, holder_cpu, yield_count; + struct paca_struct *holder_paca; + + lock_value = lock->slock; + if (lock_value == 0) + return; + holder_cpu = lock_value & 0xffff; + BUG_ON(holder_cpu >= NR_CPUS); + holder_paca = &paca[holder_cpu]; + yield_count = holder_paca->lppaca.yield_count; + if ((yield_count & 1) == 0) + return; /* virtual cpu is currently running */ + rmb(); + if (lock->slock != lock_value) + return; /* something has changed */ +#ifdef CONFIG_PPC_ISERIES + HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, + ((u64)holder_cpu << 32) | yield_count); +#else + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), + yield_count); +#endif +} + +/* + * Waiting for a read lock or a write lock on a rwlock... + * This turns out to be the same for read and write locks, since + * we only know the holder if it is write-locked. + */ +void __rw_yield(raw_rwlock_t *rw) +{ + int lock_value; + unsigned int holder_cpu, yield_count; + struct paca_struct *holder_paca; + + lock_value = rw->lock; + if (lock_value >= 0) + return; /* no write lock at present */ + holder_cpu = lock_value & 0xffff; + BUG_ON(holder_cpu >= NR_CPUS); + holder_paca = &paca[holder_cpu]; + yield_count = holder_paca->lppaca.yield_count; + if ((yield_count & 1) == 0) + return; /* virtual cpu is currently running */ + rmb(); + if (rw->lock != lock_value) + return; /* something has changed */ +#ifdef CONFIG_PPC_ISERIES + HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, + ((u64)holder_cpu << 32) | yield_count); +#else + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), + yield_count); +#endif +} +#endif + +void __raw_spin_unlock_wait(raw_spinlock_t *lock) +{ + while (lock->slock) { + HMT_low(); + if (SHARED_PROCESSOR) + __spin_yield(lock); + } + HMT_medium(); +} + +EXPORT_SYMBOL(__raw_spin_unlock_wait); diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S new file mode 100644 index 00000000000..68df20283ff --- /dev/null +++ b/arch/powerpc/lib/mem_64.S @@ -0,0 +1,119 @@ +/* + * String handling functions for PowerPC. + * + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/errno.h> +#include <asm/ppc_asm.h> + +_GLOBAL(memset) + neg r0,r3 + rlwimi r4,r4,8,16,23 + andi. r0,r0,7 /* # bytes to be 8-byte aligned */ + rlwimi r4,r4,16,0,15 + cmplw cr1,r5,r0 /* do we get that far? */ + rldimi r4,r4,32,0 + mtcrf 1,r0 + mr r6,r3 + blt cr1,8f + beq+ 3f /* if already 8-byte aligned */ + subf r5,r0,r5 + bf 31,1f + stb r4,0(r6) + addi r6,r6,1 +1: bf 30,2f + sth r4,0(r6) + addi r6,r6,2 +2: bf 29,3f + stw r4,0(r6) + addi r6,r6,4 +3: srdi. r0,r5,6 + clrldi r5,r5,58 + mtctr r0 + beq 5f +4: std r4,0(r6) + std r4,8(r6) + std r4,16(r6) + std r4,24(r6) + std r4,32(r6) + std r4,40(r6) + std r4,48(r6) + std r4,56(r6) + addi r6,r6,64 + bdnz 4b +5: srwi. r0,r5,3 + clrlwi r5,r5,29 + mtcrf 1,r0 + beq 8f + bf 29,6f + std r4,0(r6) + std r4,8(r6) + std r4,16(r6) + std r4,24(r6) + addi r6,r6,32 +6: bf 30,7f + std r4,0(r6) + std r4,8(r6) + addi r6,r6,16 +7: bf 31,8f + std r4,0(r6) + addi r6,r6,8 +8: cmpwi r5,0 + mtcrf 1,r5 + beqlr+ + bf 29,9f + stw r4,0(r6) + addi r6,r6,4 +9: bf 30,10f + sth r4,0(r6) + addi r6,r6,2 +10: bflr 31 + stb r4,0(r6) + blr + +_GLOBAL(memmove) + cmplw 0,r3,r4 + bgt .backwards_memcpy + b .memcpy + +_GLOBAL(backwards_memcpy) + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + add r6,r3,r5 + add r4,r4,r5 + beq 2f + andi. r0,r6,3 + mtctr r7 + bne 5f +1: lwz r7,-4(r4) + lwzu r8,-8(r4) + stw r7,-4(r6) + stwu r8,-8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,-4(r4) + subi r5,r5,4 + stwu r0,-4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 +4: lbzu r0,-1(r4) + stbu r0,-1(r6) + bdnz 4b + blr +5: mtctr r0 +6: lbzu r7,-1(r4) + stbu r7,-1(r6) + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S new file mode 100644 index 00000000000..9ccacdf5bcb --- /dev/null +++ b/arch/powerpc/lib/memcpy_64.S @@ -0,0 +1,172 @@ +/* + * arch/ppc64/lib/memcpy.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> + + .align 7 +_GLOBAL(memcpy) + mtcrf 0x01,r5 + cmpldi cr1,r5,16 + neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry + andi. r6,r6,7 + dcbt 0,r4 + blt cr1,.Lshort_copy + bne .Ldst_unaligned +.Ldst_aligned: + andi. r0,r4,7 + addi r3,r3,-16 + bne .Lsrc_unaligned + srdi r7,r5,4 + ld r9,0(r4) + addi r4,r4,-8 + mtctr r7 + andi. r5,r5,7 + bf cr7*4+0,2f + addi r3,r3,8 + addi r4,r4,8 + mr r8,r9 + blt cr1,3f +1: ld r9,8(r4) + std r8,8(r3) +2: ldu r8,16(r4) + stdu r9,16(r3) + bdnz 1b +3: std r8,8(r3) + beqlr + addi r3,r3,16 + ld r9,8(r4) +.Ldo_tail: + bf cr7*4+1,1f + rotldi r9,r9,32 + stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 + sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 + stb r9,0(r3) +3: blr + +.Lsrc_unaligned: + srdi r6,r5,3 + addi r5,r5,-16 + subf r4,r0,r4 + srdi r7,r5,4 + sldi r10,r0,3 + cmpdi cr6,r6,3 + andi. r5,r5,7 + mtctr r7 + subfic r11,r10,64 + add r5,r5,r0 + + bt cr7*4+0,0f + + ld r9,0(r4) # 3+2n loads, 2+2n stores + ld r0,8(r4) + sld r6,r9,r10 + ldu r9,16(r4) + srd r7,r0,r11 + sld r8,r0,r10 + or r7,r7,r6 + blt cr6,4f + ld r0,8(r4) + # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 + b 2f + +0: ld r0,0(r4) # 4+2n loads, 3+2n stores + ldu r9,8(r4) + sld r8,r0,r10 + addi r3,r3,-8 + blt cr6,5f + ld r0,8(r4) + srd r12,r9,r11 + sld r6,r9,r10 + ldu r9,16(r4) + or r12,r8,r12 + srd r7,r0,r11 + sld r8,r0,r10 + addi r3,r3,16 + beq cr6,3f + + # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 +1: or r7,r7,r6 + ld r0,8(r4) + std r12,8(r3) +2: srd r12,r9,r11 + sld r6,r9,r10 + ldu r9,16(r4) + or r12,r8,r12 + stdu r7,16(r3) + srd r7,r0,r11 + sld r8,r0,r10 + bdnz 1b + +3: std r12,8(r3) + or r7,r7,r6 +4: std r7,16(r3) +5: srd r12,r9,r11 + or r12,r8,r12 + std r12,24(r3) + beqlr + cmpwi cr1,r5,8 + addi r3,r3,32 + sld r9,r9,r10 + ble cr1,.Ldo_tail + ld r0,8(r4) + srd r7,r0,r11 + or r9,r7,r9 + b .Ldo_tail + +.Ldst_unaligned: + mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7 + subf r5,r6,r5 + li r7,0 + cmpldi r1,r5,16 + bf cr7*4+3,1f + lbz r0,0(r4) + stb r0,0(r3) + addi r7,r7,1 +1: bf cr7*4+2,2f + lhzx r0,r7,r4 + sthx r0,r7,r3 + addi r7,r7,2 +2: bf cr7*4+1,3f + lwzx r0,r7,r4 + stwx r0,r7,r3 +3: mtcrf 0x01,r5 + add r4,r6,r4 + add r3,r6,r3 + b .Ldst_aligned + +.Lshort_copy: + bf cr7*4+0,1f + lwz r0,0(r4) + lwz r9,4(r4) + addi r4,r4,8 + stw r0,0(r3) + stw r9,4(r3) + addi r3,r3,8 +1: bf cr7*4+1,2f + lwz r0,0(r4) + addi r4,r4,4 + stw r0,0(r3) + addi r3,r3,4 +2: bf cr7*4+2,3f + lhz r0,0(r4) + addi r4,r4,2 + sth r0,0(r3) + addi r3,r3,2 +3: bf cr7*4+3,4f + lbz r0,0(r4) + stb r0,0(r3) +4: blr diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c new file mode 100644 index 00000000000..42c5de2c898 --- /dev/null +++ b/arch/powerpc/lib/rheap.c @@ -0,0 +1,693 @@ +/* + * arch/ppc/syslib/rheap.c + * + * A Remote Heap. Remote means that we don't touch the memory that the + * heap points to. Normal heap implementations use the memory they manage + * to place their list. We cannot do that because the memory we manage may + * have special properties, for example it is uncachable or of different + * endianess. + * + * Author: Pantelis Antoniou <panto@intracom.gr> + * + * 2004 (c) INTRACOM S.A. Greece. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/slab.h> + +#include <asm/rheap.h> + +/* + * Fixup a list_head, needed when copying lists. If the pointers fall + * between s and e, apply the delta. This assumes that + * sizeof(struct list_head *) == sizeof(unsigned long *). + */ +static inline void fixup(unsigned long s, unsigned long e, int d, + struct list_head *l) +{ + unsigned long *pp; + + pp = (unsigned long *)&l->next; + if (*pp >= s && *pp < e) + *pp += d; + + pp = (unsigned long *)&l->prev; + if (*pp >= s && *pp < e) + *pp += d; +} + +/* Grow the allocated blocks */ +static int grow(rh_info_t * info, int max_blocks) +{ + rh_block_t *block, *blk; + int i, new_blocks; + int delta; + unsigned long blks, blke; + + if (max_blocks <= info->max_blocks) + return -EINVAL; + + new_blocks = max_blocks - info->max_blocks; + + block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_KERNEL); + if (block == NULL) + return -ENOMEM; + + if (info->max_blocks > 0) { + + /* copy old block area */ + memcpy(block, info->block, + sizeof(rh_block_t) * info->max_blocks); + + delta = (char *)block - (char *)info->block; + + /* and fixup list pointers */ + blks = (unsigned long)info->block; + blke = (unsigned long)(info->block + info->max_blocks); + + for (i = 0, blk = block; i < info->max_blocks; i++, blk++) + fixup(blks, blke, delta, &blk->list); + + fixup(blks, blke, delta, &info->empty_list); + fixup(blks, blke, delta, &info->free_list); + fixup(blks, blke, delta, &info->taken_list); + + /* free the old allocated memory */ + if ((info->flags & RHIF_STATIC_BLOCK) == 0) + kfree(info->block); + } + + info->block = block; + info->empty_slots += new_blocks; + info->max_blocks = max_blocks; + info->flags &= ~RHIF_STATIC_BLOCK; + + /* add all new blocks to the free list */ + for (i = 0, blk = block + info->max_blocks; i < new_blocks; i++, blk++) + list_add(&blk->list, &info->empty_list); + + return 0; +} + +/* + * Assure at least the required amount of empty slots. If this function + * causes a grow in the block area then all pointers kept to the block + * area are invalid! + */ +static int assure_empty(rh_info_t * info, int slots) +{ + int max_blocks; + + /* This function is not meant to be used to grow uncontrollably */ + if (slots >= 4) + return -EINVAL; + + /* Enough space */ + if (info->empty_slots >= slots) + return 0; + + /* Next 16 sized block */ + max_blocks = ((info->max_blocks + slots) + 15) & ~15; + + return grow(info, max_blocks); +} + +static rh_block_t *get_slot(rh_info_t * info) +{ + rh_block_t *blk; + + /* If no more free slots, and failure to extend. */ + /* XXX: You should have called assure_empty before */ + if (info->empty_slots == 0) { + printk(KERN_ERR "rh: out of slots; crash is imminent.\n"); + return NULL; + } + + /* Get empty slot to use */ + blk = list_entry(info->empty_list.next, rh_block_t, list); + list_del_init(&blk->list); + info->empty_slots--; + + /* Initialize */ + blk->start = NULL; + blk->size = 0; + blk->owner = NULL; + + return blk; +} + +static inline void release_slot(rh_info_t * info, rh_block_t * blk) +{ + list_add(&blk->list, &info->empty_list); + info->empty_slots++; +} + +static void attach_free_block(rh_info_t * info, rh_block_t * blkn) +{ + rh_block_t *blk; + rh_block_t *before; + rh_block_t *after; + rh_block_t *next; + int size; + unsigned long s, e, bs, be; + struct list_head *l; + + /* We assume that they are aligned properly */ + size = blkn->size; + s = (unsigned long)blkn->start; + e = s + size; + + /* Find the blocks immediately before and after the given one + * (if any) */ + before = NULL; + after = NULL; + next = NULL; + + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + + bs = (unsigned long)blk->start; + be = bs + blk->size; + + if (next == NULL && s >= bs) + next = blk; + + if (be == s) + before = blk; + + if (e == bs) + after = blk; + + /* If both are not null, break now */ + if (before != NULL && after != NULL) + break; + } + + /* Now check if they are really adjacent */ + if (before != NULL && s != (unsigned long)before->start + before->size) + before = NULL; + + if (after != NULL && e != (unsigned long)after->start) + after = NULL; + + /* No coalescing; list insert and return */ + if (before == NULL && after == NULL) { + + if (next != NULL) + list_add(&blkn->list, &next->list); + else + list_add(&blkn->list, &info->free_list); + + return; + } + + /* We don't need it anymore */ + release_slot(info, blkn); + + /* Grow the before block */ + if (before != NULL && after == NULL) { + before->size += size; + return; + } + + /* Grow the after block backwards */ + if (before == NULL && after != NULL) { + after->start = (int8_t *)after->start - size; + after->size += size; + return; + } + + /* Grow the before block, and release the after block */ + before->size += size + after->size; + list_del(&after->list); + release_slot(info, after); +} + +static void attach_taken_block(rh_info_t * info, rh_block_t * blkn) +{ + rh_block_t *blk; + struct list_head *l; + + /* Find the block immediately before the given one (if any) */ + list_for_each(l, &info->taken_list) { + blk = list_entry(l, rh_block_t, list); + if (blk->start > blkn->start) { + list_add_tail(&blkn->list, &blk->list); + return; + } + } + + list_add_tail(&blkn->list, &info->taken_list); +} + +/* + * Create a remote heap dynamically. Note that no memory for the blocks + * are allocated. It will upon the first allocation + */ +rh_info_t *rh_create(unsigned int alignment) +{ + rh_info_t *info; + + /* Alignment must be a power of two */ + if ((alignment & (alignment - 1)) != 0) + return ERR_PTR(-EINVAL); + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) + return ERR_PTR(-ENOMEM); + + info->alignment = alignment; + + /* Initially everything as empty */ + info->block = NULL; + info->max_blocks = 0; + info->empty_slots = 0; + info->flags = 0; + + INIT_LIST_HEAD(&info->empty_list); + INIT_LIST_HEAD(&info->free_list); + INIT_LIST_HEAD(&info->taken_list); + + return info; +} + +/* + * Destroy a dynamically created remote heap. Deallocate only if the areas + * are not static + */ +void rh_destroy(rh_info_t * info) +{ + if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL) + kfree(info->block); + + if ((info->flags & RHIF_STATIC_INFO) == 0) + kfree(info); +} + +/* + * Initialize in place a remote heap info block. This is needed to support + * operation very early in the startup of the kernel, when it is not yet safe + * to call kmalloc. + */ +void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks, + rh_block_t * block) +{ + int i; + rh_block_t *blk; + + /* Alignment must be a power of two */ + if ((alignment & (alignment - 1)) != 0) + return; + + info->alignment = alignment; + + /* Initially everything as empty */ + info->block = block; + info->max_blocks = max_blocks; + info->empty_slots = max_blocks; + info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK; + + INIT_LIST_HEAD(&info->empty_list); + INIT_LIST_HEAD(&info->free_list); + INIT_LIST_HEAD(&info->taken_list); + + /* Add all new blocks to the free list */ + for (i = 0, blk = block; i < max_blocks; i++, blk++) + list_add(&blk->list, &info->empty_list); +} + +/* Attach a free memory region, coalesces regions if adjuscent */ +int rh_attach_region(rh_info_t * info, void *start, int size) +{ + rh_block_t *blk; + unsigned long s, e, m; + int r; + + /* The region must be aligned */ + s = (unsigned long)start; + e = s + size; + m = info->alignment - 1; + + /* Round start up */ + s = (s + m) & ~m; + + /* Round end down */ + e = e & ~m; + + /* Take final values */ + start = (void *)s; + size = (int)(e - s); + + /* Grow the blocks, if needed */ + r = assure_empty(info, 1); + if (r < 0) + return r; + + blk = get_slot(info); + blk->start = start; + blk->size = size; + blk->owner = NULL; + + attach_free_block(info, blk); + + return 0; +} + +/* Detatch given address range, splits free block if needed. */ +void *rh_detach_region(rh_info_t * info, void *start, int size) +{ + struct list_head *l; + rh_block_t *blk, *newblk; + unsigned long s, e, m, bs, be; + + /* Validate size */ + if (size <= 0) + return ERR_PTR(-EINVAL); + + /* The region must be aligned */ + s = (unsigned long)start; + e = s + size; + m = info->alignment - 1; + + /* Round start up */ + s = (s + m) & ~m; + + /* Round end down */ + e = e & ~m; + + if (assure_empty(info, 1) < 0) + return ERR_PTR(-ENOMEM); + + blk = NULL; + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + /* The range must lie entirely inside one free block */ + bs = (unsigned long)blk->start; + be = (unsigned long)blk->start + blk->size; + if (s >= bs && e <= be) + break; + blk = NULL; + } + + if (blk == NULL) + return ERR_PTR(-ENOMEM); + + /* Perfect fit */ + if (bs == s && be == e) { + /* Delete from free list, release slot */ + list_del(&blk->list); + release_slot(info, blk); + return (void *)s; + } + + /* blk still in free list, with updated start and/or size */ + if (bs == s || be == e) { + if (bs == s) + blk->start = (int8_t *)blk->start + size; + blk->size -= size; + + } else { + /* The front free fragment */ + blk->size = s - bs; + + /* the back free fragment */ + newblk = get_slot(info); + newblk->start = (void *)e; + newblk->size = be - e; + + list_add(&newblk->list, &blk->list); + } + + return (void *)s; +} + +void *rh_alloc(rh_info_t * info, int size, const char *owner) +{ + struct list_head *l; + rh_block_t *blk; + rh_block_t *newblk; + void *start; + + /* Validate size */ + if (size <= 0) + return ERR_PTR(-EINVAL); + + /* Align to configured alignment */ + size = (size + (info->alignment - 1)) & ~(info->alignment - 1); + + if (assure_empty(info, 1) < 0) + return ERR_PTR(-ENOMEM); + + blk = NULL; + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + if (size <= blk->size) + break; + blk = NULL; + } + + if (blk == NULL) + return ERR_PTR(-ENOMEM); + + /* Just fits */ + if (blk->size == size) { + /* Move from free list to taken list */ + list_del(&blk->list); + blk->owner = owner; + start = blk->start; + + attach_taken_block(info, blk); + + return start; + } + + newblk = get_slot(info); + newblk->start = blk->start; + newblk->size = size; + newblk->owner = owner; + + /* blk still in free list, with updated start, size */ + blk->start = (int8_t *)blk->start + size; + blk->size -= size; + + start = newblk->start; + + attach_taken_block(info, newblk); + + return start; +} + +/* allocate at precisely the given address */ +void *rh_alloc_fixed(rh_info_t * info, void *start, int size, const char *owner) +{ + struct list_head *l; + rh_block_t *blk, *newblk1, *newblk2; + unsigned long s, e, m, bs, be; + + /* Validate size */ + if (size <= 0) + return ERR_PTR(-EINVAL); + + /* The region must be aligned */ + s = (unsigned long)start; + e = s + size; + m = info->alignment - 1; + + /* Round start up */ + s = (s + m) & ~m; + + /* Round end down */ + e = e & ~m; + + if (assure_empty(info, 2) < 0) + return ERR_PTR(-ENOMEM); + + blk = NULL; + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + /* The range must lie entirely inside one free block */ + bs = (unsigned long)blk->start; + be = (unsigned long)blk->start + blk->size; + if (s >= bs && e <= be) + break; + } + + if (blk == NULL) + return ERR_PTR(-ENOMEM); + + /* Perfect fit */ + if (bs == s && be == e) { + /* Move from free list to taken list */ + list_del(&blk->list); + blk->owner = owner; + + start = blk->start; + attach_taken_block(info, blk); + + return start; + + } + + /* blk still in free list, with updated start and/or size */ + if (bs == s || be == e) { + if (bs == s) + blk->start = (int8_t *)blk->start + size; + blk->size -= size; + + } else { + /* The front free fragment */ + blk->size = s - bs; + + /* The back free fragment */ + newblk2 = get_slot(info); + newblk2->start = (void *)e; + newblk2->size = be - e; + + list_add(&newblk2->list, &blk->list); + } + + newblk1 = get_slot(info); + newblk1->start = (void *)s; + newblk1->size = e - s; + newblk1->owner = owner; + + start = newblk1->start; + attach_taken_block(info, newblk1); + + return start; +} + +int rh_free(rh_info_t * info, void *start) +{ + rh_block_t *blk, *blk2; + struct list_head *l; + int size; + + /* Linear search for block */ + blk = NULL; + list_for_each(l, &info->taken_list) { + blk2 = list_entry(l, rh_block_t, list); + if (start < blk2->start) + break; + blk = blk2; + } + + if (blk == NULL || start > (blk->start + blk->size)) + return -EINVAL; + + /* Remove from taken list */ + list_del(&blk->list); + + /* Get size of freed block */ + size = blk->size; + attach_free_block(info, blk); + + return size; +} + +int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats) +{ + rh_block_t *blk; + struct list_head *l; + struct list_head *h; + int nr; + + switch (what) { + + case RHGS_FREE: + h = &info->free_list; + break; + + case RHGS_TAKEN: + h = &info->taken_list; + break; + + default: + return -EINVAL; + } + + /* Linear search for block */ + nr = 0; + list_for_each(l, h) { + blk = list_entry(l, rh_block_t, list); + if (stats != NULL && nr < max_stats) { + stats->start = blk->start; + stats->size = blk->size; + stats->owner = blk->owner; + stats++; + } + nr++; + } + + return nr; +} + +int rh_set_owner(rh_info_t * info, void *start, const char *owner) +{ + rh_block_t *blk, *blk2; + struct list_head *l; + int size; + + /* Linear search for block */ + blk = NULL; + list_for_each(l, &info->taken_list) { + blk2 = list_entry(l, rh_block_t, list); + if (start < blk2->start) + break; + blk = blk2; + } + + if (blk == NULL || start > (blk->start + blk->size)) + return -EINVAL; + + blk->owner = owner; + size = blk->size; + + return size; +} + +void rh_dump(rh_info_t * info) +{ + static rh_stats_t st[32]; /* XXX maximum 32 blocks */ + int maxnr; + int i, nr; + + maxnr = sizeof(st) / sizeof(st[0]); + + printk(KERN_INFO + "info @0x%p (%d slots empty / %d max)\n", + info, info->empty_slots, info->max_blocks); + + printk(KERN_INFO " Free:\n"); + nr = rh_get_stats(info, RHGS_FREE, maxnr, st); + if (nr > maxnr) + nr = maxnr; + for (i = 0; i < nr; i++) + printk(KERN_INFO + " 0x%p-0x%p (%u)\n", + st[i].start, (int8_t *) st[i].start + st[i].size, + st[i].size); + printk(KERN_INFO "\n"); + + printk(KERN_INFO " Taken:\n"); + nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st); + if (nr > maxnr) + nr = maxnr; + for (i = 0; i < nr; i++) + printk(KERN_INFO + " 0x%p-0x%p (%u) %s\n", + st[i].start, (int8_t *) st[i].start + st[i].size, + st[i].size, st[i].owner != NULL ? st[i].owner : ""); + printk(KERN_INFO "\n"); +} + +void rh_dump_blk(rh_info_t * info, rh_block_t * blk) +{ + printk(KERN_INFO + "blk @0x%p: 0x%p-0x%p (%u)\n", + blk, blk->start, (int8_t *) blk->start + blk->size, blk->size); +} diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c new file mode 100644 index 00000000000..e79123d1485 --- /dev/null +++ b/arch/powerpc/lib/sstep.c @@ -0,0 +1,141 @@ +/* + * Single-step support. + * + * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <asm/sstep.h> +#include <asm/processor.h> + +extern char system_call_common[]; + +/* Bits in SRR1 that are copied from MSR */ +#define MSR_MASK 0xffffffff87c0ffff + +/* + * Determine whether a conditional branch instruction would branch. + */ +static int branch_taken(unsigned int instr, struct pt_regs *regs) +{ + unsigned int bo = (instr >> 21) & 0x1f; + unsigned int bi; + + if ((bo & 4) == 0) { + /* decrement counter */ + --regs->ctr; + if (((bo >> 1) & 1) ^ (regs->ctr == 0)) + return 0; + } + if ((bo & 0x10) == 0) { + /* check bit from CR */ + bi = (instr >> 16) & 0x1f; + if (((regs->ccr >> (31 - bi)) & 1) != ((bo >> 3) & 1)) + return 0; + } + return 1; +} + +/* + * Emulate instructions that cause a transfer of control. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int emulate_step(struct pt_regs *regs, unsigned int instr) +{ + unsigned int opcode, rd; + unsigned long int imm; + + opcode = instr >> 26; + switch (opcode) { + case 16: /* bc */ + imm = (signed short)(instr & 0xfffc); + if ((instr & 2) == 0) + imm += regs->nip; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + if (instr & 1) + regs->link = regs->nip; + if (branch_taken(instr, regs)) + regs->nip = imm; + return 1; + case 17: /* sc */ + /* + * N.B. this uses knowledge about how the syscall + * entry code works. If that is changed, this will + * need to be changed also. + */ + regs->gpr[9] = regs->gpr[13]; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_common; + regs->msr = MSR_KERNEL; + return 1; + case 18: /* b */ + imm = instr & 0x03fffffc; + if (imm & 0x02000000) + imm -= 0x04000000; + if ((instr & 2) == 0) + imm += regs->nip; + if (instr & 1) { + regs->link = regs->nip + 4; + if ((regs->msr & MSR_SF) == 0) + regs->link &= 0xffffffffUL; + } + if ((regs->msr & MSR_SF) == 0) + imm &= 0xffffffffUL; + regs->nip = imm; + return 1; + case 19: + switch (instr & 0x7fe) { + case 0x20: /* bclr */ + case 0x420: /* bcctr */ + imm = (instr & 0x400)? regs->ctr: regs->link; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) { + regs->nip &= 0xffffffffUL; + imm &= 0xffffffffUL; + } + if (instr & 1) + regs->link = regs->nip; + if (branch_taken(instr, regs)) + regs->nip = imm; + return 1; + case 0x24: /* rfid, scary */ + return -1; + } + case 31: + rd = (instr >> 21) & 0x1f; + switch (instr & 0x7fe) { + case 0xa6: /* mfmsr */ + regs->gpr[rd] = regs->msr & MSR_MASK; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + return 1; + case 0x164: /* mtmsrd */ + /* only MSR_EE and MSR_RI get changed if bit 15 set */ + /* mtmsrd doesn't change MSR_HV and MSR_ME */ + imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; + imm = (regs->msr & MSR_MASK & ~imm) + | (regs->gpr[rd] & imm); + if ((imm & MSR_RI) == 0) + /* can't step mtmsrd that would clear MSR_RI */ + return -1; + regs->msr = imm; + regs->nip += 4; + if ((imm & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + return 1; + } + } + return 0; +} diff --git a/arch/powerpc/lib/strcase.c b/arch/powerpc/lib/strcase.c new file mode 100644 index 00000000000..36b521091bb --- /dev/null +++ b/arch/powerpc/lib/strcase.c @@ -0,0 +1,23 @@ +#include <linux/ctype.h> + +int strcasecmp(const char *s1, const char *s2) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while (c1 == c2 && c1 != 0); + return c1 - c2; +} + +int strncasecmp(const char *s1, const char *s2, int n) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while ((--n > 0) && c1 == c2 && c1 != 0); + return c1 - c2; +} diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S new file mode 100644 index 00000000000..b9ca84ed892 --- /dev/null +++ b/arch/powerpc/lib/string.S @@ -0,0 +1,198 @@ +/* + * String handling functions for PowerPC. + * + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/errno.h> +#include <asm/ppc_asm.h> + + .section __ex_table,"a" +#ifdef CONFIG_PPC64 + .align 3 +#define EXTBL .llong +#else + .align 2 +#define EXTBL .long +#endif + .text + +_GLOBAL(strcpy) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +/* This clears out any unused part of the destination buffer, + just as the libc version does. -- paulus */ +_GLOBAL(strncpy) + cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + bnelr /* if we didn't hit a null char, we're done */ + mfctr r5 + cmpwi 0,r5,0 /* any space left in destination buffer? */ + beqlr /* we know r0 == 0 here */ +2: stbu r0,1(r6) /* clear it out if so */ + bdnz 2b + blr + +_GLOBAL(strcat) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r5) + cmpwi 0,r0,0 + bne 1b + addi r5,r5,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +_GLOBAL(strcmp) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + beq 1b + blr + +_GLOBAL(strlen) + addi r4,r3,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + bne 1b + subf r3,r3,r4 + blr + +_GLOBAL(memcmp) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r6) + lbzu r0,1(r4) + subf. r3,r0,r3 + bdnzt 2,1b + blr +2: li r3,0 + blr + +_GLOBAL(memchr) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r3,r3,-1 +1: lbzu r0,1(r3) + cmpw 0,r0,r4 + bdnzf 2,1b + beqlr +2: li r3,0 + blr + +_GLOBAL(__clear_user) + addi r6,r3,-4 + li r3,0 + li r5,0 + cmplwi 0,r4,4 + blt 7f + /* clear a single word */ +11: stwu r5,4(r6) + beqlr + /* clear word sized chunks */ + andi. r0,r6,3 + add r4,r0,r4 + subf r6,r0,r6 + srwi r0,r4,2 + andi. r4,r4,3 + mtctr r0 + bdz 7f +1: stwu r5,4(r6) + bdnz 1b + /* clear byte sized chunks */ +7: cmpwi 0,r4,0 + beqlr + mtctr r4 + addi r6,r6,3 +8: stbu r5,1(r6) + bdnz 8b + blr +90: mr r3,r4 + blr +91: mfctr r3 + slwi r3,r3,2 + add r3,r3,r4 + blr +92: mfctr r3 + blr + + .section __ex_table,"a" + EXTBL 11b,90b + EXTBL 1b,91b + EXTBL 8b,92b + .text + +_GLOBAL(__strncpy_from_user) + addi r6,r3,-1 + addi r4,r4,-1 + cmpwi 0,r5,0 + beq 2f + mtctr r5 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + beq 3f +2: addi r6,r6,1 +3: subf r3,r3,r6 + blr +99: li r3,-EFAULT + blr + + .section __ex_table,"a" + EXTBL 1b,99b + .text + +/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ +_GLOBAL(__strnlen_user) + addi r7,r3,-1 + subf r6,r7,r5 /* top+1 - str */ + cmplw 0,r4,r6 + bge 0f + mr r6,r4 +0: mtctr r6 /* ctr = min(len, top - str) */ +1: lbzu r0,1(r7) /* get next byte */ + cmpwi 0,r0,0 + bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ + addi r7,r7,1 + subf r3,r3,r7 /* number of bytes we have looked at */ + beqlr /* return if we found a 0 byte */ + cmpw 0,r3,r4 /* did we look at all len bytes? */ + blt 99f /* if not, must have hit top */ + addi r3,r4,1 /* return len + 1 to indicate no null found */ + blr +99: li r3,0 /* bad address, return 0 */ + blr + + .section __ex_table,"a" + EXTBL 1b,99b diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c new file mode 100644 index 00000000000..5eea6f3c1e0 --- /dev/null +++ b/arch/powerpc/lib/usercopy_64.c @@ -0,0 +1,41 @@ +/* + * Functions which are too large to be inlined. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/module.h> +#include <asm/uaccess.h> + +unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_READ, from, n))) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} + +unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_WRITE, to, n))) + n = __copy_to_user(to, from, n); + return n; +} + +unsigned long copy_in_user(void __user *to, const void __user *from, + unsigned long n) +{ + might_sleep(); + if (likely(access_ok(VERIFY_READ, from, n) && + access_ok(VERIFY_WRITE, to, n))) + n =__copy_tofrom_user(to, from, n); + return n; +} + +EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(copy_in_user); + diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c new file mode 100644 index 00000000000..3d79ce281b6 --- /dev/null +++ b/arch/powerpc/mm/44x_mmu.c @@ -0,0 +1,120 @@ +/* + * Modifications by Matt Porter (mporter@mvista.com) to support + * PPC44x Book E processors. + * + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/highmem.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/bootx.h> +#include <asm/machdep.h> +#include <asm/setup.h> + +#include "mmu_decl.h" + +extern char etext[], _stext[]; + +/* Used by the 44x TLB replacement exception handler. + * Just needed it declared someplace. + */ +unsigned int tlb_44x_index = 0; +unsigned int tlb_44x_hwater = 62; + +/* + * "Pins" a 256MB TLB entry in AS0 for kernel lowmem + */ +static void __init +ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys) +{ + unsigned long attrib = 0; + + __asm__ __volatile__("\ + clrrwi %2,%2,10\n\ + ori %2,%2,%4\n\ + clrrwi %1,%1,10\n\ + li %0,0\n\ + ori %0,%0,%5\n\ + tlbwe %2,%3,%6\n\ + tlbwe %1,%3,%7\n\ + tlbwe %0,%3,%8" + : + : "r" (attrib), "r" (phys), "r" (virt), "r" (slot), + "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M), + "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), + "i" (PPC44x_TLB_PAGEID), + "i" (PPC44x_TLB_XLAT), + "i" (PPC44x_TLB_ATTRIB)); +} + +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + flush_instruction_cache(); +} + +unsigned long __init mmu_mapin_ram(void) +{ + unsigned int pinned_tlbs = 1; + int i; + + /* Determine number of entries necessary to cover lowmem */ + pinned_tlbs = (unsigned int) + (_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT); + + /* Write upper watermark to save location */ + tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs; + + /* If necessary, set additional pinned TLBs */ + if (pinned_tlbs > 1) + for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) { + unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE; + ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr); + } + + return total_lowmem; +} diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c new file mode 100644 index 00000000000..b7bcbc232f3 --- /dev/null +++ b/arch/powerpc/mm/4xx_mmu.c @@ -0,0 +1,141 @@ +/* + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/highmem.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/bootx.h> +#include <asm/machdep.h> +#include <asm/setup.h> +#include "mmu_decl.h" + +extern int __map_without_ltlbs; +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + /* + * The Zone Protection Register (ZPR) defines how protection will + * be applied to every page which is a member of a given zone. At + * present, we utilize only two of the 4xx's zones. + * The zone index bits (of ZSEL) in the PTE are used for software + * indicators, except the LSB. For user access, zone 1 is used, + * for kernel access, zone 0 is used. We set all but zone 1 + * to zero, allowing only kernel access as indicated in the PTE. + * For zone 1, we set a 01 binary (a value of 10 will not work) + * to allow user access as indicated in the PTE. This also allows + * kernel access as indicated in the PTE. + */ + + mtspr(SPRN_ZPR, 0x10000000); + + flush_instruction_cache(); + + /* + * Set up the real-mode cache parameters for the exception vector + * handlers (which are run in real-mode). + */ + + mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */ + + /* + * Cache instruction and data space where the exception + * vectors and the kernel live in real-mode. + */ + + mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */ + mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */ +} + +#define LARGE_PAGE_SIZE_16M (1<<24) +#define LARGE_PAGE_SIZE_4M (1<<22) + +unsigned long __init mmu_mapin_ram(void) +{ + unsigned long v, s; + phys_addr_t p; + + v = KERNELBASE; + p = PPC_MEMSTART; + s = 0; + + if (__map_without_ltlbs) { + return s; + } + + while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) { + pmd_t *pmdp; + unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; + + spin_lock(&init_mm.page_table_lock); + pmdp = pmd_offset(pgd_offset_k(v), v); + pmd_val(*pmdp++) = val; + pmd_val(*pmdp++) = val; + pmd_val(*pmdp++) = val; + pmd_val(*pmdp++) = val; + spin_unlock(&init_mm.page_table_lock); + + v += LARGE_PAGE_SIZE_16M; + p += LARGE_PAGE_SIZE_16M; + s += LARGE_PAGE_SIZE_16M; + } + + while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) { + pmd_t *pmdp; + unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; + + spin_lock(&init_mm.page_table_lock); + pmdp = pmd_offset(pgd_offset_k(v), v); + pmd_val(*pmdp) = val; + spin_unlock(&init_mm.page_table_lock); + + v += LARGE_PAGE_SIZE_4M; + p += LARGE_PAGE_SIZE_4M; + s += LARGE_PAGE_SIZE_4M; + } + + return s; +} diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile new file mode 100644 index 00000000000..14b2ffb6c2d --- /dev/null +++ b/arch/powerpc/mm/Makefile @@ -0,0 +1,22 @@ +# +# Makefile for the linux ppc-specific parts of the memory manager. +# + +ifeq ($(CONFIG_PPC64),y) +EXTRA_CFLAGS += -mno-minimal-toc +endif + +obj-y := fault.o mem.o lmb.o +obj-$(CONFIG_PPC32) += init_32.o pgtable_32.o mmu_context_32.o \ + tlb_32.o +hash-$(CONFIG_PPC_MULTIPLATFORM) := hash_native_64.o +obj-$(CONFIG_PPC64) += init_64.o pgtable_64.o mmu_context_64.o \ + hash_utils_64.o hash_low_64.o tlb_64.o \ + slb_low.o slb.o stab.o mmap.o imalloc.o \ + $(hash-y) +obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o +obj-$(CONFIG_40x) += 4xx_mmu.o +obj-$(CONFIG_44x) += 44x_mmu.o +obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o +obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c new file mode 100644 index 00000000000..3df641fa789 --- /dev/null +++ b/arch/powerpc/mm/fault.c @@ -0,0 +1,391 @@ +/* + * arch/ppc/mm/fault.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Modified by Cort Dougan and Paul Mackerras. + * + * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/kprobes.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/tlbflush.h> +#include <asm/kdebug.h> +#include <asm/siginfo.h> + +/* + * Check whether the instruction at regs->nip is a store using + * an update addressing form which will update r1. + */ +static int store_updates_sp(struct pt_regs *regs) +{ + unsigned int inst; + + if (get_user(inst, (unsigned int __user *)regs->nip)) + return 0; + /* check for 1 in the rA field */ + if (((inst >> 16) & 0x1f) != 1) + return 0; + /* check major opcode */ + switch (inst >> 26) { + case 37: /* stwu */ + case 39: /* stbu */ + case 45: /* sthu */ + case 53: /* stfsu */ + case 55: /* stfdu */ + return 1; + case 62: /* std or stdu */ + return (inst & 3) == 1; + case 31: + /* check minor opcode */ + switch ((inst >> 1) & 0x3ff) { + case 181: /* stdux */ + case 183: /* stwux */ + case 247: /* stbux */ + case 439: /* sthux */ + case 695: /* stfsux */ + case 759: /* stfdux */ + return 1; + } + } + return 0; +} + +static void do_dabr(struct pt_regs *regs, unsigned long error_code) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + if (debugger_dabr_match(regs)) + return; + + /* Clear the DABR */ + set_dabr(0); + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)regs->nip; + force_sig_info(SIGTRAP, &info, current); +} + +/* + * For 600- and 800-family processors, the error_code parameter is DSISR + * for a data fault, SRR1 for an instruction fault. For 400-family processors + * the error_code parameter is ESR for a data fault, 0 for an instruction + * fault. + * For 64-bit processors, the error_code parameter is + * - DSISR for a non-SLB data access fault, + * - SRR1 & 0x08000000 for a non-SLB instruction access fault + * - 0 any SLB fault. + * + * The return value is 0 if the fault was handled, or the signal + * number if this is a kernel fault that can't be handled here. + */ +int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + struct vm_area_struct * vma; + struct mm_struct *mm = current->mm; + siginfo_t info; + int code = SEGV_MAPERR; + int is_write = 0; + int trap = TRAP(regs); + int is_exec = trap == 0x400; + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + /* + * Fortunately the bit assignments in SRR1 for an instruction + * fault and DSISR for a data fault are mostly the same for the + * bits we are interested in. But there are some bits which + * indicate errors in DSISR but can validly be set in SRR1. + */ + if (trap == 0x400) + error_code &= 0x48200000; + else + is_write = error_code & DSISR_ISSTORE; +#else + is_write = error_code & ESR_DST; +#endif /* CONFIG_4xx || CONFIG_BOOKE */ + + if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return 0; + + if (trap == 0x300) { + if (debugger_fault_handler(regs)) + return 0; + } + + /* On a kernel SLB miss we can only check for a valid exception entry */ + if (!user_mode(regs) && (address >= TASK_SIZE)) + return SIGSEGV; + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + if (error_code & DSISR_DABRMATCH) { + /* DABR match */ + do_dabr(regs, error_code); + return 0; + } +#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ + + if (in_atomic() || mm == NULL) { + if (!user_mode(regs)) + return SIGSEGV; + /* in_atomic() in user mode is really bad, + as is current->mm == NULL. */ + printk(KERN_EMERG "Page fault in user mode with" + "in_atomic() = %d mm = %p\n", in_atomic(), mm); + printk(KERN_EMERG "NIP = %lx MSR = %lx\n", + regs->nip, regs->msr); + die("Weird page fault", regs, SIGSEGV); + } + + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunatly, in the case of an + * erroneous fault occuring in a code path which already holds mmap_sem + * we will deadlock attempting to validate the fault against the + * address space. Luckily the kernel only validly references user + * space from well defined areas of code, which are listed in the + * exceptions table. + * + * As the vast majority of faults will be valid we will only perform + * the source reference check when there is a possibilty of a deadlock. + * Attempt to lock the address space, if we cannot we then validate the + * source. If this is invalid we can skip the address space check, + * thus avoiding the deadlock. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (!user_mode(regs) && !search_exception_tables(regs->nip)) + goto bad_area_nosemaphore; + + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + + /* + * N.B. The POWER/Open ABI allows programs to access up to + * 288 bytes below the stack pointer. + * The kernel signal delivery code writes up to about 1.5kB + * below the stack pointer (r1) before decrementing it. + * The exec code can write slightly over 640kB to the stack + * before setting the user r1. Thus we allow the stack to + * expand to 1MB without further checks. + */ + if (address + 0x100000 < vma->vm_end) { + /* get user regs even if this fault is in kernel mode */ + struct pt_regs *uregs = current->thread.regs; + if (uregs == NULL) + goto bad_area; + + /* + * A user-mode access to an address a long way below + * the stack pointer is only valid if the instruction + * is one which would update the stack pointer to the + * address accessed if the instruction completed, + * i.e. either stwu rs,n(r1) or stwux rs,r1,rb + * (or the byte, halfword, float or double forms). + * + * If we don't check this then any write to the area + * between the last mapped region and the stack will + * expand the stack rather than segfaulting. + */ + if (address + 2048 < uregs->gpr[1] + && (!user_mode(regs) || !store_updates_sp(regs))) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; + +good_area: + code = SEGV_ACCERR; +#if defined(CONFIG_6xx) + if (error_code & 0x95700000) + /* an error such as lwarx to I/O controller space, + address matching DABR, eciwx, etc. */ + goto bad_area; +#endif /* CONFIG_6xx */ +#if defined(CONFIG_8xx) + /* The MPC8xx seems to always set 0x80000000, which is + * "undefined". Of those that can be set, this is the only + * one which seems bad. + */ + if (error_code & 0x10000000) + /* Guarded storage error. */ + goto bad_area; +#endif /* CONFIG_8xx */ + + if (is_exec) { +#ifdef CONFIG_PPC64 + /* protection fault */ + if (error_code & DSISR_PROTFAULT) + goto bad_area; + if (!(vma->vm_flags & VM_EXEC)) + goto bad_area; +#endif +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + pte_t *ptep; + + /* Since 4xx/Book-E supports per-page execute permission, + * we lazily flush dcache to icache. */ + ptep = NULL; + if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) { + struct page *page = pte_page(*ptep); + + if (! test_bit(PG_arch_1, &page->flags)) { + flush_dcache_icache_page(page); + set_bit(PG_arch_1, &page->flags); + } + pte_update(ptep, 0, _PAGE_HWEXEC); + _tlbie(address); + pte_unmap(ptep); + up_read(&mm->mmap_sem); + return 0; + } + if (ptep != NULL) + pte_unmap(ptep); +#endif + /* a write */ + } else if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + /* a read */ + } else { + /* protection fault */ + if (error_code & 0x08000000) + goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + survive: + switch (handle_mm_fault(mm, vma, address, is_write)) { + + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto do_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + + up_read(&mm->mmap_sem); + return 0; + +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + /* User mode accesses cause a SIGSEGV */ + if (user_mode(regs)) { + _exception(SIGSEGV, regs, code, address); + return 0; + } + + if (is_exec && (error_code & DSISR_PROTFAULT) + && printk_ratelimit()) + printk(KERN_CRIT "kernel tried to execute NX-protected" + " page (%lx) - exploit attempt? (uid: %d)\n", + address, current->uid); + + return SIGSEGV; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (current->pid == 1) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + printk("VM: killing process %s\n", current->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + return SIGKILL; + +do_sigbus: + up_read(&mm->mmap_sem); + if (user_mode(regs)) { + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return 0; + } + return SIGBUS; +} + +/* + * bad_page_fault is called when we have a bad access from the kernel. + * It is called from the DSI and ISI handlers in head.S and from some + * of the procedures in traps.c. + */ +void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ + const struct exception_table_entry *entry; + + /* Are we prepared to handle this fault? */ + if ((entry = search_exception_tables(regs->nip)) != NULL) { + regs->nip = entry->fixup; + return; + } + + /* kernel has accessed a bad area */ + die("Kernel access of bad area", regs, sig); +} diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c new file mode 100644 index 00000000000..af9ca0eb6d5 --- /dev/null +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -0,0 +1,237 @@ +/* + * Modifications by Kumar Gala (kumar.gala@freescale.com) to support + * E500 Book E processors. + * + * Copyright 2004 Freescale Semiconductor, Inc + * + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/highmem.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/bootx.h> +#include <asm/machdep.h> +#include <asm/setup.h> + +extern void loadcam_entry(unsigned int index); +unsigned int tlbcam_index; +unsigned int num_tlbcam_entries; +static unsigned long __cam0, __cam1, __cam2; +extern unsigned long total_lowmem; +extern unsigned long __max_low_memory; +#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE + +#define NUM_TLBCAMS (16) + +struct tlbcam { + u32 MAS0; + u32 MAS1; + u32 MAS2; + u32 MAS3; + u32 MAS7; +} TLBCAM[NUM_TLBCAMS]; + +struct tlbcamrange { + unsigned long start; + unsigned long limit; + phys_addr_t phys; +} tlbcam_addrs[NUM_TLBCAMS]; + +extern unsigned int tlbcam_index; + +/* + * Return PA for this VA if it is mapped by a CAM, or 0 + */ +unsigned long v_mapped_by_tlbcam(unsigned long va) +{ + int b; + for (b = 0; b < tlbcam_index; ++b) + if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit) + return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start); + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_mapped_by_tlbcam(unsigned long pa) +{ + int b; + for (b = 0; b < tlbcam_index; ++b) + if (pa >= tlbcam_addrs[b].phys + && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) + +tlbcam_addrs[b].phys) + return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); + return 0; +} + +/* + * Set up one of the I/D BAT (block address translation) register pairs. + * The parameters are not checked; in particular size must be a power + * of 4 between 4k and 256M. + */ +void settlbcam(int index, unsigned long virt, phys_addr_t phys, + unsigned int size, int flags, unsigned int pid) +{ + unsigned int tsize, lz; + + asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); + tsize = (21 - lz) / 2; + +#ifdef CONFIG_SMP + if ((flags & _PAGE_NO_CACHE) == 0) + flags |= _PAGE_COHERENT; +#endif + + TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1); + TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); + TLBCAM[index].MAS2 = virt & PAGE_MASK; + + TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; + + TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); + +#ifndef CONFIG_KGDB /* want user access for breakpoints */ + if (flags & _PAGE_USER) { + TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); + } +#else + TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); +#endif + + tlbcam_addrs[index].start = virt; + tlbcam_addrs[index].limit = virt + size - 1; + tlbcam_addrs[index].phys = phys; + + loadcam_entry(index); +} + +void invalidate_tlbcam_entry(int index) +{ + TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index); + TLBCAM[index].MAS1 = ~MAS1_VALID; + + loadcam_entry(index); +} + +void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1, + unsigned long cam2) +{ + settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0); + tlbcam_index++; + if (cam1) { + tlbcam_index++; + settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0); + } + if (cam2) { + tlbcam_index++; + settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0); + } +} + +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + flush_instruction_cache(); +} + +unsigned long __init mmu_mapin_ram(void) +{ + cam_mapin_ram(__cam0, __cam1, __cam2); + + return __cam0 + __cam1 + __cam2; +} + + +void __init +adjust_total_lowmem(void) +{ + unsigned long max_low_mem = MAX_LOW_MEM; + unsigned long cam_max = 0x10000000; + unsigned long ram; + + /* adjust CAM size to max_low_mem */ + if (max_low_mem < cam_max) + cam_max = max_low_mem; + + /* adjust lowmem size to max_low_mem */ + if (max_low_mem < total_lowmem) + ram = max_low_mem; + else + ram = total_lowmem; + + /* Calculate CAM values */ + __cam0 = 1UL << 2 * (__ilog2(ram) / 2); + if (__cam0 > cam_max) + __cam0 = cam_max; + ram -= __cam0; + if (ram) { + __cam1 = 1UL << 2 * (__ilog2(ram) / 2); + if (__cam1 > cam_max) + __cam1 = cam_max; + ram -= __cam1; + } + if (ram) { + __cam2 = 1UL << 2 * (__ilog2(ram) / 2); + if (__cam2 > cam_max) + __cam2 = cam_max; + ram -= __cam2; + } + + printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb," + " CAM2=%ldMb residual: %ldMb\n", + __cam0 >> 20, __cam1 >> 20, __cam2 >> 20, + (total_lowmem - __cam0 - __cam1 - __cam2) >> 20); + __max_low_memory = max_low_mem = __cam0 + __cam1 + __cam2; +} diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S new file mode 100644 index 00000000000..12ccd7155ba --- /dev/null +++ b/arch/powerpc/mm/hash_low_32.S @@ -0,0 +1,618 @@ +/* + * arch/ppc/kernel/hashtable.S + * + * $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $ + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * + * This file contains low-level assembler routines for managing + * the PowerPC MMU hash table. (PPC 8xx processors don't use a + * hash table, so this file is not used on them.) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> + +#ifdef CONFIG_SMP + .comm mmu_hash_lock,4 +#endif /* CONFIG_SMP */ + +/* + * Sync CPUs with hash_page taking & releasing the hash + * table lock + */ +#ifdef CONFIG_SMP + .text +_GLOBAL(hash_page_sync) + lis r8,mmu_hash_lock@h + ori r8,r8,mmu_hash_lock@l + lis r0,0x0fff + b 10f +11: lwz r6,0(r8) + cmpwi 0,r6,0 + bne 11b +10: lwarx r6,0,r8 + cmpwi 0,r6,0 + bne- 11b + stwcx. r0,0,r8 + bne- 10b + isync + eieio + li r0,0 + stw r0,0(r8) + blr +#endif + +/* + * Load a PTE into the hash table, if possible. + * The address is in r4, and r3 contains an access flag: + * _PAGE_RW (0x400) if a write. + * r9 contains the SRR1 value, from which we use the MSR_PR bit. + * SPRG3 contains the physical address of the current task's thread. + * + * Returns to the caller if the access is illegal or there is no + * mapping for the address. Otherwise it places an appropriate PTE + * in the hash table and returns from the exception. + * Uses r0, r3 - r8, ctr, lr. + */ + .text +_GLOBAL(hash_page) +#ifdef CONFIG_PPC64BRIDGE + mfmsr r0 + clrldi r0,r0,1 /* make sure it's in 32-bit mode */ + MTMSRD(r0) + isync +#endif + tophys(r7,0) /* gets -KERNELBASE into r7 */ +#ifdef CONFIG_SMP + addis r8,r7,mmu_hash_lock@h + ori r8,r8,mmu_hash_lock@l + lis r0,0x0fff + b 10f +11: lwz r6,0(r8) + cmpwi 0,r6,0 + bne 11b +10: lwarx r6,0,r8 + cmpwi 0,r6,0 + bne- 11b + stwcx. r0,0,r8 + bne- 10b + isync +#endif + /* Get PTE (linux-style) and check access */ + lis r0,KERNELBASE@h /* check if kernel address */ + cmplw 0,r4,r0 + mfspr r8,SPRN_SPRG3 /* current task's THREAD (phys) */ + ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ + lwz r5,PGDIR(r8) /* virt page-table root */ + blt+ 112f /* assume user more likely */ + lis r5,swapper_pg_dir@ha /* if kernel address, use */ + addi r5,r5,swapper_pg_dir@l /* kernel page table */ + rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ +112: add r5,r5,r7 /* convert to phys addr */ + rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ + lwz r8,0(r5) /* get pmd entry */ + rlwinm. r8,r8,0,0,19 /* extract address of pte page */ +#ifdef CONFIG_SMP + beq- hash_page_out /* return if no mapping */ +#else + /* XXX it seems like the 601 will give a machine fault on the + rfi if its alignment is wrong (bottom 4 bits of address are + 8 or 0xc) and we have had a not-taken conditional branch + to the address following the rfi. */ + beqlr- +#endif + rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ + rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE + + /* + * Update the linux PTE atomically. We do the lwarx up-front + * because almost always, there won't be a permission violation + * and there won't already be an HPTE, and thus we will have + * to update the PTE to set _PAGE_HASHPTE. -- paulus. + */ +retry: + lwarx r6,0,r8 /* get linux-style pte */ + andc. r5,r3,r6 /* check access & ~permission */ +#ifdef CONFIG_SMP + bne- hash_page_out /* return if access not permitted */ +#else + bnelr- +#endif + or r5,r0,r6 /* set accessed/dirty bits */ + stwcx. r5,0,r8 /* attempt to update PTE */ + bne- retry /* retry if someone got there first */ + + mfsrin r3,r4 /* get segment reg for segment */ + mfctr r0 + stw r0,_CTR(r11) + bl create_hpte /* add the hash table entry */ + +#ifdef CONFIG_SMP + eieio + addis r8,r7,mmu_hash_lock@ha + li r0,0 + stw r0,mmu_hash_lock@l(r8) +#endif + + /* Return from the exception */ + lwz r5,_CTR(r11) + mtctr r5 + lwz r0,GPR0(r11) + lwz r7,GPR7(r11) + lwz r8,GPR8(r11) + b fast_exception_return + +#ifdef CONFIG_SMP +hash_page_out: + eieio + addis r8,r7,mmu_hash_lock@ha + li r0,0 + stw r0,mmu_hash_lock@l(r8) + blr +#endif /* CONFIG_SMP */ + +/* + * Add an entry for a particular page to the hash table. + * + * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval) + * + * We assume any necessary modifications to the pte (e.g. setting + * the accessed bit) have already been done and that there is actually + * a hash table in use (i.e. we're not on a 603). + */ +_GLOBAL(add_hash_page) + mflr r0 + stw r0,4(r1) + + /* Convert context and va to VSID */ + mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note create_hpte trims to 24 bits */ + +#ifdef CONFIG_SMP + rlwinm r8,r1,0,0,18 /* use cpu number to make tag */ + lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ + oris r8,r8,12 +#endif /* CONFIG_SMP */ + + /* + * We disable interrupts here, even on UP, because we don't + * want to race with hash_page, and because we want the + * _PAGE_HASHPTE bit to be a reliable indication of whether + * the HPTE exists (or at least whether one did once). + * We also turn off the MMU for data accesses so that we + * we can't take a hash table miss (assuming the code is + * covered by a BAT). -- paulus + */ + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear MSR_DR */ + mtmsr r0 + SYNC_601 + isync + + tophys(r7,0) + +#ifdef CONFIG_SMP + addis r9,r7,mmu_hash_lock@ha + addi r9,r9,mmu_hash_lock@l +10: lwarx r0,0,r9 /* take the mmu_hash_lock */ + cmpi 0,r0,0 + bne- 11f + stwcx. r8,0,r9 + beq+ 12f +11: lwz r0,0(r9) + cmpi 0,r0,0 + beq 10b + b 11b +12: isync +#endif + + /* + * Fetch the linux pte and test and set _PAGE_HASHPTE atomically. + * If _PAGE_HASHPTE was already set, we don't replace the existing + * HPTE, so we just unlock and return. + */ + mr r8,r5 + rlwimi r8,r4,22,20,29 +1: lwarx r6,0,r8 + andi. r0,r6,_PAGE_HASHPTE + bne 9f /* if HASHPTE already set, done */ + ori r5,r6,_PAGE_HASHPTE + stwcx. r5,0,r8 + bne- 1b + + bl create_hpte + +9: +#ifdef CONFIG_SMP + eieio + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ +#endif + + /* reenable interrupts and DR */ + mtmsr r10 + SYNC_601 + isync + + lwz r0,4(r1) + mtlr r0 + blr + +/* + * This routine adds a hardware PTE to the hash table. + * It is designed to be called with the MMU either on or off. + * r3 contains the VSID, r4 contains the virtual address, + * r5 contains the linux PTE, r6 contains the old value of the + * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the + * offset to be added to addresses (0 if the MMU is on, + * -KERNELBASE if it is off). + * On SMP, the caller should have the mmu_hash_lock held. + * We assume that the caller has (or will) set the _PAGE_HASHPTE + * bit in the linux PTE in memory. The value passed in r6 should + * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set + * this routine will skip the search for an existing HPTE. + * This procedure modifies r0, r3 - r6, r8, cr0. + * -- paulus. + * + * For speed, 4 of the instructions get patched once the size and + * physical address of the hash table are known. These definitions + * of Hash_base and Hash_bits below are just an example. + */ +Hash_base = 0xc0180000 +Hash_bits = 12 /* e.g. 256kB hash table */ +Hash_msk = (((1 << Hash_bits) - 1) * 64) + +#ifndef CONFIG_PPC64BRIDGE +/* defines for the PTE format for 32-bit PPCs */ +#define PTE_SIZE 8 +#define PTEG_SIZE 64 +#define LG_PTEG_SIZE 6 +#define LDPTEu lwzu +#define STPTE stw +#define CMPPTE cmpw +#define PTE_H 0x40 +#define PTE_V 0x80000000 +#define TST_V(r) rlwinm. r,r,0,0,0 +#define SET_V(r) oris r,r,PTE_V@h +#define CLR_V(r,t) rlwinm r,r,0,1,31 + +#else +/* defines for the PTE format for 64-bit PPCs */ +#define PTE_SIZE 16 +#define PTEG_SIZE 128 +#define LG_PTEG_SIZE 7 +#define LDPTEu ldu +#define STPTE std +#define CMPPTE cmpd +#define PTE_H 2 +#define PTE_V 1 +#define TST_V(r) andi. r,r,PTE_V +#define SET_V(r) ori r,r,PTE_V +#define CLR_V(r,t) li t,PTE_V; andc r,r,t +#endif /* CONFIG_PPC64BRIDGE */ + +#define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1) +#define HASH_RIGHT 31-LG_PTEG_SIZE + +_GLOBAL(create_hpte) + /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ + rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + and r8,r8,r0 /* writable if _RW & _DIRTY */ + rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ + ori r8,r8,0xe14 /* clear out reserved bits and M */ + andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ +BEGIN_FTR_SECTION + ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */ +END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) + + /* Construct the high word of the PPC-style PTE (r5) */ +#ifndef CONFIG_PPC64BRIDGE + rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ + rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */ +#else /* CONFIG_PPC64BRIDGE */ + clrlwi r3,r3,8 /* reduce vsid to 24 bits */ + sldi r5,r3,12 /* shift vsid into position */ + rlwimi r5,r4,16,20,24 /* put in API (abbrev page index) */ +#endif /* CONFIG_PPC64BRIDGE */ + SET_V(r5) /* set V (valid) bit */ + + /* Get the address of the primary PTE group in the hash table (r3) */ +_GLOBAL(hash_page_patch_A) + addis r0,r7,Hash_base@h /* base address of hash table */ + rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ + rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ + xor r3,r3,r0 /* make primary hash */ + li r0,8 /* PTEs/group */ + + /* + * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search + * if it is clear, meaning that the HPTE isn't there already... + */ + andi. r6,r6,_PAGE_HASHPTE + beq+ 10f /* no PTE: go look for an empty slot */ + tlbie r4 + + addis r4,r7,htab_hash_searches@ha + lwz r6,htab_hash_searches@l(r4) + addi r6,r6,1 /* count how many searches we do */ + stw r6,htab_hash_searches@l(r4) + + /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ + mtctr r0 + addi r4,r3,-PTE_SIZE +1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ + CMPPTE 0,r6,r5 + bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ + beq+ found_slot + + /* Search the secondary PTEG for a matching PTE */ + ori r5,r5,PTE_H /* set H (secondary hash) bit */ +_GLOBAL(hash_page_patch_B) + xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ + xori r4,r4,(-PTEG_SIZE & 0xffff) + addi r4,r4,-PTE_SIZE + mtctr r0 +2: LDPTEu r6,PTE_SIZE(r4) + CMPPTE 0,r6,r5 + bdnzf 2,2b + beq+ found_slot + xori r5,r5,PTE_H /* clear H bit again */ + + /* Search the primary PTEG for an empty slot */ +10: mtctr r0 + addi r4,r3,-PTE_SIZE /* search primary PTEG */ +1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ + TST_V(r6) /* test valid bit */ + bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ + beq+ found_empty + + /* update counter of times that the primary PTEG is full */ + addis r4,r7,primary_pteg_full@ha + lwz r6,primary_pteg_full@l(r4) + addi r6,r6,1 + stw r6,primary_pteg_full@l(r4) + + /* Search the secondary PTEG for an empty slot */ + ori r5,r5,PTE_H /* set H (secondary hash) bit */ +_GLOBAL(hash_page_patch_C) + xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ + xori r4,r4,(-PTEG_SIZE & 0xffff) + addi r4,r4,-PTE_SIZE + mtctr r0 +2: LDPTEu r6,PTE_SIZE(r4) + TST_V(r6) + bdnzf 2,2b + beq+ found_empty + xori r5,r5,PTE_H /* clear H bit again */ + + /* + * Choose an arbitrary slot in the primary PTEG to overwrite. + * Since both the primary and secondary PTEGs are full, and we + * have no information that the PTEs in the primary PTEG are + * more important or useful than those in the secondary PTEG, + * and we know there is a definite (although small) speed + * advantage to putting the PTE in the primary PTEG, we always + * put the PTE in the primary PTEG. + */ + addis r4,r7,next_slot@ha + lwz r6,next_slot@l(r4) + addi r6,r6,PTE_SIZE + andi. r6,r6,7*PTE_SIZE + stw r6,next_slot@l(r4) + add r4,r3,r6 + +#ifndef CONFIG_SMP + /* Store PTE in PTEG */ +found_empty: + STPTE r5,0(r4) +found_slot: + STPTE r8,PTE_SIZE/2(r4) + +#else /* CONFIG_SMP */ +/* + * Between the tlbie above and updating the hash table entry below, + * another CPU could read the hash table entry and put it in its TLB. + * There are 3 cases: + * 1. using an empty slot + * 2. updating an earlier entry to change permissions (i.e. enable write) + * 3. taking over the PTE for an unrelated address + * + * In each case it doesn't really matter if the other CPUs have the old + * PTE in their TLB. So we don't need to bother with another tlbie here, + * which is convenient as we've overwritten the register that had the + * address. :-) The tlbie above is mainly to make sure that this CPU comes + * and gets the new PTE from the hash table. + * + * We do however have to make sure that the PTE is never in an invalid + * state with the V bit set. + */ +found_empty: +found_slot: + CLR_V(r5,r0) /* clear V (valid) bit in PTE */ + STPTE r5,0(r4) + sync + TLBSYNC + STPTE r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */ + sync + SET_V(r5) + STPTE r5,0(r4) /* finally set V bit in PTE */ +#endif /* CONFIG_SMP */ + + sync /* make sure pte updates get to memory */ + blr + + .comm next_slot,4 + .comm primary_pteg_full,4 + .comm htab_hash_searches,4 + +/* + * Flush the entry for a particular page from the hash table. + * + * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval, + * int count) + * + * We assume that there is a hash table in use (Hash != 0). + */ +_GLOBAL(flush_hash_pages) + tophys(r7,0) + + /* + * We disable interrupts here, even on UP, because we want + * the _PAGE_HASHPTE bit to be a reliable indication of + * whether the HPTE exists (or at least whether one did once). + * We also turn off the MMU for data accesses so that we + * we can't take a hash table miss (assuming the code is + * covered by a BAT). -- paulus + */ + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear MSR_DR */ + mtmsr r0 + SYNC_601 + isync + + /* First find a PTE in the range that has _PAGE_HASHPTE set */ + rlwimi r5,r4,22,20,29 +1: lwz r0,0(r5) + cmpwi cr1,r6,1 + andi. r0,r0,_PAGE_HASHPTE + bne 2f + ble cr1,19f + addi r4,r4,0x1000 + addi r5,r5,4 + addi r6,r6,-1 + b 1b + + /* Convert context and va to VSID */ +2: mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note code below trims to 24 bits */ + + /* Construct the high word of the PPC-style PTE (r11) */ +#ifndef CONFIG_PPC64BRIDGE + rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ + rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */ +#else /* CONFIG_PPC64BRIDGE */ + clrlwi r3,r3,8 /* reduce vsid to 24 bits */ + sldi r11,r3,12 /* shift vsid into position */ + rlwimi r11,r4,16,20,24 /* put in API (abbrev page index) */ +#endif /* CONFIG_PPC64BRIDGE */ + SET_V(r11) /* set V (valid) bit */ + +#ifdef CONFIG_SMP + addis r9,r7,mmu_hash_lock@ha + addi r9,r9,mmu_hash_lock@l + rlwinm r8,r1,0,0,18 + add r8,r8,r7 + lwz r8,TI_CPU(r8) + oris r8,r8,9 +10: lwarx r0,0,r9 + cmpi 0,r0,0 + bne- 11f + stwcx. r8,0,r9 + beq+ 12f +11: lwz r0,0(r9) + cmpi 0,r0,0 + beq 10b + b 11b +12: isync +#endif + + /* + * Check the _PAGE_HASHPTE bit in the linux PTE. If it is + * already clear, we're done (for this pte). If not, + * clear it (atomically) and proceed. -- paulus. + */ +33: lwarx r8,0,r5 /* fetch the pte */ + andi. r0,r8,_PAGE_HASHPTE + beq 8f /* done if HASHPTE is already clear */ + rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */ + stwcx. r8,0,r5 /* update the pte */ + bne- 33b + + /* Get the address of the primary PTE group in the hash table (r3) */ +_GLOBAL(flush_hash_patch_A) + addis r8,r7,Hash_base@h /* base address of hash table */ + rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ + rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ + xor r8,r0,r8 /* make primary hash */ + + /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ + li r0,8 /* PTEs/group */ + mtctr r0 + addi r12,r8,-PTE_SIZE +1: LDPTEu r0,PTE_SIZE(r12) /* get next PTE */ + CMPPTE 0,r0,r11 + bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ + beq+ 3f + + /* Search the secondary PTEG for a matching PTE */ + ori r11,r11,PTE_H /* set H (secondary hash) bit */ + li r0,8 /* PTEs/group */ +_GLOBAL(flush_hash_patch_B) + xoris r12,r8,Hash_msk>>16 /* compute secondary hash */ + xori r12,r12,(-PTEG_SIZE & 0xffff) + addi r12,r12,-PTE_SIZE + mtctr r0 +2: LDPTEu r0,PTE_SIZE(r12) + CMPPTE 0,r0,r11 + bdnzf 2,2b + xori r11,r11,PTE_H /* clear H again */ + bne- 4f /* should rarely fail to find it */ + +3: li r0,0 + STPTE r0,0(r12) /* invalidate entry */ +4: sync + tlbie r4 /* in hw tlb too */ + sync + +8: ble cr1,9f /* if all ptes checked */ +81: addi r6,r6,-1 + addi r5,r5,4 /* advance to next pte */ + addi r4,r4,0x1000 + lwz r0,0(r5) /* check next pte */ + cmpwi cr1,r6,1 + andi. r0,r0,_PAGE_HASHPTE + bne 33b + bgt cr1,81b + +9: +#ifdef CONFIG_SMP + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ +#endif + +19: mtmsr r10 + SYNC_601 + isync + blr diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S new file mode 100644 index 00000000000..d6ed9102eee --- /dev/null +++ b/arch/powerpc/mm/hash_low_64.S @@ -0,0 +1,288 @@ +/* + * ppc64 MMU hashtable management routines + * + * (c) Copyright IBM Corp. 2003 + * + * Maintained by: Benjamin Herrenschmidt + * <benh@kernel.crashing.org> + * + * This file is covered by the GNU Public Licence v2 as + * described in the kernel's COPYING file. + */ + +#include <asm/reg.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/page.h> +#include <asm/types.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cputable.h> + + .text + +/* + * Stackframe: + * + * +-> Back chain (SP + 256) + * | General register save area (SP + 112) + * | Parameter save area (SP + 48) + * | TOC save area (SP + 40) + * | link editor doubleword (SP + 32) + * | compiler doubleword (SP + 24) + * | LR save area (SP + 16) + * | CR save area (SP + 8) + * SP ---> +-- Back chain (SP + 0) + */ +#define STACKFRAMESIZE 256 + +/* Save parameters offsets */ +#define STK_PARM(i) (STACKFRAMESIZE + 48 + ((i)-3)*8) + +/* Save non-volatile offsets */ +#define STK_REG(i) (112 + ((i)-14)*8) + +/* + * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid, + * pte_t *ptep, unsigned long trap, int local) + * + * Adds a page to the hash table. This is the non-LPAR version for now + */ + +_GLOBAL(__hash_page) + mflr r0 + std r0,16(r1) + stdu r1,-STACKFRAMESIZE(r1) + /* Save all params that we need after a function call */ + std r6,STK_PARM(r6)(r1) + std r8,STK_PARM(r8)(r1) + + /* Add _PAGE_PRESENT to access */ + ori r4,r4,_PAGE_PRESENT + + /* Save non-volatile registers. + * r31 will hold "old PTE" + * r30 is "new PTE" + * r29 is "va" + * r28 is a hash value + * r27 is hashtab mask (maybe dynamic patched instead ?) + */ + std r27,STK_REG(r27)(r1) + std r28,STK_REG(r28)(r1) + std r29,STK_REG(r29)(r1) + std r30,STK_REG(r30)(r1) + std r31,STK_REG(r31)(r1) + + /* Step 1: + * + * Check permissions, atomically mark the linux PTE busy + * and hashed. + */ +1: + ldarx r31,0,r6 + /* Check access rights (access & ~(pte_val(*ptep))) */ + andc. r0,r4,r31 + bne- htab_wrong_access + /* Check if PTE is busy */ + andi. r0,r31,_PAGE_BUSY + /* If so, just bail out and refault if needed. Someone else + * is changing this PTE anyway and might hash it. + */ + bne- bail_ok + /* Prepare new PTE value (turn access RW into DIRTY, then + * add BUSY,HASHPTE and ACCESSED) + */ + rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ + or r30,r30,r31 + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + /* Write the linux PTE atomically (setting busy) */ + stdcx. r30,0,r6 + bne- 1b + isync + + /* Step 2: + * + * Insert/Update the HPTE in the hash table. At this point, + * r4 (access) is re-useable, we use it for the new HPTE flags + */ + + /* Calc va and put it in r29 */ + rldicr r29,r5,28,63-28 + rldicl r3,r3,0,36 + or r29,r3,r29 + + /* Calculate hash value for primary slot and store it in r28 */ + rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ + rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */ + xor r28,r5,r0 + + /* Convert linux PTE bits into HW equivalents */ + andi. r3,r30,0x1fe /* Get basic set of flags */ + xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */ + rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ + rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ + and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */ + andc r0,r30,r0 /* r0 = pte & ~r0 */ + rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + + /* We eventually do the icache sync here (maybe inline that + * code rather than call a C function...) + */ +BEGIN_FTR_SECTION + mr r4,r30 + mr r5,r7 + bl .hash_page_do_lazy_icache +END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) + + /* At this point, r3 contains new PP bits, save them in + * place of "access" in the param area (sic) + */ + std r3,STK_PARM(r4)(r1) + + /* Get htab_hash_mask */ + ld r4,htab_hash_mask@got(2) + ld r27,0(r4) /* htab_hash_mask -> r27 */ + + /* Check if we may already be in the hashtable, in this case, we + * go to out-of-line code to try to modify the HPTE + */ + andi. r0,r31,_PAGE_HASHPTE + bne htab_modify_pte + +htab_insert_pte: + /* Clear hpte bits in new pte (we also clear BUSY btw) and + * add _PAGE_HASHPTE + */ + lis r0,_PAGE_HPTEFLAGS@h + ori r0,r0,_PAGE_HPTEFLAGS@l + andc r30,r30,r0 + ori r30,r30,_PAGE_HASHPTE + + /* page number in r5 */ + rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT + + /* Calculate primary group hash */ + and r0,r28,r27 + rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + + /* Call ppc_md.hpte_insert */ + ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ + mr r4,r29 /* Retreive va */ + li r6,0 /* no vflags */ +_GLOBAL(htab_call_hpte_insert1) + bl . /* Will be patched by htab_finish_init() */ + cmpdi 0,r3,0 + bge htab_pte_insert_ok /* Insertion successful */ + cmpdi 0,r3,-2 /* Critical failure */ + beq- htab_pte_insert_failure + + /* Now try secondary slot */ + + /* page number in r5 */ + rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT + + /* Calculate secondary group hash */ + andc r0,r27,r28 + rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ + + /* Call ppc_md.hpte_insert */ + ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ + mr r4,r29 /* Retreive va */ + li r6,HPTE_V_SECONDARY@l /* secondary slot */ +_GLOBAL(htab_call_hpte_insert2) + bl . /* Will be patched by htab_finish_init() */ + cmpdi 0,r3,0 + bge+ htab_pte_insert_ok /* Insertion successful */ + cmpdi 0,r3,-2 /* Critical failure */ + beq- htab_pte_insert_failure + + /* Both are full, we need to evict something */ + mftb r0 + /* Pick a random group based on TB */ + andi. r0,r0,1 + mr r5,r28 + bne 2f + not r5,r5 +2: and r0,r5,r27 + rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + /* Call ppc_md.hpte_remove */ +_GLOBAL(htab_call_hpte_remove) + bl . /* Will be patched by htab_finish_init() */ + + /* Try all again */ + b htab_insert_pte + +bail_ok: + li r3,0 + b bail + +htab_pte_insert_ok: + /* Insert slot number & secondary bit in PTE */ + rldimi r30,r3,12,63-15 + + /* Write out the PTE with a normal write + * (maybe add eieio may be good still ?) + */ +htab_write_out_pte: + ld r6,STK_PARM(r6)(r1) + std r30,0(r6) + li r3, 0 +bail: + ld r27,STK_REG(r27)(r1) + ld r28,STK_REG(r28)(r1) + ld r29,STK_REG(r29)(r1) + ld r30,STK_REG(r30)(r1) + ld r31,STK_REG(r31)(r1) + addi r1,r1,STACKFRAMESIZE + ld r0,16(r1) + mtlr r0 + blr + +htab_modify_pte: + /* Keep PP bits in r4 and slot idx from the PTE around in r3 */ + mr r4,r3 + rlwinm r3,r31,32-12,29,31 + + /* Secondary group ? if yes, get a inverted hash value */ + mr r5,r28 + andi. r0,r31,_PAGE_SECONDARY + beq 1f + not r5,r5 +1: + /* Calculate proper slot value for ppc_md.hpte_updatepp */ + and r0,r5,r27 + rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + add r3,r0,r3 /* add slot idx */ + + /* Call ppc_md.hpte_updatepp */ + mr r5,r29 /* va */ + li r6,0 /* large is 0 */ + ld r7,STK_PARM(r8)(r1) /* get "local" param */ +_GLOBAL(htab_call_hpte_updatepp) + bl . /* Will be patched by htab_finish_init() */ + + /* if we failed because typically the HPTE wasn't really here + * we try an insertion. + */ + cmpdi 0,r3,-1 + beq- htab_insert_pte + + /* Clear the BUSY bit and Write out the PTE */ + li r0,_PAGE_BUSY + andc r30,r30,r0 + b htab_write_out_pte + +htab_wrong_access: + /* Bail out clearing reservation */ + stdcx. r31,0,r6 + li r3,1 + b bail + +htab_pte_insert_failure: + /* Bail out restoring old PTE */ + ld r6,STK_PARM(r6)(r1) + std r31,0(r6) + li r3,-1 + b bail + + diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c new file mode 100644 index 00000000000..174d14576c2 --- /dev/null +++ b/arch/powerpc/mm/hash_native_64.c @@ -0,0 +1,446 @@ +/* + * native hashtable management. + * + * SMP scalability work: + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/spinlock.h> +#include <linux/bitops.h> +#include <linux/threads.h> +#include <linux/smp.h> + +#include <asm/abs_addr.h> +#include <asm/machdep.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/tlb.h> +#include <asm/cputable.h> + +#define HPTE_LOCK_BIT 3 + +static DEFINE_SPINLOCK(native_tlbie_lock); + +static inline void native_lock_hpte(hpte_t *hptep) +{ + unsigned long *word = &hptep->v; + + while (1) { + if (!test_and_set_bit(HPTE_LOCK_BIT, word)) + break; + while(test_bit(HPTE_LOCK_BIT, word)) + cpu_relax(); + } +} + +static inline void native_unlock_hpte(hpte_t *hptep) +{ + unsigned long *word = &hptep->v; + + asm volatile("lwsync":::"memory"); + clear_bit(HPTE_LOCK_BIT, word); +} + +long native_hpte_insert(unsigned long hpte_group, unsigned long va, + unsigned long prpn, unsigned long vflags, + unsigned long rflags) +{ + hpte_t *hptep = htab_address + hpte_group; + unsigned long hpte_v, hpte_r; + int i; + + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (! (hptep->v & HPTE_V_VALID)) { + /* retry with lock held */ + native_lock_hpte(hptep); + if (! (hptep->v & HPTE_V_VALID)) + break; + native_unlock_hpte(hptep); + } + + hptep++; + } + + if (i == HPTES_PER_GROUP) + return -1; + + hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; + if (vflags & HPTE_V_LARGE) + va &= ~(1UL << HPTE_V_AVPN_SHIFT); + hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; + + hptep->r = hpte_r; + /* Guarantee the second dword is visible before the valid bit */ + __asm__ __volatile__ ("eieio" : : : "memory"); + /* + * Now set the first dword including the valid bit + * NOTE: this also unlocks the hpte + */ + hptep->v = hpte_v; + + __asm__ __volatile__ ("ptesync" : : : "memory"); + + return i | (!!(vflags & HPTE_V_SECONDARY) << 3); +} + +static long native_hpte_remove(unsigned long hpte_group) +{ + hpte_t *hptep; + int i; + int slot_offset; + unsigned long hpte_v; + + /* pick a random entry to start at */ + slot_offset = mftb() & 0x7; + + for (i = 0; i < HPTES_PER_GROUP; i++) { + hptep = htab_address + hpte_group + slot_offset; + hpte_v = hptep->v; + + if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) { + /* retry with lock held */ + native_lock_hpte(hptep); + hpte_v = hptep->v; + if ((hpte_v & HPTE_V_VALID) + && !(hpte_v & HPTE_V_BOLTED)) + break; + native_unlock_hpte(hptep); + } + + slot_offset++; + slot_offset &= 0x7; + } + + if (i == HPTES_PER_GROUP) + return -1; + + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->v = 0; + + return i; +} + +static inline void set_pp_bit(unsigned long pp, hpte_t *addr) +{ + unsigned long old; + unsigned long *p = &addr->r; + + __asm__ __volatile__( + "1: ldarx %0,0,%3\n\ + rldimi %0,%2,0,61\n\ + stdcx. %0,0,%3\n\ + bne 1b" + : "=&r" (old), "=m" (*p) + : "r" (pp), "r" (p), "m" (*p) + : "cc"); +} + +/* + * Only works on small pages. Yes its ugly to have to check each slot in + * the group but we only use this during bootup. + */ +static long native_hpte_find(unsigned long vpn) +{ + hpte_t *hptep; + unsigned long hash; + unsigned long i, j; + long slot; + unsigned long hpte_v; + + hash = hpt_hash(vpn, 0); + + for (j = 0; j < 2; j++) { + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + for (i = 0; i < HPTES_PER_GROUP; i++) { + hptep = htab_address + slot; + hpte_v = hptep->v; + + if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) + && (hpte_v & HPTE_V_VALID) + && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) { + /* HPTE matches */ + if (j) + slot = -slot; + return slot; + } + ++slot; + } + hash = ~hash; + } + + return -1; +} + +static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large, int local) +{ + hpte_t *hptep = htab_address + slot; + unsigned long hpte_v; + unsigned long avpn = va >> 23; + int ret = 0; + + if (large) + avpn &= ~1; + + native_lock_hpte(hptep); + + hpte_v = hptep->v; + + /* Even if we miss, we need to invalidate the TLB */ + if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) + || !(hpte_v & HPTE_V_VALID)) { + native_unlock_hpte(hptep); + ret = -1; + } else { + set_pp_bit(newpp, hptep); + native_unlock_hpte(hptep); + } + + /* Ensure it is out of the tlb too */ + if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { + tlbiel(va); + } else { + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock(&native_tlbie_lock); + tlbie(va, large); + if (lock_tlbie) + spin_unlock(&native_tlbie_lock); + } + + return ret; +} + +/* + * Update the page protection bits. Intended to be used to create + * guard pages for kernel data structures on pages which are bolted + * in the HPT. Assumes pages being operated on will not be stolen. + * Does not work on large pages. + * + * No need to lock here because we should be the only user. + */ +static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) +{ + unsigned long vsid, va, vpn, flags = 0; + long slot; + hpte_t *hptep; + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + + slot = native_hpte_find(vpn); + if (slot == -1) + panic("could not find page to bolt\n"); + hptep = htab_address + slot; + + set_pp_bit(newpp, hptep); + + /* Ensure it is out of the tlb too */ + if (lock_tlbie) + spin_lock_irqsave(&native_tlbie_lock, flags); + tlbie(va, 0); + if (lock_tlbie) + spin_unlock_irqrestore(&native_tlbie_lock, flags); +} + +static void native_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + hpte_t *hptep = htab_address + slot; + unsigned long hpte_v; + unsigned long avpn = va >> 23; + unsigned long flags; + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (large) + avpn &= ~1; + + local_irq_save(flags); + native_lock_hpte(hptep); + + hpte_v = hptep->v; + + /* Even if we miss, we need to invalidate the TLB */ + if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) + || !(hpte_v & HPTE_V_VALID)) { + native_unlock_hpte(hptep); + } else { + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->v = 0; + } + + /* Invalidate the tlb */ + if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { + tlbiel(va); + } else { + if (lock_tlbie) + spin_lock(&native_tlbie_lock); + tlbie(va, large); + if (lock_tlbie) + spin_unlock(&native_tlbie_lock); + } + local_irq_restore(flags); +} + +/* + * clear all mappings on kexec. All cpus are in real mode (or they will + * be when they isi), and we are the only one left. We rely on our kernel + * mapping being 0xC0's and the hardware ignoring those two real bits. + * + * TODO: add batching support when enabled. remember, no dynamic memory here, + * athough there is the control page available... + */ +static void native_hpte_clear(void) +{ + unsigned long slot, slots, flags; + hpte_t *hptep = htab_address; + unsigned long hpte_v; + unsigned long pteg_count; + + pteg_count = htab_hash_mask + 1; + + local_irq_save(flags); + + /* we take the tlbie lock and hold it. Some hardware will + * deadlock if we try to tlbie from two processors at once. + */ + spin_lock(&native_tlbie_lock); + + slots = pteg_count * HPTES_PER_GROUP; + + for (slot = 0; slot < slots; slot++, hptep++) { + /* + * we could lock the pte here, but we are the only cpu + * running, right? and for crash dump, we probably + * don't want to wait for a maybe bad cpu. + */ + hpte_v = hptep->v; + + if (hpte_v & HPTE_V_VALID) { + hptep->v = 0; + tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE); + } + } + + spin_unlock(&native_tlbie_lock); + local_irq_restore(flags); +} + +static void native_flush_hash_range(unsigned long number, int local) +{ + unsigned long va, vpn, hash, secondary, slot, flags, avpn; + int i, j; + hpte_t *hptep; + unsigned long hpte_v; + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + unsigned long large = batch->large; + + local_irq_save(flags); + + j = 0; + for (i = 0; i < number; i++) { + va = batch->vaddr[j]; + if (large) + vpn = va >> HPAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + hash = hpt_hash(vpn, large); + secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15; + if (secondary) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12; + + hptep = htab_address + slot; + + avpn = va >> 23; + if (large) + avpn &= ~0x1UL; + + native_lock_hpte(hptep); + + hpte_v = hptep->v; + + /* Even if we miss, we need to invalidate the TLB */ + if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) + || !(hpte_v & HPTE_V_VALID)) { + native_unlock_hpte(hptep); + } else { + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->v = 0; + } + + j++; + } + + if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { + asm volatile("ptesync":::"memory"); + + for (i = 0; i < j; i++) + __tlbiel(batch->vaddr[i]); + + asm volatile("ptesync":::"memory"); + } else { + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock(&native_tlbie_lock); + + asm volatile("ptesync":::"memory"); + + for (i = 0; i < j; i++) + __tlbie(batch->vaddr[i], large); + + asm volatile("eieio; tlbsync; ptesync":::"memory"); + + if (lock_tlbie) + spin_unlock(&native_tlbie_lock); + } + + local_irq_restore(flags); +} + +#ifdef CONFIG_PPC_PSERIES +/* Disable TLB batching on nighthawk */ +static inline int tlb_batching_enabled(void) +{ + struct device_node *root = of_find_node_by_path("/"); + int enabled = 1; + + if (root) { + const char *model = get_property(root, "model", NULL); + if (model && !strcmp(model, "IBM,9076-N81")) + enabled = 0; + of_node_put(root); + } + + return enabled; +} +#else +static inline int tlb_batching_enabled(void) +{ + return 1; +} +#endif + +void hpte_init_native(void) +{ + ppc_md.hpte_invalidate = native_hpte_invalidate; + ppc_md.hpte_updatepp = native_hpte_updatepp; + ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; + ppc_md.hpte_insert = native_hpte_insert; + ppc_md.hpte_remove = native_hpte_remove; + ppc_md.hpte_clear_all = native_hpte_clear; + if (tlb_batching_enabled()) + ppc_md.flush_hash_range = native_flush_hash_range; + htab_finish_init(); +} diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c new file mode 100644 index 00000000000..35dd93eeaf4 --- /dev/null +++ b/arch/powerpc/mm/hash_utils_64.c @@ -0,0 +1,438 @@ +/* + * PowerPC64 port by Mike Corrigan and Dave Engebretsen + * {mikejc|engebret}@us.ibm.com + * + * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> + * + * SMP scalability work: + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * + * Module name: htab.c + * + * Description: + * PowerPC Hashed Page Table functions + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/sysctl.h> +#include <linux/ctype.h> +#include <linux/cache.h> +#include <linux/init.h> +#include <linux/signal.h> + +#include <asm/ppcdebug.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/page.h> +#include <asm/types.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/machdep.h> +#include <asm/lmb.h> +#include <asm/abs_addr.h> +#include <asm/tlbflush.h> +#include <asm/io.h> +#include <asm/eeh.h> +#include <asm/tlb.h> +#include <asm/cacheflush.h> +#include <asm/cputable.h> +#include <asm/abs_addr.h> +#include <asm/sections.h> + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * Note: pte --> Linux PTE + * HPTE --> PowerPC Hashed Page Table Entry + * + * Execution context: + * htab_initialize is called with the MMU off (of course), but + * the kernel has been copied down to zero so it can directly + * reference global data. At this point it is very difficult + * to print debug info. + * + */ + +#ifdef CONFIG_U3_DART +extern unsigned long dart_tablebase; +#endif /* CONFIG_U3_DART */ + +hpte_t *htab_address; +unsigned long htab_hash_mask; + +unsigned long _SDR1; + +#define KB (1024) +#define MB (1024*KB) + +static inline void loop_forever(void) +{ + volatile unsigned long x = 1; + for(;x;x|=1) + ; +} + +static inline void create_pte_mapping(unsigned long start, unsigned long end, + unsigned long mode, int large) +{ + unsigned long addr; + unsigned int step; + unsigned long tmp_mode; + unsigned long vflags; + + if (large) { + step = 16*MB; + vflags = HPTE_V_BOLTED | HPTE_V_LARGE; + } else { + step = 4*KB; + vflags = HPTE_V_BOLTED; + } + + for (addr = start; addr < end; addr += step) { + unsigned long vpn, hash, hpteg; + unsigned long vsid = get_kernel_vsid(addr); + unsigned long va = (vsid << 28) | (addr & 0xfffffff); + int ret = -1; + + if (large) + vpn = va >> HPAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + + + tmp_mode = mode; + + /* Make non-kernel text non-executable */ + if (!in_kernel_text(addr)) + tmp_mode = mode | HW_NO_EXEC; + + hash = hpt_hash(vpn, large); + + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + +#ifdef CONFIG_PPC_ISERIES + if (systemcfg->platform & PLATFORM_ISERIES_LPAR) + ret = iSeries_hpte_bolt_or_insert(hpteg, va, + virt_to_abs(addr) >> PAGE_SHIFT, + vflags, tmp_mode); + else +#endif +#ifdef CONFIG_PPC_PSERIES + if (systemcfg->platform & PLATFORM_LPAR) + ret = pSeries_lpar_hpte_insert(hpteg, va, + virt_to_abs(addr) >> PAGE_SHIFT, + vflags, tmp_mode); + else +#endif +#ifdef CONFIG_PPC_MULTIPLATFORM + ret = native_hpte_insert(hpteg, va, + virt_to_abs(addr) >> PAGE_SHIFT, + vflags, tmp_mode); +#endif + + if (ret == -1) { + ppc64_terminate_msg(0x20, "create_pte_mapping"); + loop_forever(); + } + } +} + +void __init htab_initialize(void) +{ + unsigned long table, htab_size_bytes; + unsigned long pteg_count; + unsigned long mode_rw; + int i, use_largepages = 0; + unsigned long base = 0, size = 0; + extern unsigned long tce_alloc_start, tce_alloc_end; + + DBG(" -> htab_initialize()\n"); + + /* + * Calculate the required size of the htab. We want the number of + * PTEGs to equal one half the number of real pages. + */ + htab_size_bytes = 1UL << ppc64_pft_size; + pteg_count = htab_size_bytes >> 7; + + /* For debug, make the HTAB 1/8 as big as it normally would be. */ + ifppcdebug(PPCDBG_HTABSIZE) { + pteg_count >>= 3; + htab_size_bytes = pteg_count << 7; + } + + htab_hash_mask = pteg_count - 1; + + if (systemcfg->platform & PLATFORM_LPAR) { + /* Using a hypervisor which owns the htab */ + htab_address = NULL; + _SDR1 = 0; + } else { + /* Find storage for the HPT. Must be contiguous in + * the absolute address space. + */ + table = lmb_alloc(htab_size_bytes, htab_size_bytes); + + DBG("Hash table allocated at %lx, size: %lx\n", table, + htab_size_bytes); + + if ( !table ) { + ppc64_terminate_msg(0x20, "hpt space"); + loop_forever(); + } + htab_address = abs_to_virt(table); + + /* htab absolute addr + encoded htabsize */ + _SDR1 = table + __ilog2(pteg_count) - 11; + + /* Initialize the HPT with no entries */ + memset((void *)table, 0, htab_size_bytes); + } + + mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; + + /* On U3 based machines, we need to reserve the DART area and + * _NOT_ map it to avoid cache paradoxes as it's remapped non + * cacheable later on + */ + if (cpu_has_feature(CPU_FTR_16M_PAGE)) + use_largepages = 1; + + /* create bolted the linear mapping in the hash table */ + for (i=0; i < lmb.memory.cnt; i++) { + base = lmb.memory.region[i].base + KERNELBASE; + size = lmb.memory.region[i].size; + + DBG("creating mapping for region: %lx : %lx\n", base, size); + +#ifdef CONFIG_U3_DART + /* Do not map the DART space. Fortunately, it will be aligned + * in such a way that it will not cross two lmb regions and will + * fit within a single 16Mb page. + * The DART space is assumed to be a full 16Mb region even if we + * only use 2Mb of that space. We will use more of it later for + * AGP GART. We have to use a full 16Mb large page. + */ + DBG("DART base: %lx\n", dart_tablebase); + + if (dart_tablebase != 0 && dart_tablebase >= base + && dart_tablebase < (base + size)) { + if (base != dart_tablebase) + create_pte_mapping(base, dart_tablebase, mode_rw, + use_largepages); + if ((base + size) > (dart_tablebase + 16*MB)) + create_pte_mapping(dart_tablebase + 16*MB, base + size, + mode_rw, use_largepages); + continue; + } +#endif /* CONFIG_U3_DART */ + create_pte_mapping(base, base + size, mode_rw, use_largepages); + } + + /* + * If we have a memory_limit and we've allocated TCEs then we need to + * explicitly map the TCE area at the top of RAM. We also cope with the + * case that the TCEs start below memory_limit. + * tce_alloc_start/end are 16MB aligned so the mapping should work + * for either 4K or 16MB pages. + */ + if (tce_alloc_start) { + tce_alloc_start += KERNELBASE; + tce_alloc_end += KERNELBASE; + + if (base + size >= tce_alloc_start) + tce_alloc_start = base + size + 1; + + create_pte_mapping(tce_alloc_start, tce_alloc_end, + mode_rw, use_largepages); + } + + DBG(" <- htab_initialize()\n"); +} +#undef KB +#undef MB + +/* + * Called by asm hashtable.S for doing lazy icache flush + */ +unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) +{ + struct page *page; + + if (!pfn_valid(pte_pfn(pte))) + return pp; + + page = pte_page(pte); + + /* page is dirty */ + if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (trap == 0x400) { + __flush_dcache_icache(page_address(page)); + set_bit(PG_arch_1, &page->flags); + } else + pp |= HW_NO_EXEC; + } + return pp; +} + +/* Result code is: + * 0 - handled + * 1 - normal page fault + * -1 - critical hash insertion error + */ +int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +{ + void *pgdir; + unsigned long vsid; + struct mm_struct *mm; + pte_t *ptep; + int ret; + int user_region = 0; + int local = 0; + cpumask_t tmp; + + if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) + return 1; + + switch (REGION_ID(ea)) { + case USER_REGION_ID: + user_region = 1; + mm = current->mm; + if (! mm) + return 1; + + vsid = get_vsid(mm->context.id, ea); + break; + case VMALLOC_REGION_ID: + mm = &init_mm; + vsid = get_kernel_vsid(ea); + break; +#if 0 + case KERNEL_REGION_ID: + /* + * Should never get here - entire 0xC0... region is bolted. + * Send the problem up to do_page_fault + */ +#endif + default: + /* Not a valid range + * Send the problem up to do_page_fault + */ + return 1; + break; + } + + pgdir = mm->pgd; + + if (pgdir == NULL) + return 1; + + tmp = cpumask_of_cpu(smp_processor_id()); + if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) + local = 1; + + /* Is this a huge page ? */ + if (unlikely(in_hugepage_area(mm->context, ea))) + ret = hash_huge_page(mm, access, ea, vsid, local); + else { + ptep = find_linux_pte(pgdir, ea); + if (ptep == NULL) + return 1; + ret = __hash_page(ea, access, vsid, ptep, trap, local); + } + + return ret; +} + +void flush_hash_page(unsigned long va, pte_t pte, int local) +{ + unsigned long vpn, hash, secondary, slot; + unsigned long huge = pte_huge(pte); + + if (huge) + vpn = va >> HPAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + hash = hpt_hash(vpn, huge); + secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; + if (secondary) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; + + ppc_md.hpte_invalidate(slot, va, huge, local); +} + +void flush_hash_range(unsigned long number, int local) +{ + if (ppc_md.flush_hash_range) { + ppc_md.flush_hash_range(number, local); + } else { + int i; + struct ppc64_tlb_batch *batch = + &__get_cpu_var(ppc64_tlb_batch); + + for (i = 0; i < number; i++) + flush_hash_page(batch->vaddr[i], batch->pte[i], local); + } +} + +static inline void make_bl(unsigned int *insn_addr, void *func) +{ + unsigned long funcp = *((unsigned long *)func); + int offset = funcp - (unsigned long)insn_addr; + + *insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc)); + flush_icache_range((unsigned long)insn_addr, 4+ + (unsigned long)insn_addr); +} + +/* + * low_hash_fault is called when we the low level hash code failed + * to instert a PTE due to an hypervisor error + */ +void low_hash_fault(struct pt_regs *regs, unsigned long address) +{ + if (user_mode(regs)) { + siginfo_t info; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return; + } + bad_page_fault(regs, address, SIGBUS); +} + +void __init htab_finish_init(void) +{ + extern unsigned int *htab_call_hpte_insert1; + extern unsigned int *htab_call_hpte_insert2; + extern unsigned int *htab_call_hpte_remove; + extern unsigned int *htab_call_hpte_updatepp; + + make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); + make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); + make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); + make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp); +} diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c new file mode 100644 index 00000000000..0ea0994ed97 --- /dev/null +++ b/arch/powerpc/mm/hugetlbpage.c @@ -0,0 +1,745 @@ +/* + * PPC64 (POWER4) Huge TLB Page Support for Kernel. + * + * Copyright (C) 2003 David Gibson, IBM Corporation. + * + * Based on the IA-32 version: + * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/smp_lock.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> +#include <asm/machdep.h> +#include <asm/cputable.h> +#include <asm/tlb.h> + +#include <linux/sysctl.h> + +#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) +#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) + +/* Modelled after find_linux_pte() */ +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pg; + pud_t *pu; + pmd_t *pm; + pte_t *pt; + + BUG_ON(! in_hugepage_area(mm->context, addr)); + + addr &= HPAGE_MASK; + + pg = pgd_offset(mm, addr); + if (!pgd_none(*pg)) { + pu = pud_offset(pg, addr); + if (!pud_none(*pu)) { + pm = pmd_offset(pu, addr); + pt = (pte_t *)pm; + BUG_ON(!pmd_none(*pm) + && !(pte_present(*pt) && pte_huge(*pt))); + return pt; + } + } + + return NULL; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pg; + pud_t *pu; + pmd_t *pm; + pte_t *pt; + + BUG_ON(! in_hugepage_area(mm->context, addr)); + + addr &= HPAGE_MASK; + + pg = pgd_offset(mm, addr); + pu = pud_alloc(mm, pg, addr); + + if (pu) { + pm = pmd_alloc(mm, pu, addr); + if (pm) { + pt = (pte_t *)pm; + BUG_ON(!pmd_none(*pm) + && !(pte_present(*pt) && pte_huge(*pt))); + return pt; + } + } + + return NULL; +} + +#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + int i; + + if (pte_present(*ptep)) { + pte_clear(mm, addr, ptep); + flush_tlb_pending(); + } + + for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { + *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); + ptep++; + } +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + unsigned long old = pte_update(ptep, ~0UL); + int i; + + if (old & _PAGE_HASHPTE) + hpte_update(mm, addr, old, 0); + + for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) + ptep[i] = __pte(0); + + return __pte(old); +} + +/* + * This function checks for proper alignment of input addr and len parameters. + */ +int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + if (! (within_hugepage_low_range(addr, len) + || within_hugepage_high_range(addr, len)) ) + return -EINVAL; + return 0; +} + +static void flush_low_segments(void *parm) +{ + u16 areas = (unsigned long) parm; + unsigned long i; + + asm volatile("isync" : : : "memory"); + + BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); + + for (i = 0; i < NUM_LOW_AREAS; i++) { + if (! (areas & (1U << i))) + continue; + asm volatile("slbie %0" + : : "r" ((i << SID_SHIFT) | SLBIE_C)); + } + + asm volatile("isync" : : : "memory"); +} + +static void flush_high_segments(void *parm) +{ + u16 areas = (unsigned long) parm; + unsigned long i, j; + + asm volatile("isync" : : : "memory"); + + BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); + + for (i = 0; i < NUM_HIGH_AREAS; i++) { + if (! (areas & (1U << i))) + continue; + for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) + asm volatile("slbie %0" + :: "r" (((i << HTLB_AREA_SHIFT) + + (j << SID_SHIFT)) | SLBIE_C)); + } + + asm volatile("isync" : : : "memory"); +} + +static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) +{ + unsigned long start = area << SID_SHIFT; + unsigned long end = (area+1) << SID_SHIFT; + struct vm_area_struct *vma; + + BUG_ON(area >= NUM_LOW_AREAS); + + /* Check no VMAs are in the region */ + vma = find_vma(mm, start); + if (vma && (vma->vm_start < end)) + return -EBUSY; + + return 0; +} + +static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) +{ + unsigned long start = area << HTLB_AREA_SHIFT; + unsigned long end = (area+1) << HTLB_AREA_SHIFT; + struct vm_area_struct *vma; + + BUG_ON(area >= NUM_HIGH_AREAS); + + /* Check no VMAs are in the region */ + vma = find_vma(mm, start); + if (vma && (vma->vm_start < end)) + return -EBUSY; + + return 0; +} + +static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) +{ + unsigned long i; + + BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); + BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); + + newareas &= ~(mm->context.low_htlb_areas); + if (! newareas) + return 0; /* The segments we want are already open */ + + for (i = 0; i < NUM_LOW_AREAS; i++) + if ((1 << i) & newareas) + if (prepare_low_area_for_htlb(mm, i) != 0) + return -EBUSY; + + mm->context.low_htlb_areas |= newareas; + + /* update the paca copy of the context struct */ + get_paca()->context = mm->context; + + /* the context change must make it to memory before the flush, + * so that further SLB misses do the right thing. */ + mb(); + on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); + + return 0; +} + +static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) +{ + unsigned long i; + + BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); + BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) + != NUM_HIGH_AREAS); + + newareas &= ~(mm->context.high_htlb_areas); + if (! newareas) + return 0; /* The areas we want are already open */ + + for (i = 0; i < NUM_HIGH_AREAS; i++) + if ((1 << i) & newareas) + if (prepare_high_area_for_htlb(mm, i) != 0) + return -EBUSY; + + mm->context.high_htlb_areas |= newareas; + + /* update the paca copy of the context struct */ + get_paca()->context = mm->context; + + /* the context change must make it to memory before the flush, + * so that further SLB misses do the right thing. */ + mb(); + on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); + + return 0; +} + +int prepare_hugepage_range(unsigned long addr, unsigned long len) +{ + int err; + + if ( (addr+len) < addr ) + return -EINVAL; + + if ((addr + len) < 0x100000000UL) + err = open_low_hpage_areas(current->mm, + LOW_ESID_MASK(addr, len)); + else + err = open_high_hpage_areas(current->mm, + HTLB_AREA_MASK(addr, len)); + if (err) { + printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" + " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", + addr, len, + LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); + return err; + } + + return 0; +} + +struct page * +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) +{ + pte_t *ptep; + struct page *page; + + if (! in_hugepage_area(mm->context, address)) + return ERR_PTR(-EINVAL); + + ptep = huge_pte_offset(mm, address); + page = pte_page(*ptep); + if (page) + page += (address % HPAGE_SIZE) / PAGE_SIZE; + + return page; +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + BUG(); + return NULL; +} + +/* Because we have an exclusive hugepage region which lies within the + * normal user address space, we have to take special measures to make + * non-huge mmap()s evade the hugepage reserved regions. */ +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr; + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (((TASK_SIZE - len) >= addr) + && (!vma || (addr+len) <= vma->vm_start) + && !is_hugepage_only_range(mm, addr,len)) + return addr; + } + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } + +full_search: + vma = find_vma(mm, addr); + while (TASK_SIZE - len >= addr) { + BUG_ON(vma && (addr >= vma->vm_end)); + + if (touches_hugepage_low_range(mm, addr, len)) { + addr = ALIGN(addr+1, 1<<SID_SHIFT); + vma = find_vma(mm, addr); + continue; + } + if (touches_hugepage_high_range(mm, addr, len)) { + addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); + vma = find_vma(mm, addr); + continue; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + vma = vma->vm_next; + } + + /* Make sure we didn't miss any holes */ + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; +} + +/* + * This mmap-allocator allocates new areas top-down from below the + * stack's low limit (the base): + * + * Because we have an exclusive hugepage region which lies within the + * normal user address space, we have to take special measures to make + * non-huge mmap()s evade the hugepage reserved regions. + */ +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma, *prev_vma; + struct mm_struct *mm = current->mm; + unsigned long base = mm->mmap_base, addr = addr0; + unsigned long largest_hole = mm->cached_hole_size; + int first_time = 1; + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) + return -ENOMEM; + + /* dont allow allocations above current base */ + if (mm->free_area_cache > base) + mm->free_area_cache = base; + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start) + && !is_hugepage_only_range(mm, addr,len)) + return addr; + } + + if (len <= largest_hole) { + largest_hole = 0; + mm->free_area_cache = base; + } +try_again: + /* make sure it can fit in the remaining address space */ + if (mm->free_area_cache < len) + goto fail; + + /* either no address requested or cant fit in requested address hole */ + addr = (mm->free_area_cache - len) & PAGE_MASK; + do { +hugepage_recheck: + if (touches_hugepage_low_range(mm, addr, len)) { + addr = (addr & ((~0) << SID_SHIFT)) - len; + goto hugepage_recheck; + } else if (touches_hugepage_high_range(mm, addr, len)) { + addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; + goto hugepage_recheck; + } + + /* + * Lookup failure means no vma is above this address, + * i.e. return with success: + */ + if (!(vma = find_vma_prev(mm, addr, &prev_vma))) + return addr; + + /* + * new region fits between prev_vma->vm_end and + * vma->vm_start, use it: + */ + if (addr+len <= vma->vm_start && + (!prev_vma || (addr >= prev_vma->vm_end))) { + /* remember the address as a hint for next time */ + mm->cached_hole_size = largest_hole; + return (mm->free_area_cache = addr); + } else { + /* pull free_area_cache down to the first hole */ + if (mm->free_area_cache == vma->vm_end) { + mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; + } + } + + /* remember the largest hole we saw so far */ + if (addr + largest_hole < vma->vm_start) + largest_hole = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + } while (len <= vma->vm_start); + +fail: + /* + * if hint left us with no space for the requested + * mapping then try again: + */ + if (first_time) { + mm->free_area_cache = base; + largest_hole = 0; + first_time = 0; + goto try_again; + } + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->cached_hole_size = ~0UL; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +} + +static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) +{ + unsigned long addr = 0; + struct vm_area_struct *vma; + + vma = find_vma(current->mm, addr); + while (addr + len <= 0x100000000UL) { + BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ + + if (! __within_hugepage_low_range(addr, len, segmask)) { + addr = ALIGN(addr+1, 1<<SID_SHIFT); + vma = find_vma(current->mm, addr); + continue; + } + + if (!vma || (addr + len) <= vma->vm_start) + return addr; + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + /* Depending on segmask this might not be a confirmed + * hugepage region, so the ALIGN could have skipped + * some VMAs */ + vma = find_vma(current->mm, addr); + } + + return -ENOMEM; +} + +static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) +{ + unsigned long addr = 0x100000000UL; + struct vm_area_struct *vma; + + vma = find_vma(current->mm, addr); + while (addr + len <= TASK_SIZE_USER64) { + BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ + + if (! __within_hugepage_high_range(addr, len, areamask)) { + addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); + vma = find_vma(current->mm, addr); + continue; + } + + if (!vma || (addr + len) <= vma->vm_start) + return addr; + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + /* Depending on segmask this might not be a confirmed + * hugepage region, so the ALIGN could have skipped + * some VMAs */ + vma = find_vma(current->mm, addr); + } + + return -ENOMEM; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + int lastshift; + u16 areamask, curareas; + + if (len & ~HPAGE_MASK) + return -EINVAL; + + if (!cpu_has_feature(CPU_FTR_16M_PAGE)) + return -EINVAL; + + if (test_thread_flag(TIF_32BIT)) { + curareas = current->mm->context.low_htlb_areas; + + /* First see if we can do the mapping in the existing + * low areas */ + addr = htlb_get_low_area(len, curareas); + if (addr != -ENOMEM) + return addr; + + lastshift = 0; + for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); + ! lastshift; areamask >>=1) { + if (areamask & 1) + lastshift = 1; + + addr = htlb_get_low_area(len, curareas | areamask); + if ((addr != -ENOMEM) + && open_low_hpage_areas(current->mm, areamask) == 0) + return addr; + } + } else { + curareas = current->mm->context.high_htlb_areas; + + /* First see if we can do the mapping in the existing + * high areas */ + addr = htlb_get_high_area(len, curareas); + if (addr != -ENOMEM) + return addr; + + lastshift = 0; + for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); + ! lastshift; areamask >>=1) { + if (areamask & 1) + lastshift = 1; + + addr = htlb_get_high_area(len, curareas | areamask); + if ((addr != -ENOMEM) + && open_high_hpage_areas(current->mm, areamask) == 0) + return addr; + } + } + printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" + " enough areas\n"); + return -ENOMEM; +} + +int hash_huge_page(struct mm_struct *mm, unsigned long access, + unsigned long ea, unsigned long vsid, int local) +{ + pte_t *ptep; + unsigned long va, vpn; + pte_t old_pte, new_pte; + unsigned long rflags, prpn; + long slot; + int err = 1; + + spin_lock(&mm->page_table_lock); + + ptep = huge_pte_offset(mm, ea); + + /* Search the Linux page table for a match with va */ + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> HPAGE_SHIFT; + + /* + * If no pte found or not present, send the problem up to + * do_page_fault + */ + if (unlikely(!ptep || pte_none(*ptep))) + goto out; + +/* BUG_ON(pte_bad(*ptep)); */ + + /* + * Check the user's access rights to the page. If access should be + * prevented then send the problem up to do_page_fault. + */ + if (unlikely(access & ~pte_val(*ptep))) + goto out; + /* + * At this point, we have a pte (old_pte) which can be used to build + * or update an HPTE. There are 2 cases: + * + * 1. There is a valid (present) pte with no associated HPTE (this is + * the most common case) + * 2. There is a valid (present) pte with an associated HPTE. The + * current values of the pp bits in the HPTE prevent access + * because we are doing software DIRTY bit management and the + * page is currently not DIRTY. + */ + + + old_pte = *ptep; + new_pte = old_pte; + + rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); + /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ + rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); + + /* Check if pte already has an hpte (case 2) */ + if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { + /* There MIGHT be an HPTE for this pte */ + unsigned long hash, slot; + + hash = hpt_hash(vpn, 1); + if (pte_val(old_pte) & _PAGE_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; + + if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) + pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; + } + + if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { + unsigned long hash = hpt_hash(vpn, 1); + unsigned long hpte_group; + + prpn = pte_pfn(old_pte); + +repeat: + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + + /* Update the linux pte with the HPTE slot */ + pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; + pte_val(new_pte) |= _PAGE_HASHPTE; + + /* Add in WIMG bits */ + /* XXX We should store these in the pte */ + rflags |= _PAGE_COHERENT; + + slot = ppc_md.hpte_insert(hpte_group, va, prpn, + HPTE_V_LARGE, rflags); + + /* Primary is full, try the secondary */ + if (unlikely(slot == -1)) { + pte_val(new_pte) |= _PAGE_SECONDARY; + hpte_group = ((~hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + slot = ppc_md.hpte_insert(hpte_group, va, prpn, + HPTE_V_LARGE | + HPTE_V_SECONDARY, + rflags); + if (slot == -1) { + if (mftb() & 0x1) + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP)&~0x7UL; + + ppc_md.hpte_remove(hpte_group); + goto repeat; + } + } + + if (unlikely(slot == -2)) + panic("hash_huge_page: pte_insert failed\n"); + + pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; + + /* + * No need to use ldarx/stdcx here because all who + * might be updating the pte will hold the + * page_table_lock + */ + *ptep = new_pte; + } + + err = 0; + + out: + spin_unlock(&mm->page_table_lock); + + return err; +} diff --git a/arch/powerpc/mm/imalloc.c b/arch/powerpc/mm/imalloc.c new file mode 100644 index 00000000000..c65b87b9275 --- /dev/null +++ b/arch/powerpc/mm/imalloc.c @@ -0,0 +1,317 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include <asm/uaccess.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/semaphore.h> +#include <asm/imalloc.h> +#include <asm/cacheflush.h> + +static DECLARE_MUTEX(imlist_sem); +struct vm_struct * imlist = NULL; + +static int get_free_im_addr(unsigned long size, unsigned long *im_addr) +{ + unsigned long addr; + struct vm_struct **p, *tmp; + + addr = ioremap_bot; + for (p = &imlist; (tmp = *p) ; p = &tmp->next) { + if (size + addr < (unsigned long) tmp->addr) + break; + if ((unsigned long)tmp->addr >= ioremap_bot) + addr = tmp->size + (unsigned long) tmp->addr; + if (addr >= IMALLOC_END-size) + return 1; + } + *im_addr = addr; + + return 0; +} + +/* Return whether the region described by v_addr and size is a subset + * of the region described by parent + */ +static inline int im_region_is_subset(unsigned long v_addr, unsigned long size, + struct vm_struct *parent) +{ + return (int) (v_addr >= (unsigned long) parent->addr && + v_addr < (unsigned long) parent->addr + parent->size && + size < parent->size); +} + +/* Return whether the region described by v_addr and size is a superset + * of the region described by child + */ +static int im_region_is_superset(unsigned long v_addr, unsigned long size, + struct vm_struct *child) +{ + struct vm_struct parent; + + parent.addr = (void *) v_addr; + parent.size = size; + + return im_region_is_subset((unsigned long) child->addr, child->size, + &parent); +} + +/* Return whether the region described by v_addr and size overlaps + * the region described by vm. Overlapping regions meet the + * following conditions: + * 1) The regions share some part of the address space + * 2) The regions aren't identical + * 3) Neither region is a subset of the other + */ +static int im_region_overlaps(unsigned long v_addr, unsigned long size, + struct vm_struct *vm) +{ + if (im_region_is_superset(v_addr, size, vm)) + return 0; + + return (v_addr + size > (unsigned long) vm->addr + vm->size && + v_addr < (unsigned long) vm->addr + vm->size) || + (v_addr < (unsigned long) vm->addr && + v_addr + size > (unsigned long) vm->addr); +} + +/* Determine imalloc status of region described by v_addr and size. + * Can return one of the following: + * IM_REGION_UNUSED - Entire region is unallocated in imalloc space. + * IM_REGION_SUBSET - Region is a subset of a region that is already + * allocated in imalloc space. + * vm will be assigned to a ptr to the parent region. + * IM_REGION_EXISTS - Exact region already allocated in imalloc space. + * vm will be assigned to a ptr to the existing imlist + * member. + * IM_REGION_OVERLAPS - Region overlaps an allocated region in imalloc space. + * IM_REGION_SUPERSET - Region is a superset of a region that is already + * allocated in imalloc space. + */ +static int im_region_status(unsigned long v_addr, unsigned long size, + struct vm_struct **vm) +{ + struct vm_struct *tmp; + + for (tmp = imlist; tmp; tmp = tmp->next) + if (v_addr < (unsigned long) tmp->addr + tmp->size) + break; + + if (tmp) { + if (im_region_overlaps(v_addr, size, tmp)) + return IM_REGION_OVERLAP; + + *vm = tmp; + if (im_region_is_subset(v_addr, size, tmp)) { + /* Return with tmp pointing to superset */ + return IM_REGION_SUBSET; + } + if (im_region_is_superset(v_addr, size, tmp)) { + /* Return with tmp pointing to first subset */ + return IM_REGION_SUPERSET; + } + else if (v_addr == (unsigned long) tmp->addr && + size == tmp->size) { + /* Return with tmp pointing to exact region */ + return IM_REGION_EXISTS; + } + } + + *vm = NULL; + return IM_REGION_UNUSED; +} + +static struct vm_struct * split_im_region(unsigned long v_addr, + unsigned long size, struct vm_struct *parent) +{ + struct vm_struct *vm1 = NULL; + struct vm_struct *vm2 = NULL; + struct vm_struct *new_vm = NULL; + + vm1 = (struct vm_struct *) kmalloc(sizeof(*vm1), GFP_KERNEL); + if (vm1 == NULL) { + printk(KERN_ERR "%s() out of memory\n", __FUNCTION__); + return NULL; + } + + if (v_addr == (unsigned long) parent->addr) { + /* Use existing parent vm_struct to represent child, allocate + * new one for the remainder of parent range + */ + vm1->size = parent->size - size; + vm1->addr = (void *) (v_addr + size); + vm1->next = parent->next; + + parent->size = size; + parent->next = vm1; + new_vm = parent; + } else if (v_addr + size == (unsigned long) parent->addr + + parent->size) { + /* Allocate new vm_struct to represent child, use existing + * parent one for remainder of parent range + */ + vm1->size = size; + vm1->addr = (void *) v_addr; + vm1->next = parent->next; + new_vm = vm1; + + parent->size -= size; + parent->next = vm1; + } else { + /* Allocate two new vm_structs for the new child and + * uppermost remainder, and use existing parent one for the + * lower remainder of parent range + */ + vm2 = (struct vm_struct *) kmalloc(sizeof(*vm2), GFP_KERNEL); + if (vm2 == NULL) { + printk(KERN_ERR "%s() out of memory\n", __FUNCTION__); + kfree(vm1); + return NULL; + } + + vm1->size = size; + vm1->addr = (void *) v_addr; + vm1->next = vm2; + new_vm = vm1; + + vm2->size = ((unsigned long) parent->addr + parent->size) - + (v_addr + size); + vm2->addr = (void *) v_addr + size; + vm2->next = parent->next; + + parent->size = v_addr - (unsigned long) parent->addr; + parent->next = vm1; + } + + return new_vm; +} + +static struct vm_struct * __add_new_im_area(unsigned long req_addr, + unsigned long size) +{ + struct vm_struct **p, *tmp, *area; + + for (p = &imlist; (tmp = *p) ; p = &tmp->next) { + if (req_addr + size <= (unsigned long)tmp->addr) + break; + } + + area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL); + if (!area) + return NULL; + area->flags = 0; + area->addr = (void *)req_addr; + area->size = size; + area->next = *p; + *p = area; + + return area; +} + +static struct vm_struct * __im_get_area(unsigned long req_addr, + unsigned long size, + int criteria) +{ + struct vm_struct *tmp; + int status; + + status = im_region_status(req_addr, size, &tmp); + if ((criteria & status) == 0) { + return NULL; + } + + switch (status) { + case IM_REGION_UNUSED: + tmp = __add_new_im_area(req_addr, size); + break; + case IM_REGION_SUBSET: + tmp = split_im_region(req_addr, size, tmp); + break; + case IM_REGION_EXISTS: + /* Return requested region */ + break; + case IM_REGION_SUPERSET: + /* Return first existing subset of requested region */ + break; + default: + printk(KERN_ERR "%s() unexpected imalloc region status\n", + __FUNCTION__); + tmp = NULL; + } + + return tmp; +} + +struct vm_struct * im_get_free_area(unsigned long size) +{ + struct vm_struct *area; + unsigned long addr; + + down(&imlist_sem); + if (get_free_im_addr(size, &addr)) { + printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n", + __FUNCTION__, size); + area = NULL; + goto next_im_done; + } + + area = __im_get_area(addr, size, IM_REGION_UNUSED); + if (area == NULL) { + printk(KERN_ERR + "%s() cannot obtain area for addr 0x%lx size 0x%lx\n", + __FUNCTION__, addr, size); + } +next_im_done: + up(&imlist_sem); + return area; +} + +struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, + int criteria) +{ + struct vm_struct *area; + + down(&imlist_sem); + area = __im_get_area(v_addr, size, criteria); + up(&imlist_sem); + return area; +} + +void im_free(void * addr) +{ + struct vm_struct **p, *tmp; + + if (!addr) + return; + if ((unsigned long) addr & ~PAGE_MASK) { + printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr); + return; + } + down(&imlist_sem); + for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { + if (tmp->addr == addr) { + *p = tmp->next; + + /* XXX: do we need the lock? */ + spin_lock(&init_mm.page_table_lock); + unmap_vm_area(tmp); + spin_unlock(&init_mm.page_table_lock); + + kfree(tmp); + up(&imlist_sem); + return; + } + } + up(&imlist_sem); + printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__, + addr); +} diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c new file mode 100644 index 00000000000..bf13c14e66b --- /dev/null +++ b/arch/powerpc/mm/init_32.c @@ -0,0 +1,258 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/initrd.h> +#include <linux/pagemap.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/btext.h> +#include <asm/tlb.h> +#include <asm/bootinfo.h> +#include <asm/prom.h> +#include <asm/lmb.h> +#include <asm/sections.h> + +#include "mmu_decl.h" + +#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) +/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ +#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE)) +#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" +#endif +#endif +#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +unsigned long total_memory; +unsigned long total_lowmem; + +unsigned long ppc_memstart; +unsigned long ppc_memoffset = PAGE_OFFSET; + +int boot_mapsize; +#ifdef CONFIG_PPC_PMAC +unsigned long agp_special_page; +#endif + +#ifdef CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; + +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif + +void MMU_init(void); + +/* XXX should be in current.h -- paulus */ +extern struct task_struct *current_set[NR_CPUS]; + +char *klimit = _end; +struct device_node *memory_node; + +extern int init_bootmem_done; + +/* + * this tells the system to map all of ram with the segregs + * (i.e. page tables) instead of the bats. + * -- Cort + */ +int __map_without_bats; +int __map_without_ltlbs; + +/* max amount of low RAM to map in */ +unsigned long __max_low_memory = MAX_LOW_MEM; + +/* + * limit of what is accessible with initial MMU setup - + * 256MB usually, but only 16MB on 601. + */ +unsigned long __initial_memory_limit = 0x10000000; + +/* + * Check for command-line options that affect what MMU_init will do. + */ +void MMU_setup(void) +{ + /* Check for nobats option (used in mapin_ram). */ + if (strstr(cmd_line, "nobats")) { + __map_without_bats = 1; + } + + if (strstr(cmd_line, "noltlbs")) { + __map_without_ltlbs = 1; + } +} + +/* + * MMU_init sets up the basic memory mappings for the kernel, + * including both RAM and possibly some I/O regions, + * and sets up the page tables and the MMU hardware ready to go. + */ +void __init MMU_init(void) +{ + if (ppc_md.progress) + ppc_md.progress("MMU:enter", 0x111); + + /* 601 can only access 16MB at the moment */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + __initial_memory_limit = 0x01000000; + + /* parse args from command line */ + MMU_setup(); + + if (lmb.memory.cnt > 1) { + lmb.memory.cnt = 1; + lmb_analyze(); + printk(KERN_WARNING "Only using first contiguous memory region"); + } + + total_memory = lmb_end_of_DRAM(); + total_lowmem = total_memory; + +#ifdef CONFIG_FSL_BOOKE + /* Freescale Book-E parts expect lowmem to be mapped by fixed TLB + * entries, so we need to adjust lowmem to match the amount we can map + * in the fixed entries */ + adjust_total_lowmem(); +#endif /* CONFIG_FSL_BOOKE */ + if (total_lowmem > __max_low_memory) { + total_lowmem = __max_low_memory; +#ifndef CONFIG_HIGHMEM + total_memory = total_lowmem; +#endif /* CONFIG_HIGHMEM */ + } + + /* Initialize the MMU hardware */ + if (ppc_md.progress) + ppc_md.progress("MMU:hw init", 0x300); + MMU_init_hw(); + + /* Map in all of RAM starting at KERNELBASE */ + if (ppc_md.progress) + ppc_md.progress("MMU:mapin", 0x301); + mapin_ram(); + +#ifdef CONFIG_HIGHMEM + ioremap_base = PKMAP_BASE; +#else + ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ +#endif /* CONFIG_HIGHMEM */ + ioremap_bot = ioremap_base; + + /* Map in I/O resources */ + if (ppc_md.progress) + ppc_md.progress("MMU:setio", 0x302); + if (ppc_md.setup_io_mappings) + ppc_md.setup_io_mappings(); + + /* Initialize the context management stuff */ + mmu_context_init(); + + if (ppc_md.progress) + ppc_md.progress("MMU:exit", 0x211); + +#ifdef CONFIG_BOOTX_TEXT + /* By default, we are no longer mapped */ + boot_text_mapped = 0; + /* Must be done last, or ppc_md.progress will die. */ + map_boot_text(); +#endif +} + +/* This is only called until mem_init is done. */ +void __init *early_get_page(void) +{ + void *p; + + if (init_bootmem_done) { + p = alloc_bootmem_pages(PAGE_SIZE); + } else { + p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, + __initial_memory_limit)); + } + return p; +} + +/* Free up now-unused memory */ +static void free_sec(unsigned long start, unsigned long end, const char *name) +{ + unsigned long cnt = 0; + + while (start < end) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + cnt++; + start += PAGE_SIZE; + } + if (cnt) { + printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name); + totalram_pages += cnt; + } +} + +void free_initmem(void) +{ +#define FREESEC(TYPE) \ + free_sec((unsigned long)(&__ ## TYPE ## _begin), \ + (unsigned long)(&__ ## TYPE ## _end), \ + #TYPE); + + printk ("Freeing unused kernel memory:"); + FREESEC(init); + printk("\n"); + ppc_md.progress = NULL; +#undef FREESEC +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c new file mode 100644 index 00000000000..b0fc822ec29 --- /dev/null +++ b/arch/powerpc/mm/init_64.c @@ -0,0 +1,223 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen <engebret@us.ibm.com> + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/idr.h> +#include <linux/nodemask.h> +#include <linux/module.h> + +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/prom.h> +#include <asm/lmb.h> +#include <asm/rtas.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/tlb.h> +#include <asm/eeh.h> +#include <asm/processor.h> +#include <asm/mmzone.h> +#include <asm/cputable.h> +#include <asm/ppcdebug.h> +#include <asm/sections.h> +#include <asm/system.h> +#include <asm/iommu.h> +#include <asm/abs_addr.h> +#include <asm/vdso.h> +#include <asm/imalloc.h> + +#if PGTABLE_RANGE > USER_VSID_RANGE +#warning Limited user VSID range means pagetable space is wasted +#endif + +#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) +#warning TASK_SIZE is smaller than it needs to be. +#endif + +unsigned long klimit = (unsigned long)_end; + +/* max amount of RAM to use */ +unsigned long __max_memory; + +/* info on what we think the IO hole is */ +unsigned long io_hole_start; +unsigned long io_hole_size; + +/* + * Do very early mm setup. + */ +void __init mm_init_ppc64(void) +{ +#ifndef CONFIG_PPC_ISERIES + unsigned long i; +#endif + + ppc64_boot_msg(0x100, "MM Init"); + + /* This is the story of the IO hole... please, keep seated, + * unfortunately, we are out of oxygen masks at the moment. + * So we need some rough way to tell where your big IO hole + * is. On pmac, it's between 2G and 4G, on POWER3, it's around + * that area as well, on POWER4 we don't have one, etc... + * We need that as a "hint" when sizing the TCE table on POWER3 + * So far, the simplest way that seem work well enough for us it + * to just assume that the first discontinuity in our physical + * RAM layout is the IO hole. That may not be correct in the future + * (and isn't on iSeries but then we don't care ;) + */ + +#ifndef CONFIG_PPC_ISERIES + for (i = 1; i < lmb.memory.cnt; i++) { + unsigned long base, prevbase, prevsize; + + prevbase = lmb.memory.region[i-1].base; + prevsize = lmb.memory.region[i-1].size; + base = lmb.memory.region[i].base; + if (base > (prevbase + prevsize)) { + io_hole_start = prevbase + prevsize; + io_hole_size = base - (prevbase + prevsize); + break; + } + } +#endif /* CONFIG_PPC_ISERIES */ + if (io_hole_start) + printk("IO Hole assumed to be %lx -> %lx\n", + io_hole_start, io_hole_start + io_hole_size - 1); + + ppc64_boot_msg(0x100, "MM Init Done"); +} + +void free_initmem(void) +{ + unsigned long addr; + + addr = (unsigned long)__init_begin; + for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { + memset((void *)addr, 0xcc, PAGE_SIZE); + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + totalram_pages++; + } + printk ("Freeing unused kernel memory: %luk freed\n", + ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +static struct kcore_list kcore_vmem; + +static int __init setup_kcore(void) +{ + int i; + + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long base, size; + struct kcore_list *kcore_mem; + + base = lmb.memory.region[i].base; + size = lmb.memory.region[i].size; + + /* GFP_ATOMIC to avoid might_sleep warnings during boot */ + kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC); + if (!kcore_mem) + panic("mem_init: kmalloc failed\n"); + + kclist_add(kcore_mem, __va(base), size); + } + + kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); + + return 0; +} +module_init(setup_kcore); + +static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +{ + memset(addr, 0, kmem_cache_size(cache)); +} + +static const int pgtable_cache_size[2] = { + PTE_TABLE_SIZE, PMD_TABLE_SIZE +}; +static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { + "pgd_pte_cache", "pud_pmd_cache", +}; + +kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; + +void pgtable_cache_init(void) +{ + int i; + + BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); + BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); + BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); + BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); + + for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { + int size = pgtable_cache_size[i]; + const char *name = pgtable_cache_name[i]; + + pgtable_cache[i] = kmem_cache_create(name, + size, size, + SLAB_HWCACHE_ALIGN + | SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (! pgtable_cache[i]) + panic("pgtable_cache_init(): could not create %s!\n", + name); + } +} diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c new file mode 100644 index 00000000000..9b5aa6808eb --- /dev/null +++ b/arch/powerpc/mm/lmb.c @@ -0,0 +1,296 @@ +/* + * Procedures for maintaining information about logical memory blocks. + * + * Peter Bergner, IBM Corp. June 2001. + * Copyright (C) 2001 Peter Bergner. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/bitops.h> +#include <asm/types.h> +#include <asm/page.h> +#include <asm/prom.h> +#include <asm/lmb.h> +#ifdef CONFIG_PPC32 +#include "mmu_decl.h" /* for __max_low_memory */ +#endif + +struct lmb lmb; + +#undef DEBUG + +void lmb_dump_all(void) +{ +#ifdef DEBUG + unsigned long i; + + udbg_printf("lmb_dump_all:\n"); + udbg_printf(" memory.cnt = 0x%lx\n", + lmb.memory.cnt); + udbg_printf(" memory.size = 0x%lx\n", + lmb.memory.size); + for (i=0; i < lmb.memory.cnt ;i++) { + udbg_printf(" memory.region[0x%x].base = 0x%lx\n", + i, lmb.memory.region[i].base); + udbg_printf(" .size = 0x%lx\n", + lmb.memory.region[i].size); + } + + udbg_printf("\n reserved.cnt = 0x%lx\n", + lmb.reserved.cnt); + udbg_printf(" reserved.size = 0x%lx\n", + lmb.reserved.size); + for (i=0; i < lmb.reserved.cnt ;i++) { + udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", + i, lmb.reserved.region[i].base); + udbg_printf(" .size = 0x%lx\n", + lmb.reserved.region[i].size); + } +#endif /* DEBUG */ +} + +static unsigned long __init lmb_addrs_overlap(unsigned long base1, + unsigned long size1, unsigned long base2, unsigned long size2) +{ + return ((base1 < (base2+size2)) && (base2 < (base1+size1))); +} + +static long __init lmb_addrs_adjacent(unsigned long base1, unsigned long size1, + unsigned long base2, unsigned long size2) +{ + if (base2 == base1 + size1) + return 1; + else if (base1 == base2 + size2) + return -1; + + return 0; +} + +static long __init lmb_regions_adjacent(struct lmb_region *rgn, + unsigned long r1, unsigned long r2) +{ + unsigned long base1 = rgn->region[r1].base; + unsigned long size1 = rgn->region[r1].size; + unsigned long base2 = rgn->region[r2].base; + unsigned long size2 = rgn->region[r2].size; + + return lmb_addrs_adjacent(base1, size1, base2, size2); +} + +/* Assumption: base addr of region 1 < base addr of region 2 */ +static void __init lmb_coalesce_regions(struct lmb_region *rgn, + unsigned long r1, unsigned long r2) +{ + unsigned long i; + + rgn->region[r1].size += rgn->region[r2].size; + for (i=r2; i < rgn->cnt-1; i++) { + rgn->region[i].base = rgn->region[i+1].base; + rgn->region[i].size = rgn->region[i+1].size; + } + rgn->cnt--; +} + +/* This routine called with relocation disabled. */ +void __init lmb_init(void) +{ + /* Create a dummy zero size LMB which will get coalesced away later. + * This simplifies the lmb_add() code below... + */ + lmb.memory.region[0].base = 0; + lmb.memory.region[0].size = 0; + lmb.memory.cnt = 1; + + /* Ditto. */ + lmb.reserved.region[0].base = 0; + lmb.reserved.region[0].size = 0; + lmb.reserved.cnt = 1; +} + +/* This routine may be called with relocation disabled. */ +void __init lmb_analyze(void) +{ + int i; + + lmb.memory.size = 0; + + for (i = 0; i < lmb.memory.cnt; i++) + lmb.memory.size += lmb.memory.region[i].size; +} + +/* This routine called with relocation disabled. */ +static long __init lmb_add_region(struct lmb_region *rgn, unsigned long base, + unsigned long size) +{ + unsigned long i, coalesced = 0; + long adjacent; + + /* First try and coalesce this LMB with another. */ + for (i=0; i < rgn->cnt; i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + + adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); + if ( adjacent > 0 ) { + rgn->region[i].base -= size; + rgn->region[i].size += size; + coalesced++; + break; + } + else if ( adjacent < 0 ) { + rgn->region[i].size += size; + coalesced++; + break; + } + } + + if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { + lmb_coalesce_regions(rgn, i, i+1); + coalesced++; + } + + if (coalesced) + return coalesced; + if (rgn->cnt >= MAX_LMB_REGIONS) + return -1; + + /* Couldn't coalesce the LMB, so add it to the sorted table. */ + for (i = rgn->cnt-1; i >= 0; i--) { + if (base < rgn->region[i].base) { + rgn->region[i+1].base = rgn->region[i].base; + rgn->region[i+1].size = rgn->region[i].size; + } else { + rgn->region[i+1].base = base; + rgn->region[i+1].size = size; + break; + } + } + rgn->cnt++; + + return 0; +} + +/* This routine may be called with relocation disabled. */ +long __init lmb_add(unsigned long base, unsigned long size) +{ + struct lmb_region *_rgn = &(lmb.memory); + + /* On pSeries LPAR systems, the first LMB is our RMO region. */ + if (base == 0) + lmb.rmo_size = size; + + return lmb_add_region(_rgn, base, size); + +} + +long __init lmb_reserve(unsigned long base, unsigned long size) +{ + struct lmb_region *_rgn = &(lmb.reserved); + + return lmb_add_region(_rgn, base, size); +} + +long __init lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, + unsigned long size) +{ + unsigned long i; + + for (i=0; i < rgn->cnt; i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { + break; + } + } + + return (i < rgn->cnt) ? i : -1; +} + +unsigned long __init lmb_alloc(unsigned long size, unsigned long align) +{ + return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); +} + +unsigned long __init lmb_alloc_base(unsigned long size, unsigned long align, + unsigned long max_addr) +{ + long i, j; + unsigned long base = 0; + +#ifdef CONFIG_PPC32 + /* On 32-bit, make sure we allocate lowmem */ + if (max_addr == LMB_ALLOC_ANYWHERE) + max_addr = __max_low_memory; +#endif + for (i = lmb.memory.cnt-1; i >= 0; i--) { + unsigned long lmbbase = lmb.memory.region[i].base; + unsigned long lmbsize = lmb.memory.region[i].size; + + if (max_addr == LMB_ALLOC_ANYWHERE) + base = _ALIGN_DOWN(lmbbase + lmbsize - size, align); + else if (lmbbase < max_addr) { + base = min(lmbbase + lmbsize, max_addr); + base = _ALIGN_DOWN(base - size, align); + } else + continue; + + while ((lmbbase <= base) && + ((j = lmb_overlaps_region(&lmb.reserved, base, size)) >= 0) ) + base = _ALIGN_DOWN(lmb.reserved.region[j].base - size, + align); + + if ((base != 0) && (lmbbase <= base)) + break; + } + + if (i < 0) + return 0; + + lmb_add_region(&lmb.reserved, base, size); + + return base; +} + +/* You must call lmb_analyze() before this. */ +unsigned long __init lmb_phys_mem_size(void) +{ + return lmb.memory.size; +} + +unsigned long __init lmb_end_of_DRAM(void) +{ + int idx = lmb.memory.cnt - 1; + + return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); +} + +/* + * Truncate the lmb list to memory_limit if it's set + * You must call lmb_analyze() after this. + */ +void __init lmb_enforce_memory_limit(unsigned long memory_limit) +{ + unsigned long i, limit; + + if (! memory_limit) + return; + + limit = memory_limit; + for (i = 0; i < lmb.memory.cnt; i++) { + if (limit > lmb.memory.region[i].size) { + limit -= lmb.memory.region[i].size; + continue; + } + + lmb.memory.region[i].size = limit; + lmb.memory.cnt = i + 1; + break; + } +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c new file mode 100644 index 00000000000..55b5860ed3c --- /dev/null +++ b/arch/powerpc/mm/mem.c @@ -0,0 +1,485 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/initrd.h> +#include <linux/pagemap.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/btext.h> +#include <asm/tlb.h> +#include <asm/bootinfo.h> +#include <asm/prom.h> +#include <asm/lmb.h> +#include <asm/sections.h> +#ifdef CONFIG_PPC64 +#include <asm/vdso.h> +#endif + +#include "mmu_decl.h" + +#ifndef CPU_FTR_COHERENT_ICACHE +#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ +#define CPU_FTR_NOEXECUTE 0 +#endif + +int init_bootmem_done; +int mem_init_done; + +/* + * This is called by /dev/mem to know if a given address has to + * be mapped non-cacheable or not + */ +int page_is_ram(unsigned long pfn) +{ + unsigned long paddr = (pfn << PAGE_SHIFT); + +#ifndef CONFIG_PPC64 /* XXX for now */ + return paddr < __pa(high_memory); +#else + int i; + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long base; + + base = lmb.memory.region[i].base; + + if ((paddr >= base) && + (paddr < (base + lmb.memory.region[i].size))) { + return 1; + } + } + + return 0; +#endif +} +EXPORT_SYMBOL(page_is_ram); + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, + unsigned long size, pgprot_t vma_prot) +{ + if (ppc_md.phys_mem_access_prot) + return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); + + if (!page_is_ram(addr >> PAGE_SHIFT)) + vma_prot = __pgprot(pgprot_val(vma_prot) + | _PAGE_GUARDED | _PAGE_NO_CACHE); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); + +void show_mem(void) +{ + unsigned long total = 0, reserved = 0; + unsigned long shared = 0, cached = 0; + unsigned long highmem = 0; + struct page *page; + pg_data_t *pgdat; + unsigned long i; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + for_each_pgdat(pgdat) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { + page = pgdat_page_nr(pgdat, i); + total++; + if (PageHighMem(page)) + highmem++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (page_count(page)) + shared += page_count(page) - 1; + } + } + printk("%ld pages of RAM\n", total); +#ifdef CONFIG_HIGHMEM + printk("%ld pages of HIGHMEM\n", highmem); +#endif + printk("%ld reserved pages\n", reserved); + printk("%ld pages shared\n", shared); + printk("%ld pages swap cached\n", cached); +} + +/* + * Initialize the bootmem system and give it all the memory we + * have available. If we are using highmem, we only put the + * lowmem into the bootmem system. + */ +#ifndef CONFIG_NEED_MULTIPLE_NODES +void __init do_init_bootmem(void) +{ + unsigned long i; + unsigned long start, bootmap_pages; + unsigned long total_pages; + int boot_mapsize; + + max_pfn = total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT; +#ifdef CONFIG_HIGHMEM + total_pages = total_lowmem >> PAGE_SHIFT; +#endif + + /* + * Find an area to use for the bootmem bitmap. Calculate the size of + * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. + * Add 1 additional page in case the address isn't page-aligned. + */ + bootmap_pages = bootmem_bootmap_pages(total_pages); + + start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); + BUG_ON(!start); + + boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); + + /* Add all physical memory to the bootmem map, mark each area + * present. + */ + for (i = 0; i < lmb.memory.cnt; i++) { + unsigned long base = lmb.memory.region[i].base; + unsigned long size = lmb_size_bytes(&lmb.memory, i); +#ifdef CONFIG_HIGHMEM + if (base >= total_lowmem) + continue; + if (base + size > total_lowmem) + size = total_lowmem - base; +#endif + free_bootmem(base, size); + } + + /* reserve the sections we're already using */ + for (i = 0; i < lmb.reserved.cnt; i++) + reserve_bootmem(lmb.reserved.region[i].base, + lmb_size_bytes(&lmb.reserved, i)); + + /* XXX need to clip this if using highmem? */ + for (i = 0; i < lmb.memory.cnt; i++) + memory_present(0, lmb_start_pfn(&lmb.memory, i), + lmb_end_pfn(&lmb.memory, i)); + init_bootmem_done = 1; +} + +/* + * paging_init() sets up the page tables - in fact we've already done this. + */ +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long zholes_size[MAX_NR_ZONES]; + unsigned long total_ram = lmb_phys_mem_size(); + unsigned long top_of_ram = lmb_end_of_DRAM(); + +#ifdef CONFIG_HIGHMEM + map_page(PKMAP_BASE, 0, 0); /* XXX gross */ + pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k + (PKMAP_BASE), PKMAP_BASE), PKMAP_BASE); + map_page(KMAP_FIX_BEGIN, 0, 0); /* XXX gross */ + kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k + (KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN); + kmap_prot = PAGE_KERNEL; +#endif /* CONFIG_HIGHMEM */ + + printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", + top_of_ram, total_ram); + printk(KERN_INFO "Memory hole size: %ldMB\n", + (top_of_ram - total_ram) >> 20); + /* + * All pages are DMA-able so we put them all in the DMA zone. + */ + memset(zones_size, 0, sizeof(zones_size)); + memset(zholes_size, 0, sizeof(zholes_size)); + + zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; + zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; + +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT; + zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT; + zholes_size[ZONE_HIGHMEM] = (top_of_ram - total_ram) >> PAGE_SHIFT; +#else + zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; + zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; +#endif /* CONFIG_HIGHMEM */ + + free_area_init_node(0, NODE_DATA(0), zones_size, + __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); +} +#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ + +void __init mem_init(void) +{ +#ifdef CONFIG_NEED_MULTIPLE_NODES + int nid; +#endif + pg_data_t *pgdat; + unsigned long i; + struct page *page; + unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; + + num_physpages = max_pfn; /* RAM is assumed contiguous */ + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + +#ifdef CONFIG_NEED_MULTIPLE_NODES + for_each_online_node(nid) { + if (NODE_DATA(nid)->node_spanned_pages != 0) { + printk("freeing bootmem node %x\n", nid); + totalram_pages += + free_all_bootmem_node(NODE_DATA(nid)); + } + } +#else + max_mapnr = num_physpages; + totalram_pages += free_all_bootmem(); +#endif + for_each_pgdat(pgdat) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { + page = pgdat_page_nr(pgdat, i); + if (PageReserved(page)) + reservedpages++; + } + } + + codesize = (unsigned long)&_sdata - (unsigned long)&_stext; + datasize = (unsigned long)&__init_begin - (unsigned long)&_sdata; + initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; + bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; + +#ifdef CONFIG_HIGHMEM + { + unsigned long pfn, highmem_mapnr; + + highmem_mapnr = total_lowmem >> PAGE_SHIFT; + for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { + struct page *page = pfn_to_page(pfn); + + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + totalhigh_pages++; + } + totalram_pages += totalhigh_pages; + printk(KERN_INFO "High memory: %luk\n", + totalhigh_pages << (PAGE_SHIFT-10)); + } +#endif /* CONFIG_HIGHMEM */ + + printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " + "%luk reserved, %luk data, %luk bss, %luk init)\n", + (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), + num_physpages << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + bsssize >> 10, + initsize >> 10); + + mem_init_done = 1; + +#ifdef CONFIG_PPC64 + /* Initialize the vDSO */ + vdso_init(); +#endif +} + +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &page->flags)) + clear_bit(PG_arch_1, &page->flags); +} +EXPORT_SYMBOL(flush_dcache_page); + +void flush_dcache_icache_page(struct page *page) +{ +#ifdef CONFIG_BOOKE + void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE); + __flush_dcache_icache(start); + kunmap_atomic(start, KM_PPC_SYNC_ICACHE); +#elif defined(CONFIG_8xx) || defined(CONFIG_PPC64) + /* On 8xx there is no need to kmap since highmem is not supported */ + __flush_dcache_icache(page_address(page)); +#else + __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); +#endif + +} +void clear_user_page(void *page, unsigned long vaddr, struct page *pg) +{ + clear_page(page); + + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* + * We shouldnt have to do this, but some versions of glibc + * require it (ld.so assumes zero filled pages are icache clean) + * - Anton + */ + + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &pg->flags)) + clear_bit(PG_arch_1, &pg->flags); +} +EXPORT_SYMBOL(clear_user_page); + +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg) +{ + copy_page(vto, vfrom); + + /* + * We should be able to use the following optimisation, however + * there are two problems. + * Firstly a bug in some versions of binutils meant PLT sections + * were not marked executable. + * Secondly the first word in the GOT section is blrl, used + * to establish the GOT address. Until recently the GOT was + * not marked executable. + * - Anton + */ +#if 0 + if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) + return; +#endif + + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &pg->flags)) + clear_bit(PG_arch_1, &pg->flags); +} + +void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + unsigned long maddr; + + maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); + flush_icache_range(maddr, maddr + len); + kunmap(page); +} +EXPORT_SYMBOL(flush_icache_user_range); + +/* + * This is called at the end of handling a user page fault, when the + * fault has been handled by updating a PTE in the linux page tables. + * We use it to preload an HPTE into the hash table corresponding to + * the updated linux PTE. + * + * This must always be called with the mm->page_table_lock held + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, + pte_t pte) +{ + /* handle i-cache coherency */ + unsigned long pfn = pte_pfn(pte); +#ifdef CONFIG_PPC32 + pmd_t *pmd; +#else + unsigned long vsid; + void *pgdir; + pte_t *ptep; + int local = 0; + cpumask_t tmp; + unsigned long flags; +#endif + + /* handle i-cache coherency */ + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && + !cpu_has_feature(CPU_FTR_NOEXECUTE) && + pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + if (!PageReserved(page) + && !test_bit(PG_arch_1, &page->flags)) { + if (vma->vm_mm == current->active_mm) { +#ifdef CONFIG_8xx + /* On 8xx, cache control instructions (particularly + * "dcbst" from flush_dcache_icache) fault as write + * operation if there is an unpopulated TLB entry + * for the address in question. To workaround that, + * we invalidate the TLB here, thus avoiding dcbst + * misbehaviour. + */ + _tlbie(address); +#endif + __flush_dcache_icache((void *) address); + } else + flush_dcache_icache_page(page); + set_bit(PG_arch_1, &page->flags); + } + } + +#ifdef CONFIG_PPC_STD_MMU + /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ + if (!pte_young(pte) || address >= TASK_SIZE) + return; +#ifdef CONFIG_PPC32 + if (Hash == 0) + return; + pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address); + if (!pmd_none(*pmd)) + add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd)); +#else + pgdir = vma->vm_mm->pgd; + if (pgdir == NULL) + return; + + ptep = find_linux_pte(pgdir, address); + if (!ptep) + return; + + vsid = get_vsid(vma->vm_mm->context.id, address); + + local_irq_save(flags); + tmp = cpumask_of_cpu(smp_processor_id()); + if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) + local = 1; + + __hash_page(address, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, + 0x300, local); + local_irq_restore(flags); +#endif +#endif +} diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c new file mode 100644 index 00000000000..fe65f522aff --- /dev/null +++ b/arch/powerpc/mm/mmap.c @@ -0,0 +1,86 @@ +/* + * linux/arch/ppc64/mm/mmap.c + * + * flexible mmap layout support + * + * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Started by Ingo Molnar <mingo@elte.hu> + */ + +#include <linux/personality.h> +#include <linux/mm.h> + +/* + * Top of mmap area (just below the process stack). + * + * Leave an at least ~128 MB hole. + */ +#define MIN_GAP (128*1024*1024) +#define MAX_GAP (TASK_SIZE/6*5) + +static inline unsigned long mmap_base(void) +{ + unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return TASK_SIZE - (gap & PAGE_MASK); +} + +static inline int mmap_is_legacy(void) +{ + /* + * Force standard allocation for 64 bit programs. + */ + if (!test_thread_flag(TIF_32BIT)) + return 1; + + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} diff --git a/arch/powerpc/mm/mmu_context_32.c b/arch/powerpc/mm/mmu_context_32.c new file mode 100644 index 00000000000..a8816e0f6a8 --- /dev/null +++ b/arch/powerpc/mm/mmu_context_32.c @@ -0,0 +1,86 @@ +/* + * This file contains the routines for handling the MMU on those + * PowerPC implementations where the MMU substantially follows the + * architecture specification. This includes the 6xx, 7xx, 7xxx, + * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/init.h> + +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> + +mm_context_t next_mmu_context; +unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; +#ifdef FEW_CONTEXTS +atomic_t nr_free_contexts; +struct mm_struct *context_mm[LAST_CONTEXT+1]; +void steal_context(void); +#endif /* FEW_CONTEXTS */ + +/* + * Initialize the context management stuff. + */ +void __init +mmu_context_init(void) +{ + /* + * Some processors have too few contexts to reserve one for + * init_mm, and require using context 0 for a normal task. + * Other processors reserve the use of context zero for the kernel. + * This code assumes FIRST_CONTEXT < 32. + */ + context_map[0] = (1 << FIRST_CONTEXT) - 1; + next_mmu_context = FIRST_CONTEXT; +#ifdef FEW_CONTEXTS + atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); +#endif /* FEW_CONTEXTS */ +} + +#ifdef FEW_CONTEXTS +/* + * Steal a context from a task that has one at the moment. + * This is only used on 8xx and 4xx and we presently assume that + * they don't do SMP. If they do then this will have to check + * whether the MM we steal is in use. + * We also assume that this is only used on systems that don't + * use an MMU hash table - this is true for 8xx and 4xx. + * This isn't an LRU system, it just frees up each context in + * turn (sort-of pseudo-random replacement :). This would be the + * place to implement an LRU scheme if anyone was motivated to do it. + * -- paulus + */ +void +steal_context(void) +{ + struct mm_struct *mm; + + /* free up context `next_mmu_context' */ + /* if we shouldn't free context 0, don't... */ + if (next_mmu_context < FIRST_CONTEXT) + next_mmu_context = FIRST_CONTEXT; + mm = context_mm[next_mmu_context]; + flush_tlb_mm(mm); + destroy_context(mm); +} +#endif /* FEW_CONTEXTS */ diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c new file mode 100644 index 00000000000..714a84dd8d5 --- /dev/null +++ b/arch/powerpc/mm/mmu_context_64.c @@ -0,0 +1,63 @@ +/* + * MMU context allocation for 64-bit kernels. + * + * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/idr.h> + +#include <asm/mmu_context.h> + +static DEFINE_SPINLOCK(mmu_context_lock); +static DEFINE_IDR(mmu_context_idr); + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + int index; + int err; + +again: + if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&mmu_context_lock); + err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); + spin_unlock(&mmu_context_lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > MAX_CONTEXT) { + idr_remove(&mmu_context_idr, index); + return -ENOMEM; + } + + mm->context.id = index; + + return 0; +} + +void destroy_context(struct mm_struct *mm) +{ + spin_lock(&mmu_context_lock); + idr_remove(&mmu_context_idr, mm->context.id); + spin_unlock(&mmu_context_lock); + + mm->context.id = NO_CONTEXT; +} diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h new file mode 100644 index 00000000000..a4d7a327c0e --- /dev/null +++ b/arch/powerpc/mm/mmu_decl.h @@ -0,0 +1,87 @@ +/* + * Declarations of procedures and variables shared between files + * in arch/ppc/mm/. + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include <asm/tlbflush.h> +#include <asm/mmu.h> + +#ifdef CONFIG_PPC32 +extern void mapin_ram(void); +extern int map_page(unsigned long va, phys_addr_t pa, int flags); +extern void setbat(int index, unsigned long virt, unsigned long phys, + unsigned int size, int flags); +extern void settlbcam(int index, unsigned long virt, phys_addr_t phys, + unsigned int size, int flags, unsigned int pid); +extern void invalidate_tlbcam_entry(int index); + +extern int __map_without_bats; +extern unsigned long ioremap_base; +extern unsigned long ioremap_bot; +extern unsigned int rtas_data, rtas_size; + +extern PTE *Hash, *Hash_end; +extern unsigned long Hash_size, Hash_mask; + +extern unsigned int num_tlbcam_entries; +#endif + +extern unsigned long __max_low_memory; +extern unsigned long __initial_memory_limit; +extern unsigned long total_memory; +extern unsigned long total_lowmem; + +/* ...and now those things that may be slightly different between processor + * architectures. -- Dan + */ +#if defined(CONFIG_8xx) +#define flush_HPTE(X, va, pg) _tlbie(va) +#define MMU_init_hw() do { } while(0) +#define mmu_mapin_ram() (0UL) + +#elif defined(CONFIG_4xx) +#define flush_HPTE(X, va, pg) _tlbie(va) +extern void MMU_init_hw(void); +extern unsigned long mmu_mapin_ram(void); + +#elif defined(CONFIG_FSL_BOOKE) +#define flush_HPTE(X, va, pg) _tlbie(va) +extern void MMU_init_hw(void); +extern unsigned long mmu_mapin_ram(void); +extern void adjust_total_lowmem(void); + +#elif defined(CONFIG_PPC32) +/* anything 32-bit except 4xx or 8xx */ +extern void MMU_init_hw(void); +extern unsigned long mmu_mapin_ram(void); + +/* Be careful....this needs to be updated if we ever encounter 603 SMPs, + * which includes all new 82xx processors. We need tlbie/tlbsync here + * in that case (I think). -- Dan. + */ +static inline void flush_HPTE(unsigned context, unsigned long va, + unsigned long pdval) +{ + if ((Hash != 0) && + cpu_has_feature(CPU_FTR_HPTE_TABLE)) + flush_hash_pages(0, va, pdval, 1); + else + _tlbie(va); +} +#endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c new file mode 100644 index 00000000000..cb864b8f275 --- /dev/null +++ b/arch/powerpc/mm/numa.c @@ -0,0 +1,779 @@ +/* + * pSeries NUMA support + * + * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/threads.h> +#include <linux/bootmem.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/nodemask.h> +#include <linux/cpu.h> +#include <linux/notifier.h> +#include <asm/lmb.h> +#include <asm/machdep.h> +#include <asm/abs_addr.h> + +static int numa_enabled = 1; + +static int numa_debug; +#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } + +#ifdef DEBUG_NUMA +#define ARRAY_INITIALISER -1 +#else +#define ARRAY_INITIALISER 0 +#endif + +int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] = + ARRAY_INITIALISER}; +char *numa_memory_lookup_table; +cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; +int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0}; + +struct pglist_data *node_data[MAX_NUMNODES]; +bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; +static int min_common_depth; + +/* + * We need somewhere to store start/span for each node until we have + * allocated the real node_data structures. + */ +static struct { + unsigned long node_start_pfn; + unsigned long node_end_pfn; + unsigned long node_present_pages; +} init_node_data[MAX_NUMNODES] __initdata; + +EXPORT_SYMBOL(node_data); +EXPORT_SYMBOL(numa_cpu_lookup_table); +EXPORT_SYMBOL(numa_memory_lookup_table); +EXPORT_SYMBOL(numa_cpumask_lookup_table); +EXPORT_SYMBOL(nr_cpus_in_node); + +static inline void map_cpu_to_node(int cpu, int node) +{ + numa_cpu_lookup_table[cpu] = node; + if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) { + cpu_set(cpu, numa_cpumask_lookup_table[node]); + nr_cpus_in_node[node]++; + } +} + +#ifdef CONFIG_HOTPLUG_CPU +static void unmap_cpu_from_node(unsigned long cpu) +{ + int node = numa_cpu_lookup_table[cpu]; + + dbg("removing cpu %lu from node %d\n", cpu, node); + + if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { + cpu_clear(cpu, numa_cpumask_lookup_table[node]); + nr_cpus_in_node[node]--; + } else { + printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", + cpu, node); + } +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static struct device_node * __devinit find_cpu_node(unsigned int cpu) +{ + unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); + struct device_node *cpu_node = NULL; + unsigned int *interrupt_server, *reg; + int len; + + while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { + /* Try interrupt server first */ + interrupt_server = (unsigned int *)get_property(cpu_node, + "ibm,ppc-interrupt-server#s", &len); + + len = len / sizeof(u32); + + if (interrupt_server && (len > 0)) { + while (len--) { + if (interrupt_server[len] == hw_cpuid) + return cpu_node; + } + } else { + reg = (unsigned int *)get_property(cpu_node, + "reg", &len); + if (reg && (len > 0) && (reg[0] == hw_cpuid)) + return cpu_node; + } + } + + return NULL; +} + +/* must hold reference to node during call */ +static int *of_get_associativity(struct device_node *dev) +{ + return (unsigned int *)get_property(dev, "ibm,associativity", NULL); +} + +static int of_node_numa_domain(struct device_node *device) +{ + int numa_domain; + unsigned int *tmp; + + if (min_common_depth == -1) + return 0; + + tmp = of_get_associativity(device); + if (tmp && (tmp[0] >= min_common_depth)) { + numa_domain = tmp[min_common_depth]; + } else { + dbg("WARNING: no NUMA information for %s\n", + device->full_name); + numa_domain = 0; + } + return numa_domain; +} + +/* + * In theory, the "ibm,associativity" property may contain multiple + * associativity lists because a resource may be multiply connected + * into the machine. This resource then has different associativity + * characteristics relative to its multiple connections. We ignore + * this for now. We also assume that all cpu and memory sets have + * their distances represented at a common level. This won't be + * true for heirarchical NUMA. + * + * In any case the ibm,associativity-reference-points should give + * the correct depth for a normal NUMA system. + * + * - Dave Hansen <haveblue@us.ibm.com> + */ +static int __init find_min_common_depth(void) +{ + int depth; + unsigned int *ref_points; + struct device_node *rtas_root; + unsigned int len; + + rtas_root = of_find_node_by_path("/rtas"); + + if (!rtas_root) + return -1; + + /* + * this property is 2 32-bit integers, each representing a level of + * depth in the associativity nodes. The first is for an SMP + * configuration (should be all 0's) and the second is for a normal + * NUMA configuration. + */ + ref_points = (unsigned int *)get_property(rtas_root, + "ibm,associativity-reference-points", &len); + + if ((len >= 1) && ref_points) { + depth = ref_points[1]; + } else { + dbg("WARNING: could not find NUMA " + "associativity reference point\n"); + depth = -1; + } + of_node_put(rtas_root); + + return depth; +} + +static int __init get_mem_addr_cells(void) +{ + struct device_node *memory = NULL; + int rc; + + memory = of_find_node_by_type(memory, "memory"); + if (!memory) + return 0; /* it won't matter */ + + rc = prom_n_addr_cells(memory); + return rc; +} + +static int __init get_mem_size_cells(void) +{ + struct device_node *memory = NULL; + int rc; + + memory = of_find_node_by_type(memory, "memory"); + if (!memory) + return 0; /* it won't matter */ + rc = prom_n_size_cells(memory); + return rc; +} + +static unsigned long read_n_cells(int n, unsigned int **buf) +{ + unsigned long result = 0; + + while (n--) { + result = (result << 32) | **buf; + (*buf)++; + } + return result; +} + +/* + * Figure out to which domain a cpu belongs and stick it there. + * Return the id of the domain used. + */ +static int numa_setup_cpu(unsigned long lcpu) +{ + int numa_domain = 0; + struct device_node *cpu = find_cpu_node(lcpu); + + if (!cpu) { + WARN_ON(1); + goto out; + } + + numa_domain = of_node_numa_domain(cpu); + + if (numa_domain >= num_online_nodes()) { + /* + * POWER4 LPAR uses 0xffff as invalid node, + * dont warn in this case. + */ + if (numa_domain != 0xffff) + printk(KERN_ERR "WARNING: cpu %ld " + "maps to invalid NUMA node %d\n", + lcpu, numa_domain); + numa_domain = 0; + } +out: + node_set_online(numa_domain); + + map_cpu_to_node(lcpu, numa_domain); + + of_node_put(cpu); + + return numa_domain; +} + +static int cpu_numa_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned long lcpu = (unsigned long)hcpu; + int ret = NOTIFY_DONE; + + switch (action) { + case CPU_UP_PREPARE: + if (min_common_depth == -1 || !numa_enabled) + map_cpu_to_node(lcpu, 0); + else + numa_setup_cpu(lcpu); + ret = NOTIFY_OK; + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + case CPU_UP_CANCELED: + unmap_cpu_from_node(lcpu); + break; + ret = NOTIFY_OK; +#endif + } + return ret; +} + +/* + * Check and possibly modify a memory region to enforce the memory limit. + * + * Returns the size the region should have to enforce the memory limit. + * This will either be the original value of size, a truncated value, + * or zero. If the returned value of size is 0 the region should be + * discarded as it lies wholy above the memory limit. + */ +static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size) +{ + /* + * We use lmb_end_of_DRAM() in here instead of memory_limit because + * we've already adjusted it for the limit and it takes care of + * having memory holes below the limit. + */ + extern unsigned long memory_limit; + + if (! memory_limit) + return size; + + if (start + size <= lmb_end_of_DRAM()) + return size; + + if (start >= lmb_end_of_DRAM()) + return 0; + + return lmb_end_of_DRAM() - start; +} + +static int __init parse_numa_properties(void) +{ + struct device_node *cpu = NULL; + struct device_node *memory = NULL; + int addr_cells, size_cells; + int max_domain = 0; + long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT; + unsigned long i; + + if (numa_enabled == 0) { + printk(KERN_WARNING "NUMA disabled by user\n"); + return -1; + } + + numa_memory_lookup_table = + (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1)); + memset(numa_memory_lookup_table, 0, entries * sizeof(char)); + + for (i = 0; i < entries ; i++) + numa_memory_lookup_table[i] = ARRAY_INITIALISER; + + min_common_depth = find_min_common_depth(); + + dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); + if (min_common_depth < 0) + return min_common_depth; + + max_domain = numa_setup_cpu(boot_cpuid); + + /* + * Even though we connect cpus to numa domains later in SMP init, + * we need to know the maximum node id now. This is because each + * node id must have NODE_DATA etc backing it. + * As a result of hotplug we could still have cpus appear later on + * with larger node ids. In that case we force the cpu into node 0. + */ + for_each_cpu(i) { + int numa_domain; + + cpu = find_cpu_node(i); + + if (cpu) { + numa_domain = of_node_numa_domain(cpu); + of_node_put(cpu); + + if (numa_domain < MAX_NUMNODES && + max_domain < numa_domain) + max_domain = numa_domain; + } + } + + addr_cells = get_mem_addr_cells(); + size_cells = get_mem_size_cells(); + memory = NULL; + while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + unsigned long start; + unsigned long size; + int numa_domain; + int ranges; + unsigned int *memcell_buf; + unsigned int len; + + memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + if (!memcell_buf || len <= 0) + continue; + + ranges = memory->n_addrs; +new_range: + /* these are order-sensitive, and modify the buffer pointer */ + start = read_n_cells(addr_cells, &memcell_buf); + size = read_n_cells(size_cells, &memcell_buf); + + start = _ALIGN_DOWN(start, MEMORY_INCREMENT); + size = _ALIGN_UP(size, MEMORY_INCREMENT); + + numa_domain = of_node_numa_domain(memory); + + if (numa_domain >= MAX_NUMNODES) { + if (numa_domain != 0xffff) + printk(KERN_ERR "WARNING: memory at %lx maps " + "to invalid NUMA node %d\n", start, + numa_domain); + numa_domain = 0; + } + + if (max_domain < numa_domain) + max_domain = numa_domain; + + if (! (size = numa_enforce_memory_limit(start, size))) { + if (--ranges) + goto new_range; + else + continue; + } + + /* + * Initialize new node struct, or add to an existing one. + */ + if (init_node_data[numa_domain].node_end_pfn) { + if ((start / PAGE_SIZE) < + init_node_data[numa_domain].node_start_pfn) + init_node_data[numa_domain].node_start_pfn = + start / PAGE_SIZE; + if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) > + init_node_data[numa_domain].node_end_pfn) + init_node_data[numa_domain].node_end_pfn = + (start / PAGE_SIZE) + + (size / PAGE_SIZE); + + init_node_data[numa_domain].node_present_pages += + size / PAGE_SIZE; + } else { + node_set_online(numa_domain); + + init_node_data[numa_domain].node_start_pfn = + start / PAGE_SIZE; + init_node_data[numa_domain].node_end_pfn = + init_node_data[numa_domain].node_start_pfn + + size / PAGE_SIZE; + init_node_data[numa_domain].node_present_pages = + size / PAGE_SIZE; + } + + for (i = start ; i < (start+size); i += MEMORY_INCREMENT) + numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = + numa_domain; + + if (--ranges) + goto new_range; + } + + for (i = 0; i <= max_domain; i++) + node_set_online(i); + + return 0; +} + +static void __init setup_nonnuma(void) +{ + unsigned long top_of_ram = lmb_end_of_DRAM(); + unsigned long total_ram = lmb_phys_mem_size(); + unsigned long i; + + printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", + top_of_ram, total_ram); + printk(KERN_INFO "Memory hole size: %ldMB\n", + (top_of_ram - total_ram) >> 20); + + if (!numa_memory_lookup_table) { + long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT; + numa_memory_lookup_table = + (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1)); + memset(numa_memory_lookup_table, 0, entries * sizeof(char)); + for (i = 0; i < entries ; i++) + numa_memory_lookup_table[i] = ARRAY_INITIALISER; + } + + map_cpu_to_node(boot_cpuid, 0); + + node_set_online(0); + + init_node_data[0].node_start_pfn = 0; + init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE; + init_node_data[0].node_present_pages = total_ram / PAGE_SIZE; + + for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) + numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; +} + +static void __init dump_numa_topology(void) +{ + unsigned int node; + unsigned int count; + + if (min_common_depth == -1 || !numa_enabled) + return; + + for_each_online_node(node) { + unsigned long i; + + printk(KERN_INFO "Node %d Memory:", node); + + count = 0; + + for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) { + if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) { + if (count == 0) + printk(" 0x%lx", i); + ++count; + } else { + if (count > 0) + printk("-0x%lx", i); + count = 0; + } + } + + if (count > 0) + printk("-0x%lx", i); + printk("\n"); + } + return; +} + +/* + * Allocate some memory, satisfying the lmb or bootmem allocator where + * required. nid is the preferred node and end is the physical address of + * the highest address in the node. + * + * Returns the physical address of the memory. + */ +static unsigned long careful_allocation(int nid, unsigned long size, + unsigned long align, unsigned long end) +{ + unsigned long ret = lmb_alloc_base(size, align, end); + + /* retry over all memory */ + if (!ret) + ret = lmb_alloc_base(size, align, lmb_end_of_DRAM()); + + if (!ret) + panic("numa.c: cannot allocate %lu bytes on node %d", + size, nid); + + /* + * If the memory came from a previously allocated node, we must + * retry with the bootmem allocator. + */ + if (pa_to_nid(ret) < nid) { + nid = pa_to_nid(ret); + ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid), + size, align, 0); + + if (!ret) + panic("numa.c: cannot allocate %lu bytes on node %d", + size, nid); + + ret = virt_to_abs(ret); + + dbg("alloc_bootmem %lx %lx\n", ret, size); + } + + return ret; +} + +void __init do_init_bootmem(void) +{ + int nid; + int addr_cells, size_cells; + struct device_node *memory = NULL; + static struct notifier_block ppc64_numa_nb = { + .notifier_call = cpu_numa_callback, + .priority = 1 /* Must run before sched domains notifier. */ + }; + + min_low_pfn = 0; + max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_pfn = max_low_pfn; + + if (parse_numa_properties()) + setup_nonnuma(); + else + dump_numa_topology(); + + register_cpu_notifier(&ppc64_numa_nb); + + for_each_online_node(nid) { + unsigned long start_paddr, end_paddr; + int i; + unsigned long bootmem_paddr; + unsigned long bootmap_pages; + + start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE; + end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE; + + /* Allocate the node structure node local if possible */ + NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid, + sizeof(struct pglist_data), + SMP_CACHE_BYTES, end_paddr); + NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid)); + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + + dbg("node %d\n", nid); + dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); + + NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->node_start_pfn = + init_node_data[nid].node_start_pfn; + NODE_DATA(nid)->node_spanned_pages = + end_paddr - start_paddr; + + if (NODE_DATA(nid)->node_spanned_pages == 0) + continue; + + dbg("start_paddr = %lx\n", start_paddr); + dbg("end_paddr = %lx\n", end_paddr); + + bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT); + + bootmem_paddr = careful_allocation(nid, + bootmap_pages << PAGE_SHIFT, + PAGE_SIZE, end_paddr); + memset(abs_to_virt(bootmem_paddr), 0, + bootmap_pages << PAGE_SHIFT); + dbg("bootmap_paddr = %lx\n", bootmem_paddr); + + init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, + start_paddr >> PAGE_SHIFT, + end_paddr >> PAGE_SHIFT); + + /* + * We need to do another scan of all memory sections to + * associate memory with the correct node. + */ + addr_cells = get_mem_addr_cells(); + size_cells = get_mem_size_cells(); + memory = NULL; + while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + unsigned long mem_start, mem_size; + int numa_domain, ranges; + unsigned int *memcell_buf; + unsigned int len; + + memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + if (!memcell_buf || len <= 0) + continue; + + ranges = memory->n_addrs; /* ranges in cell */ +new_range: + mem_start = read_n_cells(addr_cells, &memcell_buf); + mem_size = read_n_cells(size_cells, &memcell_buf); + if (numa_enabled) { + numa_domain = of_node_numa_domain(memory); + if (numa_domain >= MAX_NUMNODES) + numa_domain = 0; + } else + numa_domain = 0; + + if (numa_domain != nid) + continue; + + mem_size = numa_enforce_memory_limit(mem_start, mem_size); + if (mem_size) { + dbg("free_bootmem %lx %lx\n", mem_start, mem_size); + free_bootmem_node(NODE_DATA(nid), mem_start, mem_size); + } + + if (--ranges) /* process all ranges in cell */ + goto new_range; + } + + /* + * Mark reserved regions on this node + */ + for (i = 0; i < lmb.reserved.cnt; i++) { + unsigned long physbase = lmb.reserved.region[i].base; + unsigned long size = lmb.reserved.region[i].size; + + if (pa_to_nid(physbase) != nid && + pa_to_nid(physbase+size-1) != nid) + continue; + + if (physbase < end_paddr && + (physbase+size) > start_paddr) { + /* overlaps */ + if (physbase < start_paddr) { + size -= start_paddr - physbase; + physbase = start_paddr; + } + + if (size > end_paddr - physbase) + size = end_paddr - physbase; + + dbg("reserve_bootmem %lx %lx\n", physbase, + size); + reserve_bootmem_node(NODE_DATA(nid), physbase, + size); + } + } + /* + * This loop may look famaliar, but we have to do it again + * after marking our reserved memory to mark memory present + * for sparsemem. + */ + addr_cells = get_mem_addr_cells(); + size_cells = get_mem_size_cells(); + memory = NULL; + while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + unsigned long mem_start, mem_size; + int numa_domain, ranges; + unsigned int *memcell_buf; + unsigned int len; + + memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + if (!memcell_buf || len <= 0) + continue; + + ranges = memory->n_addrs; /* ranges in cell */ +new_range2: + mem_start = read_n_cells(addr_cells, &memcell_buf); + mem_size = read_n_cells(size_cells, &memcell_buf); + if (numa_enabled) { + numa_domain = of_node_numa_domain(memory); + if (numa_domain >= MAX_NUMNODES) + numa_domain = 0; + } else + numa_domain = 0; + + if (numa_domain != nid) + continue; + + mem_size = numa_enforce_memory_limit(mem_start, mem_size); + memory_present(numa_domain, mem_start >> PAGE_SHIFT, + (mem_start + mem_size) >> PAGE_SHIFT); + + if (--ranges) /* process all ranges in cell */ + goto new_range2; + } + + } +} + +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long zholes_size[MAX_NR_ZONES]; + int nid; + + memset(zones_size, 0, sizeof(zones_size)); + memset(zholes_size, 0, sizeof(zholes_size)); + + for_each_online_node(nid) { + unsigned long start_pfn; + unsigned long end_pfn; + + start_pfn = init_node_data[nid].node_start_pfn; + end_pfn = init_node_data[nid].node_end_pfn; + + zones_size[ZONE_DMA] = end_pfn - start_pfn; + zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - + init_node_data[nid].node_present_pages; + + dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, + zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); + + free_area_init_node(nid, NODE_DATA(nid), zones_size, + start_pfn, zholes_size); + } +} + +static int __init early_numa(char *p) +{ + if (!p) + return 0; + + if (strstr(p, "off")) + numa_enabled = 0; + + if (strstr(p, "debug")) + numa_debug = 1; + + return 0; +} +early_param("numa", early_numa); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c new file mode 100644 index 00000000000..5792e533916 --- /dev/null +++ b/arch/powerpc/mm/pgtable_32.c @@ -0,0 +1,469 @@ +/* + * This file contains the routines setting up the linux page tables. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/highmem.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/io.h> + +#include "mmu_decl.h" + +unsigned long ioremap_base; +unsigned long ioremap_bot; +int io_bat_index; + +#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) +#define HAVE_BATS 1 +#endif + +#if defined(CONFIG_FSL_BOOKE) +#define HAVE_TLBCAM 1 +#endif + +extern char etext[], _stext[]; + +#ifdef CONFIG_SMP +extern void hash_page_sync(void); +#endif + +#ifdef HAVE_BATS +extern unsigned long v_mapped_by_bats(unsigned long va); +extern unsigned long p_mapped_by_bats(unsigned long pa); +void setbat(int index, unsigned long virt, unsigned long phys, + unsigned int size, int flags); + +#else /* !HAVE_BATS */ +#define v_mapped_by_bats(x) (0UL) +#define p_mapped_by_bats(x) (0UL) +#endif /* HAVE_BATS */ + +#ifdef HAVE_TLBCAM +extern unsigned int tlbcam_index; +extern unsigned long v_mapped_by_tlbcam(unsigned long va); +extern unsigned long p_mapped_by_tlbcam(unsigned long pa); +#else /* !HAVE_TLBCAM */ +#define v_mapped_by_tlbcam(x) (0UL) +#define p_mapped_by_tlbcam(x) (0UL) +#endif /* HAVE_TLBCAM */ + +#ifdef CONFIG_PTE_64BIT +/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ +#define PGDIR_ORDER 1 +#else +#define PGDIR_ORDER 0 +#endif + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *ret; + + ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); + return ret; +} + +void pgd_free(pgd_t *pgd) +{ + free_pages((unsigned long)pgd, PGDIR_ORDER); +} + +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte; + extern int mem_init_done; + extern void *early_get_page(void); + + if (mem_init_done) { + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + } else { + pte = (pte_t *)early_get_page(); + if (pte) + clear_page(pte); + } + return pte; +} + +struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *ptepage; + +#ifdef CONFIG_HIGHPTE + int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; +#else + int flags = GFP_KERNEL | __GFP_REPEAT; +#endif + + ptepage = alloc_pages(flags, 0); + if (ptepage) + clear_highpage(ptepage); + return ptepage; +} + +void pte_free_kernel(pte_t *pte) +{ +#ifdef CONFIG_SMP + hash_page_sync(); +#endif + free_page((unsigned long)pte); +} + +void pte_free(struct page *ptepage) +{ +#ifdef CONFIG_SMP + hash_page_sync(); +#endif + __free_page(ptepage); +} + +#ifndef CONFIG_PHYS_64BIT +void __iomem * +ioremap(phys_addr_t addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE); +} +#else /* CONFIG_PHYS_64BIT */ +void __iomem * +ioremap64(unsigned long long addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE); +} + +void __iomem * +ioremap(phys_addr_t addr, unsigned long size) +{ + phys_addr_t addr64 = fixup_bigphys_addr(addr, size); + + return ioremap64(addr64, size); +} +#endif /* CONFIG_PHYS_64BIT */ + +void __iomem * +__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) +{ + unsigned long v, i; + phys_addr_t p; + int err; + + /* + * Choose an address to map it to. + * Once the vmalloc system is running, we use it. + * Before then, we use space going down from ioremap_base + * (ioremap_bot records where we're up to). + */ + p = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - p; + + /* + * If the address lies within the first 16 MB, assume it's in ISA + * memory space + */ + if (p < 16*1024*1024) + p += _ISA_MEM_BASE; + + /* + * Don't allow anybody to remap normal RAM that we're using. + * mem_init() sets high_memory so only do the check after that. + */ + if (mem_init_done && (p < virt_to_phys(high_memory))) { + printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p, + __builtin_return_address(0)); + return NULL; + } + + if (size == 0) + return NULL; + + /* + * Is it already mapped? Perhaps overlapped by a previous + * BAT mapping. If the whole area is mapped then we're done, + * otherwise remap it since we want to keep the virt addrs for + * each request contiguous. + * + * We make the assumption here that if the bottom and top + * of the range we want are mapped then it's mapped to the + * same virt address (and this is contiguous). + * -- Cort + */ + if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ ) + goto out; + + if ((v = p_mapped_by_tlbcam(p))) + goto out; + + if (mem_init_done) { + struct vm_struct *area; + area = get_vm_area(size, VM_IOREMAP); + if (area == 0) + return NULL; + v = (unsigned long) area->addr; + } else { + v = (ioremap_bot -= size); + } + + if ((flags & _PAGE_PRESENT) == 0) + flags |= _PAGE_KERNEL; + if (flags & _PAGE_NO_CACHE) + flags |= _PAGE_GUARDED; + + /* + * Should check if it is a candidate for a BAT mapping + */ + + err = 0; + for (i = 0; i < size && err == 0; i += PAGE_SIZE) + err = map_page(v+i, p+i, flags); + if (err) { + if (mem_init_done) + vunmap((void *)v); + return NULL; + } + +out: + return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK)); +} + +void iounmap(volatile void __iomem *addr) +{ + /* + * If mapped by BATs then there is nothing to do. + * Calling vfree() generates a benign warning. + */ + if (v_mapped_by_bats((unsigned long)addr)) return; + + if (addr > high_memory && (unsigned long) addr < ioremap_bot) + vunmap((void *) (PAGE_MASK & (unsigned long)addr)); +} + +void __iomem *ioport_map(unsigned long port, unsigned int len) +{ + return (void __iomem *) (port + _IO_BASE); +} + +void ioport_unmap(void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(ioport_map); +EXPORT_SYMBOL(ioport_unmap); + +int +map_page(unsigned long va, phys_addr_t pa, int flags) +{ + pmd_t *pd; + pte_t *pg; + int err = -ENOMEM; + + spin_lock(&init_mm.page_table_lock); + /* Use upper 10 bits of VA to index the first level map */ + pd = pmd_offset(pgd_offset_k(va), va); + /* Use middle 10 bits of VA to index the second-level map */ + pg = pte_alloc_kernel(&init_mm, pd, va); + if (pg != 0) { + err = 0; + set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); + if (mem_init_done) + flush_HPTE(0, va, pmd_val(*pd)); + } + spin_unlock(&init_mm.page_table_lock); + return err; +} + +/* + * Map in all of physical memory starting at KERNELBASE. + */ +void __init mapin_ram(void) +{ + unsigned long v, p, s, f; + + s = mmu_mapin_ram(); + v = KERNELBASE + s; + p = PPC_MEMSTART + s; + for (; s < total_lowmem; s += PAGE_SIZE) { + if ((char *) v >= _stext && (char *) v < etext) + f = _PAGE_RAM_TEXT; + else + f = _PAGE_RAM; + map_page(v, p, f); + v += PAGE_SIZE; + p += PAGE_SIZE; + } +} + +/* is x a power of 2? */ +#define is_power_of_2(x) ((x) != 0 && (((x) & ((x) - 1)) == 0)) + +/* is x a power of 4? */ +#define is_power_of_4(x) ((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1)) + +/* + * Set up a mapping for a block of I/O. + * virt, phys, size must all be page-aligned. + * This should only be called before ioremap is called. + */ +void __init io_block_mapping(unsigned long virt, phys_addr_t phys, + unsigned int size, int flags) +{ + int i; + + if (virt > KERNELBASE && virt < ioremap_bot) + ioremap_bot = ioremap_base = virt; + +#ifdef HAVE_BATS + /* + * Use a BAT for this if possible... + */ + if (io_bat_index < 2 && is_power_of_2(size) + && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { + setbat(io_bat_index, virt, phys, size, flags); + ++io_bat_index; + return; + } +#endif /* HAVE_BATS */ + +#ifdef HAVE_TLBCAM + /* + * Use a CAM for this if possible... + */ + if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size) + && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { + settlbcam(tlbcam_index, virt, phys, size, flags, 0); + ++tlbcam_index; + return; + } +#endif /* HAVE_TLBCAM */ + + /* No BATs available, put it in the page tables. */ + for (i = 0; i < size; i += PAGE_SIZE) + map_page(virt + i, phys + i, flags); +} + +/* Scan the real Linux page tables and return a PTE pointer for + * a virtual address in a context. + * Returns true (1) if PTE was found, zero otherwise. The pointer to + * the PTE pointer is unmodified if PTE is not found. + */ +int +get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int retval = 0; + + pgd = pgd_offset(mm, addr & PAGE_MASK); + if (pgd) { + pmd = pmd_offset(pgd, addr & PAGE_MASK); + if (pmd_present(*pmd)) { + pte = pte_offset_map(pmd, addr & PAGE_MASK); + if (pte) { + retval = 1; + *ptep = pte; + /* XXX caller needs to do pte_unmap, yuck */ + } + } + } + return(retval); +} + +/* Find physical address for this virtual address. Normally used by + * I/O functions, but anyone can call it. + */ +unsigned long iopa(unsigned long addr) +{ + unsigned long pa; + + /* I don't know why this won't work on PMacs or CHRP. It + * appears there is some bug, or there is some implicit + * mapping done not properly represented by BATs or in page + * tables.......I am actively working on resolving this, but + * can't hold up other stuff. -- Dan + */ + pte_t *pte; + struct mm_struct *mm; + + /* Check the BATs */ + pa = v_mapped_by_bats(addr); + if (pa) + return pa; + + /* Allow mapping of user addresses (within the thread) + * for DMA if necessary. + */ + if (addr < TASK_SIZE) + mm = current->mm; + else + mm = &init_mm; + + pa = 0; + if (get_pteptr(mm, addr, &pte)) { + pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK); + pte_unmap(pte); + } + + return(pa); +} + +/* This is will find the virtual address for a physical one.... + * Swiped from APUS, could be dangerous :-). + * This is only a placeholder until I really find a way to make this + * work. -- Dan + */ +unsigned long +mm_ptov (unsigned long paddr) +{ + unsigned long ret; +#if 0 + if (paddr < 16*1024*1024) + ret = ZTWO_VADDR(paddr); + else { + int i; + + for (i = 0; i < kmap_chunk_count;){ + unsigned long phys = kmap_chunks[i++]; + unsigned long size = kmap_chunks[i++]; + unsigned long virt = kmap_chunks[i++]; + if (paddr >= phys + && paddr < (phys + size)){ + ret = virt + paddr - phys; + goto exit; + } + } + + ret = (unsigned long) __va(paddr); + } +exit: +#ifdef DEBUGPV + printk ("PTOV(%lx)=%lx\n", paddr, ret); +#endif +#else + ret = (unsigned long)paddr + KERNELBASE; +#endif + return ret; +} + diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c new file mode 100644 index 00000000000..484d24f9208 --- /dev/null +++ b/arch/powerpc/mm/pgtable_64.c @@ -0,0 +1,349 @@ +/* + * This file contains ioremap and related functions for 64-bit machines. + * + * Derived from arch/ppc64/mm/init.c + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen <engebret@us.ibm.com> + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/idr.h> +#include <linux/nodemask.h> +#include <linux/module.h> + +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/prom.h> +#include <asm/lmb.h> +#include <asm/rtas.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/tlb.h> +#include <asm/eeh.h> +#include <asm/processor.h> +#include <asm/mmzone.h> +#include <asm/cputable.h> +#include <asm/ppcdebug.h> +#include <asm/sections.h> +#include <asm/system.h> +#include <asm/iommu.h> +#include <asm/abs_addr.h> +#include <asm/vdso.h> +#include <asm/imalloc.h> + +unsigned long ioremap_bot = IMALLOC_BASE; +static unsigned long phbs_io_bot = PHBS_IO_BASE; + +#ifdef CONFIG_PPC_ISERIES + +void __iomem *ioremap(unsigned long addr, unsigned long size) +{ + return (void __iomem *)addr; +} + +extern void __iomem *__ioremap(unsigned long addr, unsigned long size, + unsigned long flags) +{ + return (void __iomem *)addr; +} + +void iounmap(volatile void __iomem *addr) +{ + return; +} + +#else + +/* + * map_io_page currently only called by __ioremap + * map_io_page adds an entry to the ioremap page table + * and adds an entry to the HPT, possibly bolting it + */ +static int map_io_page(unsigned long ea, unsigned long pa, int flags) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + unsigned long vsid; + + if (mem_init_done) { + spin_lock(&init_mm.page_table_lock); + pgdp = pgd_offset_k(ea); + pudp = pud_alloc(&init_mm, pgdp, ea); + if (!pudp) + return -ENOMEM; + pmdp = pmd_alloc(&init_mm, pudp, ea); + if (!pmdp) + return -ENOMEM; + ptep = pte_alloc_kernel(&init_mm, pmdp, ea); + if (!ptep) + return -ENOMEM; + set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, + __pgprot(flags))); + spin_unlock(&init_mm.page_table_lock); + } else { + unsigned long va, vpn, hash, hpteg; + + /* + * If the mm subsystem is not fully up, we cannot create a + * linux page table entry for this mapping. Simply bolt an + * entry in the hardware page table. + */ + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0xFFFFFFF); + vpn = va >> PAGE_SHIFT; + + hash = hpt_hash(vpn, 0); + + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + + /* Panic if a pte grpup is full */ + if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, + HPTE_V_BOLTED, + _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX) + == -1) { + panic("map_io_page: could not insert mapping"); + } + } + return 0; +} + + +static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, + unsigned long ea, unsigned long size, + unsigned long flags) +{ + unsigned long i; + + if ((flags & _PAGE_PRESENT) == 0) + flags |= pgprot_val(PAGE_KERNEL); + + for (i = 0; i < size; i += PAGE_SIZE) + if (map_io_page(ea+i, pa+i, flags)) + return NULL; + + return (void __iomem *) (ea + (addr & ~PAGE_MASK)); +} + + +void __iomem * +ioremap(unsigned long addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); +} + +void __iomem * __ioremap(unsigned long addr, unsigned long size, + unsigned long flags) +{ + unsigned long pa, ea; + void __iomem *ret; + + /* + * Choose an address to map it to. + * Once the imalloc system is running, we use it. + * Before that, we map using addresses going + * up from ioremap_bot. imalloc will use + * the addresses from ioremap_bot through + * IMALLOC_END + * + */ + pa = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - pa; + + if (size == 0) + return NULL; + + if (mem_init_done) { + struct vm_struct *area; + area = im_get_free_area(size); + if (area == NULL) + return NULL; + ea = (unsigned long)(area->addr); + ret = __ioremap_com(addr, pa, ea, size, flags); + if (!ret) + im_free(area->addr); + } else { + ea = ioremap_bot; + ret = __ioremap_com(addr, pa, ea, size, flags); + if (ret) + ioremap_bot += size; + } + return ret; +} + +#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) + +int __ioremap_explicit(unsigned long pa, unsigned long ea, + unsigned long size, unsigned long flags) +{ + struct vm_struct *area; + void __iomem *ret; + + /* For now, require page-aligned values for pa, ea, and size */ + if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || + !IS_PAGE_ALIGNED(size)) { + printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); + return 1; + } + + if (!mem_init_done) { + /* Two things to consider in this case: + * 1) No records will be kept (imalloc, etc) that the region + * has been remapped + * 2) It won't be easy to iounmap() the region later (because + * of 1) + */ + ; + } else { + area = im_get_area(ea, size, + IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); + if (area == NULL) { + /* Expected when PHB-dlpar is in play */ + return 1; + } + if (ea != (unsigned long) area->addr) { + printk(KERN_ERR "unexpected addr return from " + "im_get_area\n"); + return 1; + } + } + + ret = __ioremap_com(pa, pa, ea, size, flags); + if (ret == NULL) { + printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); + return 1; + } + if (ret != (void *) ea) { + printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); + return 1; + } + + return 0; +} + +/* + * Unmap an IO region and remove it from imalloc'd list. + * Access to IO memory should be serialized by driver. + * This code is modeled after vmalloc code - unmap_vm_area() + * + * XXX what about calls before mem_init_done (ie python_countermeasures()) + */ +void iounmap(volatile void __iomem *token) +{ + void *addr; + + if (!mem_init_done) + return; + + addr = (void *) ((unsigned long __force) token & PAGE_MASK); + + im_free(addr); +} + +static int iounmap_subset_regions(unsigned long addr, unsigned long size) +{ + struct vm_struct *area; + + /* Check whether subsets of this region exist */ + area = im_get_area(addr, size, IM_REGION_SUPERSET); + if (area == NULL) + return 1; + + while (area) { + iounmap((void __iomem *) area->addr); + area = im_get_area(addr, size, + IM_REGION_SUPERSET); + } + + return 0; +} + +int iounmap_explicit(volatile void __iomem *start, unsigned long size) +{ + struct vm_struct *area; + unsigned long addr; + int rc; + + addr = (unsigned long __force) start & PAGE_MASK; + + /* Verify that the region either exists or is a subset of an existing + * region. In the latter case, split the parent region to create + * the exact region + */ + area = im_get_area(addr, size, + IM_REGION_EXISTS | IM_REGION_SUBSET); + if (area == NULL) { + /* Determine whether subset regions exist. If so, unmap */ + rc = iounmap_subset_regions(addr, size); + if (rc) { + printk(KERN_ERR + "%s() cannot unmap nonexistent range 0x%lx\n", + __FUNCTION__, addr); + return 1; + } + } else { + iounmap((void __iomem *) area->addr); + } + /* + * FIXME! This can't be right: + iounmap(area->addr); + * Maybe it should be "iounmap(area);" + */ + return 0; +} + +#endif + +EXPORT_SYMBOL(ioremap); +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); + +void __iomem * reserve_phb_iospace(unsigned long size) +{ + void __iomem *virt_addr; + + if (phbs_io_bot >= IMALLOC_BASE) + panic("reserve_phb_iospace(): phb io space overflow\n"); + + virt_addr = (void __iomem *) phbs_io_bot; + phbs_io_bot += size; + + return virt_addr; +} diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c new file mode 100644 index 00000000000..cef9e83cc7e --- /dev/null +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -0,0 +1,285 @@ +/* + * This file contains the routines for handling the MMU on those + * PowerPC implementations where the MMU substantially follows the + * architecture specification. This includes the 6xx, 7xx, 7xxx, + * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/highmem.h> + +#include <asm/prom.h> +#include <asm/mmu.h> +#include <asm/machdep.h> +#include <asm/lmb.h> + +#include "mmu_decl.h" + +PTE *Hash, *Hash_end; +unsigned long Hash_size, Hash_mask; +unsigned long _SDR1; + +union ubat { /* BAT register values to be loaded */ + BAT bat; +#ifdef CONFIG_PPC64BRIDGE + u64 word[2]; +#else + u32 word[2]; +#endif +} BATS[4][2]; /* 4 pairs of IBAT, DBAT */ + +struct batrange { /* stores address ranges mapped by BATs */ + unsigned long start; + unsigned long limit; + unsigned long phys; +} bat_addrs[4]; + +/* + * Return PA for this VA if it is mapped by a BAT, or 0 + */ +unsigned long v_mapped_by_bats(unsigned long va) +{ + int b; + for (b = 0; b < 4; ++b) + if (va >= bat_addrs[b].start && va < bat_addrs[b].limit) + return bat_addrs[b].phys + (va - bat_addrs[b].start); + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_mapped_by_bats(unsigned long pa) +{ + int b; + for (b = 0; b < 4; ++b) + if (pa >= bat_addrs[b].phys + && pa < (bat_addrs[b].limit-bat_addrs[b].start) + +bat_addrs[b].phys) + return bat_addrs[b].start+(pa-bat_addrs[b].phys); + return 0; +} + +unsigned long __init mmu_mapin_ram(void) +{ +#ifdef CONFIG_POWER4 + return 0; +#else + unsigned long tot, bl, done; + unsigned long max_size = (256<<20); + unsigned long align; + + if (__map_without_bats) + return 0; + + /* Set up BAT2 and if necessary BAT3 to cover RAM. */ + + /* Make sure we don't map a block larger than the + smallest alignment of the physical address. */ + /* alignment of PPC_MEMSTART */ + align = ~(PPC_MEMSTART-1) & PPC_MEMSTART; + /* set BAT block size to MIN(max_size, align) */ + if (align && align < max_size) + max_size = align; + + tot = total_lowmem; + for (bl = 128<<10; bl < max_size; bl <<= 1) { + if (bl * 2 > tot) + break; + } + + setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM); + done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1; + if ((done < tot) && !bat_addrs[3].limit) { + /* use BAT3 to cover a bit more */ + tot -= done; + for (bl = 128<<10; bl < max_size; bl <<= 1) + if (bl * 2 > tot) + break; + setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM); + done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1; + } + + return done; +#endif +} + +/* + * Set up one of the I/D BAT (block address translation) register pairs. + * The parameters are not checked; in particular size must be a power + * of 2 between 128k and 256M. + */ +void __init setbat(int index, unsigned long virt, unsigned long phys, + unsigned int size, int flags) +{ + unsigned int bl; + int wimgxpp; + union ubat *bat = BATS[index]; + + if (((flags & _PAGE_NO_CACHE) == 0) && + cpu_has_feature(CPU_FTR_NEED_COHERENT)) + flags |= _PAGE_COHERENT; + + bl = (size >> 17) - 1; + if (PVR_VER(mfspr(SPRN_PVR)) != 1) { + /* 603, 604, etc. */ + /* Do DBAT first */ + wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE + | _PAGE_COHERENT | _PAGE_GUARDED); + wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; + bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat[1].word[1] = phys | wimgxpp; +#ifndef CONFIG_KGDB /* want user access for breakpoints */ + if (flags & _PAGE_USER) +#endif + bat[1].bat.batu.vp = 1; + if (flags & _PAGE_GUARDED) { + /* G bit must be zero in IBATs */ + bat[0].word[0] = bat[0].word[1] = 0; + } else { + /* make IBAT same as DBAT */ + bat[0] = bat[1]; + } + } else { + /* 601 cpu */ + if (bl > BL_8M) + bl = BL_8M; + wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE + | _PAGE_COHERENT); + wimgxpp |= (flags & _PAGE_RW)? + ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; + bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ + bat->word[1] = phys | bl | 0x40; /* V=1 */ + } + + bat_addrs[index].start = virt; + bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; + bat_addrs[index].phys = phys; +} + +/* + * Initialize the hash table and patch the instructions in hashtable.S. + */ +void __init MMU_init_hw(void) +{ + unsigned int hmask, mb, mb2; + unsigned int n_hpteg, lg_n_hpteg; + + extern unsigned int hash_page_patch_A[]; + extern unsigned int hash_page_patch_B[], hash_page_patch_C[]; + extern unsigned int hash_page[]; + extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[]; + + if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) { + /* + * Put a blr (procedure return) instruction at the + * start of hash_page, since we can still get DSI + * exceptions on a 603. + */ + hash_page[0] = 0x4e800020; + flush_icache_range((unsigned long) &hash_page[0], + (unsigned long) &hash_page[1]); + return; + } + + if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105); + +#ifdef CONFIG_PPC64BRIDGE +#define LG_HPTEG_SIZE 7 /* 128 bytes per HPTEG */ +#define SDR1_LOW_BITS (lg_n_hpteg - 11) +#define MIN_N_HPTEG 2048 /* min 256kB hash table */ +#else +#define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */ +#define SDR1_LOW_BITS ((n_hpteg - 1) >> 10) +#define MIN_N_HPTEG 1024 /* min 64kB hash table */ +#endif + + /* + * Allow 1 HPTE (1/8 HPTEG) for each page of memory. + * This is less than the recommended amount, but then + * Linux ain't AIX. + */ + n_hpteg = total_memory / (PAGE_SIZE * 8); + if (n_hpteg < MIN_N_HPTEG) + n_hpteg = MIN_N_HPTEG; + lg_n_hpteg = __ilog2(n_hpteg); + if (n_hpteg & (n_hpteg - 1)) { + ++lg_n_hpteg; /* round up if not power of 2 */ + n_hpteg = 1 << lg_n_hpteg; + } + Hash_size = n_hpteg << LG_HPTEG_SIZE; + + /* + * Find some memory for the hash table. + */ + if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); + Hash = __va(lmb_alloc_base(Hash_size, Hash_size, + __initial_memory_limit)); + cacheable_memzero(Hash, Hash_size); + _SDR1 = __pa(Hash) | SDR1_LOW_BITS; + + Hash_end = (PTE *) ((unsigned long)Hash + Hash_size); + + printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n", + total_memory >> 20, Hash_size >> 10, Hash); + + + /* + * Patch up the instructions in hashtable.S:create_hpte + */ + if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345); + Hash_mask = n_hpteg - 1; + hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); + mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; + if (lg_n_hpteg > 16) + mb2 = 16 - LG_HPTEG_SIZE; + + hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff) + | ((unsigned int)(Hash) >> 16); + hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6); + hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6); + hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask; + hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask; + + /* + * Ensure that the locations we've patched have been written + * out from the data cache and invalidated in the instruction + * cache, on those machines with split caches. + */ + flush_icache_range((unsigned long) &hash_page_patch_A[0], + (unsigned long) &hash_page_patch_C[1]); + + /* + * Patch up the instructions in hashtable.S:flush_hash_page + */ + flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff) + | ((unsigned int)(Hash) >> 16); + flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6); + flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6); + flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask; + flush_icache_range((unsigned long) &flush_hash_patch_A[0], + (unsigned long) &flush_hash_patch_B[1]); + + if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); +} diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c new file mode 100644 index 00000000000..0473953f6a3 --- /dev/null +++ b/arch/powerpc/mm/slb.c @@ -0,0 +1,158 @@ +/* + * PowerPC64 SLB support. + * + * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM + * Based on earlier code writteh by: + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/paca.h> +#include <asm/cputable.h> + +extern void slb_allocate(unsigned long ea); + +static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot) +{ + return (ea & ESID_MASK) | SLB_ESID_V | slot; +} + +static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags) +{ + return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags; +} + +static inline void create_slbe(unsigned long ea, unsigned long flags, + unsigned long entry) +{ + asm volatile("slbmte %0,%1" : + : "r" (mk_vsid_data(ea, flags)), + "r" (mk_esid_data(ea, entry)) + : "memory" ); +} + +static void slb_flush_and_rebolt(void) +{ + /* If you change this make sure you change SLB_NUM_BOLTED + * appropriately too. */ + unsigned long ksp_flags = SLB_VSID_KERNEL; + unsigned long ksp_esid_data; + + WARN_ON(!irqs_disabled()); + + if (cpu_has_feature(CPU_FTR_16M_PAGE)) + ksp_flags |= SLB_VSID_L; + + ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); + if ((ksp_esid_data & ESID_MASK) == KERNELBASE) + ksp_esid_data &= ~SLB_ESID_V; + + /* We need to do this all in asm, so we're sure we don't touch + * the stack between the slbia and rebolting it. */ + asm volatile("isync\n" + "slbia\n" + /* Slot 1 - first VMALLOC segment */ + "slbmte %0,%1\n" + /* Slot 2 - kernel stack */ + "slbmte %2,%3\n" + "isync" + :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)), + "r"(mk_esid_data(VMALLOCBASE, 1)), + "r"(mk_vsid_data(ksp_esid_data, ksp_flags)), + "r"(ksp_esid_data) + : "memory"); +} + +/* Flush all user entries from the segment table of the current processor. */ +void switch_slb(struct task_struct *tsk, struct mm_struct *mm) +{ + unsigned long offset = get_paca()->slb_cache_ptr; + unsigned long esid_data = 0; + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + + if (offset <= SLB_CACHE_ENTRIES) { + int i; + asm volatile("isync" : : : "memory"); + for (i = 0; i < offset; i++) { + esid_data = ((unsigned long)get_paca()->slb_cache[i] + << SID_SHIFT) | SLBIE_C; + asm volatile("slbie %0" : : "r" (esid_data)); + } + asm volatile("isync" : : : "memory"); + } else { + slb_flush_and_rebolt(); + } + + /* Workaround POWER5 < DD2.1 issue */ + if (offset == 1 || offset > SLB_CACHE_ENTRIES) + asm volatile("slbie %0" : : "r" (esid_data)); + + get_paca()->slb_cache_ptr = 0; + get_paca()->context = mm->context; + + /* + * preload some userspace segments into the SLB. + */ + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + if (pc >= KERNELBASE) + return; + slb_allocate(pc); + + if (GET_ESID(pc) == GET_ESID(stack)) + return; + + if (stack >= KERNELBASE) + return; + slb_allocate(stack); + + if ((GET_ESID(pc) == GET_ESID(unmapped_base)) + || (GET_ESID(stack) == GET_ESID(unmapped_base))) + return; + + if (unmapped_base >= KERNELBASE) + return; + slb_allocate(unmapped_base); +} + +void slb_initialize(void) +{ + /* On iSeries the bolted entries have already been set up by + * the hypervisor from the lparMap data in head.S */ +#ifndef CONFIG_PPC_ISERIES + unsigned long flags = SLB_VSID_KERNEL; + + /* Invalidate the entire SLB (even slot 0) & all the ERATS */ + if (cpu_has_feature(CPU_FTR_16M_PAGE)) + flags |= SLB_VSID_L; + + asm volatile("isync":::"memory"); + asm volatile("slbmte %0,%0"::"r" (0) : "memory"); + asm volatile("isync; slbia; isync":::"memory"); + create_slbe(KERNELBASE, flags, 0); + create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1); + /* We don't bolt the stack for the time being - we're in boot, + * so the stack is in the bolted segment. By the time it goes + * elsewhere, we'll call _switch() which will bolt in the new + * one. */ + asm volatile("isync":::"memory"); +#endif + + get_paca()->stab_rr = SLB_NUM_BOLTED; +} diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S new file mode 100644 index 00000000000..a3a03da503b --- /dev/null +++ b/arch/powerpc/mm/slb_low.S @@ -0,0 +1,151 @@ +/* + * arch/ppc64/mm/slb_low.S + * + * Low-level SLB routines + * + * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM + * + * Based on earlier C version: + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cputable.h> + +/* void slb_allocate(unsigned long ea); + * + * Create an SLB entry for the given EA (user or kernel). + * r3 = faulting address, r13 = PACA + * r9, r10, r11 are clobbered by this function + * No other registers are examined or changed. + */ +_GLOBAL(slb_allocate) + /* + * First find a slot, round robin. Previously we tried to find + * a free slot first but that took too long. Unfortunately we + * dont have any LRU information to help us choose a slot. + */ +#ifdef CONFIG_PPC_ISERIES + /* + * On iSeries, the "bolted" stack segment can be cast out on + * shared processor switch so we need to check for a miss on + * it and restore it to the right slot. + */ + ld r9,PACAKSAVE(r13) + clrrdi r9,r9,28 + clrrdi r11,r3,28 + li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ + cmpld r9,r11 + beq 3f +#endif /* CONFIG_PPC_ISERIES */ + + ld r10,PACASTABRR(r13) + addi r10,r10,1 + /* use a cpu feature mask if we ever change our slb size */ + cmpldi r10,SLB_NUM_ENTRIES + + blt+ 4f + li r10,SLB_NUM_BOLTED + +4: + std r10,PACASTABRR(r13) +3: + /* r3 = faulting address, r10 = entry */ + + srdi r9,r3,60 /* get region */ + srdi r3,r3,28 /* get esid */ + cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ + + rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ + oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */ + + /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */ + + blt cr7,0f /* user or kernel? */ + + /* kernel address: proto-VSID = ESID */ + /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but + * this code will generate the protoVSID 0xfffffffff for the + * top segment. That's ok, the scramble below will translate + * it to VSID 0, which is reserved as a bad VSID - one which + * will never have any pages in it. */ + li r11,SLB_VSID_KERNEL +BEGIN_FTR_SECTION + bne cr7,9f + li r11,(SLB_VSID_KERNEL|SLB_VSID_L) +END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) + b 9f + +0: /* user address: proto-VSID = context<<15 | ESID */ + srdi. r9,r3,USER_ESID_BITS + bne- 8f /* invalid ea bits set */ + +#ifdef CONFIG_HUGETLB_PAGE +BEGIN_FTR_SECTION + lhz r9,PACAHIGHHTLBAREAS(r13) + srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) + srd r9,r9,r11 + lhz r11,PACALOWHTLBAREAS(r13) + srd r11,r11,r3 + or r9,r9,r11 +END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) +#endif /* CONFIG_HUGETLB_PAGE */ + + li r11,SLB_VSID_USER + +#ifdef CONFIG_HUGETLB_PAGE +BEGIN_FTR_SECTION + rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */ +END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) +#endif /* CONFIG_HUGETLB_PAGE */ + + ld r9,PACACONTEXTID(r13) + rldimi r3,r9,USER_ESID_BITS,0 + +9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */ + ASM_VSID_SCRAMBLE(r3,r9) + + rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */ + + /* + * No need for an isync before or after this slbmte. The exception + * we enter with and the rfid we exit with are context synchronizing. + */ + slbmte r11,r10 + + bgelr cr7 /* we're done for kernel addresses */ + + /* Update the slb cache */ + lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ + cmpldi r3,SLB_CACHE_ENTRIES + bge 1f + + /* still room in the slb cache */ + sldi r11,r3,1 /* r11 = offset * sizeof(u16) */ + rldicl r10,r10,36,28 /* get low 16 bits of the ESID */ + add r11,r11,r13 /* r11 = (u16 *)paca + offset */ + sth r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ + addi r3,r3,1 /* offset++ */ + b 2f +1: /* offset >= SLB_CACHE_ENTRIES */ + li r3,SLB_CACHE_ENTRIES+1 +2: + sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ + blr + +8: /* invalid EA */ + li r3,0 /* BAD_VSID */ + li r11,SLB_VSID_USER /* flags don't much matter */ + b 9b diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c new file mode 100644 index 00000000000..1b83f002bf2 --- /dev/null +++ b/arch/powerpc/mm/stab.c @@ -0,0 +1,279 @@ +/* + * PowerPC64 Segment Translation Support. + * + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * + * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/paca.h> +#include <asm/cputable.h> +#include <asm/lmb.h> +#include <asm/abs_addr.h> + +struct stab_entry { + unsigned long esid_data; + unsigned long vsid_data; +}; + +/* Both the segment table and SLB code uses the following cache */ +#define NR_STAB_CACHE_ENTRIES 8 +DEFINE_PER_CPU(long, stab_cache_ptr); +DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); + +/* + * Create a segment table entry for the given esid/vsid pair. + */ +static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) +{ + unsigned long esid_data, vsid_data; + unsigned long entry, group, old_esid, castout_entry, i; + unsigned int global_entry; + struct stab_entry *ste, *castout_ste; + unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE; + + vsid_data = vsid << STE_VSID_SHIFT; + esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; + if (! kernel_segment) + esid_data |= STE_ESID_KS; + + /* Search the primary group first. */ + global_entry = (esid & 0x1f) << 3; + ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); + + /* Find an empty entry, if one exists. */ + for (group = 0; group < 2; group++) { + for (entry = 0; entry < 8; entry++, ste++) { + if (!(ste->esid_data & STE_ESID_V)) { + ste->vsid_data = vsid_data; + asm volatile("eieio":::"memory"); + ste->esid_data = esid_data; + return (global_entry | entry); + } + } + /* Now search the secondary group. */ + global_entry = ((~esid) & 0x1f) << 3; + ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); + } + + /* + * Could not find empty entry, pick one with a round robin selection. + * Search all entries in the two groups. + */ + castout_entry = get_paca()->stab_rr; + for (i = 0; i < 16; i++) { + if (castout_entry < 8) { + global_entry = (esid & 0x1f) << 3; + ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); + castout_ste = ste + castout_entry; + } else { + global_entry = ((~esid) & 0x1f) << 3; + ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); + castout_ste = ste + (castout_entry - 8); + } + + /* Dont cast out the first kernel segment */ + if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE) + break; + + castout_entry = (castout_entry + 1) & 0xf; + } + + get_paca()->stab_rr = (castout_entry + 1) & 0xf; + + /* Modify the old entry to the new value. */ + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + old_esid = castout_ste->esid_data >> SID_SHIFT; + castout_ste->esid_data = 0; /* Invalidate old entry */ + + asm volatile("sync" : : : "memory"); /* Order update */ + + castout_ste->vsid_data = vsid_data; + asm volatile("eieio" : : : "memory"); /* Order update */ + castout_ste->esid_data = esid_data; + + asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); + /* Ensure completion of slbie */ + asm volatile("sync" : : : "memory"); + + return (global_entry | (castout_entry & 0x7)); +} + +/* + * Allocate a segment table entry for the given ea and mm + */ +static int __ste_allocate(unsigned long ea, struct mm_struct *mm) +{ + unsigned long vsid; + unsigned char stab_entry; + unsigned long offset; + + /* Kernel or user address? */ + if (ea >= KERNELBASE) { + vsid = get_kernel_vsid(ea); + } else { + if ((ea >= TASK_SIZE_USER64) || (! mm)) + return 1; + + vsid = get_vsid(mm->context.id, ea); + } + + stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); + + if (ea < KERNELBASE) { + offset = __get_cpu_var(stab_cache_ptr); + if (offset < NR_STAB_CACHE_ENTRIES) + __get_cpu_var(stab_cache[offset++]) = stab_entry; + else + offset = NR_STAB_CACHE_ENTRIES+1; + __get_cpu_var(stab_cache_ptr) = offset; + + /* Order update */ + asm volatile("sync":::"memory"); + } + + return 0; +} + +int ste_allocate(unsigned long ea) +{ + return __ste_allocate(ea, current->mm); +} + +/* + * Do the segment table work for a context switch: flush all user + * entries from the table, then preload some probably useful entries + * for the new task + */ +void switch_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; + struct stab_entry *ste; + unsigned long offset = __get_cpu_var(stab_cache_ptr); + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + if (offset <= NR_STAB_CACHE_ENTRIES) { + int i; + + for (i = 0; i < offset; i++) { + ste = stab + __get_cpu_var(stab_cache[i]); + ste->esid_data = 0; /* invalidate entry */ + } + } else { + unsigned long entry; + + /* Invalidate all entries. */ + ste = stab; + + /* Never flush the first entry. */ + ste += 1; + for (entry = 1; + entry < (PAGE_SIZE / sizeof(struct stab_entry)); + entry++, ste++) { + unsigned long ea; + ea = ste->esid_data & ESID_MASK; + if (ea < KERNELBASE) { + ste->esid_data = 0; + } + } + } + + asm volatile("sync; slbia; sync":::"memory"); + + __get_cpu_var(stab_cache_ptr) = 0; + + /* Now preload some entries for the new task */ + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + __ste_allocate(pc, mm); + + if (GET_ESID(pc) == GET_ESID(stack)) + return; + + __ste_allocate(stack, mm); + + if ((GET_ESID(pc) == GET_ESID(unmapped_base)) + || (GET_ESID(stack) == GET_ESID(unmapped_base))) + return; + + __ste_allocate(unmapped_base, mm); + + /* Order update */ + asm volatile("sync" : : : "memory"); +} + +extern void slb_initialize(void); + +/* + * Allocate segment tables for secondary CPUs. These must all go in + * the first (bolted) segment, so that do_stab_bolted won't get a + * recursive segment miss on the segment table itself. + */ +void stabs_alloc(void) +{ + int cpu; + + if (cpu_has_feature(CPU_FTR_SLB)) + return; + + for_each_cpu(cpu) { + unsigned long newstab; + + if (cpu == 0) + continue; /* stab for CPU 0 is statically allocated */ + + newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT); + if (! newstab) + panic("Unable to allocate segment table for CPU %d.\n", + cpu); + + newstab += KERNELBASE; + + memset((void *)newstab, 0, PAGE_SIZE); + + paca[cpu].stab_addr = newstab; + paca[cpu].stab_real = virt_to_abs(newstab); + printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real); + } +} + +/* + * Build an entry for the base kernel segment and put it into + * the segment table or SLB. All other segment table or SLB + * entries are faulted in. + */ +void stab_initialize(unsigned long stab) +{ + unsigned long vsid = get_kernel_vsid(KERNELBASE); + + if (cpu_has_feature(CPU_FTR_SLB)) { + slb_initialize(); + } else { + asm volatile("isync; slbia; isync":::"memory"); + make_ste(stab, GET_ESID(KERNELBASE), vsid); + + /* Order update */ + asm volatile("sync":::"memory"); + } +} diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c new file mode 100644 index 00000000000..6c3dc3c44c8 --- /dev/null +++ b/arch/powerpc/mm/tlb_32.c @@ -0,0 +1,183 @@ +/* + * This file contains the routines for TLB flushing. + * On machines where the MMU uses a hash table to store virtual to + * physical translations, these routines flush entries from the + * hash table also. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/highmem.h> +#include <asm/tlbflush.h> +#include <asm/tlb.h> + +#include "mmu_decl.h" + +/* + * Called when unmapping pages to flush entries from the TLB/hash table. + */ +void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) +{ + unsigned long ptephys; + + if (Hash != 0) { + ptephys = __pa(ptep) & PAGE_MASK; + flush_hash_pages(mm->context, addr, ptephys, 1); + } +} + +/* + * Called by ptep_set_access_flags, must flush on CPUs for which the + * DSI handler can't just "fixup" the TLB on a write fault + */ +void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr) +{ + if (Hash != 0) + return; + _tlbie(addr); +} + +/* + * Called at the end of a mmu_gather operation to make sure the + * TLB flush is completely done. + */ +void tlb_flush(struct mmu_gather *tlb) +{ + if (Hash == 0) { + /* + * 603 needs to flush the whole TLB here since + * it doesn't use a hash table. + */ + _tlbia(); + } +} + +/* + * TLB flushing: + * + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes kernel pages + * + * since the hardware hash table functions as an extension of the + * tlb as far as the linux tables are concerned, flush it too. + * -- Cort + */ + +/* + * 750 SMP is a Bad Idea because the 750 doesn't broadcast all + * the cache operations on the bus. Hence we need to use an IPI + * to get the other CPU(s) to invalidate their TLBs. + */ +#ifdef CONFIG_SMP_750 +#define FINISH_FLUSH smp_send_tlb_invalidate(0) +#else +#define FINISH_FLUSH do { } while (0) +#endif + +static void flush_range(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + pmd_t *pmd; + unsigned long pmd_end; + int count; + unsigned int ctx = mm->context; + + if (Hash == 0) { + _tlbia(); + return; + } + start &= PAGE_MASK; + if (start >= end) + return; + end = (end - 1) | ~PAGE_MASK; + pmd = pmd_offset(pgd_offset(mm, start), start); + for (;;) { + pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; + if (pmd_end > end) + pmd_end = end; + if (!pmd_none(*pmd)) { + count = ((pmd_end - start) >> PAGE_SHIFT) + 1; + flush_hash_pages(ctx, start, pmd_val(*pmd), count); + } + if (pmd_end == end) + break; + start = pmd_end + 1; + ++pmd; + } +} + +/* + * Flush kernel TLB entries in the given range + */ +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_range(&init_mm, start, end); + FINISH_FLUSH; +} + +/* + * Flush all the (user) entries for the address space described by mm. + */ +void flush_tlb_mm(struct mm_struct *mm) +{ + struct vm_area_struct *mp; + + if (Hash == 0) { + _tlbia(); + return; + } + + for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) + flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); + FINISH_FLUSH; +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + struct mm_struct *mm; + pmd_t *pmd; + + if (Hash == 0) { + _tlbie(vmaddr); + return; + } + mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; + pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); + if (!pmd_none(*pmd)) + flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1); + FINISH_FLUSH; +} + +/* + * For each address in the range, find the pte for the address + * and check _PAGE_HASHPTE bit; if it is set, find and destroy + * the corresponding HPTE. + */ +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + flush_range(vma->vm_mm, start, end); + FINISH_FLUSH; +} diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c new file mode 100644 index 00000000000..09ab81a10f4 --- /dev/null +++ b/arch/powerpc/mm/tlb_64.c @@ -0,0 +1,196 @@ +/* + * This file contains the routines for flushing entries from the + * TLB and MMU hash table. + * + * Derived from arch/ppc64/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen <engebret@us.ibm.com> + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/tlb.h> +#include <linux/highmem.h> + +DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); + +/* This is declared as we are using the more or less generic + * include/asm-ppc64/tlb.h file -- tgall + */ +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +unsigned long pte_freelist_forced_free; + +struct pte_freelist_batch +{ + struct rcu_head rcu; + unsigned int index; + pgtable_free_t tables[0]; +}; + +DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +unsigned long pte_freelist_forced_free; + +#define PTE_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ + / sizeof(pgtable_free_t)) + +#ifdef CONFIG_SMP +static void pte_free_smp_sync(void *arg) +{ + /* Do nothing, just ensure we sync with all CPUs */ +} +#endif + +/* This is only called when we are critically out of memory + * (and fail to get a page in pte_free_tlb). + */ +static void pgtable_free_now(pgtable_free_t pgf) +{ + pte_freelist_forced_free++; + + smp_call_function(pte_free_smp_sync, NULL, 0, 1); + + pgtable_free(pgf); +} + +static void pte_free_rcu_callback(struct rcu_head *head) +{ + struct pte_freelist_batch *batch = + container_of(head, struct pte_freelist_batch, rcu); + unsigned int i; + + for (i = 0; i < batch->index; i++) + pgtable_free(batch->tables[i]); + + free_page((unsigned long)batch); +} + +static void pte_free_submit(struct pte_freelist_batch *batch) +{ + INIT_RCU_HEAD(&batch->rcu); + call_rcu(&batch->rcu, pte_free_rcu_callback); +} + +void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) +{ + /* This is safe as we are holding page_table_lock */ + cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); + struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + + if (atomic_read(&tlb->mm->mm_users) < 2 || + cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { + pgtable_free(pgf); + return; + } + + if (*batchp == NULL) { + *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); + if (*batchp == NULL) { + pgtable_free_now(pgf); + return; + } + (*batchp)->index = 0; + } + (*batchp)->tables[(*batchp)->index++] = pgf; + if ((*batchp)->index == PTE_FREELIST_SIZE) { + pte_free_submit(*batchp); + *batchp = NULL; + } +} + +/* + * Update the MMU hash table to correspond with a change to + * a Linux PTE. If wrprot is true, it is permissible to + * change the existing HPTE to read-only rather than removing it + * (if we remove it we should clear the _PTE_HPTEFLAGS bits). + */ +void hpte_update(struct mm_struct *mm, unsigned long addr, + unsigned long pte, int wrprot) +{ + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + unsigned long vsid; + int i; + + i = batch->index; + + /* + * This can happen when we are in the middle of a TLB batch and + * we encounter memory pressure (eg copy_page_range when it tries + * to allocate a new pte). If we have to reclaim memory and end + * up scanning and resetting referenced bits then our batch context + * will change mid stream. + */ + if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) { + flush_tlb_pending(); + i = 0; + } + if (i == 0) { + batch->mm = mm; + batch->large = pte_huge(pte); + } + if (addr < KERNELBASE) { + vsid = get_vsid(mm->context.id, addr); + WARN_ON(vsid == 0); + } else + vsid = get_kernel_vsid(addr); + batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff); + batch->pte[i] = __pte(pte); + batch->index = ++i; + if (i >= PPC64_TLB_BATCH_NR) + flush_tlb_pending(); +} + +void __flush_tlb_pending(struct ppc64_tlb_batch *batch) +{ + int i; + int cpu; + cpumask_t tmp; + int local = 0; + + BUG_ON(in_interrupt()); + + cpu = get_cpu(); + i = batch->index; + tmp = cpumask_of_cpu(cpu); + if (cpus_equal(batch->mm->cpu_vm_mask, tmp)) + local = 1; + + if (i == 1) + flush_hash_page(batch->vaddr[0], batch->pte[0], local); + else + flush_hash_range(i, local); + batch->index = 0; + put_cpu(); +} + +void pte_free_finish(void) +{ + /* This is safe as we are holding page_table_lock */ + struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + + if (*batchp == NULL) + return; + pte_free_submit(*batchp); + *batchp = NULL; +} diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig new file mode 100644 index 00000000000..19d37730b66 --- /dev/null +++ b/arch/powerpc/oprofile/Kconfig @@ -0,0 +1,23 @@ + +menu "Profiling support" + depends on EXPERIMENTAL + +config PROFILING + bool "Profiling support (EXPERIMENTAL)" + help + Say Y here to enable the extended profiling support mechanisms used + by profilers such as OProfile. + + +config OPROFILE + tristate "OProfile system profiling (EXPERIMENTAL)" + depends on PROFILING + help + OProfile is a profiling system capable of profiling the + whole system, include the kernel, kernel modules, libraries, + and applications. + + If unsure, say N. + +endmenu + diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile new file mode 100644 index 00000000000..0782d0cca89 --- /dev/null +++ b/arch/powerpc/oprofile/Makefile @@ -0,0 +1,11 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) common.o +oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o +oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c new file mode 100644 index 00000000000..0ec12c8f2c0 --- /dev/null +++ b/arch/powerpc/oprofile/common.c @@ -0,0 +1,199 @@ +/* + * PPC 64 oprofile support: + * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM + * PPC 32 oprofile support: (based on PPC 64 support) + * Copyright (C) Freescale Semiconductor, Inc 2004 + * Author: Andy Fleming + * + * Based on alpha version. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/oprofile.h> +#ifndef __powerpc64__ +#include <linux/slab.h> +#endif /* ! __powerpc64__ */ +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#ifdef __powerpc64__ +#include <asm/pmc.h> +#else /* __powerpc64__ */ +#include <asm/perfmon.h> +#endif /* __powerpc64__ */ +#include <asm/cputable.h> +#include <asm/oprofile_impl.h> + +static struct op_powerpc_model *model; + +static struct op_counter_config ctr[OP_MAX_COUNTER]; +static struct op_system_config sys; + +#ifndef __powerpc64__ +static char *cpu_type; +#endif /* ! __powerpc64__ */ + +static void op_handle_interrupt(struct pt_regs *regs) +{ + model->handle_interrupt(regs, ctr); +} + +static int op_powerpc_setup(void) +{ + int err; + + /* Grab the hardware */ + err = reserve_pmc_hardware(op_handle_interrupt); + if (err) + return err; + + /* Pre-compute the values to stuff in the hardware registers. */ + model->reg_setup(ctr, &sys, model->num_counters); + + /* Configure the registers on all cpus. */ +#ifdef __powerpc64__ + on_each_cpu(model->cpu_setup, NULL, 0, 1); +#else /* __powerpc64__ */ +#if 0 + /* FIXME: Make multi-cpu work */ + on_each_cpu(model->reg_setup, NULL, 0, 1); +#endif +#endif /* __powerpc64__ */ + + return 0; +} + +static void op_powerpc_shutdown(void) +{ + release_pmc_hardware(); +} + +static void op_powerpc_cpu_start(void *dummy) +{ + model->start(ctr); +} + +static int op_powerpc_start(void) +{ + on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); + return 0; +} + +static inline void op_powerpc_cpu_stop(void *dummy) +{ + model->stop(); +} + +static void op_powerpc_stop(void) +{ + on_each_cpu(op_powerpc_cpu_stop, NULL, 0, 1); +} + +static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) +{ + int i; + +#ifdef __powerpc64__ + /* + * There is one mmcr0, mmcr1 and mmcra for setting the events for + * all of the counters. + */ + oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0); + oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1); + oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra); +#endif /* __powerpc64__ */ + + for (i = 0; i < model->num_counters; ++i) { + struct dentry *dir; + char buf[3]; + + snprintf(buf, sizeof buf, "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + + oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event); + oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count); +#ifdef __powerpc64__ + /* + * We dont support per counter user/kernel selection, but + * we leave the entries because userspace expects them + */ +#endif /* __powerpc64__ */ + oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user); + +#ifndef __powerpc64__ + /* FIXME: Not sure if this is used */ +#endif /* ! __powerpc64__ */ + oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask); + } + + oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel); + oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user); +#ifdef __powerpc64__ + oprofilefs_create_ulong(sb, root, "backtrace_spinlocks", + &sys.backtrace_spinlocks); +#endif /* __powerpc64__ */ + + /* Default to tracing both kernel and user */ + sys.enable_kernel = 1; + sys.enable_user = 1; +#ifdef __powerpc64__ + /* Turn on backtracing through spinlocks by default */ + sys.backtrace_spinlocks = 1; +#endif /* __powerpc64__ */ + + return 0; +} + +int __init oprofile_arch_init(struct oprofile_operations *ops) +{ +#ifndef __powerpc64__ +#ifdef CONFIG_FSL_BOOKE + model = &op_model_fsl_booke; +#else + return -ENODEV; +#endif + + cpu_type = kmalloc(32, GFP_KERNEL); + if (NULL == cpu_type) + return -ENOMEM; + + sprintf(cpu_type, "ppc/%s", cur_cpu_spec->cpu_name); + + model->num_counters = cur_cpu_spec->num_pmcs; + + ops->cpu_type = cpu_type; +#else /* __powerpc64__ */ + if (!cur_cpu_spec->oprofile_model || !cur_cpu_spec->oprofile_cpu_type) + return -ENODEV; + model = cur_cpu_spec->oprofile_model; + model->num_counters = cur_cpu_spec->num_pmcs; + + ops->cpu_type = cur_cpu_spec->oprofile_cpu_type; +#endif /* __powerpc64__ */ + ops->create_files = op_powerpc_create_files; + ops->setup = op_powerpc_setup; + ops->shutdown = op_powerpc_shutdown; + ops->start = op_powerpc_start; + ops->stop = op_powerpc_stop; + + printk(KERN_INFO "oprofile: using %s performance monitoring.\n", + ops->cpu_type); + + return 0; +} + +void oprofile_arch_exit(void) +{ +#ifndef __powerpc64__ + kfree(cpu_type); + cpu_type = NULL; +#endif /* ! __powerpc64__ */ +} diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c new file mode 100644 index 00000000000..1917f8df8a8 --- /dev/null +++ b/arch/powerpc/oprofile/op_model_fsl_booke.c @@ -0,0 +1,183 @@ +/* + * oprofile/op_model_e500.c + * + * Freescale Book-E oprofile support, based on ppc64 oprofile support + * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM + * + * Copyright (c) 2004 Freescale Semiconductor, Inc + * + * Author: Andy Fleming + * Maintainer: Kumar Gala <Kumar.Gala@freescale.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/reg_booke.h> +#include <asm/page.h> +#include <asm/perfmon.h> +#include <asm/oprofile_impl.h> + +static unsigned long reset_value[OP_MAX_COUNTER]; + +static int num_counters; +static int oprofile_running; + +static inline unsigned int ctr_read(unsigned int i) +{ + switch(i) { + case 0: + return mfpmr(PMRN_PMC0); + case 1: + return mfpmr(PMRN_PMC1); + case 2: + return mfpmr(PMRN_PMC2); + case 3: + return mfpmr(PMRN_PMC3); + default: + return 0; + } +} + +static inline void ctr_write(unsigned int i, unsigned int val) +{ + switch(i) { + case 0: + mtpmr(PMRN_PMC0, val); + break; + case 1: + mtpmr(PMRN_PMC1, val); + break; + case 2: + mtpmr(PMRN_PMC2, val); + break; + case 3: + mtpmr(PMRN_PMC3, val); + break; + default: + break; + } +} + + +static void fsl_booke_reg_setup(struct op_counter_config *ctr, + struct op_system_config *sys, + int num_ctrs) +{ + int i; + + num_counters = num_ctrs; + + /* freeze all counters */ + pmc_stop_ctrs(); + + /* Our counters count up, and "count" refers to + * how much before the next interrupt, and we interrupt + * on overflow. So we calculate the starting value + * which will give us "count" until overflow. + * Then we set the events on the enabled counters */ + for (i = 0; i < num_counters; ++i) { + reset_value[i] = 0x80000000UL - ctr[i].count; + + init_pmc_stop(i); + + set_pmc_event(i, ctr[i].event); + + set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); + } +} + +static void fsl_booke_start(struct op_counter_config *ctr) +{ + int i; + + mtmsr(mfmsr() | MSR_PMM); + + for (i = 0; i < num_counters; ++i) { + if (ctr[i].enabled) { + ctr_write(i, reset_value[i]); + /* Set Each enabled counterd to only + * count when the Mark bit is not set */ + set_pmc_marked(i, 1, 0); + pmc_start_ctr(i, 1); + } else { + ctr_write(i, 0); + + /* Set the ctr to be stopped */ + pmc_start_ctr(i, 0); + } + } + + /* Clear the freeze bit, and enable the interrupt. + * The counters won't actually start until the rfi clears + * the PMM bit */ + pmc_start_ctrs(1); + + oprofile_running = 1; + + pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), + mfpmr(PMRN_PMGC0)); +} + +static void fsl_booke_stop(void) +{ + /* freeze counters */ + pmc_stop_ctrs(); + + oprofile_running = 0; + + pr_debug("stop on cpu %d, pmgc0 %x\n", smp_processor_id(), + mfpmr(PMRN_PMGC0)); + + mb(); +} + + +static void fsl_booke_handle_interrupt(struct pt_regs *regs, + struct op_counter_config *ctr) +{ + unsigned long pc; + int is_kernel; + int val; + int i; + + /* set the PMM bit (see comment below) */ + mtmsr(mfmsr() | MSR_PMM); + + pc = regs->nip; + is_kernel = (pc >= KERNELBASE); + + for (i = 0; i < num_counters; ++i) { + val = ctr_read(i); + if (val < 0) { + if (oprofile_running && ctr[i].enabled) { + oprofile_add_pc(pc, is_kernel, i); + ctr_write(i, reset_value[i]); + } else { + ctr_write(i, 0); + } + } + } + + /* The freeze bit was set by the interrupt. */ + /* Clear the freeze bit, and reenable the interrupt. + * The counters won't actually start until the rfi clears + * the PMM bit */ + pmc_start_ctrs(1); +} + +struct op_powerpc_model op_model_fsl_booke = { + .reg_setup = fsl_booke_reg_setup, + .start = fsl_booke_start, + .stop = fsl_booke_stop, + .handle_interrupt = fsl_booke_handle_interrupt, +}; diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c new file mode 100644 index 00000000000..88644931584 --- /dev/null +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -0,0 +1,309 @@ +/* + * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/systemcfg.h> +#include <asm/rtas.h> +#include <asm/oprofile_impl.h> + +#define dbg(args...) + +static unsigned long reset_value[OP_MAX_COUNTER]; + +static int oprofile_running; +static int mmcra_has_sihv; + +/* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */ +static u32 mmcr0_val; +static u64 mmcr1_val; +static u32 mmcra_val; + +/* + * Since we do not have an NMI, backtracing through spinlocks is + * only a best guess. In light of this, allow it to be disabled at + * runtime. + */ +static int backtrace_spinlocks; + +static void power4_reg_setup(struct op_counter_config *ctr, + struct op_system_config *sys, + int num_ctrs) +{ + int i; + + /* + * SIHV / SIPR bits are only implemented on POWER4+ (GQ) and above. + * However we disable it on all POWER4 until we verify it works + * (I was seeing some strange behaviour last time I tried). + * + * It has been verified to work on POWER5 so we enable it there. + */ + if (cpu_has_feature(CPU_FTR_MMCRA_SIHV)) + mmcra_has_sihv = 1; + + /* + * The performance counter event settings are given in the mmcr0, + * mmcr1 and mmcra values passed from the user in the + * op_system_config structure (sys variable). + */ + mmcr0_val = sys->mmcr0; + mmcr1_val = sys->mmcr1; + mmcra_val = sys->mmcra; + + backtrace_spinlocks = sys->backtrace_spinlocks; + + for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) + reset_value[i] = 0x80000000UL - ctr[i].count; + + /* setup user and kernel profiling */ + if (sys->enable_kernel) + mmcr0_val &= ~MMCR0_KERNEL_DISABLE; + else + mmcr0_val |= MMCR0_KERNEL_DISABLE; + + if (sys->enable_user) + mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; + else + mmcr0_val |= MMCR0_PROBLEM_DISABLE; +} + +extern void ppc64_enable_pmcs(void); + +static void power4_cpu_setup(void *unused) +{ + unsigned int mmcr0 = mmcr0_val; + unsigned long mmcra = mmcra_val; + + ppc64_enable_pmcs(); + + /* set the freeze bit */ + mmcr0 |= MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE; + mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE; + mtspr(SPRN_MMCR0, mmcr0); + + mtspr(SPRN_MMCR1, mmcr1_val); + + mmcra |= MMCRA_SAMPLE_ENABLE; + mtspr(SPRN_MMCRA, mmcra); + + dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(), + mfspr(SPRN_MMCR0)); + dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), + mfspr(SPRN_MMCR1)); + dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), + mfspr(SPRN_MMCRA)); +} + +static void power4_start(struct op_counter_config *ctr) +{ + int i; + unsigned int mmcr0; + + /* set the PMM bit (see comment below) */ + mtmsrd(mfmsr() | MSR_PMM); + + for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { + if (ctr[i].enabled) { + ctr_write(i, reset_value[i]); + } else { + ctr_write(i, 0); + } + } + + mmcr0 = mfspr(SPRN_MMCR0); + + /* + * We must clear the PMAO bit on some (GQ) chips. Just do it + * all the time + */ + mmcr0 &= ~MMCR0_PMAO; + + /* + * now clear the freeze bit, counting will not start until we + * rfid from this excetion, because only at that point will + * the PMM bit be cleared + */ + mmcr0 &= ~MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + oprofile_running = 1; + + dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); +} + +static void power4_stop(void) +{ + unsigned int mmcr0; + + /* freeze counters */ + mmcr0 = mfspr(SPRN_MMCR0); + mmcr0 |= MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + oprofile_running = 0; + + dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); + + mb(); +} + +/* Fake functions used by canonicalize_pc */ +static void __attribute_used__ hypervisor_bucket(void) +{ +} + +static void __attribute_used__ rtas_bucket(void) +{ +} + +static void __attribute_used__ kernel_unknown_bucket(void) +{ +} + +static unsigned long check_spinlock_pc(struct pt_regs *regs, + unsigned long profile_pc) +{ + unsigned long pc = instruction_pointer(regs); + + /* + * If both the SIAR (sampled instruction) and the perfmon exception + * occurred in a spinlock region then we account the sample to the + * calling function. This isnt 100% correct, we really need soft + * IRQ disable so we always get the perfmon exception at the + * point at which the SIAR is set. + */ + if (backtrace_spinlocks && in_lock_functions(pc) && + in_lock_functions(profile_pc)) + return regs->link; + else + return profile_pc; +} + +/* + * On GQ and newer the MMCRA stores the HV and PR bits at the time + * the SIAR was sampled. We use that to work out if the SIAR was sampled in + * the hypervisor, our exception vectors or RTAS. + */ +static unsigned long get_pc(struct pt_regs *regs) +{ + unsigned long pc = mfspr(SPRN_SIAR); + unsigned long mmcra; + + /* Cant do much about it */ + if (!mmcra_has_sihv) + return check_spinlock_pc(regs, pc); + + mmcra = mfspr(SPRN_MMCRA); + + /* Were we in the hypervisor? */ + if ((systemcfg->platform == PLATFORM_PSERIES_LPAR) && + (mmcra & MMCRA_SIHV)) + /* function descriptor madness */ + return *((unsigned long *)hypervisor_bucket); + + /* We were in userspace, nothing to do */ + if (mmcra & MMCRA_SIPR) + return pc; + +#ifdef CONFIG_PPC_RTAS + /* Were we in RTAS? */ + if (pc >= rtas.base && pc < (rtas.base + rtas.size)) + /* function descriptor madness */ + return *((unsigned long *)rtas_bucket); +#endif + + /* Were we in our exception vectors or SLB real mode miss handler? */ + if (pc < 0x1000000UL) + return (unsigned long)__va(pc); + + /* Not sure where we were */ + if (pc < KERNELBASE) + /* function descriptor madness */ + return *((unsigned long *)kernel_unknown_bucket); + + return check_spinlock_pc(regs, pc); +} + +static int get_kernel(unsigned long pc) +{ + int is_kernel; + + if (!mmcra_has_sihv) { + is_kernel = (pc >= KERNELBASE); + } else { + unsigned long mmcra = mfspr(SPRN_MMCRA); + is_kernel = ((mmcra & MMCRA_SIPR) == 0); + } + + return is_kernel; +} + +static void power4_handle_interrupt(struct pt_regs *regs, + struct op_counter_config *ctr) +{ + unsigned long pc; + int is_kernel; + int val; + int i; + unsigned int mmcr0; + + pc = get_pc(regs); + is_kernel = get_kernel(pc); + + /* set the PMM bit (see comment below) */ + mtmsrd(mfmsr() | MSR_PMM); + + for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { + val = ctr_read(i); + if (val < 0) { + if (oprofile_running && ctr[i].enabled) { + oprofile_add_pc(pc, is_kernel, i); + ctr_write(i, reset_value[i]); + } else { + ctr_write(i, 0); + } + } + } + + mmcr0 = mfspr(SPRN_MMCR0); + + /* reset the perfmon trigger */ + mmcr0 |= MMCR0_PMXE; + + /* + * We must clear the PMAO bit on some (GQ) chips. Just do it + * all the time + */ + mmcr0 &= ~MMCR0_PMAO; + + /* + * now clear the freeze bit, counting will not start until we + * rfid from this exception, because only at that point will + * the PMM bit be cleared + */ + mmcr0 &= ~MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); +} + +struct op_powerpc_model op_model_power4 = { + .reg_setup = power4_reg_setup, + .cpu_setup = power4_cpu_setup, + .start = power4_start, + .stop = power4_stop, + .handle_interrupt = power4_handle_interrupt, +}; diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c new file mode 100644 index 00000000000..e010b85996e --- /dev/null +++ b/arch/powerpc/oprofile/op_model_rs64.c @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/oprofile_impl.h> + +#define dbg(args...) + +static void ctrl_write(unsigned int i, unsigned int val) +{ + unsigned int tmp = 0; + unsigned long shift = 0, mask = 0; + + dbg("ctrl_write %d %x\n", i, val); + + switch(i) { + case 0: + tmp = mfspr(SPRN_MMCR0); + shift = 6; + mask = 0x7F; + break; + case 1: + tmp = mfspr(SPRN_MMCR0); + shift = 0; + mask = 0x3F; + break; + case 2: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 4; + mask = 0x1F; + break; + case 3: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 9; + mask = 0x1F; + break; + case 4: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 14; + mask = 0x1F; + break; + case 5: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 19; + mask = 0x1F; + break; + case 6: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 24; + mask = 0x1F; + break; + case 7: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 28; + mask = 0xF; + break; + } + + tmp = tmp & ~(mask << shift); + tmp |= val << shift; + + switch(i) { + case 0: + case 1: + mtspr(SPRN_MMCR0, tmp); + break; + default: + mtspr(SPRN_MMCR1, tmp); + } + + dbg("ctrl_write mmcr0 %lx mmcr1 %lx\n", mfspr(SPRN_MMCR0), + mfspr(SPRN_MMCR1)); +} + +static unsigned long reset_value[OP_MAX_COUNTER]; + +static int num_counters; + +static void rs64_reg_setup(struct op_counter_config *ctr, + struct op_system_config *sys, + int num_ctrs) +{ + int i; + + num_counters = num_ctrs; + + for (i = 0; i < num_counters; ++i) + reset_value[i] = 0x80000000UL - ctr[i].count; + + /* XXX setup user and kernel profiling */ +} + +static void rs64_cpu_setup(void *unused) +{ + unsigned int mmcr0; + + /* reset MMCR0 and set the freeze bit */ + mmcr0 = MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + /* reset MMCR1, MMCRA */ + mtspr(SPRN_MMCR1, 0); + + if (cpu_has_feature(CPU_FTR_MMCRA)) + mtspr(SPRN_MMCRA, 0); + + mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE; + /* Only applies to POWER3, but should be safe on RS64 */ + mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE; + mtspr(SPRN_MMCR0, mmcr0); + + dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(), + mfspr(SPRN_MMCR0)); + dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), + mfspr(SPRN_MMCR1)); +} + +static void rs64_start(struct op_counter_config *ctr) +{ + int i; + unsigned int mmcr0; + + /* set the PMM bit (see comment below) */ + mtmsrd(mfmsr() | MSR_PMM); + + for (i = 0; i < num_counters; ++i) { + if (ctr[i].enabled) { + ctr_write(i, reset_value[i]); + ctrl_write(i, ctr[i].event); + } else { + ctr_write(i, 0); + } + } + + mmcr0 = mfspr(SPRN_MMCR0); + + /* + * now clear the freeze bit, counting will not start until we + * rfid from this excetion, because only at that point will + * the PMM bit be cleared + */ + mmcr0 &= ~MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); +} + +static void rs64_stop(void) +{ + unsigned int mmcr0; + + /* freeze counters */ + mmcr0 = mfspr(SPRN_MMCR0); + mmcr0 |= MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); + + mb(); +} + +static void rs64_handle_interrupt(struct pt_regs *regs, + struct op_counter_config *ctr) +{ + unsigned int mmcr0; + int val; + int i; + unsigned long pc = mfspr(SPRN_SIAR); + int is_kernel = (pc >= KERNELBASE); + + /* set the PMM bit (see comment below) */ + mtmsrd(mfmsr() | MSR_PMM); + + for (i = 0; i < num_counters; ++i) { + val = ctr_read(i); + if (val < 0) { + if (ctr[i].enabled) { + oprofile_add_pc(pc, is_kernel, i); + ctr_write(i, reset_value[i]); + } else { + ctr_write(i, 0); + } + } + } + + mmcr0 = mfspr(SPRN_MMCR0); + + /* reset the perfmon trigger */ + mmcr0 |= MMCR0_PMXE; + + /* + * now clear the freeze bit, counting will not start until we + * rfid from this exception, because only at that point will + * the PMM bit be cleared + */ + mmcr0 &= ~MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); +} + +struct op_powerpc_model op_model_rs64 = { + .reg_setup = rs64_reg_setup, + .cpu_setup = rs64_cpu_setup, + .start = rs64_start, + .stop = rs64_stop, + .handle_interrupt = rs64_handle_interrupt, +}; diff --git a/arch/powerpc/platforms/4xx/Kconfig b/arch/powerpc/platforms/4xx/Kconfig new file mode 100644 index 00000000000..ed39d6a3d22 --- /dev/null +++ b/arch/powerpc/platforms/4xx/Kconfig @@ -0,0 +1,280 @@ +config 4xx + bool + depends on 40x || 44x + default y + +config WANT_EARLY_SERIAL + bool + select SERIAL_8250 + default n + +menu "AMCC 4xx options" + depends on 4xx + +choice + prompt "Machine Type" + depends on 40x + default WALNUT + +config BUBINGA + bool "Bubinga" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM 405EP evaluation board. + +config CPCI405 + bool "CPCI405" + help + This option enables support for the CPCI405 board. + +config EP405 + bool "EP405/EP405PC" + help + This option enables support for the EP405/EP405PC boards. + +config REDWOOD_5 + bool "Redwood-5" + help + This option enables support for the IBM STB04 evaluation board. + +config REDWOOD_6 + bool "Redwood-6" + help + This option enables support for the IBM STBx25xx evaluation board. + +config SYCAMORE + bool "Sycamore" + help + This option enables support for the IBM PPC405GPr evaluation board. + +config WALNUT + bool "Walnut" + help + This option enables support for the IBM PPC405GP evaluation board. + +config XILINX_ML300 + bool "Xilinx-ML300" + help + This option enables support for the Xilinx ML300 evaluation board. + +endchoice + +choice + prompt "Machine Type" + depends on 44x + default EBONY + +config BAMBOO + bool "Bamboo" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM PPC440EP evaluation board. + +config EBONY + bool "Ebony" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM PPC440GP evaluation board. + +config LUAN + bool "Luan" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM PPC440SP evaluation board. + +config OCOTEA + bool "Ocotea" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM PPC440GX evaluation board. + +endchoice + +config EP405PC + bool "EP405PC Support" + depends on EP405 + + +# It's often necessary to know the specific 4xx processor type. +# Fortunately, it is impled (so far) from the board type, so we +# don't need to ask more redundant questions. +config NP405H + bool + depends on ASH + default y + +config 440EP + bool + depends on BAMBOO + select PPC_FPU + default y + +config 440GP + bool + depends on EBONY + default y + +config 440GX + bool + depends on OCOTEA + default y + +config 440SP + bool + depends on LUAN + default y + +config 440 + bool + depends on 440GP || 440SP || 440EP + default y + +config 440A + bool + depends on 440GX + default y + +config IBM440EP_ERR42 + bool + depends on 440EP + default y + +# All 405-based cores up until the 405GPR and 405EP have this errata. +config IBM405_ERR77 + bool + depends on 40x && !403GCX && !405GPR && !405EP + default y + +# All 40x-based cores, up until the 405GPR and 405EP have this errata. +config IBM405_ERR51 + bool + depends on 40x && !405GPR && !405EP + default y + +config BOOKE + bool + depends on 44x + default y + +config IBM_OCP + bool + depends on ASH || BAMBOO || BUBINGA || CPCI405 || EBONY || EP405 || LUAN || OCOTEA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT + default y + +config XILINX_OCP + bool + depends on XILINX_ML300 + default y + +config IBM_EMAC4 + bool + depends on 440GX || 440SP + default y + +config BIOS_FIXUP + bool + depends on BUBINGA || EP405 || SYCAMORE || WALNUT + default y + +# OAK doesn't exist but wanted to keep this around for any future 403GCX boards +config 403GCX + bool + depends OAK + default y + +config 405EP + bool + depends on BUBINGA + default y + +config 405GP + bool + depends on CPCI405 || EP405 || WALNUT + default y + +config 405GPR + bool + depends on SYCAMORE + default y + +config VIRTEX_II_PRO + bool + depends on XILINX_ML300 + default y + +config STB03xxx + bool + depends on REDWOOD_5 || REDWOOD_6 + default y + +config EMBEDDEDBOOT + bool + depends on EP405 || XILINX_ML300 + default y + +config IBM_OPENBIOS + bool + depends on ASH || BUBINGA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT + default y + +config PPC4xx_DMA + bool "PPC4xx DMA controller support" + depends on 4xx + +config PPC4xx_EDMA + bool + depends on !STB03xxx && PPC4xx_DMA + default y + +config PPC_GEN550 + bool + depends on 4xx + default y + +choice + prompt "TTYS0 device and default console" + depends on 40x + default UART0_TTYS0 + +config UART0_TTYS0 + bool "UART0" + +config UART0_TTYS1 + bool "UART1" + +endchoice + +config SERIAL_SICC + bool "SICC Serial port support" + depends on STB03xxx + +config UART1_DFLT_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y + +config SERIAL_SICC_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y +endmenu + + +menu "IBM 40x options" + depends on 40x + +config SERIAL_SICC + bool "SICC Serial port" + depends on STB03xxx + +config UART1_DFLT_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y + +config SERIAL_SICC_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y + +endmenu diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile new file mode 100644 index 00000000000..79ff6b1e887 --- /dev/null +++ b/arch/powerpc/platforms/4xx/Makefile @@ -0,0 +1 @@ +# empty makefile so make clean works
\ No newline at end of file diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig new file mode 100644 index 00000000000..c5bc2821d99 --- /dev/null +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -0,0 +1,86 @@ +config 85xx + bool + depends on E500 + default y + +config PPC_INDIRECT_PCI_BE + bool + depends on 85xx + default y + +menu "Freescale 85xx options" + depends on E500 + +choice + prompt "Machine Type" + depends on 85xx + default MPC8540_ADS + +config MPC8540_ADS + bool "Freescale MPC8540 ADS" + help + This option enables support for the MPC 8540 ADS evaluation board. + +config MPC8548_CDS + bool "Freescale MPC8548 CDS" + help + This option enablese support for the MPC8548 CDS evaluation board. + +config MPC8555_CDS + bool "Freescale MPC8555 CDS" + help + This option enablese support for the MPC8555 CDS evaluation board. + +config MPC8560_ADS + bool "Freescale MPC8560 ADS" + help + This option enables support for the MPC 8560 ADS evaluation board. + +config SBC8560 + bool "WindRiver PowerQUICC III SBC8560" + help + This option enables support for the WindRiver PowerQUICC III + SBC8560 board. + +config STX_GP3 + bool "Silicon Turnkey Express GP3" + help + This option enables support for the Silicon Turnkey Express GP3 + board. + +endchoice + +# It's often necessary to know the specific 85xx processor type. +# Fortunately, it is implied (so far) from the board type, so we +# don't need to ask more redundant questions. +config MPC8540 + bool + depends on MPC8540_ADS + default y + +config MPC8548 + bool + depends on MPC8548_CDS + default y + +config MPC8555 + bool + depends on MPC8555_CDS + default y + +config MPC8560 + bool + depends on SBC8560 || MPC8560_ADS || STX_GP3 + default y + +config 85xx_PCI2 + bool "Supprt for 2nd PCI host controller" + depends on MPC8555_CDS + default y + +config PPC_GEN550 + bool + depends on MPC8540 || SBC8560 || MPC8555 + default y + +endmenu diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile new file mode 100644 index 00000000000..6407197ffd8 --- /dev/null +++ b/arch/powerpc/platforms/85xx/Makefile @@ -0,0 +1 @@ +# empty makefile so make clean works diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig new file mode 100644 index 00000000000..c8c0ba3cf8e --- /dev/null +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -0,0 +1,352 @@ +config FADS + bool + +choice + prompt "8xx Machine Type" + depends on 8xx + default RPXLITE + +config RPXLITE + bool "RPX-Lite" + ---help--- + Single-board computers based around the PowerPC MPC8xx chips and + intended for embedded applications. The following types are + supported: + + RPX-Lite: + Embedded Planet RPX Lite. PC104 form-factor SBC based on the MPC823. + + RPX-Classic: + Embedded Planet RPX Classic Low-fat. Credit-card-size SBC based on + the MPC 860 + + BSE-IP: + Bright Star Engineering ip-Engine. + + TQM823L: + TQM850L: + TQM855L: + TQM860L: + MPC8xx based family of mini modules, half credit card size, + up to 64 MB of RAM, 8 MB Flash, (Fast) Ethernet, 2 x serial ports, + 2 x CAN bus interface, ... + Manufacturer: TQ Components, www.tq-group.de + Date of Release: October (?) 1999 + End of Life: not yet :-) + URL: + - module: <http://www.denx.de/PDF/TQM8xxLHWM201.pdf> + - starter kit: <http://www.denx.de/PDF/STK8xxLHWM201.pdf> + - images: <http://www.denx.de/embedded-ppc-en.html> + + FPS850L: + FingerPrint Sensor System (based on TQM850L) + Manufacturer: IKENDI AG, <http://www.ikendi.com/> + Date of Release: November 1999 + End of life: end 2000 ? + URL: see TQM850L + + IVMS8: + MPC860 based board used in the "Integrated Voice Mail System", + Small Version (8 voice channels) + Manufacturer: Speech Design, <http://www.speech-design.de/> + Date of Release: December 2000 (?) + End of life: - + URL: <http://www.speech-design.de/> + + IVML24: + MPC860 based board used in the "Integrated Voice Mail System", + Large Version (24 voice channels) + Manufacturer: Speech Design, <http://www.speech-design.de/> + Date of Release: March 2001 (?) + End of life: - + URL: <http://www.speech-design.de/> + + HERMES: + Hermes-Pro ISDN/LAN router with integrated 8 x hub + Manufacturer: Multidata Gesellschaft fur Datentechnik und Informatik + <http://www.multidata.de/> + Date of Release: 2000 (?) + End of life: - + URL: <http://www.multidata.de/english/products/hpro.htm> + + IP860: + VMEBus IP (Industry Pack) carrier board with MPC860 + Manufacturer: MicroSys GmbH, <http://www.microsys.de/> + Date of Release: ? + End of life: - + URL: <http://www.microsys.de/html/ip860.html> + + PCU_E: + PCU = Peripheral Controller Unit, Extended + Manufacturer: Siemens AG, ICN (Information and Communication Networks) + <http://www.siemens.de/page/1,3771,224315-1-999_2_226207-0,00.html> + Date of Release: April 2001 + End of life: August 2001 + URL: n. a. + +config RPXCLASSIC + bool "RPX-Classic" + help + The RPX-Classic is a single-board computer based on the Motorola + MPC860. It features 16MB of DRAM and a variable amount of flash, + I2C EEPROM, thermal monitoring, a PCMCIA slot, a DIP switch and two + LEDs. Variants with Ethernet ports exist. Say Y here to support it + directly. + +config BSEIP + bool "BSE-IP" + help + Say Y here to support the Bright Star Engineering ipEngine SBC. + This is a credit-card-sized device featuring a MPC823 processor, + 26MB DRAM, 4MB flash, Ethernet, a 16K-gate FPGA, USB, an LCD/video + controller, and two RS232 ports. + +config MPC8XXFADS + bool "FADS" + select FADS + +config MPC86XADS + bool "MPC86XADS" + help + MPC86x Application Development System by Freescale Semiconductor. + The MPC86xADS is meant to serve as a platform for s/w and h/w + development around the MPC86X processor families. + select FADS + +config MPC885ADS + bool "MPC885ADS" + help + Freescale Semiconductor MPC885 Application Development System (ADS). + Also known as DUET. + The MPC885ADS is meant to serve as a platform for s/w and h/w + development around the MPC885 processor family. + +config TQM823L + bool "TQM823L" + help + Say Y here to support the TQM823L, one of an MPC8xx-based family of + mini SBCs (half credit-card size) from TQ Components first released + in late 1999. Technical references are at + <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and + <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at + <http://www.denx.de/embedded-ppc-en.html>. + +config TQM850L + bool "TQM850L" + help + Say Y here to support the TQM850L, one of an MPC8xx-based family of + mini SBCs (half credit-card size) from TQ Components first released + in late 1999. Technical references are at + <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and + <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at + <http://www.denx.de/embedded-ppc-en.html>. + +config TQM855L + bool "TQM855L" + help + Say Y here to support the TQM855L, one of an MPC8xx-based family of + mini SBCs (half credit-card size) from TQ Components first released + in late 1999. Technical references are at + <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and + <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at + <http://www.denx.de/embedded-ppc-en.html>. + +config TQM860L + bool "TQM860L" + help + Say Y here to support the TQM860L, one of an MPC8xx-based family of + mini SBCs (half credit-card size) from TQ Components first released + in late 1999. Technical references are at + <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and + <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at + <http://www.denx.de/embedded-ppc-en.html>. + +config FPS850L + bool "FPS850L" + +config IVMS8 + bool "IVMS8" + help + Say Y here to support the Integrated Voice-Mail Small 8-channel SBC + from Speech Design, released March 2001. The manufacturer's website + is at <http://www.speech-design.de/>. + +config IVML24 + bool "IVML24" + help + Say Y here to support the Integrated Voice-Mail Large 24-channel SBC + from Speech Design, released March 2001. The manufacturer's website + is at <http://www.speech-design.de/>. + +config HERMES_PRO + bool "HERMES" + +config IP860 + bool "IP860" + +config LWMON + bool "LWMON" + +config PCU_E + bool "PCU_E" + +config CCM + bool "CCM" + +config LANTEC + bool "LANTEC" + +config MBX + bool "MBX" + help + MBX is a line of Motorola single-board computer based around the + MPC821 and MPC860 processors, and intended for embedded-controller + applications. Say Y here to support these boards directly. + +config WINCEPT + bool "WinCept" + help + The Wincept 100/110 is a Motorola single-board computer based on the + MPC821 PowerPC, introduced in 1998 and designed to be used in + thin-client machines. Say Y to support it directly. + +endchoice + +# +# MPC8xx Communication options +# + +menu "MPC8xx CPM Options" + depends on 8xx + +config SCC_ENET + bool "CPM SCC Ethernet" + depends on NET_ETHERNET + help + Enable Ethernet support via the Motorola MPC8xx serial + communications controller. + +choice + prompt "SCC used for Ethernet" + depends on SCC_ENET + default SCC1_ENET + +config SCC1_ENET + bool "SCC1" + help + Use MPC8xx serial communications controller 1 to drive Ethernet + (default). + +config SCC2_ENET + bool "SCC2" + help + Use MPC8xx serial communications controller 2 to drive Ethernet. + +config SCC3_ENET + bool "SCC3" + help + Use MPC8xx serial communications controller 3 to drive Ethernet. + +endchoice + +config FEC_ENET + bool "860T FEC Ethernet" + depends on NET_ETHERNET + help + Enable Ethernet support via the Fast Ethernet Controller (FCC) on + the Motorola MPC8260. + +config USE_MDIO + bool "Use MDIO for PHY configuration" + depends on FEC_ENET + help + On some boards the hardware configuration of the ethernet PHY can be + used without any software interaction over the MDIO interface, so + all MII code can be omitted. Say N here if unsure or if you don't + need link status reports. + +config FEC_AM79C874 + bool "Support AMD79C874 PHY" + depends on USE_MDIO + +config FEC_LXT970 + bool "Support LXT970 PHY" + depends on USE_MDIO + +config FEC_LXT971 + bool "Support LXT971 PHY" + depends on USE_MDIO + +config FEC_QS6612 + bool "Support QS6612 PHY" + depends on USE_MDIO + +config ENET_BIG_BUFFERS + bool "Use Big CPM Ethernet Buffers" + depends on SCC_ENET || FEC_ENET + help + Allocate large buffers for MPC8xx Ethernet. Increases throughput + and decreases the likelihood of dropped packets, but costs memory. + +config HTDMSOUND + bool "Embedded Planet HIOX Audio" + depends on SOUND=y + +# This doesn't really belong here, but it is convenient to ask +# 8xx specific questions. +comment "Generic MPC8xx Options" + +config 8xx_COPYBACK + bool "Copy-Back Data Cache (else Writethrough)" + help + Saying Y here will cause the cache on an MPC8xx processor to be used + in Copy-Back mode. If you say N here, it is used in Writethrough + mode. + + If in doubt, say Y here. + +config 8xx_CPU6 + bool "CPU6 Silicon Errata (860 Pre Rev. C)" + help + MPC860 CPUs, prior to Rev C have some bugs in the silicon, which + require workarounds for Linux (and most other OSes to work). If you + get a BUG() very early in boot, this might fix the problem. For + more details read the document entitled "MPC860 Family Device Errata + Reference" on Motorola's website. This option also incurs a + performance hit. + + If in doubt, say N here. + +choice + prompt "Microcode patch selection" + default NO_UCODE_PATCH + help + Help not implemented yet, coming soon. + +config NO_UCODE_PATCH + bool "None" + +config USB_SOF_UCODE_PATCH + bool "USB SOF patch" + help + Help not implemented yet, coming soon. + +config I2C_SPI_UCODE_PATCH + bool "I2C/SPI relocation patch" + help + Help not implemented yet, coming soon. + +config I2C_SPI_SMC1_UCODE_PATCH + bool "I2C/SPI/SMC1 relocation patch" + help + Help not implemented yet, coming soon. + +endchoice + +config UCODE_PATCH + bool + default y + depends on !NO_UCODE_PATCH + +endmenu + diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile new file mode 100644 index 00000000000..da744d5e52f --- /dev/null +++ b/arch/powerpc/platforms/Makefile @@ -0,0 +1,7 @@ +ifeq ($(CONFIG_PPC32),y) +obj-$(CONFIG_PPC_PMAC) += powermac/ +endif +obj-$(CONFIG_4xx) += 4xx/ +obj-$(CONFIG_85xx) += 85xx/ +obj-$(CONFIG_PPC_PSERIES) += pseries/ +obj-$(CONFIG_PPC_ISERIES) += iseries/ diff --git a/arch/powerpc/platforms/apus/Kconfig b/arch/powerpc/platforms/apus/Kconfig new file mode 100644 index 00000000000..6bde3bffed8 --- /dev/null +++ b/arch/powerpc/platforms/apus/Kconfig @@ -0,0 +1,130 @@ + +config AMIGA + bool + depends on APUS + default y + help + This option enables support for the Amiga series of computers. + +config ZORRO + bool + depends on APUS + default y + help + This enables support for the Zorro bus in the Amiga. If you have + expansion cards in your Amiga that conform to the Amiga + AutoConfig(tm) specification, say Y, otherwise N. Note that even + expansion cards that do not fit in the Zorro slots but fit in e.g. + the CPU slot may fall in this category, so you have to say Y to let + Linux use these. + +config ABSTRACT_CONSOLE + bool + depends on APUS + default y + +config APUS_FAST_EXCEPT + bool + depends on APUS + default y + +config AMIGA_PCMCIA + bool "Amiga 1200/600 PCMCIA support" + depends on APUS && EXPERIMENTAL + help + Include support in the kernel for pcmcia on Amiga 1200 and Amiga + 600. If you intend to use pcmcia cards say Y; otherwise say N. + +config AMIGA_BUILTIN_SERIAL + tristate "Amiga builtin serial support" + depends on APUS + help + If you want to use your Amiga's built-in serial port in Linux, + answer Y. + + To compile this driver as a module, choose M here. + +config GVPIOEXT + tristate "GVP IO-Extender support" + depends on APUS + help + If you want to use a GVP IO-Extender serial card in Linux, say Y. + Otherwise, say N. + +config GVPIOEXT_LP + tristate "GVP IO-Extender parallel printer support" + depends on GVPIOEXT + help + Say Y to enable driving a printer from the parallel port on your + GVP IO-Extender card, N otherwise. + +config GVPIOEXT_PLIP + tristate "GVP IO-Extender PLIP support" + depends on GVPIOEXT + help + Say Y to enable doing IP over the parallel port on your GVP + IO-Extender card, N otherwise. + +config MULTIFACE_III_TTY + tristate "Multiface Card III serial support" + depends on APUS + help + If you want to use a Multiface III card's serial port in Linux, + answer Y. + + To compile this driver as a module, choose M here. + +config A2232 + tristate "Commodore A2232 serial support (EXPERIMENTAL)" + depends on EXPERIMENTAL && APUS + ---help--- + This option supports the 2232 7-port serial card shipped with the + Amiga 2000 and other Zorro-bus machines, dating from 1989. At + a max of 19,200 bps, the ports are served by a 6551 ACIA UART chip + each, plus a 8520 CIA, and a master 6502 CPU and buffer as well. The + ports were connected with 8 pin DIN connectors on the card bracket, + for which 8 pin to DB25 adapters were supplied. The card also had + jumpers internally to toggle various pinning configurations. + + This driver can be built as a module; but then "generic_serial" + will also be built as a module. This has to be loaded before + "ser_a2232". If you want to do this, answer M here. + +config WHIPPET_SERIAL + tristate "Hisoft Whippet PCMCIA serial support" + depends on AMIGA_PCMCIA + help + HiSoft has a web page at <http://www.hisoft.co.uk/>, but there + is no listing for the Whippet in their Amiga section. + +config APNE + tristate "PCMCIA NE2000 support" + depends on AMIGA_PCMCIA + help + If you have a PCMCIA NE2000 compatible adapter, say Y. Otherwise, + say N. + + To compile this driver as a module, choose M here: the + module will be called apne. + +config SERIAL_CONSOLE + bool "Support for serial port console" + depends on APUS && (AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y) + +config HEARTBEAT + bool "Use power LED as a heartbeat" + depends on APUS + help + Use the power-on LED on your machine as a load meter. The exact + behavior is platform-dependent, but normally the flash frequency is + a hyperbolic function of the 5-minute load average. + +config PROC_HARDWARE + bool "/proc/hardware support" + depends on APUS + +source "drivers/zorro/Kconfig" + +config PCI_PERMEDIA + bool "PCI for Permedia2" + depends on !4xx && !8xx && APUS diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig new file mode 100644 index 00000000000..2d755b79d51 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/Kconfig @@ -0,0 +1,305 @@ +choice + prompt "Machine Type" + depends on EMBEDDED6xx + +config KATANA + bool "Artesyn-Katana" + help + Select KATANA if configuring an Artesyn KATANA 750i or 3750 + cPCI board. + +config WILLOW + bool "Cogent-Willow" + +config CPCI690 + bool "Force-CPCI690" + help + Select CPCI690 if configuring a Force CPCI690 cPCI board. + +config POWERPMC250 + bool "Force-PowerPMC250" + +config CHESTNUT + bool "IBM 750FX Eval board or 750GX Eval board" + help + Select CHESTNUT if configuring an IBM 750FX Eval Board or a + IBM 750GX Eval board. + +config SPRUCE + bool "IBM-Spruce" + +config HDPU + bool "Sky-HDPU" + help + Select HDPU if configuring a Sky Computers Compute Blade. + +config HDPU_FEATURES + depends HDPU + tristate "HDPU-Features" + help + Select to enable HDPU enhanced features. + +config EV64260 + bool "Marvell-EV64260BP" + help + Select EV64260 if configuring a Marvell (formerly Galileo) + EV64260BP Evaluation platform. + +config LOPEC + bool "Motorola-LoPEC" + +config MVME5100 + bool "Motorola-MVME5100" + +config PPLUS + bool "Motorola-PowerPlus" + +config PRPMC750 + bool "Motorola-PrPMC750" + +config PRPMC800 + bool "Motorola-PrPMC800" + +config SANDPOINT + bool "Motorola-Sandpoint" + help + Select SANDPOINT if configuring for a Motorola Sandpoint X3 + (any flavor). + +config RADSTONE_PPC7D + bool "Radstone Technology PPC7D board" + +config PAL4 + bool "SBS-Palomar4" + +config GEMINI + bool "Synergy-Gemini" + depends on BROKEN + help + Select Gemini if configuring for a Synergy Microsystems' Gemini + series Single Board Computer. More information is available at: + <http://www.synergymicro.com/PressRel/97_10_15.html>. + +config EST8260 + bool "EST8260" + ---help--- + The EST8260 is a single-board computer manufactured by Wind River + Systems, Inc. (formerly Embedded Support Tools Corp.) and based on + the MPC8260. Wind River Systems has a website at + <http://www.windriver.com/>, but the EST8260 cannot be found on it + and has probably been discontinued or rebadged. + +config SBC82xx + bool "SBC82xx" + ---help--- + SBC PowerQUICC II, single-board computer with MPC82xx CPU + Manufacturer: Wind River Systems, Inc. + Date of Release: May 2003 + End of Life: - + URL: <http://www.windriver.com/> + +config SBS8260 + bool "SBS8260" + +config RPX8260 + bool "RPXSUPER" + +config TQM8260 + bool "TQM8260" + ---help--- + MPC8260 based module, little larger than credit card, + up to 128 MB global + 64 MB local RAM, 32 MB Flash, + 32 kB EEPROM, 256 kB L@ Cache, 10baseT + 100baseT Ethernet, + 2 x serial ports, ... + Manufacturer: TQ Components, www.tq-group.de + Date of Release: June 2001 + End of Life: not yet :-) + URL: <http://www.denx.de/PDF/TQM82xx_SPEC_Rev005.pdf> + +config ADS8272 + bool "ADS8272" + +config PQ2FADS + bool "Freescale-PQ2FADS" + help + Select PQ2FADS if you wish to configure for a Freescale + PQ2FADS board (-VR or -ZU). + +config LITE5200 + bool "Freescale LITE5200 / (IceCube)" + select PPC_MPC52xx + help + Support for the LITE5200 dev board for the MPC5200 from Freescale. + This is for the LITE5200 version 2.0 board. Don't know if it changes + much but it's only been tested on this board version. I think this + board is also known as IceCube. + +config MPC834x_SYS + bool "Freescale MPC834x SYS" + help + This option enables support for the MPC 834x SYS evaluation board. + + Be aware that PCI buses can only function when SYS board is plugged + into the PIB (Platform IO Board) board from Freescale which provide + 3 PCI slots. The PIBs PCI initialization is the bootloader's + responsiblilty. + +config EV64360 + bool "Marvell-EV64360BP" + help + Select EV64360 if configuring a Marvell EV64360BP Evaluation + platform. +endchoice + +config PQ2ADS + bool + depends on ADS8272 + default y + +config TQM8xxL + bool + depends on 8xx && (TQM823L || TQM850L || FPS850L || TQM855L || TQM860L) + default y + +config PPC_MPC52xx + bool + +config 8260 + bool "CPM2 Support" if WILLOW + depends on 6xx + default y if TQM8260 || RPX8260 || EST8260 || SBS8260 || SBC82xx || PQ2FADS + help + The MPC8260 is a typical embedded CPU made by Motorola. Selecting + this option means that you wish to build a kernel for a machine with + an 8260 class CPU. + +config 8272 + bool + depends on 6xx + default y if ADS8272 + select 8260 + help + The MPC8272 CPM has a different internal dpram setup than other CPM2 + devices + +config 83xx + bool + default y if MPC834x_SYS + +config MPC834x + bool + default y if MPC834x_SYS + +config CPM2 + bool + depends on 8260 || MPC8560 || MPC8555 + default y + help + The CPM2 (Communications Processor Module) is a coprocessor on + embedded CPUs made by Motorola. Selecting this option means that + you wish to build a kernel for a machine with a CPM2 coprocessor + on it (826x, 827x, 8560). + +config PPC_GEN550 + bool + depends on SANDPOINT || SPRUCE || PPLUS || \ + PRPMC750 || PRPMC800 || LOPEC || \ + (EV64260 && !SERIAL_MPSC) || CHESTNUT || RADSTONE_PPC7D || \ + 83xx + default y + +config FORCE + bool + depends on 6xx && POWERPMC250 + default y + +config GT64260 + bool + depends on EV64260 || CPCI690 + default y + +config MV64360 # Really MV64360 & MV64460 + bool + depends on CHESTNUT || KATANA || RADSTONE_PPC7D || HDPU || EV64360 + default y + +config MV64X60 + bool + depends on (GT64260 || MV64360) + default y + +menu "Set bridge options" + depends on MV64X60 + +config NOT_COHERENT_CACHE + bool "Turn off Cache Coherency" + default n + help + Some 64x60 bridges lock up when trying to enforce cache coherency. + When this option is selected, cache coherency will be turned off. + Note that this can cause other problems (e.g., stale data being + speculatively loaded via a cached mapping). Use at your own risk. + +config MV64X60_BASE + hex "Set bridge base used by firmware" + default "0xf1000000" + help + A firmware can leave the base address of the bridge's registers at + a non-standard location. If so, set this value to reflect the + address of that non-standard location. + +config MV64X60_NEW_BASE + hex "Set bridge base used by kernel" + default "0xf1000000" + help + If the current base address of the bridge's registers is not where + you want it, set this value to the address that you want it moved to. + +endmenu + +config NONMONARCH_SUPPORT + bool "Enable Non-Monarch Support" + depends on PRPMC800 + +config HARRIER + bool + depends on PRPMC800 + default y + +config EPIC_SERIAL_MODE + bool + depends on 6xx && (LOPEC || SANDPOINT) + default y + +config MPC10X_BRIDGE + bool + depends on POWERPMC250 || LOPEC || SANDPOINT + default y + +config MPC10X_OPENPIC + bool + depends on POWERPMC250 || LOPEC || SANDPOINT + default y + +config MPC10X_STORE_GATHERING + bool "Enable MPC10x store gathering" + depends on MPC10X_BRIDGE + +config SANDPOINT_ENABLE_UART1 + bool "Enable DUART mode on Sandpoint" + depends on SANDPOINT + help + If this option is enabled then the MPC824x processor will run + in DUART mode instead of UART mode. + +config HARRIER_STORE_GATHERING + bool "Enable Harrier store gathering" + depends on HARRIER + +config MVME5100_IPMC761_PRESENT + bool "MVME5100 configured with an IPMC761" + depends on MVME5100 + +config SPRUCE_BAUD_33M + bool "Spruce baud clock support" + depends on SPRUCE diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig new file mode 100644 index 00000000000..3d957a30c8c --- /dev/null +++ b/arch/powerpc/platforms/iseries/Kconfig @@ -0,0 +1,31 @@ + +menu "iSeries device drivers" + depends on PPC_ISERIES + +config VIOCONS + tristate "iSeries Virtual Console Support" + +config VIODASD + tristate "iSeries Virtual I/O disk support" + help + If you are running on an iSeries system and you want to use + virtual disks created and managed by OS/400, say Y. + +config VIOCD + tristate "iSeries Virtual I/O CD support" + help + If you are running Linux on an IBM iSeries system and you want to + read a CD drive owned by OS/400, say Y here. + +config VIOTAPE + tristate "iSeries Virtual Tape Support" + help + If you are running Linux on an iSeries system and you want Linux + to read and/or write a tape drive owned by OS/400, say Y here. + +endmenu + +config VIOPATH + bool + depends on VIOCONS || VIODASD || VIOCD || VIOTAPE || VETH + default y diff --git a/arch/powerpc/platforms/iseries/Makefile b/arch/powerpc/platforms/iseries/Makefile new file mode 100644 index 00000000000..127b465308b --- /dev/null +++ b/arch/powerpc/platforms/iseries/Makefile @@ -0,0 +1,9 @@ +EXTRA_CFLAGS += -mno-minimal-toc + +obj-y += hvlog.o hvlpconfig.o lpardata.o setup.o mf.o lpevents.o \ + hvcall.o proc.o htab.o iommu.o misc.o +obj-$(CONFIG_PCI) += pci.o irq.o vpdinfo.o +obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_VIOPATH) += viopath.o +obj-$(CONFIG_MODULES) += ksyms.o diff --git a/arch/powerpc/platforms/iseries/call_sm.h b/arch/powerpc/platforms/iseries/call_sm.h new file mode 100644 index 00000000000..ef223166cf2 --- /dev/null +++ b/arch/powerpc/platforms/iseries/call_sm.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _ISERIES_CALL_SM_H +#define _ISERIES_CALL_SM_H + +/* + * This file contains the "hypervisor call" interface which is used to + * drive the hypervisor from the OS. + */ + +#include <asm/iSeries/HvCallSc.h> +#include <asm/iSeries/HvTypes.h> + +#define HvCallSmGet64BitsOfAccessMap HvCallSm + 11 + +static inline u64 HvCallSm_get64BitsOfAccessMap(HvLpIndex lpIndex, + u64 indexIntoBitMap) +{ + return HvCall2(HvCallSmGet64BitsOfAccessMap, lpIndex, indexIntoBitMap); +} + +#endif /* _ISERIES_CALL_SM_H */ diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c new file mode 100644 index 00000000000..431b22767d0 --- /dev/null +++ b/arch/powerpc/platforms/iseries/htab.c @@ -0,0 +1,256 @@ +/* + * iSeries hashtable management. + * Derived from pSeries_htab.c + * + * SMP scalability work: + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/machdep.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/iSeries/HvCallHpt.h> +#include <asm/abs_addr.h> +#include <linux/spinlock.h> + +static spinlock_t iSeries_hlocks[64] __cacheline_aligned_in_smp = + { [0 ... 63] = SPIN_LOCK_UNLOCKED}; + +/* + * Very primitive algorithm for picking up a lock + */ +static inline void iSeries_hlock(unsigned long slot) +{ + if (slot & 0x8) + slot = ~slot; + spin_lock(&iSeries_hlocks[(slot >> 4) & 0x3f]); +} + +static inline void iSeries_hunlock(unsigned long slot) +{ + if (slot & 0x8) + slot = ~slot; + spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]); +} + +static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, + unsigned long prpn, unsigned long vflags, + unsigned long rflags) +{ + unsigned long arpn; + long slot; + hpte_t lhpte; + int secondary = 0; + + /* + * The hypervisor tries both primary and secondary. + * If we are being called to insert in the secondary, + * it means we have already tried both primary and secondary, + * so we return failure immediately. + */ + if (vflags & HPTE_V_SECONDARY) + return -1; + + iSeries_hlock(hpte_group); + + slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); + BUG_ON(lhpte.v & HPTE_V_VALID); + + if (slot == -1) { /* No available entry found in either group */ + iSeries_hunlock(hpte_group); + return -1; + } + + if (slot < 0) { /* MSB set means secondary group */ + vflags |= HPTE_V_VALID; + secondary = 1; + slot &= 0x7fffffffffffffff; + } + + arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT; + + lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; + lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags; + + /* Now fill in the actual HPTE */ + HvCallHpt_addValidate(slot, secondary, &lhpte); + + iSeries_hunlock(hpte_group); + + return (secondary << 3) | (slot & 7); +} + +long iSeries_hpte_bolt_or_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, unsigned long vflags, + unsigned long rflags) +{ + long slot; + hpte_t lhpte; + + slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); + + if (lhpte.v & HPTE_V_VALID) { + /* Bolt the existing HPTE */ + HvCallHpt_setSwBits(slot, 0x10, 0); + HvCallHpt_setPp(slot, PP_RWXX); + return 0; + } + + return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags); +} + +static unsigned long iSeries_hpte_getword0(unsigned long slot) +{ + hpte_t hpte; + + HvCallHpt_get(&hpte, slot); + return hpte.v; +} + +static long iSeries_hpte_remove(unsigned long hpte_group) +{ + unsigned long slot_offset; + int i; + unsigned long hpte_v; + + /* Pick a random slot to start at */ + slot_offset = mftb() & 0x7; + + iSeries_hlock(hpte_group); + + for (i = 0; i < HPTES_PER_GROUP; i++) { + hpte_v = iSeries_hpte_getword0(hpte_group + slot_offset); + + if (! (hpte_v & HPTE_V_BOLTED)) { + HvCallHpt_invalidateSetSwBitsGet(hpte_group + + slot_offset, 0, 0); + iSeries_hunlock(hpte_group); + return i; + } + + slot_offset++; + slot_offset &= 0x7; + } + + iSeries_hunlock(hpte_group); + + return -1; +} + +/* + * The HyperVisor expects the "flags" argument in this form: + * bits 0..59 : reserved + * bit 60 : N + * bits 61..63 : PP2,PP1,PP0 + */ +static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large, int local) +{ + hpte_t hpte; + unsigned long avpn = va >> 23; + + iSeries_hlock(slot); + + HvCallHpt_get(&hpte, slot); + if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) { + /* + * Hypervisor expects bits as NPPP, which is + * different from how they are mapped in our PP. + */ + HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1)); + iSeries_hunlock(slot); + return 0; + } + iSeries_hunlock(slot); + + return -1; +} + +/* + * Functions used to find the PTE for a particular virtual address. + * Only used during boot when bolting pages. + * + * Input : vpn : virtual page number + * Output: PTE index within the page table of the entry + * -1 on failure + */ +static long iSeries_hpte_find(unsigned long vpn) +{ + hpte_t hpte; + long slot; + + /* + * The HvCallHpt_findValid interface is as follows: + * 0xffffffffffffffff : No entry found. + * 0x00000000xxxxxxxx : Entry found in primary group, slot x + * 0x80000000xxxxxxxx : Entry found in secondary group, slot x + */ + slot = HvCallHpt_findValid(&hpte, vpn); + if (hpte.v & HPTE_V_VALID) { + if (slot < 0) { + slot &= 0x7fffffffffffffff; + slot = -slot; + } + } else + slot = -1; + return slot; +} + +/* + * Update the page protection bits. Intended to be used to create + * guard pages for kernel data structures on pages which are bolted + * in the HPT. Assumes pages being operated on will not be stolen. + * Does not work on large pages. + * + * No need to lock here because we should be the only user. + */ +static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) +{ + unsigned long vsid,va,vpn; + long slot; + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + slot = iSeries_hpte_find(vpn); + if (slot == -1) + panic("updateboltedpp: Could not find page to bolt\n"); + HvCallHpt_setPp(slot, newpp); +} + +static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + unsigned long hpte_v; + unsigned long avpn = va >> 23; + unsigned long flags; + + local_irq_save(flags); + + iSeries_hlock(slot); + + hpte_v = iSeries_hpte_getword0(slot); + + if ((HPTE_V_AVPN_VAL(hpte_v) == avpn) && (hpte_v & HPTE_V_VALID)) + HvCallHpt_invalidateSetSwBitsGet(slot, 0, 0); + + iSeries_hunlock(slot); + + local_irq_restore(flags); +} + +void hpte_init_iSeries(void) +{ + ppc_md.hpte_invalidate = iSeries_hpte_invalidate; + ppc_md.hpte_updatepp = iSeries_hpte_updatepp; + ppc_md.hpte_updateboltedpp = iSeries_hpte_updateboltedpp; + ppc_md.hpte_insert = iSeries_hpte_insert; + ppc_md.hpte_remove = iSeries_hpte_remove; + + htab_finish_init(); +} diff --git a/arch/powerpc/platforms/iseries/hvcall.S b/arch/powerpc/platforms/iseries/hvcall.S new file mode 100644 index 00000000000..07ae6ad5f49 --- /dev/null +++ b/arch/powerpc/platforms/iseries/hvcall.S @@ -0,0 +1,94 @@ +/* + * This file contains the code to perform calls to the + * iSeries LPAR hypervisor + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> +#include <asm/processor.h> +#include <asm/ptrace.h> /* XXX for STACK_FRAME_OVERHEAD */ + + .text + +/* + * Hypervisor call + * + * Invoke the iSeries hypervisor via the System Call instruction + * Parameters are passed to this routine in registers r3 - r10 + * + * r3 contains the HV function to be called + * r4-r10 contain the operands to the hypervisor function + * + */ + +_GLOBAL(HvCall) +_GLOBAL(HvCall0) +_GLOBAL(HvCall1) +_GLOBAL(HvCall2) +_GLOBAL(HvCall3) +_GLOBAL(HvCall4) +_GLOBAL(HvCall5) +_GLOBAL(HvCall6) +_GLOBAL(HvCall7) + + + mfcr r0 + std r0,-8(r1) + stdu r1,-(STACK_FRAME_OVERHEAD+16)(r1) + + /* r0 = 0xffffffffffffffff indicates a hypervisor call */ + + li r0,-1 + + /* Invoke the hypervisor */ + + sc + + ld r1,0(r1) + ld r0,-8(r1) + mtcrf 0xff,r0 + + /* return to caller, return value in r3 */ + + blr + +_GLOBAL(HvCall0Ret16) +_GLOBAL(HvCall1Ret16) +_GLOBAL(HvCall2Ret16) +_GLOBAL(HvCall3Ret16) +_GLOBAL(HvCall4Ret16) +_GLOBAL(HvCall5Ret16) +_GLOBAL(HvCall6Ret16) +_GLOBAL(HvCall7Ret16) + + mfcr r0 + std r0,-8(r1) + std r31,-16(r1) + stdu r1,-(STACK_FRAME_OVERHEAD+32)(r1) + + mr r31,r4 + li r0,-1 + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + sc + + std r3,0(r31) + std r4,8(r31) + + mr r3,r5 + + ld r1,0(r1) + ld r0,-8(r1) + mtcrf 0xff,r0 + ld r31,-16(r1) + + blr diff --git a/arch/powerpc/platforms/iseries/hvlog.c b/arch/powerpc/platforms/iseries/hvlog.c new file mode 100644 index 00000000000..f61e2e9ac9e --- /dev/null +++ b/arch/powerpc/platforms/iseries/hvlog.c @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <asm/page.h> +#include <asm/abs_addr.h> +#include <asm/iSeries/HvCall.h> +#include <asm/iSeries/HvCallSc.h> +#include <asm/iSeries/HvTypes.h> + + +void HvCall_writeLogBuffer(const void *buffer, u64 len) +{ + struct HvLpBufferList hv_buf; + u64 left_this_page; + u64 cur = virt_to_abs(buffer); + + while (len) { + hv_buf.addr = cur; + left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur; + if (left_this_page > len) + left_this_page = len; + hv_buf.len = left_this_page; + len -= left_this_page; + HvCall2(HvCallBaseWriteLogBuffer, + virt_to_abs(&hv_buf), + left_this_page); + cur = (cur & PAGE_MASK) + PAGE_SIZE; + } +} diff --git a/arch/powerpc/platforms/iseries/hvlpconfig.c b/arch/powerpc/platforms/iseries/hvlpconfig.c new file mode 100644 index 00000000000..dc28621aea0 --- /dev/null +++ b/arch/powerpc/platforms/iseries/hvlpconfig.c @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2001 Kyle A. Lucke, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <asm/iSeries/HvLpConfig.h> + +HvLpIndex HvLpConfig_getLpIndex_outline(void) +{ + return HvLpConfig_getLpIndex(); +} +EXPORT_SYMBOL(HvLpConfig_getLpIndex_outline); diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c new file mode 100644 index 00000000000..e40c50b7cef --- /dev/null +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup: + * + * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation + * + * Dynamic DMA mapping support, iSeries-specific parts. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/types.h> +#include <linux/dma-mapping.h> +#include <linux/list.h> + +#include <asm/iommu.h> +#include <asm/tce.h> +#include <asm/machdep.h> +#include <asm/iSeries/HvCallXm.h> +#include <asm/iSeries/iSeries_pci.h> + +extern struct list_head iSeries_Global_Device_List; + + +static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce; + + while (npages--) { + tce.te_word = 0; + tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT; + + if (tbl->it_type == TCE_VB) { + /* Virtual Bus */ + tce.te_bits.tb_valid = 1; + tce.te_bits.tb_allio = 1; + if (direction != DMA_TO_DEVICE) + tce.te_bits.tb_rdwr = 1; + } else { + /* PCI Bus */ + tce.te_bits.tb_rdwr = 1; /* Read allowed */ + if (direction != DMA_TO_DEVICE) + tce.te_bits.tb_pciwr = 1; + } + + rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, + tce.te_word); + if (rc) + panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", + rc); + index++; + uaddr += PAGE_SIZE; + } +} + +static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) +{ + u64 rc; + + while (npages--) { + rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0); + if (rc) + panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", + rc); + index++; + } +} + +#ifdef CONFIG_PCI +/* + * This function compares the known tables to find an iommu_table + * that has already been built for hardware TCEs. + */ +static struct iommu_table *iommu_table_find(struct iommu_table * tbl) +{ + struct pci_dn *pdn; + + list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) { + struct iommu_table *it = pdn->iommu_table; + if ((it != NULL) && + (it->it_type == TCE_PCI) && + (it->it_offset == tbl->it_offset) && + (it->it_index == tbl->it_index) && + (it->it_size == tbl->it_size)) + return it; + } + return NULL; +} + +/* + * Call Hv with the architected data structure to get TCE table info. + * info. Put the returned data into the Linux representation of the + * TCE table data. + * The Hardware Tce table comes in three flavors. + * 1. TCE table shared between Buses. + * 2. TCE table per Bus. + * 3. TCE Table per IOA. + */ +static void iommu_table_getparms(struct device_node *dn, + struct iommu_table* tbl) +{ + struct iommu_table_cb *parms; + + parms = kmalloc(sizeof(*parms), GFP_KERNEL); + if (parms == NULL) + panic("PCI_DMA: TCE Table Allocation failed."); + + memset(parms, 0, sizeof(*parms)); + + parms->itc_busno = ISERIES_BUS(dn); + parms->itc_slotno = PCI_DN(dn)->LogicalSlot; + parms->itc_virtbus = 0; + + HvCallXm_getTceTableParms(ISERIES_HV_ADDR(parms)); + + if (parms->itc_size == 0) + panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms); + + /* itc_size is in pages worth of table, it_size is in # of entries */ + tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry); + tbl->it_busno = parms->itc_busno; + tbl->it_offset = parms->itc_offset; + tbl->it_index = parms->itc_index; + tbl->it_blocksize = 1; + tbl->it_type = TCE_PCI; + + kfree(parms); +} + + +void iommu_devnode_init_iSeries(struct device_node *dn) +{ + struct iommu_table *tbl; + struct pci_dn *pdn = PCI_DN(dn); + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_getparms(dn, tbl); + + /* Look for existing tce table */ + pdn->iommu_table = iommu_table_find(tbl); + if (pdn->iommu_table == NULL) + pdn->iommu_table = iommu_init_table(tbl); + else + kfree(tbl); +} +#endif + +static void iommu_dev_setup_iSeries(struct pci_dev *dev) { } +static void iommu_bus_setup_iSeries(struct pci_bus *bus) { } + +void iommu_init_early_iSeries(void) +{ + ppc_md.tce_build = tce_build_iSeries; + ppc_md.tce_free = tce_free_iSeries; + + ppc_md.iommu_dev_setup = iommu_dev_setup_iSeries; + ppc_md.iommu_bus_setup = iommu_bus_setup_iSeries; + + pci_iommu_init(); +} diff --git a/arch/powerpc/platforms/iseries/ipl_parms.h b/arch/powerpc/platforms/iseries/ipl_parms.h new file mode 100644 index 00000000000..77c135ddbf1 --- /dev/null +++ b/arch/powerpc/platforms/iseries/ipl_parms.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _ISERIES_IPL_PARMS_H +#define _ISERIES_IPL_PARMS_H + +/* + * This struct maps the IPL Parameters DMA'd from the SP. + * + * Warning: + * This data must map in exactly 64 bytes and match the architecture for + * the IPL parms + */ + +#include <asm/types.h> + +struct ItIplParmsReal { + u8 xFormat; // Defines format of IplParms x00-x00 + u8 xRsvd01:6; // Reserved x01-x01 + u8 xAlternateSearch:1; // Alternate search indicator ... + u8 xUaSupplied:1; // UA Supplied on programmed IPL... + u8 xLsUaFormat; // Format byte for UA x02-x02 + u8 xRsvd02; // Reserved x03-x03 + u32 xLsUa; // LS UA x04-x07 + u32 xUnusedLsLid; // First OS LID to load x08-x0B + u16 xLsBusNumber; // LS Bus Number x0C-x0D + u8 xLsCardAdr; // LS Card Address x0E-x0E + u8 xLsBoardAdr; // LS Board Address x0F-x0F + u32 xRsvd03; // Reserved x10-x13 + u8 xSpcnPresent:1; // SPCN present x14-x14 + u8 xCpmPresent:1; // CPM present ... + u8 xRsvd04:6; // Reserved ... + u8 xRsvd05:4; // Reserved x15-x15 + u8 xKeyLock:4; // Keylock setting ... + u8 xRsvd06:6; // Reserved x16-x16 + u8 xIplMode:2; // Ipl mode (A|B|C|D) ... + u8 xHwIplType; // Fast v slow v slow EC HW IPL x17-x17 + u16 xCpmEnabledIpl:1; // CPM in effect when IPL initiatedx18-x19 + u16 xPowerOnResetIpl:1; // Indicate POR condition ... + u16 xMainStorePreserved:1; // Main Storage is preserved ... + u16 xRsvd07:13; // Reserved ... + u16 xIplSource:16; // Ipl source x1A-x1B + u8 xIplReason:8; // Reason for this IPL x1C-x1C + u8 xRsvd08; // Reserved x1D-x1D + u16 xRsvd09; // Reserved x1E-x1F + u16 xSysBoxType; // System Box Type x20-x21 + u16 xSysProcType; // System Processor Type x22-x23 + u32 xRsvd10; // Reserved x24-x27 + u64 xRsvd11; // Reserved x28-x2F + u64 xRsvd12; // Reserved x30-x37 + u64 xRsvd13; // Reserved x38-x3F +}; + +extern struct ItIplParmsReal xItIplParmsReal; + +#endif /* _ISERIES_IPL_PARMS_H */ diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c new file mode 100644 index 00000000000..31fb5fa67fa --- /dev/null +++ b/arch/powerpc/platforms/iseries/irq.c @@ -0,0 +1,366 @@ +/* + * This module supports the iSeries PCI bus interrupt handling + * Copyright (C) 20yy <Robert L Holtorf> <IBM Corp> + * Copyright (C) 2004-2005 IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the: + * Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, + * Boston, MA 02111-1307 USA + * + * Change Activity: + * Created, December 13, 2000 by Wayne Holm + * End Change Activity + */ +#include <linux/config.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/threads.h> +#include <linux/smp.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/bootmem.h> +#include <linux/ide.h> +#include <linux/irq.h> +#include <linux/spinlock.h> + +#include <asm/ppcdebug.h> +#include <asm/iSeries/HvTypes.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/iSeries/HvCallPci.h> +#include <asm/iSeries/HvCallXm.h> + +#include "irq.h" + +/* This maps virtual irq numbers to real irqs */ +unsigned int virt_irq_to_real_map[NR_IRQS]; + +/* The next available virtual irq number */ +/* Note: the pcnet32 driver assumes irq numbers < 2 aren't valid. :( */ +static int next_virtual_irq = 2; + +static long Pci_Interrupt_Count; +static long Pci_Event_Count; + +enum XmPciLpEvent_Subtype { + XmPciLpEvent_BusCreated = 0, // PHB has been created + XmPciLpEvent_BusError = 1, // PHB has failed + XmPciLpEvent_BusFailed = 2, // Msg to Secondary, Primary failed bus + XmPciLpEvent_NodeFailed = 4, // Multi-adapter bridge has failed + XmPciLpEvent_NodeRecovered = 5, // Multi-adapter bridge has recovered + XmPciLpEvent_BusRecovered = 12, // PHB has been recovered + XmPciLpEvent_UnQuiesceBus = 18, // Secondary bus unqiescing + XmPciLpEvent_BridgeError = 21, // Bridge Error + XmPciLpEvent_SlotInterrupt = 22 // Slot interrupt +}; + +struct XmPciLpEvent_BusInterrupt { + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; +}; + +struct XmPciLpEvent_NodeInterrupt { + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; + HvAgentId deviceId; +}; + +struct XmPciLpEvent { + struct HvLpEvent hvLpEvent; + + union { + u64 alignData; // Align on an 8-byte boundary + + struct { + u32 fisr; + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; + HvAgentId deviceId; + } slotInterrupt; + + struct XmPciLpEvent_BusInterrupt busFailed; + struct XmPciLpEvent_BusInterrupt busRecovered; + struct XmPciLpEvent_BusInterrupt busCreated; + + struct XmPciLpEvent_NodeInterrupt nodeFailed; + struct XmPciLpEvent_NodeInterrupt nodeRecovered; + + } eventData; + +}; + +static void intReceived(struct XmPciLpEvent *eventParm, + struct pt_regs *regsParm) +{ + int irq; + + ++Pci_Interrupt_Count; + + switch (eventParm->hvLpEvent.xSubtype) { + case XmPciLpEvent_SlotInterrupt: + irq = eventParm->hvLpEvent.xCorrelationToken; + /* Dispatch the interrupt handlers for this irq */ + ppc_irq_dispatch_handler(regsParm, irq); + HvCallPci_eoi(eventParm->eventData.slotInterrupt.busNumber, + eventParm->eventData.slotInterrupt.subBusNumber, + eventParm->eventData.slotInterrupt.deviceId); + break; + /* Ignore error recovery events for now */ + case XmPciLpEvent_BusCreated: + printk(KERN_INFO "intReceived: system bus %d created\n", + eventParm->eventData.busCreated.busNumber); + break; + case XmPciLpEvent_BusError: + case XmPciLpEvent_BusFailed: + printk(KERN_INFO "intReceived: system bus %d failed\n", + eventParm->eventData.busFailed.busNumber); + break; + case XmPciLpEvent_BusRecovered: + case XmPciLpEvent_UnQuiesceBus: + printk(KERN_INFO "intReceived: system bus %d recovered\n", + eventParm->eventData.busRecovered.busNumber); + break; + case XmPciLpEvent_NodeFailed: + case XmPciLpEvent_BridgeError: + printk(KERN_INFO + "intReceived: multi-adapter bridge %d/%d/%d failed\n", + eventParm->eventData.nodeFailed.busNumber, + eventParm->eventData.nodeFailed.subBusNumber, + eventParm->eventData.nodeFailed.deviceId); + break; + case XmPciLpEvent_NodeRecovered: + printk(KERN_INFO + "intReceived: multi-adapter bridge %d/%d/%d recovered\n", + eventParm->eventData.nodeRecovered.busNumber, + eventParm->eventData.nodeRecovered.subBusNumber, + eventParm->eventData.nodeRecovered.deviceId); + break; + default: + printk(KERN_ERR + "intReceived: unrecognized event subtype 0x%x\n", + eventParm->hvLpEvent.xSubtype); + break; + } +} + +static void XmPciLpEvent_handler(struct HvLpEvent *eventParm, + struct pt_regs *regsParm) +{ +#ifdef CONFIG_PCI + ++Pci_Event_Count; + + if (eventParm && (eventParm->xType == HvLpEvent_Type_PciIo)) { + switch (eventParm->xFlags.xFunction) { + case HvLpEvent_Function_Int: + intReceived((struct XmPciLpEvent *)eventParm, regsParm); + break; + case HvLpEvent_Function_Ack: + printk(KERN_ERR + "XmPciLpEvent_handler: unexpected ack received\n"); + break; + default: + printk(KERN_ERR + "XmPciLpEvent_handler: unexpected event function %d\n", + (int)eventParm->xFlags.xFunction); + break; + } + } else if (eventParm) + printk(KERN_ERR + "XmPciLpEvent_handler: Unrecognized PCI event type 0x%x\n", + (int)eventParm->xType); + else + printk(KERN_ERR "XmPciLpEvent_handler: NULL event received\n"); +#endif +} + +/* + * This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c + * It must be called before the bus walk. + */ +void __init iSeries_init_IRQ(void) +{ + /* Register PCI event handler and open an event path */ + int xRc; + + xRc = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo, + &XmPciLpEvent_handler); + if (xRc == 0) { + xRc = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0); + if (xRc != 0) + printk(KERN_ERR "iSeries_init_IRQ: open event path " + "failed with rc 0x%x\n", xRc); + } else + printk(KERN_ERR "iSeries_init_IRQ: register handler " + "failed with rc 0x%x\n", xRc); +} + +#define REAL_IRQ_TO_BUS(irq) ((((irq) >> 6) & 0xff) + 1) +#define REAL_IRQ_TO_IDSEL(irq) ((((irq) >> 3) & 7) + 1) +#define REAL_IRQ_TO_FUNC(irq) ((irq) & 7) + +/* + * This will be called by device drivers (via enable_IRQ) + * to enable INTA in the bridge interrupt status register. + */ +static void iSeries_enable_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + /* The IRQ has already been locked by the caller */ + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Unmask secondary INTA */ + mask = 0x80000000; + HvCallPci_unmaskInterrupts(bus, subBus, deviceId, mask); + PPCDBG(PPCDBG_BUSWALK, "iSeries_enable_IRQ 0x%02X.%02X.%02X 0x%04X\n", + bus, subBus, deviceId, irq); +} + +/* This is called by iSeries_activate_IRQs */ +static unsigned int iSeries_startup_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Link the IRQ number to the bridge */ + HvCallXm_connectBusUnit(bus, subBus, deviceId, irq); + + /* Unmask bridge interrupts in the FISR */ + mask = 0x01010000 << function; + HvCallPci_unmaskFisr(bus, subBus, deviceId, mask); + iSeries_enable_IRQ(irq); + return 0; +} + +/* + * This is called out of iSeries_fixup to activate interrupt + * generation for usable slots + */ +void __init iSeries_activate_IRQs() +{ + int irq; + unsigned long flags; + + for_each_irq (irq) { + irq_desc_t *desc = get_irq_desc(irq); + + if (desc && desc->handler && desc->handler->startup) { + spin_lock_irqsave(&desc->lock, flags); + desc->handler->startup(irq); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* this is not called anywhere currently */ +static void iSeries_shutdown_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + /* irq should be locked by the caller */ + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Invalidate the IRQ number in the bridge */ + HvCallXm_connectBusUnit(bus, subBus, deviceId, 0); + + /* Mask bridge interrupts in the FISR */ + mask = 0x01010000 << function; + HvCallPci_maskFisr(bus, subBus, deviceId, mask); +} + +/* + * This will be called by device drivers (via disable_IRQ) + * to disable INTA in the bridge interrupt status register. + */ +static void iSeries_disable_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + /* The IRQ has already been locked by the caller */ + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Mask secondary INTA */ + mask = 0x80000000; + HvCallPci_maskInterrupts(bus, subBus, deviceId, mask); + PPCDBG(PPCDBG_BUSWALK, "iSeries_disable_IRQ 0x%02X.%02X.%02X 0x%04X\n", + bus, subBus, deviceId, irq); +} + +/* + * Need to define this so ppc_irq_dispatch_handler will NOT call + * enable_IRQ at the end of interrupt handling. However, this does + * nothing because there is not enough information provided to do + * the EOI HvCall. This is done by XmPciLpEvent.c + */ +static void iSeries_end_IRQ(unsigned int irq) +{ +} + +static hw_irq_controller iSeries_IRQ_handler = { + .typename = "iSeries irq controller", + .startup = iSeries_startup_IRQ, + .shutdown = iSeries_shutdown_IRQ, + .enable = iSeries_enable_IRQ, + .disable = iSeries_disable_IRQ, + .end = iSeries_end_IRQ +}; + +/* + * This is called out of iSeries_scan_slot to allocate an IRQ for an EADS slot + * It calculates the irq value for the slot. + * Note that subBusNumber is always 0 (at the moment at least). + */ +int __init iSeries_allocate_IRQ(HvBusNumber busNumber, + HvSubBusNumber subBusNumber, HvAgentId deviceId) +{ + unsigned int realirq, virtirq; + u8 idsel = (deviceId >> 4); + u8 function = deviceId & 7; + + virtirq = next_virtual_irq++; + realirq = ((busNumber - 1) << 6) + ((idsel - 1) << 3) + function; + virt_irq_to_real_map[virtirq] = realirq; + + irq_desc[virtirq].handler = &iSeries_IRQ_handler; + return virtirq; +} + +int virt_irq_create_mapping(unsigned int real_irq) +{ + BUG(); /* Don't call this on iSeries, yet */ + + return 0; +} + +void virt_irq_init(void) +{ + return; +} diff --git a/arch/powerpc/platforms/iseries/irq.h b/arch/powerpc/platforms/iseries/irq.h new file mode 100644 index 00000000000..5f643f16ecc --- /dev/null +++ b/arch/powerpc/platforms/iseries/irq.h @@ -0,0 +1,8 @@ +#ifndef _ISERIES_IRQ_H +#define _ISERIES_IRQ_H + +extern void iSeries_init_IRQ(void); +extern int iSeries_allocate_IRQ(HvBusNumber, HvSubBusNumber, HvAgentId); +extern void iSeries_activate_IRQs(void); + +#endif /* _ISERIES_IRQ_H */ diff --git a/arch/powerpc/platforms/iseries/ksyms.c b/arch/powerpc/platforms/iseries/ksyms.c new file mode 100644 index 00000000000..f271b353972 --- /dev/null +++ b/arch/powerpc/platforms/iseries/ksyms.c @@ -0,0 +1,27 @@ +/* + * (C) 2001-2005 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/module.h> + +#include <asm/hw_irq.h> +#include <asm/iSeries/HvCallSc.h> + +EXPORT_SYMBOL(HvCall0); +EXPORT_SYMBOL(HvCall1); +EXPORT_SYMBOL(HvCall2); +EXPORT_SYMBOL(HvCall3); +EXPORT_SYMBOL(HvCall4); +EXPORT_SYMBOL(HvCall5); +EXPORT_SYMBOL(HvCall6); +EXPORT_SYMBOL(HvCall7); + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(local_get_flags); +EXPORT_SYMBOL(local_irq_disable); +EXPORT_SYMBOL(local_irq_restore); +#endif diff --git a/arch/powerpc/platforms/iseries/lpardata.c b/arch/powerpc/platforms/iseries/lpardata.c new file mode 100644 index 00000000000..ed2ffee6f73 --- /dev/null +++ b/arch/powerpc/platforms/iseries/lpardata.c @@ -0,0 +1,227 @@ +/* + * Copyright 2001 Mike Corrigan, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <linux/types.h> +#include <linux/threads.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/naca.h> +#include <asm/abs_addr.h> +#include <asm/iSeries/ItLpNaca.h> +#include <asm/lppaca.h> +#include <asm/iSeries/ItLpRegSave.h> +#include <asm/paca.h> +#include <asm/iSeries/LparMap.h> +#include <asm/iSeries/ItExtVpdPanel.h> +#include <asm/iSeries/ItLpQueue.h> + +#include "vpd_areas.h" +#include "spcomm_area.h" +#include "ipl_parms.h" +#include "processor_vpd.h" +#include "release_data.h" + +/* The HvReleaseData is the root of the information shared between + * the hypervisor and Linux. + */ +struct HvReleaseData hvReleaseData = { + .xDesc = 0xc8a5d9c4, /* "HvRD" ebcdic */ + .xSize = sizeof(struct HvReleaseData), + .xVpdAreasPtrOffset = offsetof(struct naca_struct, xItVpdAreas), + .xSlicNacaAddr = &naca, /* 64-bit Naca address */ + .xMsNucDataOffset = LPARMAP_PHYS, + .xFlags = HVREL_TAGSINACTIVE /* tags inactive */ + /* 64 bit */ + /* shared processors */ + /* HMT allowed */ + | 6, /* TEMP: This allows non-GA driver */ + .xVrmIndex = 4, /* We are v5r2m0 */ + .xMinSupportedPlicVrmIndex = 3, /* v5r1m0 */ + .xMinCompatablePlicVrmIndex = 3, /* v5r1m0 */ + .xVrmName = { 0xd3, 0x89, 0x95, 0xa4, /* "Linux 2.4.64" ebcdic */ + 0xa7, 0x40, 0xf2, 0x4b, + 0xf4, 0x4b, 0xf6, 0xf4 }, +}; + +/* + * The NACA. The first dword of the naca is required by the iSeries + * hypervisor to point to itVpdAreas. The hypervisor finds the NACA + * through the pointer in hvReleaseData. + */ +struct naca_struct naca = { + .xItVpdAreas = &itVpdAreas, + .xRamDisk = 0, + .xRamDiskSize = 0, +}; + +extern void system_reset_iSeries(void); +extern void machine_check_iSeries(void); +extern void data_access_iSeries(void); +extern void instruction_access_iSeries(void); +extern void hardware_interrupt_iSeries(void); +extern void alignment_iSeries(void); +extern void program_check_iSeries(void); +extern void fp_unavailable_iSeries(void); +extern void decrementer_iSeries(void); +extern void trap_0a_iSeries(void); +extern void trap_0b_iSeries(void); +extern void system_call_iSeries(void); +extern void single_step_iSeries(void); +extern void trap_0e_iSeries(void); +extern void performance_monitor_iSeries(void); +extern void data_access_slb_iSeries(void); +extern void instruction_access_slb_iSeries(void); + +struct ItLpNaca itLpNaca = { + .xDesc = 0xd397d581, /* "LpNa" ebcdic */ + .xSize = 0x0400, /* size of ItLpNaca */ + .xIntHdlrOffset = 0x0300, /* offset to int array */ + .xMaxIntHdlrEntries = 19, /* # ents */ + .xPrimaryLpIndex = 0, /* Part # of primary */ + .xServiceLpIndex = 0, /* Part # of serv */ + .xLpIndex = 0, /* Part # of me */ + .xMaxLpQueues = 0, /* # of LP queues */ + .xLpQueueOffset = 0x100, /* offset of start of LP queues */ + .xPirEnvironMode = 0, /* Piranha stuff */ + .xPirConsoleMode = 0, + .xPirDasdMode = 0, + .xLparInstalled = 0, + .xSysPartitioned = 0, + .xHwSyncedTBs = 0, + .xIntProcUtilHmt = 0, + .xSpVpdFormat = 0, + .xIntProcRatio = 0, + .xPlicVrmIndex = 0, /* VRM index of PLIC */ + .xMinSupportedSlicVrmInd = 0, /* min supported SLIC */ + .xMinCompatableSlicVrmInd = 0, /* min compat SLIC */ + .xLoadAreaAddr = 0, /* 64-bit addr of load area */ + .xLoadAreaChunks = 0, /* chunks for load area */ + .xPaseSysCallCRMask = 0, /* PASE mask */ + .xSlicSegmentTablePtr = 0, /* seg table */ + .xOldLpQueue = { 0 }, /* Old LP Queue */ + .xInterruptHdlr = { + (u64)system_reset_iSeries, /* 0x100 System Reset */ + (u64)machine_check_iSeries, /* 0x200 Machine Check */ + (u64)data_access_iSeries, /* 0x300 Data Access */ + (u64)instruction_access_iSeries, /* 0x400 Instruction Access */ + (u64)hardware_interrupt_iSeries, /* 0x500 External */ + (u64)alignment_iSeries, /* 0x600 Alignment */ + (u64)program_check_iSeries, /* 0x700 Program Check */ + (u64)fp_unavailable_iSeries, /* 0x800 FP Unavailable */ + (u64)decrementer_iSeries, /* 0x900 Decrementer */ + (u64)trap_0a_iSeries, /* 0xa00 Trap 0A */ + (u64)trap_0b_iSeries, /* 0xb00 Trap 0B */ + (u64)system_call_iSeries, /* 0xc00 System Call */ + (u64)single_step_iSeries, /* 0xd00 Single Step */ + (u64)trap_0e_iSeries, /* 0xe00 Trap 0E */ + (u64)performance_monitor_iSeries,/* 0xf00 Performance Monitor */ + 0, /* int 0x1000 */ + 0, /* int 0x1010 */ + 0, /* int 0x1020 CPU ctls */ + (u64)hardware_interrupt_iSeries, /* SC Ret Hdlr */ + (u64)data_access_slb_iSeries, /* 0x380 D-SLB */ + (u64)instruction_access_slb_iSeries /* 0x480 I-SLB */ + } +}; +EXPORT_SYMBOL(itLpNaca); + +/* May be filled in by the hypervisor so cannot end up in the BSS */ +struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data"))); + +/* May be filled in by the hypervisor so cannot end up in the BSS */ +struct ItExtVpdPanel xItExtVpdPanel __attribute__((__section__(".data"))); +EXPORT_SYMBOL(xItExtVpdPanel); + +#define maxPhysicalProcessors 32 + +struct IoHriProcessorVpd xIoHriProcessorVpd[maxPhysicalProcessors] = { + { + .xInstCacheOperandSize = 32, + .xDataCacheOperandSize = 32, + .xProcFreq = 50000000, + .xTimeBaseFreq = 50000000, + .xPVR = 0x3600 + } +}; + +/* Space for Main Store Vpd 27,200 bytes */ +/* May be filled in by the hypervisor so cannot end up in the BSS */ +u64 xMsVpd[3400] __attribute__((__section__(".data"))); + +/* Space for Recovery Log Buffer */ +/* May be filled in by the hypervisor so cannot end up in the BSS */ +u64 xRecoveryLogBuffer[32] __attribute__((__section__(".data"))); + +struct SpCommArea xSpCommArea = { + .xDesc = 0xE2D7C3C2, + .xFormat = 1, +}; + +/* The LparMap data is now located at offset 0x6000 in head.S + * It was put there so that the HvReleaseData could address it + * with a 32-bit offset as required by the iSeries hypervisor + * + * The Naca has a pointer to the ItVpdAreas. The hypervisor finds + * the Naca via the HvReleaseData area. The HvReleaseData has the + * offset into the Naca of the pointer to the ItVpdAreas. + */ +struct ItVpdAreas itVpdAreas = { + .xSlicDesc = 0xc9a3e5c1, /* "ItVA" */ + .xSlicSize = sizeof(struct ItVpdAreas), + .xSlicVpdEntries = ItVpdMaxEntries, /* # VPD array entries */ + .xSlicDmaEntries = ItDmaMaxEntries, /* # DMA array entries */ + .xSlicMaxLogicalProcs = NR_CPUS * 2, /* Max logical procs */ + .xSlicMaxPhysicalProcs = maxPhysicalProcessors, /* Max physical procs */ + .xSlicDmaToksOffset = offsetof(struct ItVpdAreas, xPlicDmaToks), + .xSlicVpdAdrsOffset = offsetof(struct ItVpdAreas, xSlicVpdAdrs), + .xSlicDmaLensOffset = offsetof(struct ItVpdAreas, xPlicDmaLens), + .xSlicVpdLensOffset = offsetof(struct ItVpdAreas, xSlicVpdLens), + .xSlicMaxSlotLabels = 0, /* max slot labels */ + .xSlicMaxLpQueues = 1, /* max LP queues */ + .xPlicDmaLens = { 0 }, /* DMA lengths */ + .xPlicDmaToks = { 0 }, /* DMA tokens */ + .xSlicVpdLens = { /* VPD lengths */ + 0,0,0, /* 0 - 2 */ + sizeof(xItExtVpdPanel), /* 3 Extended VPD */ + sizeof(struct paca_struct), /* 4 length of Paca */ + 0, /* 5 */ + sizeof(struct ItIplParmsReal),/* 6 length of IPL parms */ + 26992, /* 7 length of MS VPD */ + 0, /* 8 */ + sizeof(struct ItLpNaca),/* 9 length of LP Naca */ + 0, /* 10 */ + 256, /* 11 length of Recovery Log Buf */ + sizeof(struct SpCommArea), /* 12 length of SP Comm Area */ + 0,0,0, /* 13 - 15 */ + sizeof(struct IoHriProcessorVpd),/* 16 length of Proc Vpd */ + 0,0,0,0,0,0, /* 17 - 22 */ + sizeof(struct hvlpevent_queue), /* 23 length of Lp Queue */ + 0,0 /* 24 - 25 */ + }, + .xSlicVpdAdrs = { /* VPD addresses */ + 0,0,0, /* 0 - 2 */ + &xItExtVpdPanel, /* 3 Extended VPD */ + &paca[0], /* 4 first Paca */ + 0, /* 5 */ + &xItIplParmsReal, /* 6 IPL parms */ + &xMsVpd, /* 7 MS Vpd */ + 0, /* 8 */ + &itLpNaca, /* 9 LpNaca */ + 0, /* 10 */ + &xRecoveryLogBuffer, /* 11 Recovery Log Buffer */ + &xSpCommArea, /* 12 SP Comm Area */ + 0,0,0, /* 13 - 15 */ + &xIoHriProcessorVpd, /* 16 Proc Vpd */ + 0,0,0,0,0,0, /* 17 - 22 */ + &hvlpevent_queue, /* 23 Lp Queue */ + 0,0 + } +}; diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c new file mode 100644 index 00000000000..f8b4155b048 --- /dev/null +++ b/arch/powerpc/platforms/iseries/lpevents.c @@ -0,0 +1,329 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/stddef.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/proc_fs.h> +#include <linux/module.h> + +#include <asm/system.h> +#include <asm/paca.h> +#include <asm/iSeries/ItLpQueue.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/iSeries/HvCallEvent.h> +#include <asm/iSeries/ItLpNaca.h> + +/* + * The LpQueue is used to pass event data from the hypervisor to + * the partition. This is where I/O interrupt events are communicated. + * + * It is written to by the hypervisor so cannot end up in the BSS. + */ +struct hvlpevent_queue hvlpevent_queue __attribute__((__section__(".data"))); + +DEFINE_PER_CPU(unsigned long[HvLpEvent_Type_NumTypes], hvlpevent_counts); + +static char *event_types[HvLpEvent_Type_NumTypes] = { + "Hypervisor", + "Machine Facilities", + "Session Manager", + "SPD I/O", + "Virtual Bus", + "PCI I/O", + "RIO I/O", + "Virtual Lan", + "Virtual I/O" +}; + +/* Array of LpEvent handler functions */ +static LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes]; +static unsigned lpEventHandlerPaths[HvLpEvent_Type_NumTypes]; + +static struct HvLpEvent * get_next_hvlpevent(void) +{ + struct HvLpEvent * event; + event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr; + + if (event->xFlags.xValid) { + /* rmb() needed only for weakly consistent machines (regatta) */ + rmb(); + /* Set pointer to next potential event */ + hvlpevent_queue.xSlicCurEventPtr += ((event->xSizeMinus1 + + LpEventAlign) / LpEventAlign) * LpEventAlign; + + /* Wrap to beginning if no room at end */ + if (hvlpevent_queue.xSlicCurEventPtr > + hvlpevent_queue.xSlicLastValidEventPtr) { + hvlpevent_queue.xSlicCurEventPtr = + hvlpevent_queue.xSlicEventStackPtr; + } + } else { + event = NULL; + } + + return event; +} + +static unsigned long spread_lpevents = NR_CPUS; + +int hvlpevent_is_pending(void) +{ + struct HvLpEvent *next_event; + + if (smp_processor_id() >= spread_lpevents) + return 0; + + next_event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr; + + return next_event->xFlags.xValid | + hvlpevent_queue.xPlicOverflowIntPending; +} + +static void hvlpevent_clear_valid(struct HvLpEvent * event) +{ + /* Tell the Hypervisor that we're done with this event. + * Also clear bits within this event that might look like valid bits. + * ie. on 64-byte boundaries. + */ + struct HvLpEvent *tmp; + unsigned extra = ((event->xSizeMinus1 + LpEventAlign) / + LpEventAlign) - 1; + + switch (extra) { + case 3: + tmp = (struct HvLpEvent*)((char*)event + 3 * LpEventAlign); + tmp->xFlags.xValid = 0; + case 2: + tmp = (struct HvLpEvent*)((char*)event + 2 * LpEventAlign); + tmp->xFlags.xValid = 0; + case 1: + tmp = (struct HvLpEvent*)((char*)event + 1 * LpEventAlign); + tmp->xFlags.xValid = 0; + } + + mb(); + + event->xFlags.xValid = 0; +} + +void process_hvlpevents(struct pt_regs *regs) +{ + struct HvLpEvent * event; + + /* If we have recursed, just return */ + if (!spin_trylock(&hvlpevent_queue.lock)) + return; + + for (;;) { + event = get_next_hvlpevent(); + if (event) { + /* Call appropriate handler here, passing + * a pointer to the LpEvent. The handler + * must make a copy of the LpEvent if it + * needs it in a bottom half. (perhaps for + * an ACK) + * + * Handlers are responsible for ACK processing + * + * The Hypervisor guarantees that LpEvents will + * only be delivered with types that we have + * registered for, so no type check is necessary + * here! + */ + if (event->xType < HvLpEvent_Type_NumTypes) + __get_cpu_var(hvlpevent_counts)[event->xType]++; + if (event->xType < HvLpEvent_Type_NumTypes && + lpEventHandler[event->xType]) + lpEventHandler[event->xType](event, regs); + else + printk(KERN_INFO "Unexpected Lp Event type=%d\n", event->xType ); + + hvlpevent_clear_valid(event); + } else if (hvlpevent_queue.xPlicOverflowIntPending) + /* + * No more valid events. If overflow events are + * pending process them + */ + HvCallEvent_getOverflowLpEvents(hvlpevent_queue.xIndex); + else + break; + } + + spin_unlock(&hvlpevent_queue.lock); +} + +static int set_spread_lpevents(char *str) +{ + unsigned long val = simple_strtoul(str, NULL, 0); + + /* + * The parameter is the number of processors to share in processing + * lp events. + */ + if (( val > 0) && (val <= NR_CPUS)) { + spread_lpevents = val; + printk("lpevent processing spread over %ld processors\n", val); + } else { + printk("invalid spread_lpevents %ld\n", val); + } + + return 1; +} +__setup("spread_lpevents=", set_spread_lpevents); + +void setup_hvlpevent_queue(void) +{ + void *eventStack; + + /* + * Allocate a page for the Event Stack. The Hypervisor needs the + * absolute real address, so we subtract out the KERNELBASE and add + * in the absolute real address of the kernel load area. + */ + eventStack = alloc_bootmem_pages(LpEventStackSize); + memset(eventStack, 0, LpEventStackSize); + + /* Invoke the hypervisor to initialize the event stack */ + HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize); + + hvlpevent_queue.xSlicEventStackPtr = (char *)eventStack; + hvlpevent_queue.xSlicCurEventPtr = (char *)eventStack; + hvlpevent_queue.xSlicLastValidEventPtr = (char *)eventStack + + (LpEventStackSize - LpEventMaxSize); + hvlpevent_queue.xIndex = 0; +} + +/* Register a handler for an LpEvent type */ +int HvLpEvent_registerHandler(HvLpEvent_Type eventType, LpEventHandler handler) +{ + if (eventType < HvLpEvent_Type_NumTypes) { + lpEventHandler[eventType] = handler; + return 0; + } + return 1; +} +EXPORT_SYMBOL(HvLpEvent_registerHandler); + +int HvLpEvent_unregisterHandler(HvLpEvent_Type eventType) +{ + might_sleep(); + + if (eventType < HvLpEvent_Type_NumTypes) { + if (!lpEventHandlerPaths[eventType]) { + lpEventHandler[eventType] = NULL; + /* + * We now sleep until all other CPUs have scheduled. + * This ensures that the deletion is seen by all + * other CPUs, and that the deleted handler isn't + * still running on another CPU when we return. + */ + synchronize_rcu(); + return 0; + } + } + return 1; +} +EXPORT_SYMBOL(HvLpEvent_unregisterHandler); + +/* + * lpIndex is the partition index of the target partition. + * needed only for VirtualIo, VirtualLan and SessionMgr. Zero + * indicates to use our partition index - for the other types. + */ +int HvLpEvent_openPath(HvLpEvent_Type eventType, HvLpIndex lpIndex) +{ + if ((eventType < HvLpEvent_Type_NumTypes) && + lpEventHandler[eventType]) { + if (lpIndex == 0) + lpIndex = itLpNaca.xLpIndex; + HvCallEvent_openLpEventPath(lpIndex, eventType); + ++lpEventHandlerPaths[eventType]; + return 0; + } + return 1; +} + +int HvLpEvent_closePath(HvLpEvent_Type eventType, HvLpIndex lpIndex) +{ + if ((eventType < HvLpEvent_Type_NumTypes) && + lpEventHandler[eventType] && + lpEventHandlerPaths[eventType]) { + if (lpIndex == 0) + lpIndex = itLpNaca.xLpIndex; + HvCallEvent_closeLpEventPath(lpIndex, eventType); + --lpEventHandlerPaths[eventType]; + return 0; + } + return 1; +} + +static int proc_lpevents_show(struct seq_file *m, void *v) +{ + int cpu, i; + unsigned long sum; + static unsigned long cpu_totals[NR_CPUS]; + + /* FIXME: do we care that there's no locking here? */ + sum = 0; + for_each_online_cpu(cpu) { + cpu_totals[cpu] = 0; + for (i = 0; i < HvLpEvent_Type_NumTypes; i++) { + cpu_totals[cpu] += per_cpu(hvlpevent_counts, cpu)[i]; + } + sum += cpu_totals[cpu]; + } + + seq_printf(m, "LpEventQueue 0\n"); + seq_printf(m, " events processed:\t%lu\n", sum); + + for (i = 0; i < HvLpEvent_Type_NumTypes; ++i) { + sum = 0; + for_each_online_cpu(cpu) { + sum += per_cpu(hvlpevent_counts, cpu)[i]; + } + + seq_printf(m, " %-20s %10lu\n", event_types[i], sum); + } + + seq_printf(m, "\n events processed by processor:\n"); + + for_each_online_cpu(cpu) { + seq_printf(m, " CPU%02d %10lu\n", cpu, cpu_totals[cpu]); + } + + return 0; +} + +static int proc_lpevents_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_lpevents_show, NULL); +} + +static struct file_operations proc_lpevents_operations = { + .open = proc_lpevents_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_lpevents_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL); + if (e) + e->proc_fops = &proc_lpevents_operations; + + return 0; +} +__initcall(proc_lpevents_init); + diff --git a/arch/powerpc/platforms/iseries/main_store.h b/arch/powerpc/platforms/iseries/main_store.h new file mode 100644 index 00000000000..74f6889f834 --- /dev/null +++ b/arch/powerpc/platforms/iseries/main_store.h @@ -0,0 +1,165 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ISERIES_MAIN_STORE_H +#define _ISERIES_MAIN_STORE_H + +/* Main Store Vpd for Condor,iStar,sStar */ +struct IoHriMainStoreSegment4 { + u8 msArea0Exists:1; + u8 msArea1Exists:1; + u8 msArea2Exists:1; + u8 msArea3Exists:1; + u8 reserved1:4; + u8 reserved2; + + u8 msArea0Functional:1; + u8 msArea1Functional:1; + u8 msArea2Functional:1; + u8 msArea3Functional:1; + u8 reserved3:4; + u8 reserved4; + + u32 totalMainStore; + + u64 msArea0Ptr; + u64 msArea1Ptr; + u64 msArea2Ptr; + u64 msArea3Ptr; + + u32 cardProductionLevel; + + u32 msAdrHole; + + u8 msArea0HasRiserVpd:1; + u8 msArea1HasRiserVpd:1; + u8 msArea2HasRiserVpd:1; + u8 msArea3HasRiserVpd:1; + u8 reserved5:4; + u8 reserved6; + u16 reserved7; + + u8 reserved8[28]; + + u64 nonInterleavedBlocksStartAdr; + u64 nonInterleavedBlocksEndAdr; +}; + +/* Main Store VPD for Power4 */ +struct IoHriMainStoreChipInfo1 { + u32 chipMfgID __attribute((packed)); + char chipECLevel[4] __attribute((packed)); +}; + +struct IoHriMainStoreVpdIdData { + char typeNumber[4]; + char modelNumber[4]; + char partNumber[12]; + char serialNumber[12]; +}; + +struct IoHriMainStoreVpdFruData { + char fruLabel[8] __attribute((packed)); + u8 numberOfSlots __attribute((packed)); + u8 pluggingType __attribute((packed)); + u16 slotMapIndex __attribute((packed)); +}; + +struct IoHriMainStoreAdrRangeBlock { + void *blockStart __attribute((packed)); + void *blockEnd __attribute((packed)); + u32 blockProcChipId __attribute((packed)); +}; + +#define MaxAreaAdrRangeBlocks 4 + +struct IoHriMainStoreArea4 { + u32 msVpdFormat __attribute((packed)); + u8 containedVpdType __attribute((packed)); + u8 reserved1 __attribute((packed)); + u16 reserved2 __attribute((packed)); + + u64 msExists __attribute((packed)); + u64 msFunctional __attribute((packed)); + + u32 memorySize __attribute((packed)); + u32 procNodeId __attribute((packed)); + + u32 numAdrRangeBlocks __attribute((packed)); + struct IoHriMainStoreAdrRangeBlock xAdrRangeBlock[MaxAreaAdrRangeBlocks] __attribute((packed)); + + struct IoHriMainStoreChipInfo1 chipInfo0 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo1 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo2 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo3 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo4 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo5 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo6 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo7 __attribute((packed)); + + void *msRamAreaArray __attribute((packed)); + u32 msRamAreaArrayNumEntries __attribute((packed)); + u32 msRamAreaArrayEntrySize __attribute((packed)); + + u32 numaDimmExists __attribute((packed)); + u32 numaDimmFunctional __attribute((packed)); + void *numaDimmArray __attribute((packed)); + u32 numaDimmArrayNumEntries __attribute((packed)); + u32 numaDimmArrayEntrySize __attribute((packed)); + + struct IoHriMainStoreVpdIdData idData __attribute((packed)); + + u64 powerData __attribute((packed)); + u64 cardAssemblyPartNum __attribute((packed)); + u64 chipSerialNum __attribute((packed)); + + u64 reserved3 __attribute((packed)); + char reserved4[16] __attribute((packed)); + + struct IoHriMainStoreVpdFruData fruData __attribute((packed)); + + u8 vpdPortNum __attribute((packed)); + u8 reserved5 __attribute((packed)); + u8 frameId __attribute((packed)); + u8 rackUnit __attribute((packed)); + char asciiKeywordVpd[256] __attribute((packed)); + u32 reserved6 __attribute((packed)); +}; + + +struct IoHriMainStoreSegment5 { + u16 reserved1; + u8 reserved2; + u8 msVpdFormat; + + u32 totalMainStore; + u64 maxConfiguredMsAdr; + + struct IoHriMainStoreArea4 *msAreaArray; + u32 msAreaArrayNumEntries; + u32 msAreaArrayEntrySize; + + u32 msAreaExists; + u32 msAreaFunctional; + + u64 reserved3; +}; + +extern u64 xMsVpd[]; + +#endif /* _ISERIES_MAIN_STORE_H */ diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c new file mode 100644 index 00000000000..82f5abab9af --- /dev/null +++ b/arch/powerpc/platforms/iseries/mf.c @@ -0,0 +1,1316 @@ +/* + * Copyright (C) 2001 Troy D. Armstrong IBM Corporation + * Copyright (C) 2004-2005 Stephen Rothwell IBM Corporation + * + * This modules exists as an interface between a Linux secondary partition + * running on an iSeries and the primary partition's Virtual Service + * Processor (VSP) object. The VSP has final authority over powering on/off + * all partitions in the iSeries. It also provides miscellaneous low-level + * machine facility type operations. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/bcd.h> + +#include <asm/time.h> +#include <asm/uaccess.h> +#include <asm/paca.h> +#include <asm/iSeries/vio.h> +#include <asm/iSeries/mf.h> +#include <asm/iSeries/HvLpConfig.h> +#include <asm/iSeries/ItLpQueue.h> + +#include "setup.h" + +extern int piranha_simulator; + +/* + * This is the structure layout for the Machine Facilites LPAR event + * flows. + */ +struct vsp_cmd_data { + u64 token; + u16 cmd; + HvLpIndex lp_index; + u8 result_code; + u32 reserved; + union { + u64 state; /* GetStateOut */ + u64 ipl_type; /* GetIplTypeOut, Function02SelectIplTypeIn */ + u64 ipl_mode; /* GetIplModeOut, Function02SelectIplModeIn */ + u64 page[4]; /* GetSrcHistoryIn */ + u64 flag; /* GetAutoIplWhenPrimaryIplsOut, + SetAutoIplWhenPrimaryIplsIn, + WhiteButtonPowerOffIn, + Function08FastPowerOffIn, + IsSpcnRackPowerIncompleteOut */ + struct { + u64 token; + u64 address_type; + u64 side; + u32 length; + u32 offset; + } kern; /* SetKernelImageIn, GetKernelImageIn, + SetKernelCmdLineIn, GetKernelCmdLineIn */ + u32 length_out; /* GetKernelImageOut, GetKernelCmdLineOut */ + u8 reserved[80]; + } sub_data; +}; + +struct vsp_rsp_data { + struct completion com; + struct vsp_cmd_data *response; +}; + +struct alloc_data { + u16 size; + u16 type; + u32 count; + u16 reserved1; + u8 reserved2; + HvLpIndex target_lp; +}; + +struct ce_msg_data; + +typedef void (*ce_msg_comp_hdlr)(void *token, struct ce_msg_data *vsp_cmd_rsp); + +struct ce_msg_comp_data { + ce_msg_comp_hdlr handler; + void *token; +}; + +struct ce_msg_data { + u8 ce_msg[12]; + char reserved[4]; + struct ce_msg_comp_data *completion; +}; + +struct io_mf_lp_event { + struct HvLpEvent hp_lp_event; + u16 subtype_result_code; + u16 reserved1; + u32 reserved2; + union { + struct alloc_data alloc; + struct ce_msg_data ce_msg; + struct vsp_cmd_data vsp_cmd; + } data; +}; + +#define subtype_data(a, b, c, d) \ + (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) + +/* + * All outgoing event traffic is kept on a FIFO queue. The first + * pointer points to the one that is outstanding, and all new + * requests get stuck on the end. Also, we keep a certain number of + * preallocated pending events so that we can operate very early in + * the boot up sequence (before kmalloc is ready). + */ +struct pending_event { + struct pending_event *next; + struct io_mf_lp_event event; + MFCompleteHandler hdlr; + char dma_data[72]; + unsigned dma_data_length; + unsigned remote_address; +}; +static spinlock_t pending_event_spinlock; +static struct pending_event *pending_event_head; +static struct pending_event *pending_event_tail; +static struct pending_event *pending_event_avail; +static struct pending_event pending_event_prealloc[16]; + +/* + * Put a pending event onto the available queue, so it can get reused. + * Attention! You must have the pending_event_spinlock before calling! + */ +static void free_pending_event(struct pending_event *ev) +{ + if (ev != NULL) { + ev->next = pending_event_avail; + pending_event_avail = ev; + } +} + +/* + * Enqueue the outbound event onto the stack. If the queue was + * empty to begin with, we must also issue it via the Hypervisor + * interface. There is a section of code below that will touch + * the first stack pointer without the protection of the pending_event_spinlock. + * This is OK, because we know that nobody else will be modifying + * the first pointer when we do this. + */ +static int signal_event(struct pending_event *ev) +{ + int rc = 0; + unsigned long flags; + int go = 1; + struct pending_event *ev1; + HvLpEvent_Rc hv_rc; + + /* enqueue the event */ + if (ev != NULL) { + ev->next = NULL; + spin_lock_irqsave(&pending_event_spinlock, flags); + if (pending_event_head == NULL) + pending_event_head = ev; + else { + go = 0; + pending_event_tail->next = ev; + } + pending_event_tail = ev; + spin_unlock_irqrestore(&pending_event_spinlock, flags); + } + + /* send the event */ + while (go) { + go = 0; + + /* any DMA data to send beforehand? */ + if (pending_event_head->dma_data_length > 0) + HvCallEvent_dmaToSp(pending_event_head->dma_data, + pending_event_head->remote_address, + pending_event_head->dma_data_length, + HvLpDma_Direction_LocalToRemote); + + hv_rc = HvCallEvent_signalLpEvent( + &pending_event_head->event.hp_lp_event); + if (hv_rc != HvLpEvent_Rc_Good) { + printk(KERN_ERR "mf.c: HvCallEvent_signalLpEvent() " + "failed with %d\n", (int)hv_rc); + + spin_lock_irqsave(&pending_event_spinlock, flags); + ev1 = pending_event_head; + pending_event_head = pending_event_head->next; + if (pending_event_head != NULL) + go = 1; + spin_unlock_irqrestore(&pending_event_spinlock, flags); + + if (ev1 == ev) + rc = -EIO; + else if (ev1->hdlr != NULL) + (*ev1->hdlr)((void *)ev1->event.hp_lp_event.xCorrelationToken, -EIO); + + spin_lock_irqsave(&pending_event_spinlock, flags); + free_pending_event(ev1); + spin_unlock_irqrestore(&pending_event_spinlock, flags); + } + } + + return rc; +} + +/* + * Allocate a new pending_event structure, and initialize it. + */ +static struct pending_event *new_pending_event(void) +{ + struct pending_event *ev = NULL; + HvLpIndex primary_lp = HvLpConfig_getPrimaryLpIndex(); + unsigned long flags; + struct HvLpEvent *hev; + + spin_lock_irqsave(&pending_event_spinlock, flags); + if (pending_event_avail != NULL) { + ev = pending_event_avail; + pending_event_avail = pending_event_avail->next; + } + spin_unlock_irqrestore(&pending_event_spinlock, flags); + if (ev == NULL) { + ev = kmalloc(sizeof(struct pending_event), GFP_ATOMIC); + if (ev == NULL) { + printk(KERN_ERR "mf.c: unable to kmalloc %ld bytes\n", + sizeof(struct pending_event)); + return NULL; + } + } + memset(ev, 0, sizeof(struct pending_event)); + hev = &ev->event.hp_lp_event; + hev->xFlags.xValid = 1; + hev->xFlags.xAckType = HvLpEvent_AckType_ImmediateAck; + hev->xFlags.xAckInd = HvLpEvent_AckInd_DoAck; + hev->xFlags.xFunction = HvLpEvent_Function_Int; + hev->xType = HvLpEvent_Type_MachineFac; + hev->xSourceLp = HvLpConfig_getLpIndex(); + hev->xTargetLp = primary_lp; + hev->xSizeMinus1 = sizeof(ev->event) - 1; + hev->xRc = HvLpEvent_Rc_Good; + hev->xSourceInstanceId = HvCallEvent_getSourceLpInstanceId(primary_lp, + HvLpEvent_Type_MachineFac); + hev->xTargetInstanceId = HvCallEvent_getTargetLpInstanceId(primary_lp, + HvLpEvent_Type_MachineFac); + + return ev; +} + +static int signal_vsp_instruction(struct vsp_cmd_data *vsp_cmd) +{ + struct pending_event *ev = new_pending_event(); + int rc; + struct vsp_rsp_data response; + + if (ev == NULL) + return -ENOMEM; + + init_completion(&response.com); + response.response = vsp_cmd; + ev->event.hp_lp_event.xSubtype = 6; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'V', 'I'); + ev->event.data.vsp_cmd.token = (u64)&response; + ev->event.data.vsp_cmd.cmd = vsp_cmd->cmd; + ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex(); + ev->event.data.vsp_cmd.result_code = 0xFF; + ev->event.data.vsp_cmd.reserved = 0; + memcpy(&(ev->event.data.vsp_cmd.sub_data), + &(vsp_cmd->sub_data), sizeof(vsp_cmd->sub_data)); + mb(); + + rc = signal_event(ev); + if (rc == 0) + wait_for_completion(&response.com); + return rc; +} + + +/* + * Send a 12-byte CE message to the primary partition VSP object + */ +static int signal_ce_msg(char *ce_msg, struct ce_msg_comp_data *completion) +{ + struct pending_event *ev = new_pending_event(); + + if (ev == NULL) + return -ENOMEM; + + ev->event.hp_lp_event.xSubtype = 0; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'C', 'E'); + memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12); + ev->event.data.ce_msg.completion = completion; + return signal_event(ev); +} + +/* + * Send a 12-byte CE message (with no data) to the primary partition VSP object + */ +static int signal_ce_msg_simple(u8 ce_op, struct ce_msg_comp_data *completion) +{ + u8 ce_msg[12]; + + memset(ce_msg, 0, sizeof(ce_msg)); + ce_msg[3] = ce_op; + return signal_ce_msg(ce_msg, completion); +} + +/* + * Send a 12-byte CE message and DMA data to the primary partition VSP object + */ +static int dma_and_signal_ce_msg(char *ce_msg, + struct ce_msg_comp_data *completion, void *dma_data, + unsigned dma_data_length, unsigned remote_address) +{ + struct pending_event *ev = new_pending_event(); + + if (ev == NULL) + return -ENOMEM; + + ev->event.hp_lp_event.xSubtype = 0; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'C', 'E'); + memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12); + ev->event.data.ce_msg.completion = completion; + memcpy(ev->dma_data, dma_data, dma_data_length); + ev->dma_data_length = dma_data_length; + ev->remote_address = remote_address; + return signal_event(ev); +} + +/* + * Initiate a nice (hopefully) shutdown of Linux. We simply are + * going to try and send the init process a SIGINT signal. If + * this fails (why?), we'll simply force it off in a not-so-nice + * manner. + */ +static int shutdown(void) +{ + int rc = kill_proc(1, SIGINT, 1); + + if (rc) { + printk(KERN_ALERT "mf.c: SIGINT to init failed (%d), " + "hard shutdown commencing\n", rc); + mf_power_off(); + } else + printk(KERN_INFO "mf.c: init has been successfully notified " + "to proceed with shutdown\n"); + return rc; +} + +/* + * The primary partition VSP object is sending us a new + * event flow. Handle it... + */ +static void handle_int(struct io_mf_lp_event *event) +{ + struct ce_msg_data *ce_msg_data; + struct ce_msg_data *pce_msg_data; + unsigned long flags; + struct pending_event *pev; + + /* ack the interrupt */ + event->hp_lp_event.xRc = HvLpEvent_Rc_Good; + HvCallEvent_ackLpEvent(&event->hp_lp_event); + + /* process interrupt */ + switch (event->hp_lp_event.xSubtype) { + case 0: /* CE message */ + ce_msg_data = &event->data.ce_msg; + switch (ce_msg_data->ce_msg[3]) { + case 0x5B: /* power control notification */ + if ((ce_msg_data->ce_msg[5] & 0x20) != 0) { + printk(KERN_INFO "mf.c: Commencing partition shutdown\n"); + if (shutdown() == 0) + signal_ce_msg_simple(0xDB, NULL); + } + break; + case 0xC0: /* get time */ + spin_lock_irqsave(&pending_event_spinlock, flags); + pev = pending_event_head; + if (pev != NULL) + pending_event_head = pending_event_head->next; + spin_unlock_irqrestore(&pending_event_spinlock, flags); + if (pev == NULL) + break; + pce_msg_data = &pev->event.data.ce_msg; + if (pce_msg_data->ce_msg[3] != 0x40) + break; + if (pce_msg_data->completion != NULL) { + ce_msg_comp_hdlr handler = + pce_msg_data->completion->handler; + void *token = pce_msg_data->completion->token; + + if (handler != NULL) + (*handler)(token, ce_msg_data); + } + spin_lock_irqsave(&pending_event_spinlock, flags); + free_pending_event(pev); + spin_unlock_irqrestore(&pending_event_spinlock, flags); + /* send next waiting event */ + if (pending_event_head != NULL) + signal_event(NULL); + break; + } + break; + case 1: /* IT sys shutdown */ + printk(KERN_INFO "mf.c: Commencing system shutdown\n"); + shutdown(); + break; + } +} + +/* + * The primary partition VSP object is acknowledging the receipt + * of a flow we sent to them. If there are other flows queued + * up, we must send another one now... + */ +static void handle_ack(struct io_mf_lp_event *event) +{ + unsigned long flags; + struct pending_event *two = NULL; + unsigned long free_it = 0; + struct ce_msg_data *ce_msg_data; + struct ce_msg_data *pce_msg_data; + struct vsp_rsp_data *rsp; + + /* handle current event */ + if (pending_event_head == NULL) { + printk(KERN_ERR "mf.c: stack empty for receiving ack\n"); + return; + } + + switch (event->hp_lp_event.xSubtype) { + case 0: /* CE msg */ + ce_msg_data = &event->data.ce_msg; + if (ce_msg_data->ce_msg[3] != 0x40) { + free_it = 1; + break; + } + if (ce_msg_data->ce_msg[2] == 0) + break; + free_it = 1; + pce_msg_data = &pending_event_head->event.data.ce_msg; + if (pce_msg_data->completion != NULL) { + ce_msg_comp_hdlr handler = + pce_msg_data->completion->handler; + void *token = pce_msg_data->completion->token; + + if (handler != NULL) + (*handler)(token, ce_msg_data); + } + break; + case 4: /* allocate */ + case 5: /* deallocate */ + if (pending_event_head->hdlr != NULL) + (*pending_event_head->hdlr)((void *)event->hp_lp_event.xCorrelationToken, event->data.alloc.count); + free_it = 1; + break; + case 6: + free_it = 1; + rsp = (struct vsp_rsp_data *)event->data.vsp_cmd.token; + if (rsp == NULL) { + printk(KERN_ERR "mf.c: no rsp\n"); + break; + } + if (rsp->response != NULL) + memcpy(rsp->response, &event->data.vsp_cmd, + sizeof(event->data.vsp_cmd)); + complete(&rsp->com); + break; + } + + /* remove from queue */ + spin_lock_irqsave(&pending_event_spinlock, flags); + if ((pending_event_head != NULL) && (free_it == 1)) { + struct pending_event *oldHead = pending_event_head; + + pending_event_head = pending_event_head->next; + two = pending_event_head; + free_pending_event(oldHead); + } + spin_unlock_irqrestore(&pending_event_spinlock, flags); + + /* send next waiting event */ + if (two != NULL) + signal_event(NULL); +} + +/* + * This is the generic event handler we are registering with + * the Hypervisor. Ensure the flows are for us, and then + * parse it enough to know if it is an interrupt or an + * acknowledge. + */ +static void hv_handler(struct HvLpEvent *event, struct pt_regs *regs) +{ + if ((event != NULL) && (event->xType == HvLpEvent_Type_MachineFac)) { + switch(event->xFlags.xFunction) { + case HvLpEvent_Function_Ack: + handle_ack((struct io_mf_lp_event *)event); + break; + case HvLpEvent_Function_Int: + handle_int((struct io_mf_lp_event *)event); + break; + default: + printk(KERN_ERR "mf.c: non ack/int event received\n"); + break; + } + } else + printk(KERN_ERR "mf.c: alien event received\n"); +} + +/* + * Global kernel interface to allocate and seed events into the + * Hypervisor. + */ +void mf_allocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type, + unsigned size, unsigned count, MFCompleteHandler hdlr, + void *user_token) +{ + struct pending_event *ev = new_pending_event(); + int rc; + + if (ev == NULL) { + rc = -ENOMEM; + } else { + ev->event.hp_lp_event.xSubtype = 4; + ev->event.hp_lp_event.xCorrelationToken = (u64)user_token; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'M', 'A'); + ev->event.data.alloc.target_lp = target_lp; + ev->event.data.alloc.type = type; + ev->event.data.alloc.size = size; + ev->event.data.alloc.count = count; + ev->hdlr = hdlr; + rc = signal_event(ev); + } + if ((rc != 0) && (hdlr != NULL)) + (*hdlr)(user_token, rc); +} +EXPORT_SYMBOL(mf_allocate_lp_events); + +/* + * Global kernel interface to unseed and deallocate events already in + * Hypervisor. + */ +void mf_deallocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type, + unsigned count, MFCompleteHandler hdlr, void *user_token) +{ + struct pending_event *ev = new_pending_event(); + int rc; + + if (ev == NULL) + rc = -ENOMEM; + else { + ev->event.hp_lp_event.xSubtype = 5; + ev->event.hp_lp_event.xCorrelationToken = (u64)user_token; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'M', 'D'); + ev->event.data.alloc.target_lp = target_lp; + ev->event.data.alloc.type = type; + ev->event.data.alloc.count = count; + ev->hdlr = hdlr; + rc = signal_event(ev); + } + if ((rc != 0) && (hdlr != NULL)) + (*hdlr)(user_token, rc); +} +EXPORT_SYMBOL(mf_deallocate_lp_events); + +/* + * Global kernel interface to tell the VSP object in the primary + * partition to power this partition off. + */ +void mf_power_off(void) +{ + printk(KERN_INFO "mf.c: Down it goes...\n"); + signal_ce_msg_simple(0x4d, NULL); + for (;;) + ; +} + +/* + * Global kernel interface to tell the VSP object in the primary + * partition to reboot this partition. + */ +void mf_reboot(void) +{ + printk(KERN_INFO "mf.c: Preparing to bounce...\n"); + signal_ce_msg_simple(0x4e, NULL); + for (;;) + ; +} + +/* + * Display a single word SRC onto the VSP control panel. + */ +void mf_display_src(u32 word) +{ + u8 ce[12]; + + memset(ce, 0, sizeof(ce)); + ce[3] = 0x4a; + ce[7] = 0x01; + ce[8] = word >> 24; + ce[9] = word >> 16; + ce[10] = word >> 8; + ce[11] = word; + signal_ce_msg(ce, NULL); +} + +/* + * Display a single word SRC of the form "PROGXXXX" on the VSP control panel. + */ +void mf_display_progress(u16 value) +{ + u8 ce[12]; + u8 src[72]; + + memcpy(ce, "\x00\x00\x04\x4A\x00\x00\x00\x48\x00\x00\x00\x00", 12); + memcpy(src, "\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00PROGxxxx ", + 72); + src[6] = value >> 8; + src[7] = value & 255; + src[44] = "0123456789ABCDEF"[(value >> 12) & 15]; + src[45] = "0123456789ABCDEF"[(value >> 8) & 15]; + src[46] = "0123456789ABCDEF"[(value >> 4) & 15]; + src[47] = "0123456789ABCDEF"[value & 15]; + dma_and_signal_ce_msg(ce, NULL, src, sizeof(src), 9 * 64 * 1024); +} + +/* + * Clear the VSP control panel. Used to "erase" an SRC that was + * previously displayed. + */ +void mf_clear_src(void) +{ + signal_ce_msg_simple(0x4b, NULL); +} + +/* + * Initialization code here. + */ +void mf_init(void) +{ + int i; + + /* initialize */ + spin_lock_init(&pending_event_spinlock); + for (i = 0; + i < sizeof(pending_event_prealloc) / sizeof(*pending_event_prealloc); + ++i) + free_pending_event(&pending_event_prealloc[i]); + HvLpEvent_registerHandler(HvLpEvent_Type_MachineFac, &hv_handler); + + /* virtual continue ack */ + signal_ce_msg_simple(0x57, NULL); + + /* initialization complete */ + printk(KERN_NOTICE "mf.c: iSeries Linux LPAR Machine Facilities " + "initialized\n"); +} + +struct rtc_time_data { + struct completion com; + struct ce_msg_data ce_msg; + int rc; +}; + +static void get_rtc_time_complete(void *token, struct ce_msg_data *ce_msg) +{ + struct rtc_time_data *rtc = token; + + memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg)); + rtc->rc = 0; + complete(&rtc->com); +} + +static int rtc_set_tm(int rc, u8 *ce_msg, struct rtc_time *tm) +{ + tm->tm_wday = 0; + tm->tm_yday = 0; + tm->tm_isdst = 0; + if (rc) { + tm->tm_sec = 0; + tm->tm_min = 0; + tm->tm_hour = 0; + tm->tm_mday = 15; + tm->tm_mon = 5; + tm->tm_year = 52; + return rc; + } + + if ((ce_msg[2] == 0xa9) || + (ce_msg[2] == 0xaf)) { + /* TOD clock is not set */ + tm->tm_sec = 1; + tm->tm_min = 1; + tm->tm_hour = 1; + tm->tm_mday = 10; + tm->tm_mon = 8; + tm->tm_year = 71; + mf_set_rtc(tm); + } + { + u8 year = ce_msg[5]; + u8 sec = ce_msg[6]; + u8 min = ce_msg[7]; + u8 hour = ce_msg[8]; + u8 day = ce_msg[10]; + u8 mon = ce_msg[11]; + + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + + if (year <= 69) + year += 100; + + tm->tm_sec = sec; + tm->tm_min = min; + tm->tm_hour = hour; + tm->tm_mday = day; + tm->tm_mon = mon; + tm->tm_year = year; + } + + return 0; +} + +int mf_get_rtc(struct rtc_time *tm) +{ + struct ce_msg_comp_data ce_complete; + struct rtc_time_data rtc_data; + int rc; + + memset(&ce_complete, 0, sizeof(ce_complete)); + memset(&rtc_data, 0, sizeof(rtc_data)); + init_completion(&rtc_data.com); + ce_complete.handler = &get_rtc_time_complete; + ce_complete.token = &rtc_data; + rc = signal_ce_msg_simple(0x40, &ce_complete); + if (rc) + return rc; + wait_for_completion(&rtc_data.com); + return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm); +} + +struct boot_rtc_time_data { + int busy; + struct ce_msg_data ce_msg; + int rc; +}; + +static void get_boot_rtc_time_complete(void *token, struct ce_msg_data *ce_msg) +{ + struct boot_rtc_time_data *rtc = token; + + memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg)); + rtc->rc = 0; + rtc->busy = 0; +} + +int mf_get_boot_rtc(struct rtc_time *tm) +{ + struct ce_msg_comp_data ce_complete; + struct boot_rtc_time_data rtc_data; + int rc; + + memset(&ce_complete, 0, sizeof(ce_complete)); + memset(&rtc_data, 0, sizeof(rtc_data)); + rtc_data.busy = 1; + ce_complete.handler = &get_boot_rtc_time_complete; + ce_complete.token = &rtc_data; + rc = signal_ce_msg_simple(0x40, &ce_complete); + if (rc) + return rc; + /* We need to poll here as we are not yet taking interrupts */ + while (rtc_data.busy) { + if (hvlpevent_is_pending()) + process_hvlpevents(NULL); + } + return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm); +} + +int mf_set_rtc(struct rtc_time *tm) +{ + char ce_time[12]; + u8 day, mon, hour, min, sec, y1, y2; + unsigned year; + + year = 1900 + tm->tm_year; + y1 = year / 100; + y2 = year % 100; + + sec = tm->tm_sec; + min = tm->tm_min; + hour = tm->tm_hour; + day = tm->tm_mday; + mon = tm->tm_mon + 1; + + BIN_TO_BCD(sec); + BIN_TO_BCD(min); + BIN_TO_BCD(hour); + BIN_TO_BCD(mon); + BIN_TO_BCD(day); + BIN_TO_BCD(y1); + BIN_TO_BCD(y2); + + memset(ce_time, 0, sizeof(ce_time)); + ce_time[3] = 0x41; + ce_time[4] = y1; + ce_time[5] = y2; + ce_time[6] = sec; + ce_time[7] = min; + ce_time[8] = hour; + ce_time[10] = day; + ce_time[11] = mon; + + return signal_ce_msg(ce_time, NULL); +} + +#ifdef CONFIG_PROC_FS + +static int proc_mf_dump_cmdline(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + char *p; + struct vsp_cmd_data vsp_cmd; + int rc; + dma_addr_t dma_addr; + + /* The HV appears to return no more than 256 bytes of command line */ + if (off >= 256) + return 0; + if ((off + count) > 256) + count = 256 - off; + + dma_addr = dma_map_single(iSeries_vio_dev, page, off + count, + DMA_FROM_DEVICE); + if (dma_mapping_error(dma_addr)) + return -ENOMEM; + memset(page, 0, off + count); + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 33; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = (u64)data; + vsp_cmd.sub_data.kern.length = off + count; + mb(); + rc = signal_vsp_instruction(&vsp_cmd); + dma_unmap_single(iSeries_vio_dev, dma_addr, off + count, + DMA_FROM_DEVICE); + if (rc) + return rc; + if (vsp_cmd.result_code != 0) + return -ENOMEM; + p = page; + len = 0; + while (len < (off + count)) { + if ((*p == '\0') || (*p == '\n')) { + if (*p == '\0') + *p = '\n'; + p++; + len++; + *eof = 1; + break; + } + p++; + len++; + } + + if (len < off) { + *eof = 1; + len = 0; + } + return len; +} + +#if 0 +static int mf_getVmlinuxChunk(char *buffer, int *size, int offset, u64 side) +{ + struct vsp_cmd_data vsp_cmd; + int rc; + int len = *size; + dma_addr_t dma_addr; + + dma_addr = dma_map_single(iSeries_vio_dev, buffer, len, + DMA_FROM_DEVICE); + memset(buffer, 0, len); + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 32; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = side; + vsp_cmd.sub_data.kern.offset = offset; + vsp_cmd.sub_data.kern.length = len; + mb(); + rc = signal_vsp_instruction(&vsp_cmd); + if (rc == 0) { + if (vsp_cmd.result_code == 0) + *size = vsp_cmd.sub_data.length_out; + else + rc = -ENOMEM; + } + + dma_unmap_single(iSeries_vio_dev, dma_addr, len, DMA_FROM_DEVICE); + + return rc; +} + +static int proc_mf_dump_vmlinux(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int sizeToGet = count; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (mf_getVmlinuxChunk(page, &sizeToGet, off, (u64)data) == 0) { + if (sizeToGet != 0) { + *start = page + off; + return sizeToGet; + } + *eof = 1; + return 0; + } + *eof = 1; + return 0; +} +#endif + +static int proc_mf_dump_side(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + char mf_current_side = ' '; + struct vsp_cmd_data vsp_cmd; + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 2; + vsp_cmd.sub_data.ipl_type = 0; + mb(); + + if (signal_vsp_instruction(&vsp_cmd) == 0) { + if (vsp_cmd.result_code == 0) { + switch (vsp_cmd.sub_data.ipl_type) { + case 0: mf_current_side = 'A'; + break; + case 1: mf_current_side = 'B'; + break; + case 2: mf_current_side = 'C'; + break; + default: mf_current_side = 'D'; + break; + } + } + } + + len = sprintf(page, "%c\n", mf_current_side); + + if (len <= (off + count)) + *eof = 1; + *start = page + off; + len -= off; + if (len > count) + len = count; + if (len < 0) + len = 0; + return len; +} + +static int proc_mf_change_side(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char side; + u64 newSide; + struct vsp_cmd_data vsp_cmd; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (count == 0) + return 0; + + if (get_user(side, buffer)) + return -EFAULT; + + switch (side) { + case 'A': newSide = 0; + break; + case 'B': newSide = 1; + break; + case 'C': newSide = 2; + break; + case 'D': newSide = 3; + break; + default: + printk(KERN_ERR "mf_proc.c: proc_mf_change_side: invalid side\n"); + return -EINVAL; + } + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.sub_data.ipl_type = newSide; + vsp_cmd.cmd = 10; + + (void)signal_vsp_instruction(&vsp_cmd); + + return count; +} + +#if 0 +static void mf_getSrcHistory(char *buffer, int size) +{ + struct IplTypeReturnStuff return_stuff; + struct pending_event *ev = new_pending_event(); + int rc = 0; + char *pages[4]; + + pages[0] = kmalloc(4096, GFP_ATOMIC); + pages[1] = kmalloc(4096, GFP_ATOMIC); + pages[2] = kmalloc(4096, GFP_ATOMIC); + pages[3] = kmalloc(4096, GFP_ATOMIC); + if ((ev == NULL) || (pages[0] == NULL) || (pages[1] == NULL) + || (pages[2] == NULL) || (pages[3] == NULL)) + return -ENOMEM; + + return_stuff.xType = 0; + return_stuff.xRc = 0; + return_stuff.xDone = 0; + ev->event.hp_lp_event.xSubtype = 6; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'V', 'I'); + ev->event.data.vsp_cmd.xEvent = &return_stuff; + ev->event.data.vsp_cmd.cmd = 4; + ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex(); + ev->event.data.vsp_cmd.result_code = 0xFF; + ev->event.data.vsp_cmd.reserved = 0; + ev->event.data.vsp_cmd.sub_data.page[0] = ISERIES_HV_ADDR(pages[0]); + ev->event.data.vsp_cmd.sub_data.page[1] = ISERIES_HV_ADDR(pages[1]); + ev->event.data.vsp_cmd.sub_data.page[2] = ISERIES_HV_ADDR(pages[2]); + ev->event.data.vsp_cmd.sub_data.page[3] = ISERIES_HV_ADDR(pages[3]); + mb(); + if (signal_event(ev) != 0) + return; + + while (return_stuff.xDone != 1) + udelay(10); + if (return_stuff.xRc == 0) + memcpy(buffer, pages[0], size); + kfree(pages[0]); + kfree(pages[1]); + kfree(pages[2]); + kfree(pages[3]); +} +#endif + +static int proc_mf_dump_src(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ +#if 0 + int len; + + mf_getSrcHistory(page, count); + len = count; + len -= off; + if (len < count) { + *eof = 1; + if (len <= 0) + return 0; + } else + len = count; + *start = page + off; + return len; +#else + return 0; +#endif +} + +static int proc_mf_change_src(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char stkbuf[10]; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if ((count < 4) && (count != 1)) { + printk(KERN_ERR "mf_proc: invalid src\n"); + return -EINVAL; + } + + if (count > (sizeof(stkbuf) - 1)) + count = sizeof(stkbuf) - 1; + if (copy_from_user(stkbuf, buffer, count)) + return -EFAULT; + + if ((count == 1) && (*stkbuf == '\0')) + mf_clear_src(); + else + mf_display_src(*(u32 *)stkbuf); + + return count; +} + +static int proc_mf_change_cmdline(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + struct vsp_cmd_data vsp_cmd; + dma_addr_t dma_addr; + char *page; + int ret = -EACCES; + + if (!capable(CAP_SYS_ADMIN)) + goto out; + + dma_addr = 0; + page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr, + GFP_ATOMIC); + ret = -ENOMEM; + if (page == NULL) + goto out; + + ret = -EFAULT; + if (copy_from_user(page, buffer, count)) + goto out_free; + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 31; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = (u64)data; + vsp_cmd.sub_data.kern.length = count; + mb(); + (void)signal_vsp_instruction(&vsp_cmd); + ret = count; + +out_free: + dma_free_coherent(iSeries_vio_dev, count, page, dma_addr); +out: + return ret; +} + +static ssize_t proc_mf_change_vmlinux(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + ssize_t rc; + dma_addr_t dma_addr; + char *page; + struct vsp_cmd_data vsp_cmd; + + rc = -EACCES; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + dma_addr = 0; + page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr, + GFP_ATOMIC); + rc = -ENOMEM; + if (page == NULL) { + printk(KERN_ERR "mf.c: couldn't allocate memory to set vmlinux chunk\n"); + goto out; + } + rc = -EFAULT; + if (copy_from_user(page, buf, count)) + goto out_free; + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 30; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = (u64)dp->data; + vsp_cmd.sub_data.kern.offset = *ppos; + vsp_cmd.sub_data.kern.length = count; + mb(); + rc = signal_vsp_instruction(&vsp_cmd); + if (rc) + goto out_free; + rc = -ENOMEM; + if (vsp_cmd.result_code != 0) + goto out_free; + + *ppos += count; + rc = count; +out_free: + dma_free_coherent(iSeries_vio_dev, count, page, dma_addr); +out: + return rc; +} + +static struct file_operations proc_vmlinux_operations = { + .write = proc_mf_change_vmlinux, +}; + +static int __init mf_proc_init(void) +{ + struct proc_dir_entry *mf_proc_root; + struct proc_dir_entry *ent; + struct proc_dir_entry *mf; + char name[2]; + int i; + + mf_proc_root = proc_mkdir("iSeries/mf", NULL); + if (!mf_proc_root) + return 1; + + name[1] = '\0'; + for (i = 0; i < 4; i++) { + name[0] = 'A' + i; + mf = proc_mkdir(name, mf_proc_root); + if (!mf) + return 1; + + ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)(long)i; + ent->read_proc = proc_mf_dump_cmdline; + ent->write_proc = proc_mf_change_cmdline; + + if (i == 3) /* no vmlinux entry for 'D' */ + continue; + + ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)(long)i; + ent->proc_fops = &proc_vmlinux_operations; + } + + ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_side; + ent->write_proc = proc_mf_change_side; + + ent = create_proc_entry("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_src; + ent->write_proc = proc_mf_change_src; + + return 0; +} + +__initcall(mf_proc_init); + +#endif /* CONFIG_PROC_FS */ + +/* + * Get the RTC from the virtual service processor + * This requires flowing LpEvents to the primary partition + */ +void iSeries_get_rtc_time(struct rtc_time *rtc_tm) +{ + if (piranha_simulator) + return; + + mf_get_rtc(rtc_tm); + rtc_tm->tm_mon--; +} + +/* + * Set the RTC in the virtual service processor + * This requires flowing LpEvents to the primary partition + */ +int iSeries_set_rtc_time(struct rtc_time *tm) +{ + mf_set_rtc(tm); + return 0; +} + +void iSeries_get_boot_time(struct rtc_time *tm) +{ + if (piranha_simulator) + return; + + mf_get_boot_rtc(tm); + tm->tm_mon -= 1; +} diff --git a/arch/powerpc/platforms/iseries/misc.S b/arch/powerpc/platforms/iseries/misc.S new file mode 100644 index 00000000000..09f14522e17 --- /dev/null +++ b/arch/powerpc/platforms/iseries/misc.S @@ -0,0 +1,55 @@ +/* + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-2005 IBM Corp + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/processor.h> +#include <asm/asm-offsets.h> + + .text + +/* unsigned long local_save_flags(void) */ +_GLOBAL(local_get_flags) + lbz r3,PACAPROCENABLED(r13) + blr + +/* unsigned long local_irq_disable(void) */ +_GLOBAL(local_irq_disable) + lbz r3,PACAPROCENABLED(r13) + li r4,0 + stb r4,PACAPROCENABLED(r13) + blr /* Done */ + +/* void local_irq_restore(unsigned long flags) */ +_GLOBAL(local_irq_restore) + lbz r5,PACAPROCENABLED(r13) + /* Check if things are setup the way we want _already_. */ + cmpw 0,r3,r5 + beqlr + /* are we enabling interrupts? */ + cmpdi 0,r3,0 + stb r3,PACAPROCENABLED(r13) + beqlr + /* Check pending interrupts */ + /* A decrementer, IPI or PMC interrupt may have occurred + * while we were in the hypervisor (which enables) */ + ld r4,PACALPPACA+LPPACAANYINT(r13) + cmpdi r4,0 + beqlr + + /* + * Handle pending interrupts in interrupt context + */ + li r0,0x5555 + sc + blr diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c new file mode 100644 index 00000000000..f79c5982446 --- /dev/null +++ b/arch/powerpc/platforms/iseries/pci.c @@ -0,0 +1,911 @@ +/* + * Copyright (C) 2001 Allan Trautman, IBM Corporation + * + * iSeries specific routines for PCI. + * + * Based on code from pci.c and iSeries_pci.c 32bit + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ide.h> +#include <linux/pci.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <asm/ppcdebug.h> +#include <asm/iommu.h> + +#include <asm/iSeries/HvCallPci.h> +#include <asm/iSeries/HvCallXm.h> +#include <asm/iSeries/iSeries_pci.h> +#include <asm/iSeries/mf.h> + +#include <asm/ppc-pci.h> + +#include "irq.h" + +extern unsigned long io_page_mask; + +/* + * Forward declares of prototypes. + */ +static struct device_node *find_Device_Node(int bus, int devfn); +static void scan_PHB_slots(struct pci_controller *Phb); +static void scan_EADS_bridge(HvBusNumber Bus, HvSubBusNumber SubBus, int IdSel); +static int scan_bridge_slot(HvBusNumber Bus, struct HvCallPci_BridgeInfo *Info); + +LIST_HEAD(iSeries_Global_Device_List); + +static int DeviceCount; + +/* Counters and control flags. */ +static long Pci_Io_Read_Count; +static long Pci_Io_Write_Count; +#if 0 +static long Pci_Cfg_Read_Count; +static long Pci_Cfg_Write_Count; +#endif +static long Pci_Error_Count; + +static int Pci_Retry_Max = 3; /* Only retry 3 times */ +static int Pci_Error_Flag = 1; /* Set Retry Error on. */ + +static struct pci_ops iSeries_pci_ops; + +/* + * Table defines + * Each Entry size is 4 MB * 1024 Entries = 4GB I/O address space. + */ +#define IOMM_TABLE_MAX_ENTRIES 1024 +#define IOMM_TABLE_ENTRY_SIZE 0x0000000000400000UL +#define BASE_IO_MEMORY 0xE000000000000000UL + +static unsigned long max_io_memory = 0xE000000000000000UL; +static long current_iomm_table_entry; + +/* + * Lookup Tables. + */ +static struct device_node **iomm_table; +static u8 *iobar_table; + +/* + * Static and Global variables + */ +static char *pci_io_text = "iSeries PCI I/O"; +static DEFINE_SPINLOCK(iomm_table_lock); + +/* + * iomm_table_initialize + * + * Allocates and initalizes the Address Translation Table and Bar + * Tables to get them ready for use. Must be called before any + * I/O space is handed out to the device BARs. + */ +static void iomm_table_initialize(void) +{ + spin_lock(&iomm_table_lock); + iomm_table = kmalloc(sizeof(*iomm_table) * IOMM_TABLE_MAX_ENTRIES, + GFP_KERNEL); + iobar_table = kmalloc(sizeof(*iobar_table) * IOMM_TABLE_MAX_ENTRIES, + GFP_KERNEL); + spin_unlock(&iomm_table_lock); + if ((iomm_table == NULL) || (iobar_table == NULL)) + panic("PCI: I/O tables allocation failed.\n"); +} + +/* + * iomm_table_allocate_entry + * + * Adds pci_dev entry in address translation table + * + * - Allocates the number of entries required in table base on BAR + * size. + * - Allocates starting at BASE_IO_MEMORY and increases. + * - The size is round up to be a multiple of entry size. + * - CurrentIndex is incremented to keep track of the last entry. + * - Builds the resource entry for allocated BARs. + */ +static void iomm_table_allocate_entry(struct pci_dev *dev, int bar_num) +{ + struct resource *bar_res = &dev->resource[bar_num]; + long bar_size = pci_resource_len(dev, bar_num); + + /* + * No space to allocate, quick exit, skip Allocation. + */ + if (bar_size == 0) + return; + /* + * Set Resource values. + */ + spin_lock(&iomm_table_lock); + bar_res->name = pci_io_text; + bar_res->start = + IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry; + bar_res->start += BASE_IO_MEMORY; + bar_res->end = bar_res->start + bar_size - 1; + /* + * Allocate the number of table entries needed for BAR. + */ + while (bar_size > 0 ) { + iomm_table[current_iomm_table_entry] = dev->sysdata; + iobar_table[current_iomm_table_entry] = bar_num; + bar_size -= IOMM_TABLE_ENTRY_SIZE; + ++current_iomm_table_entry; + } + max_io_memory = BASE_IO_MEMORY + + (IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry); + spin_unlock(&iomm_table_lock); +} + +/* + * allocate_device_bars + * + * - Allocates ALL pci_dev BAR's and updates the resources with the + * BAR value. BARS with zero length will have the resources + * The HvCallPci_getBarParms is used to get the size of the BAR + * space. It calls iomm_table_allocate_entry to allocate + * each entry. + * - Loops through The Bar resources(0 - 5) including the ROM + * is resource(6). + */ +static void allocate_device_bars(struct pci_dev *dev) +{ + struct resource *bar_res; + int bar_num; + + for (bar_num = 0; bar_num <= PCI_ROM_RESOURCE; ++bar_num) { + bar_res = &dev->resource[bar_num]; + iomm_table_allocate_entry(dev, bar_num); + } +} + +/* + * Log error information to system console. + * Filter out the device not there errors. + * PCI: EADs Connect Failed 0x18.58.10 Rc: 0x00xx + * PCI: Read Vendor Failed 0x18.58.10 Rc: 0x00xx + * PCI: Connect Bus Unit Failed 0x18.58.10 Rc: 0x00xx + */ +static void pci_Log_Error(char *Error_Text, int Bus, int SubBus, + int AgentId, int HvRc) +{ + if (HvRc == 0x0302) + return; + printk(KERN_ERR "PCI: %s Failed: 0x%02X.%02X.%02X Rc: 0x%04X", + Error_Text, Bus, SubBus, AgentId, HvRc); +} + +/* + * build_device_node(u16 Bus, int SubBus, u8 DevFn) + */ +static struct device_node *build_device_node(HvBusNumber Bus, + HvSubBusNumber SubBus, int AgentId, int Function) +{ + struct device_node *node; + struct pci_dn *pdn; + + PPCDBG(PPCDBG_BUSWALK, + "-build_device_node 0x%02X.%02X.%02X Function: %02X\n", + Bus, SubBus, AgentId, Function); + + node = kmalloc(sizeof(struct device_node), GFP_KERNEL); + if (node == NULL) + return NULL; + memset(node, 0, sizeof(struct device_node)); + pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); + if (pdn == NULL) { + kfree(node); + return NULL; + } + node->data = pdn; + pdn->node = node; + list_add_tail(&pdn->Device_List, &iSeries_Global_Device_List); +#if 0 + pdn->DsaAddr = ((u64)Bus << 48) + ((u64)SubBus << 40) + ((u64)0x10 << 32); +#endif + pdn->DsaAddr.DsaAddr = 0; + pdn->DsaAddr.Dsa.busNumber = Bus; + pdn->DsaAddr.Dsa.subBusNumber = SubBus; + pdn->DsaAddr.Dsa.deviceId = 0x10; + pdn->devfn = PCI_DEVFN(ISERIES_ENCODE_DEVICE(AgentId), Function); + return node; +} + +/* + * unsigned long __init find_and_init_phbs(void) + * + * Description: + * This function checks for all possible system PCI host bridges that connect + * PCI buses. The system hypervisor is queried as to the guest partition + * ownership status. A pci_controller is built for any bus which is partially + * owned or fully owned by this guest partition. + */ +unsigned long __init find_and_init_phbs(void) +{ + struct pci_controller *phb; + HvBusNumber bus; + + PPCDBG(PPCDBG_BUSWALK, "find_and_init_phbs Entry\n"); + + /* Check all possible buses. */ + for (bus = 0; bus < 256; bus++) { + int ret = HvCallXm_testBus(bus); + if (ret == 0) { + printk("bus %d appears to exist\n", bus); + + phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), GFP_KERNEL); + if (phb == NULL) + return -ENOMEM; + pci_setup_pci_controller(phb); + + phb->pci_mem_offset = phb->local_number = bus; + phb->first_busno = bus; + phb->last_busno = bus; + phb->ops = &iSeries_pci_ops; + + PPCDBG(PPCDBG_BUSWALK, "PCI:Create iSeries pci_controller(%p), Bus: %04X\n", + phb, bus); + + /* Find and connect the devices. */ + scan_PHB_slots(phb); + } + /* + * Check for Unexpected Return code, a clue that something + * has gone wrong. + */ + else if (ret != 0x0301) + printk(KERN_ERR "Unexpected Return on Probe(0x%04X): 0x%04X", + bus, ret); + } + return 0; +} + +/* + * iSeries_pcibios_init + * + * Chance to initialize and structures or variable before PCI Bus walk. + */ +void iSeries_pcibios_init(void) +{ + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Entry.\n"); + iomm_table_initialize(); + find_and_init_phbs(); + io_page_mask = -1; + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Exit.\n"); +} + +/* + * iSeries_pci_final_fixup(void) + */ +void __init iSeries_pci_final_fixup(void) +{ + struct pci_dev *pdev = NULL; + struct device_node *node; + int DeviceCount = 0; + + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup Entry.\n"); + + /* Fix up at the device node and pci_dev relationship */ + mf_display_src(0xC9000100); + + printk("pcibios_final_fixup\n"); + for_each_pci_dev(pdev) { + node = find_Device_Node(pdev->bus->number, pdev->devfn); + printk("pci dev %p (%x.%x), node %p\n", pdev, + pdev->bus->number, pdev->devfn, node); + + if (node != NULL) { + ++DeviceCount; + pdev->sysdata = (void *)node; + PCI_DN(node)->pcidev = pdev; + PPCDBG(PPCDBG_BUSWALK, + "pdev 0x%p <==> DevNode 0x%p\n", + pdev, node); + allocate_device_bars(pdev); + iSeries_Device_Information(pdev, DeviceCount); + iommu_devnode_init_iSeries(node); + } else + printk("PCI: Device Tree not found for 0x%016lX\n", + (unsigned long)pdev); + pdev->irq = PCI_DN(node)->Irq; + } + iSeries_activate_IRQs(); + mf_display_src(0xC9000200); +} + +void pcibios_fixup_bus(struct pci_bus *PciBus) +{ + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup_bus(0x%04X) Entry.\n", + PciBus->number); +} + +void pcibios_fixup_resources(struct pci_dev *pdev) +{ + PPCDBG(PPCDBG_BUSWALK, "fixup_resources pdev %p\n", pdev); +} + +/* + * Loop through each node function to find usable EADs bridges. + */ +static void scan_PHB_slots(struct pci_controller *Phb) +{ + struct HvCallPci_DeviceInfo *DevInfo; + HvBusNumber bus = Phb->local_number; /* System Bus */ + const HvSubBusNumber SubBus = 0; /* EADs is always 0. */ + int HvRc = 0; + int IdSel; + const int MaxAgents = 8; + + DevInfo = (struct HvCallPci_DeviceInfo*) + kmalloc(sizeof(struct HvCallPci_DeviceInfo), GFP_KERNEL); + if (DevInfo == NULL) + return; + + /* + * Probe for EADs Bridges + */ + for (IdSel = 1; IdSel < MaxAgents; ++IdSel) { + HvRc = HvCallPci_getDeviceInfo(bus, SubBus, IdSel, + ISERIES_HV_ADDR(DevInfo), + sizeof(struct HvCallPci_DeviceInfo)); + if (HvRc == 0) { + if (DevInfo->deviceType == HvCallPci_NodeDevice) + scan_EADS_bridge(bus, SubBus, IdSel); + else + printk("PCI: Invalid System Configuration(0x%02X)" + " for bus 0x%02x id 0x%02x.\n", + DevInfo->deviceType, bus, IdSel); + } + else + pci_Log_Error("getDeviceInfo", bus, SubBus, IdSel, HvRc); + } + kfree(DevInfo); +} + +static void scan_EADS_bridge(HvBusNumber bus, HvSubBusNumber SubBus, + int IdSel) +{ + struct HvCallPci_BridgeInfo *BridgeInfo; + HvAgentId AgentId; + int Function; + int HvRc; + + BridgeInfo = (struct HvCallPci_BridgeInfo *) + kmalloc(sizeof(struct HvCallPci_BridgeInfo), GFP_KERNEL); + if (BridgeInfo == NULL) + return; + + /* Note: hvSubBus and irq is always be 0 at this level! */ + for (Function = 0; Function < 8; ++Function) { + AgentId = ISERIES_PCI_AGENTID(IdSel, Function); + HvRc = HvCallXm_connectBusUnit(bus, SubBus, AgentId, 0); + if (HvRc == 0) { + printk("found device at bus %d idsel %d func %d (AgentId %x)\n", + bus, IdSel, Function, AgentId); + /* Connect EADs: 0x18.00.12 = 0x00 */ + PPCDBG(PPCDBG_BUSWALK, + "PCI:Connect EADs: 0x%02X.%02X.%02X\n", + bus, SubBus, AgentId); + HvRc = HvCallPci_getBusUnitInfo(bus, SubBus, AgentId, + ISERIES_HV_ADDR(BridgeInfo), + sizeof(struct HvCallPci_BridgeInfo)); + if (HvRc == 0) { + printk("bridge info: type %x subbus %x maxAgents %x maxsubbus %x logslot %x\n", + BridgeInfo->busUnitInfo.deviceType, + BridgeInfo->subBusNumber, + BridgeInfo->maxAgents, + BridgeInfo->maxSubBusNumber, + BridgeInfo->logicalSlotNumber); + PPCDBG(PPCDBG_BUSWALK, + "PCI: BridgeInfo, Type:0x%02X, SubBus:0x%02X, MaxAgents:0x%02X, MaxSubBus: 0x%02X, LSlot: 0x%02X\n", + BridgeInfo->busUnitInfo.deviceType, + BridgeInfo->subBusNumber, + BridgeInfo->maxAgents, + BridgeInfo->maxSubBusNumber, + BridgeInfo->logicalSlotNumber); + + if (BridgeInfo->busUnitInfo.deviceType == + HvCallPci_BridgeDevice) { + /* Scan_Bridge_Slot...: 0x18.00.12 */ + scan_bridge_slot(bus, BridgeInfo); + } else + printk("PCI: Invalid Bridge Configuration(0x%02X)", + BridgeInfo->busUnitInfo.deviceType); + } + } else if (HvRc != 0x000B) + pci_Log_Error("EADs Connect", + bus, SubBus, AgentId, HvRc); + } + kfree(BridgeInfo); +} + +/* + * This assumes that the node slot is always on the primary bus! + */ +static int scan_bridge_slot(HvBusNumber Bus, + struct HvCallPci_BridgeInfo *BridgeInfo) +{ + struct device_node *node; + HvSubBusNumber SubBus = BridgeInfo->subBusNumber; + u16 VendorId = 0; + int HvRc = 0; + u8 Irq = 0; + int IdSel = ISERIES_GET_DEVICE_FROM_SUBBUS(SubBus); + int Function = ISERIES_GET_FUNCTION_FROM_SUBBUS(SubBus); + HvAgentId EADsIdSel = ISERIES_PCI_AGENTID(IdSel, Function); + + /* iSeries_allocate_IRQ.: 0x18.00.12(0xA3) */ + Irq = iSeries_allocate_IRQ(Bus, 0, EADsIdSel); + PPCDBG(PPCDBG_BUSWALK, + "PCI:- allocate and assign IRQ 0x%02X.%02X.%02X = 0x%02X\n", + Bus, 0, EADsIdSel, Irq); + + /* + * Connect all functions of any device found. + */ + for (IdSel = 1; IdSel <= BridgeInfo->maxAgents; ++IdSel) { + for (Function = 0; Function < 8; ++Function) { + HvAgentId AgentId = ISERIES_PCI_AGENTID(IdSel, Function); + HvRc = HvCallXm_connectBusUnit(Bus, SubBus, + AgentId, Irq); + if (HvRc != 0) { + pci_Log_Error("Connect Bus Unit", + Bus, SubBus, AgentId, HvRc); + continue; + } + + HvRc = HvCallPci_configLoad16(Bus, SubBus, AgentId, + PCI_VENDOR_ID, &VendorId); + if (HvRc != 0) { + pci_Log_Error("Read Vendor", + Bus, SubBus, AgentId, HvRc); + continue; + } + printk("read vendor ID: %x\n", VendorId); + + /* FoundDevice: 0x18.28.10 = 0x12AE */ + PPCDBG(PPCDBG_BUSWALK, + "PCI:- FoundDevice: 0x%02X.%02X.%02X = 0x%04X, irq %d\n", + Bus, SubBus, AgentId, VendorId, Irq); + HvRc = HvCallPci_configStore8(Bus, SubBus, AgentId, + PCI_INTERRUPT_LINE, Irq); + if (HvRc != 0) + pci_Log_Error("PciCfgStore Irq Failed!", + Bus, SubBus, AgentId, HvRc); + + ++DeviceCount; + node = build_device_node(Bus, SubBus, EADsIdSel, Function); + PCI_DN(node)->Irq = Irq; + PCI_DN(node)->LogicalSlot = BridgeInfo->logicalSlotNumber; + + } /* for (Function = 0; Function < 8; ++Function) */ + } /* for (IdSel = 1; IdSel <= MaxAgents; ++IdSel) */ + return HvRc; +} + +/* + * I/0 Memory copy MUST use mmio commands on iSeries + * To do; For performance, include the hv call directly + */ +void iSeries_memset_io(volatile void __iomem *dest, char c, size_t Count) +{ + u8 ByteValue = c; + long NumberOfBytes = Count; + + while (NumberOfBytes > 0) { + iSeries_Write_Byte(ByteValue, dest++); + -- NumberOfBytes; + } +} +EXPORT_SYMBOL(iSeries_memset_io); + +void iSeries_memcpy_toio(volatile void __iomem *dest, void *source, size_t count) +{ + char *src = source; + long NumberOfBytes = count; + + while (NumberOfBytes > 0) { + iSeries_Write_Byte(*src++, dest++); + -- NumberOfBytes; + } +} +EXPORT_SYMBOL(iSeries_memcpy_toio); + +void iSeries_memcpy_fromio(void *dest, const volatile void __iomem *src, size_t count) +{ + char *dst = dest; + long NumberOfBytes = count; + + while (NumberOfBytes > 0) { + *dst++ = iSeries_Read_Byte(src++); + -- NumberOfBytes; + } +} +EXPORT_SYMBOL(iSeries_memcpy_fromio); + +/* + * Look down the chain to find the matching Device Device + */ +static struct device_node *find_Device_Node(int bus, int devfn) +{ + struct pci_dn *pdn; + + list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) { + if ((bus == pdn->DsaAddr.Dsa.busNumber) && + (devfn == pdn->devfn)) + return pdn->node; + } + return NULL; +} + +#if 0 +/* + * Returns the device node for the passed pci_dev + * Sanity Check Node PciDev to passed pci_dev + * If none is found, returns a NULL which the client must handle. + */ +static struct device_node *get_Device_Node(struct pci_dev *pdev) +{ + struct device_node *node; + + node = pdev->sysdata; + if (node == NULL || PCI_DN(node)->pcidev != pdev) + node = find_Device_Node(pdev->bus->number, pdev->devfn); + return node; +} +#endif + +/* + * Config space read and write functions. + * For now at least, we look for the device node for the bus and devfn + * that we are asked to access. It may be possible to translate the devfn + * to a subbus and deviceid more directly. + */ +static u64 hv_cfg_read_func[4] = { + HvCallPciConfigLoad8, HvCallPciConfigLoad16, + HvCallPciConfigLoad32, HvCallPciConfigLoad32 +}; + +static u64 hv_cfg_write_func[4] = { + HvCallPciConfigStore8, HvCallPciConfigStore16, + HvCallPciConfigStore32, HvCallPciConfigStore32 +}; + +/* + * Read PCI config space + */ +static int iSeries_pci_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int size, u32 *val) +{ + struct device_node *node = find_Device_Node(bus->number, devfn); + u64 fn; + struct HvCallPci_LoadReturn ret; + + if (node == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset > 255) { + *val = ~0; + return PCIBIOS_BAD_REGISTER_NUMBER; + } + + fn = hv_cfg_read_func[(size - 1) & 3]; + HvCall3Ret16(fn, &ret, PCI_DN(node)->DsaAddr.DsaAddr, offset, 0); + + if (ret.rc != 0) { + *val = ~0; + return PCIBIOS_DEVICE_NOT_FOUND; /* or something */ + } + + *val = ret.value; + return 0; +} + +/* + * Write PCI config space + */ + +static int iSeries_pci_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int size, u32 val) +{ + struct device_node *node = find_Device_Node(bus->number, devfn); + u64 fn; + u64 ret; + + if (node == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset > 255) + return PCIBIOS_BAD_REGISTER_NUMBER; + + fn = hv_cfg_write_func[(size - 1) & 3]; + ret = HvCall4(fn, PCI_DN(node)->DsaAddr.DsaAddr, offset, val, 0); + + if (ret != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + + return 0; +} + +static struct pci_ops iSeries_pci_ops = { + .read = iSeries_pci_read_config, + .write = iSeries_pci_write_config +}; + +/* + * Check Return Code + * -> On Failure, print and log information. + * Increment Retry Count, if exceeds max, panic partition. + * + * PCI: Device 23.90 ReadL I/O Error( 0): 0x1234 + * PCI: Device 23.90 ReadL Retry( 1) + * PCI: Device 23.90 ReadL Retry Successful(1) + */ +static int CheckReturnCode(char *TextHdr, struct device_node *DevNode, + int *retry, u64 ret) +{ + if (ret != 0) { + struct pci_dn *pdn = PCI_DN(DevNode); + + ++Pci_Error_Count; + (*retry)++; + printk("PCI: %s: Device 0x%04X:%02X I/O Error(%2d): 0x%04X\n", + TextHdr, pdn->DsaAddr.Dsa.busNumber, pdn->devfn, + *retry, (int)ret); + /* + * Bump the retry and check for retry count exceeded. + * If, Exceeded, panic the system. + */ + if (((*retry) > Pci_Retry_Max) && + (Pci_Error_Flag > 0)) { + mf_display_src(0xB6000103); + panic_timeout = 0; + panic("PCI: Hardware I/O Error, SRC B6000103, " + "Automatic Reboot Disabled.\n"); + } + return -1; /* Retry Try */ + } + return 0; +} + +/* + * Translate the I/O Address into a device node, bar, and bar offset. + * Note: Make sure the passed variable end up on the stack to avoid + * the exposure of being device global. + */ +static inline struct device_node *xlate_iomm_address( + const volatile void __iomem *IoAddress, + u64 *dsaptr, u64 *BarOffsetPtr) +{ + unsigned long OrigIoAddr; + unsigned long BaseIoAddr; + unsigned long TableIndex; + struct device_node *DevNode; + + OrigIoAddr = (unsigned long __force)IoAddress; + if ((OrigIoAddr < BASE_IO_MEMORY) || (OrigIoAddr >= max_io_memory)) + return NULL; + BaseIoAddr = OrigIoAddr - BASE_IO_MEMORY; + TableIndex = BaseIoAddr / IOMM_TABLE_ENTRY_SIZE; + DevNode = iomm_table[TableIndex]; + + if (DevNode != NULL) { + int barnum = iobar_table[TableIndex]; + *dsaptr = PCI_DN(DevNode)->DsaAddr.DsaAddr | (barnum << 24); + *BarOffsetPtr = BaseIoAddr % IOMM_TABLE_ENTRY_SIZE; + } else + panic("PCI: Invalid PCI IoAddress detected!\n"); + return DevNode; +} + +/* + * Read MM I/O Instructions for the iSeries + * On MM I/O error, all ones are returned and iSeries_pci_IoError is cal + * else, data is returned in big Endian format. + * + * iSeries_Read_Byte = Read Byte ( 8 bit) + * iSeries_Read_Word = Read Word (16 bit) + * iSeries_Read_Long = Read Long (32 bit) + */ +u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + struct HvCallPci_LoadReturn ret; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Read_Byte: invalid access at IO address %p\n", IoAddress); + return 0xff; + } + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad8, &ret, dsa, BarOffset, 0); + } while (CheckReturnCode("RDB", DevNode, &retry, ret.rc) != 0); + + return (u8)ret.value; +} +EXPORT_SYMBOL(iSeries_Read_Byte); + +u16 iSeries_Read_Word(const volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + struct HvCallPci_LoadReturn ret; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Read_Word: invalid access at IO address %p\n", IoAddress); + return 0xffff; + } + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad16, &ret, dsa, + BarOffset, 0); + } while (CheckReturnCode("RDW", DevNode, &retry, ret.rc) != 0); + + return swab16((u16)ret.value); +} +EXPORT_SYMBOL(iSeries_Read_Word); + +u32 iSeries_Read_Long(const volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + struct HvCallPci_LoadReturn ret; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Read_Long: invalid access at IO address %p\n", IoAddress); + return 0xffffffff; + } + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad32, &ret, dsa, + BarOffset, 0); + } while (CheckReturnCode("RDL", DevNode, &retry, ret.rc) != 0); + + return swab32((u32)ret.value); +} +EXPORT_SYMBOL(iSeries_Read_Long); + +/* + * Write MM I/O Instructions for the iSeries + * + * iSeries_Write_Byte = Write Byte (8 bit) + * iSeries_Write_Word = Write Word(16 bit) + * iSeries_Write_Long = Write Long(32 bit) + */ +void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + u64 rc; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Write_Byte: invalid access at IO address %p\n", IoAddress); + return; + } + do { + ++Pci_Io_Write_Count; + rc = HvCall4(HvCallPciBarStore8, dsa, BarOffset, data, 0); + } while (CheckReturnCode("WWB", DevNode, &retry, rc) != 0); +} +EXPORT_SYMBOL(iSeries_Write_Byte); + +void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + u64 rc; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Write_Word: invalid access at IO address %p\n", IoAddress); + return; + } + do { + ++Pci_Io_Write_Count; + rc = HvCall4(HvCallPciBarStore16, dsa, BarOffset, swab16(data), 0); + } while (CheckReturnCode("WWW", DevNode, &retry, rc) != 0); +} +EXPORT_SYMBOL(iSeries_Write_Word); + +void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + u64 rc; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Write_Long: invalid access at IO address %p\n", IoAddress); + return; + } + do { + ++Pci_Io_Write_Count; + rc = HvCall4(HvCallPciBarStore32, dsa, BarOffset, swab32(data), 0); + } while (CheckReturnCode("WWL", DevNode, &retry, rc) != 0); +} +EXPORT_SYMBOL(iSeries_Write_Long); diff --git a/arch/powerpc/platforms/iseries/proc.c b/arch/powerpc/platforms/iseries/proc.c new file mode 100644 index 00000000000..6f1929cac66 --- /dev/null +++ b/arch/powerpc/platforms/iseries/proc.c @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2001 Kyle A. Lucke IBM Corporation + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/param.h> /* for HZ */ +#include <asm/paca.h> +#include <asm/processor.h> +#include <asm/time.h> +#include <asm/lppaca.h> +#include <asm/iSeries/ItLpQueue.h> +#include <asm/iSeries/HvCallXm.h> + +#include "processor_vpd.h" +#include "main_store.h" + +static int __init iseries_proc_create(void) +{ + struct proc_dir_entry *e = proc_mkdir("iSeries", 0); + if (!e) + return 1; + + return 0; +} +core_initcall(iseries_proc_create); + +static unsigned long startTitan = 0; +static unsigned long startTb = 0; + +static int proc_titantod_show(struct seq_file *m, void *v) +{ + unsigned long tb0, titan_tod; + + tb0 = get_tb(); + titan_tod = HvCallXm_loadTod(); + + seq_printf(m, "Titan\n" ); + seq_printf(m, " time base = %016lx\n", tb0); + seq_printf(m, " titan tod = %016lx\n", titan_tod); + seq_printf(m, " xProcFreq = %016x\n", + xIoHriProcessorVpd[0].xProcFreq); + seq_printf(m, " xTimeBaseFreq = %016x\n", + xIoHriProcessorVpd[0].xTimeBaseFreq); + seq_printf(m, " tb_ticks_per_jiffy = %lu\n", tb_ticks_per_jiffy); + seq_printf(m, " tb_ticks_per_usec = %lu\n", tb_ticks_per_usec); + + if (!startTitan) { + startTitan = titan_tod; + startTb = tb0; + } else { + unsigned long titan_usec = (titan_tod - startTitan) >> 12; + unsigned long tb_ticks = (tb0 - startTb); + unsigned long titan_jiffies = titan_usec / (1000000/HZ); + unsigned long titan_jiff_usec = titan_jiffies * (1000000/HZ); + unsigned long titan_jiff_rem_usec = + titan_usec - titan_jiff_usec; + unsigned long tb_jiffies = tb_ticks / tb_ticks_per_jiffy; + unsigned long tb_jiff_ticks = tb_jiffies * tb_ticks_per_jiffy; + unsigned long tb_jiff_rem_ticks = tb_ticks - tb_jiff_ticks; + unsigned long tb_jiff_rem_usec = + tb_jiff_rem_ticks / tb_ticks_per_usec; + unsigned long new_tb_ticks_per_jiffy = + (tb_ticks * (1000000/HZ))/titan_usec; + + seq_printf(m, " titan elapsed = %lu uSec\n", titan_usec); + seq_printf(m, " tb elapsed = %lu ticks\n", tb_ticks); + seq_printf(m, " titan jiffies = %lu.%04lu \n", titan_jiffies, + titan_jiff_rem_usec); + seq_printf(m, " tb jiffies = %lu.%04lu\n", tb_jiffies, + tb_jiff_rem_usec); + seq_printf(m, " new tb_ticks_per_jiffy = %lu\n", + new_tb_ticks_per_jiffy); + } + + return 0; +} + +static int proc_titantod_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_titantod_show, NULL); +} + +static struct file_operations proc_titantod_operations = { + .open = proc_titantod_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init iseries_proc_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL); + if (e) + e->proc_fops = &proc_titantod_operations; + + return 0; +} +__initcall(iseries_proc_init); diff --git a/arch/powerpc/platforms/iseries/processor_vpd.h b/arch/powerpc/platforms/iseries/processor_vpd.h new file mode 100644 index 00000000000..7ac5d0d0dbf --- /dev/null +++ b/arch/powerpc/platforms/iseries/processor_vpd.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _ISERIES_PROCESSOR_VPD_H +#define _ISERIES_PROCESSOR_VPD_H + +#include <asm/types.h> + +/* + * This struct maps Processor Vpd that is DMAd to SLIC by CSP + */ +struct IoHriProcessorVpd { + u8 xFormat; // VPD format indicator x00-x00 + u8 xProcStatus:8; // Processor State x01-x01 + u8 xSecondaryThreadCount; // Secondary thread cnt x02-x02 + u8 xSrcType:1; // Src Type x03-x03 + u8 xSrcSoft:1; // Src stay soft ... + u8 xSrcParable:1; // Src parable ... + u8 xRsvd1:5; // Reserved ... + u16 xHvPhysicalProcIndex; // Hypervisor physical proc index04-x05 + u16 xRsvd2; // Reserved x06-x07 + u32 xHwNodeId; // Hardware node id x08-x0B + u32 xHwProcId; // Hardware processor id x0C-x0F + + u32 xTypeNum; // Card Type/CCIN number x10-x13 + u32 xModelNum; // Model/Feature number x14-x17 + u64 xSerialNum; // Serial number x18-x1F + char xPartNum[12]; // Book Part or FPU number x20-x2B + char xMfgID[4]; // Manufacturing ID x2C-x2F + + u32 xProcFreq; // Processor Frequency x30-x33 + u32 xTimeBaseFreq; // Time Base Frequency x34-x37 + + u32 xChipEcLevel; // Chip EC Levels x38-x3B + u32 xProcIdReg; // PIR SPR value x3C-x3F + u32 xPVR; // PVR value x40-x43 + u8 xRsvd3[12]; // Reserved x44-x4F + + u32 xInstCacheSize; // Instruction cache size in KB x50-x53 + u32 xInstBlockSize; // Instruction cache block size x54-x57 + u32 xDataCacheOperandSize; // Data cache operand size x58-x5B + u32 xInstCacheOperandSize; // Inst cache operand size x5C-x5F + + u32 xDataL1CacheSizeKB; // L1 data cache size in KB x60-x63 + u32 xDataL1CacheLineSize; // L1 data cache block size x64-x67 + u64 xRsvd4; // Reserved x68-x6F + + u32 xDataL2CacheSizeKB; // L2 data cache size in KB x70-x73 + u32 xDataL2CacheLineSize; // L2 data cache block size x74-x77 + u64 xRsvd5; // Reserved x78-x7F + + u32 xDataL3CacheSizeKB; // L3 data cache size in KB x80-x83 + u32 xDataL3CacheLineSize; // L3 data cache block size x84-x87 + u64 xRsvd6; // Reserved x88-x8F + + u64 xFruLabel; // Card Location Label x90-x97 + u8 xSlotsOnCard; // Slots on card (0=no slots) x98-x98 + u8 xPartLocFlag; // Location flag (0-pluggable 1-imbedded) x99-x99 + u16 xSlotMapIndex; // Index in slot map table x9A-x9B + u8 xSmartCardPortNo; // Smart card port number x9C-x9C + u8 xRsvd7; // Reserved x9D-x9D + u16 xFrameIdAndRackUnit; // Frame ID and rack unit adr x9E-x9F + + u8 xRsvd8[24]; // Reserved xA0-xB7 + + char xProcSrc[72]; // CSP format SRC xB8-xFF +}; + +extern struct IoHriProcessorVpd xIoHriProcessorVpd[]; + +#endif /* _ISERIES_PROCESSOR_VPD_H */ diff --git a/arch/powerpc/platforms/iseries/release_data.h b/arch/powerpc/platforms/iseries/release_data.h new file mode 100644 index 00000000000..c68b9c3e5ca --- /dev/null +++ b/arch/powerpc/platforms/iseries/release_data.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _ISERIES_RELEASE_DATA_H +#define _ISERIES_RELEASE_DATA_H + +/* + * This control block contains the critical information about the + * release so that it can be changed in the future (ie, the virtual + * address of the OS's NACA). + */ +#include <asm/types.h> +#include <asm/naca.h> + +/* + * When we IPL a secondary partition, we will check if if the + * secondary xMinPlicVrmIndex > the primary xVrmIndex. + * If it is then this tells PLIC that this secondary is not + * supported running on this "old" of a level of PLIC. + * + * Likewise, we will compare the primary xMinSlicVrmIndex to + * the secondary xVrmIndex. + * If the primary xMinSlicVrmDelta > secondary xVrmDelta then we + * know that this PLIC does not support running an OS "that old". + */ + +#define HVREL_TAGSINACTIVE 0x8000 +#define HVREL_32BIT 0x4000 +#define HVREL_NOSHAREDPROCS 0x2000 +#define HVREL_NOHMT 0x1000 + +struct HvReleaseData { + u32 xDesc; /* Descriptor "HvRD" ebcdic x00-x03 */ + u16 xSize; /* Size of this control block x04-x05 */ + u16 xVpdAreasPtrOffset; /* Offset in NACA of ItVpdAreas x06-x07 */ + struct naca_struct *xSlicNacaAddr; /* Virt addr of SLIC NACA x08-x0F */ + u32 xMsNucDataOffset; /* Offset of Linux Mapping Data x10-x13 */ + u32 xRsvd1; /* Reserved x14-x17 */ + u16 xFlags; + u16 xVrmIndex; /* VRM Index of OS image x1A-x1B */ + u16 xMinSupportedPlicVrmIndex; /* Min PLIC level (soft) x1C-x1D */ + u16 xMinCompatablePlicVrmIndex; /* Min PLIC levelP (hard) x1E-x1F */ + char xVrmName[12]; /* Displayable name x20-x2B */ + char xRsvd3[20]; /* Reserved x2C-x3F */ +}; + +extern struct HvReleaseData hvReleaseData; + +#endif /* _ISERIES_RELEASE_DATA_H */ diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c new file mode 100644 index 00000000000..b6cf050a8c2 --- /dev/null +++ b/arch/powerpc/platforms/iseries/setup.c @@ -0,0 +1,1002 @@ +/* + * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> + * Copyright (c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> + * + * Description: + * Architecture- / platform-specific boot-time initialization code for + * the IBM iSeries LPAR. Adapted from original code by Grant Erickson and + * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek + * <dan@net4x.com>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/threads.h> +#include <linux/smp.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/initrd.h> +#include <linux/seq_file.h> +#include <linux/kdev_t.h> +#include <linux/major.h> +#include <linux/root_dev.h> + +#include <asm/processor.h> +#include <asm/machdep.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/mmu_context.h> +#include <asm/cputable.h> +#include <asm/sections.h> +#include <asm/iommu.h> +#include <asm/firmware.h> + +#include <asm/time.h> +#include <asm/naca.h> +#include <asm/paca.h> +#include <asm/cache.h> +#include <asm/sections.h> +#include <asm/abs_addr.h> +#include <asm/iSeries/HvCallHpt.h> +#include <asm/iSeries/HvLpConfig.h> +#include <asm/iSeries/HvCallEvent.h> +#include <asm/iSeries/HvCallXm.h> +#include <asm/iSeries/ItLpQueue.h> +#include <asm/iSeries/mf.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/iSeries/LparMap.h> + +#include "setup.h" +#include "irq.h" +#include "vpd_areas.h" +#include "processor_vpd.h" +#include "main_store.h" +#include "call_sm.h" + +extern void hvlog(char *fmt, ...); + +#ifdef DEBUG +#define DBG(fmt...) hvlog(fmt) +#else +#define DBG(fmt...) +#endif + +/* Function Prototypes */ +extern void ppcdbg_initialize(void); + +static void build_iSeries_Memory_Map(void); +static int iseries_shared_idle(void); +static int iseries_dedicated_idle(void); +#ifdef CONFIG_PCI +extern void iSeries_pci_final_fixup(void); +#else +static void iSeries_pci_final_fixup(void) { } +#endif + +/* Global Variables */ +int piranha_simulator; + +extern int rd_size; /* Defined in drivers/block/rd.c */ +extern unsigned long klimit; +extern unsigned long embedded_sysmap_start; +extern unsigned long embedded_sysmap_end; + +extern unsigned long iSeries_recal_tb; +extern unsigned long iSeries_recal_titan; + +static int mf_initialized; + +struct MemoryBlock { + unsigned long absStart; + unsigned long absEnd; + unsigned long logicalStart; + unsigned long logicalEnd; +}; + +/* + * Process the main store vpd to determine where the holes in memory are + * and return the number of physical blocks and fill in the array of + * block data. + */ +static unsigned long iSeries_process_Condor_mainstore_vpd( + struct MemoryBlock *mb_array, unsigned long max_entries) +{ + unsigned long holeFirstChunk, holeSizeChunks; + unsigned long numMemoryBlocks = 1; + struct IoHriMainStoreSegment4 *msVpd = + (struct IoHriMainStoreSegment4 *)xMsVpd; + unsigned long holeStart = msVpd->nonInterleavedBlocksStartAdr; + unsigned long holeEnd = msVpd->nonInterleavedBlocksEndAdr; + unsigned long holeSize = holeEnd - holeStart; + + printk("Mainstore_VPD: Condor\n"); + /* + * Determine if absolute memory has any + * holes so that we can interpret the + * access map we get back from the hypervisor + * correctly. + */ + mb_array[0].logicalStart = 0; + mb_array[0].logicalEnd = 0x100000000; + mb_array[0].absStart = 0; + mb_array[0].absEnd = 0x100000000; + + if (holeSize) { + numMemoryBlocks = 2; + holeStart = holeStart & 0x000fffffffffffff; + holeStart = addr_to_chunk(holeStart); + holeFirstChunk = holeStart; + holeSize = addr_to_chunk(holeSize); + holeSizeChunks = holeSize; + printk( "Main store hole: start chunk = %0lx, size = %0lx chunks\n", + holeFirstChunk, holeSizeChunks ); + mb_array[0].logicalEnd = holeFirstChunk; + mb_array[0].absEnd = holeFirstChunk; + mb_array[1].logicalStart = holeFirstChunk; + mb_array[1].logicalEnd = 0x100000000 - holeSizeChunks; + mb_array[1].absStart = holeFirstChunk + holeSizeChunks; + mb_array[1].absEnd = 0x100000000; + } + return numMemoryBlocks; +} + +#define MaxSegmentAreas 32 +#define MaxSegmentAdrRangeBlocks 128 +#define MaxAreaRangeBlocks 4 + +static unsigned long iSeries_process_Regatta_mainstore_vpd( + struct MemoryBlock *mb_array, unsigned long max_entries) +{ + struct IoHriMainStoreSegment5 *msVpdP = + (struct IoHriMainStoreSegment5 *)xMsVpd; + unsigned long numSegmentBlocks = 0; + u32 existsBits = msVpdP->msAreaExists; + unsigned long area_num; + + printk("Mainstore_VPD: Regatta\n"); + + for (area_num = 0; area_num < MaxSegmentAreas; ++area_num ) { + unsigned long numAreaBlocks; + struct IoHriMainStoreArea4 *currentArea; + + if (existsBits & 0x80000000) { + unsigned long block_num; + + currentArea = &msVpdP->msAreaArray[area_num]; + numAreaBlocks = currentArea->numAdrRangeBlocks; + printk("ms_vpd: processing area %2ld blocks=%ld", + area_num, numAreaBlocks); + for (block_num = 0; block_num < numAreaBlocks; + ++block_num ) { + /* Process an address range block */ + struct MemoryBlock tempBlock; + unsigned long i; + + tempBlock.absStart = + (unsigned long)currentArea->xAdrRangeBlock[block_num].blockStart; + tempBlock.absEnd = + (unsigned long)currentArea->xAdrRangeBlock[block_num].blockEnd; + tempBlock.logicalStart = 0; + tempBlock.logicalEnd = 0; + printk("\n block %ld absStart=%016lx absEnd=%016lx", + block_num, tempBlock.absStart, + tempBlock.absEnd); + + for (i = 0; i < numSegmentBlocks; ++i) { + if (mb_array[i].absStart == + tempBlock.absStart) + break; + } + if (i == numSegmentBlocks) { + if (numSegmentBlocks == max_entries) + panic("iSeries_process_mainstore_vpd: too many memory blocks"); + mb_array[numSegmentBlocks] = tempBlock; + ++numSegmentBlocks; + } else + printk(" (duplicate)"); + } + printk("\n"); + } + existsBits <<= 1; + } + /* Now sort the blocks found into ascending sequence */ + if (numSegmentBlocks > 1) { + unsigned long m, n; + + for (m = 0; m < numSegmentBlocks - 1; ++m) { + for (n = numSegmentBlocks - 1; m < n; --n) { + if (mb_array[n].absStart < + mb_array[n-1].absStart) { + struct MemoryBlock tempBlock; + + tempBlock = mb_array[n]; + mb_array[n] = mb_array[n-1]; + mb_array[n-1] = tempBlock; + } + } + } + } + /* + * Assign "logical" addresses to each block. These + * addresses correspond to the hypervisor "bitmap" space. + * Convert all addresses into units of 256K chunks. + */ + { + unsigned long i, nextBitmapAddress; + + printk("ms_vpd: %ld sorted memory blocks\n", numSegmentBlocks); + nextBitmapAddress = 0; + for (i = 0; i < numSegmentBlocks; ++i) { + unsigned long length = mb_array[i].absEnd - + mb_array[i].absStart; + + mb_array[i].logicalStart = nextBitmapAddress; + mb_array[i].logicalEnd = nextBitmapAddress + length; + nextBitmapAddress += length; + printk(" Bitmap range: %016lx - %016lx\n" + " Absolute range: %016lx - %016lx\n", + mb_array[i].logicalStart, + mb_array[i].logicalEnd, + mb_array[i].absStart, mb_array[i].absEnd); + mb_array[i].absStart = addr_to_chunk(mb_array[i].absStart & + 0x000fffffffffffff); + mb_array[i].absEnd = addr_to_chunk(mb_array[i].absEnd & + 0x000fffffffffffff); + mb_array[i].logicalStart = + addr_to_chunk(mb_array[i].logicalStart); + mb_array[i].logicalEnd = addr_to_chunk(mb_array[i].logicalEnd); + } + } + + return numSegmentBlocks; +} + +static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array, + unsigned long max_entries) +{ + unsigned long i; + unsigned long mem_blocks = 0; + + if (cpu_has_feature(CPU_FTR_SLB)) + mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array, + max_entries); + else + mem_blocks = iSeries_process_Condor_mainstore_vpd(mb_array, + max_entries); + + printk("Mainstore_VPD: numMemoryBlocks = %ld \n", mem_blocks); + for (i = 0; i < mem_blocks; ++i) { + printk("Mainstore_VPD: block %3ld logical chunks %016lx - %016lx\n" + " abs chunks %016lx - %016lx\n", + i, mb_array[i].logicalStart, mb_array[i].logicalEnd, + mb_array[i].absStart, mb_array[i].absEnd); + } + return mem_blocks; +} + +static void __init iSeries_get_cmdline(void) +{ + char *p, *q; + + /* copy the command line parameter from the primary VSP */ + HvCallEvent_dmaToSp(cmd_line, 2 * 64* 1024, 256, + HvLpDma_Direction_RemoteToLocal); + + p = cmd_line; + q = cmd_line + 255; + while(p < q) { + if (!*p || *p == '\n') + break; + ++p; + } + *p = 0; +} + +static void __init iSeries_init_early(void) +{ + extern unsigned long memory_limit; + + DBG(" -> iSeries_init_early()\n"); + + ppc64_firmware_features = FW_FEATURE_ISERIES; + + ppcdbg_initialize(); + + ppc64_interrupt_controller = IC_ISERIES; + +#if defined(CONFIG_BLK_DEV_INITRD) + /* + * If the init RAM disk has been configured and there is + * a non-zero starting address for it, set it up + */ + if (naca.xRamDisk) { + initrd_start = (unsigned long)__va(naca.xRamDisk); + initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE; + initrd_below_start_ok = 1; // ramdisk in kernel space + ROOT_DEV = Root_RAM0; + if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize) + rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024; + } else +#endif /* CONFIG_BLK_DEV_INITRD */ + { + /* ROOT_DEV = MKDEV(VIODASD_MAJOR, 1); */ + } + + iSeries_recal_tb = get_tb(); + iSeries_recal_titan = HvCallXm_loadTod(); + + /* + * Initialize the hash table management pointers + */ + hpte_init_iSeries(); + + /* + * Initialize the DMA/TCE management + */ + iommu_init_early_iSeries(); + + iSeries_get_cmdline(); + + /* Save unparsed command line copy for /proc/cmdline */ + strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); + + /* Parse early parameters, in particular mem=x */ + parse_early_param(); + + if (memory_limit) { + if (memory_limit < systemcfg->physicalMemorySize) + systemcfg->physicalMemorySize = memory_limit; + else { + printk("Ignoring mem=%lu >= ram_top.\n", memory_limit); + memory_limit = 0; + } + } + + /* Initialize machine-dependency vectors */ +#ifdef CONFIG_SMP + smp_init_iSeries(); +#endif + if (itLpNaca.xPirEnvironMode == 0) + piranha_simulator = 1; + + /* Associate Lp Event Queue 0 with processor 0 */ + HvCallEvent_setLpEventQueueInterruptProc(0, 0); + + mf_init(); + mf_initialized = 1; + mb(); + + /* If we were passed an initrd, set the ROOT_DEV properly if the values + * look sensible. If not, clear initrd reference. + */ +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && + initrd_end > initrd_start) + ROOT_DEV = Root_RAM0; + else + initrd_start = initrd_end = 0; +#endif /* CONFIG_BLK_DEV_INITRD */ + + DBG(" <- iSeries_init_early()\n"); +} + +struct mschunks_map mschunks_map = { + /* XXX We don't use these, but Piranha might need them. */ + .chunk_size = MSCHUNKS_CHUNK_SIZE, + .chunk_shift = MSCHUNKS_CHUNK_SHIFT, + .chunk_mask = MSCHUNKS_OFFSET_MASK, +}; +EXPORT_SYMBOL(mschunks_map); + +void mschunks_alloc(unsigned long num_chunks) +{ + klimit = _ALIGN(klimit, sizeof(u32)); + mschunks_map.mapping = (u32 *)klimit; + klimit += num_chunks * sizeof(u32); + mschunks_map.num_chunks = num_chunks; +} + +/* + * The iSeries may have very large memories ( > 128 GB ) and a partition + * may get memory in "chunks" that may be anywhere in the 2**52 real + * address space. The chunks are 256K in size. To map this to the + * memory model Linux expects, the AS/400 specific code builds a + * translation table to translate what Linux thinks are "physical" + * addresses to the actual real addresses. This allows us to make + * it appear to Linux that we have contiguous memory starting at + * physical address zero while in fact this could be far from the truth. + * To avoid confusion, I'll let the words physical and/or real address + * apply to the Linux addresses while I'll use "absolute address" to + * refer to the actual hardware real address. + * + * build_iSeries_Memory_Map gets information from the Hypervisor and + * looks at the Main Store VPD to determine the absolute addresses + * of the memory that has been assigned to our partition and builds + * a table used to translate Linux's physical addresses to these + * absolute addresses. Absolute addresses are needed when + * communicating with the hypervisor (e.g. to build HPT entries) + */ + +static void __init build_iSeries_Memory_Map(void) +{ + u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize; + u32 nextPhysChunk; + u32 hptFirstChunk, hptLastChunk, hptSizeChunks, hptSizePages; + u32 totalChunks,moreChunks; + u32 currChunk, thisChunk, absChunk; + u32 currDword; + u32 chunkBit; + u64 map; + struct MemoryBlock mb[32]; + unsigned long numMemoryBlocks, curBlock; + + /* Chunk size on iSeries is 256K bytes */ + totalChunks = (u32)HvLpConfig_getMsChunks(); + mschunks_alloc(totalChunks); + + /* + * Get absolute address of our load area + * and map it to physical address 0 + * This guarantees that the loadarea ends up at physical 0 + * otherwise, it might not be returned by PLIC as the first + * chunks + */ + + loadAreaFirstChunk = (u32)addr_to_chunk(itLpNaca.xLoadAreaAddr); + loadAreaSize = itLpNaca.xLoadAreaChunks; + + /* + * Only add the pages already mapped here. + * Otherwise we might add the hpt pages + * The rest of the pages of the load area + * aren't in the HPT yet and can still + * be assigned an arbitrary physical address + */ + if ((loadAreaSize * 64) > HvPagesToMap) + loadAreaSize = HvPagesToMap / 64; + + loadAreaLastChunk = loadAreaFirstChunk + loadAreaSize - 1; + + /* + * TODO Do we need to do something if the HPT is in the 64MB load area? + * This would be required if the itLpNaca.xLoadAreaChunks includes + * the HPT size + */ + + printk("Mapping load area - physical addr = 0000000000000000\n" + " absolute addr = %016lx\n", + chunk_to_addr(loadAreaFirstChunk)); + printk("Load area size %dK\n", loadAreaSize * 256); + + for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk) + mschunks_map.mapping[nextPhysChunk] = + loadAreaFirstChunk + nextPhysChunk; + + /* + * Get absolute address of our HPT and remember it so + * we won't map it to any physical address + */ + hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); + hptSizePages = (u32)HvCallHpt_getHptPages(); + hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT); + hptLastChunk = hptFirstChunk + hptSizeChunks - 1; + + printk("HPT absolute addr = %016lx, size = %dK\n", + chunk_to_addr(hptFirstChunk), hptSizeChunks * 256); + + ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE); + + /* + * The actual hashed page table is in the hypervisor, + * we have no direct access + */ + htab_address = NULL; + + /* + * Determine if absolute memory has any + * holes so that we can interpret the + * access map we get back from the hypervisor + * correctly. + */ + numMemoryBlocks = iSeries_process_mainstore_vpd(mb, 32); + + /* + * Process the main store access map from the hypervisor + * to build up our physical -> absolute translation table + */ + curBlock = 0; + currChunk = 0; + currDword = 0; + moreChunks = totalChunks; + + while (moreChunks) { + map = HvCallSm_get64BitsOfAccessMap(itLpNaca.xLpIndex, + currDword); + thisChunk = currChunk; + while (map) { + chunkBit = map >> 63; + map <<= 1; + if (chunkBit) { + --moreChunks; + while (thisChunk >= mb[curBlock].logicalEnd) { + ++curBlock; + if (curBlock >= numMemoryBlocks) + panic("out of memory blocks"); + } + if (thisChunk < mb[curBlock].logicalStart) + panic("memory block error"); + + absChunk = mb[curBlock].absStart + + (thisChunk - mb[curBlock].logicalStart); + if (((absChunk < hptFirstChunk) || + (absChunk > hptLastChunk)) && + ((absChunk < loadAreaFirstChunk) || + (absChunk > loadAreaLastChunk))) { + mschunks_map.mapping[nextPhysChunk] = + absChunk; + ++nextPhysChunk; + } + } + ++thisChunk; + } + ++currDword; + currChunk += 64; + } + + /* + * main store size (in chunks) is + * totalChunks - hptSizeChunks + * which should be equal to + * nextPhysChunk + */ + systemcfg->physicalMemorySize = chunk_to_addr(nextPhysChunk); +} + +/* + * Document me. + */ +static void __init iSeries_setup_arch(void) +{ + unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index; + + if (get_paca()->lppaca.shared_proc) { + ppc_md.idle_loop = iseries_shared_idle; + printk(KERN_INFO "Using shared processor idle loop\n"); + } else { + ppc_md.idle_loop = iseries_dedicated_idle; + printk(KERN_INFO "Using dedicated idle loop\n"); + } + + /* Setup the Lp Event Queue */ + setup_hvlpevent_queue(); + + printk("Max logical processors = %d\n", + itVpdAreas.xSlicMaxLogicalProcs); + printk("Max physical processors = %d\n", + itVpdAreas.xSlicMaxPhysicalProcs); + + systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR; + printk("Processor version = %x\n", systemcfg->processor); +} + +static void iSeries_get_cpuinfo(struct seq_file *m) +{ + seq_printf(m, "machine\t\t: 64-bit iSeries Logical Partition\n"); +} + +/* + * Document me. + * and Implement me. + */ +static int iSeries_get_irq(struct pt_regs *regs) +{ + /* -2 means ignore this interrupt */ + return -2; +} + +/* + * Document me. + */ +static void iSeries_restart(char *cmd) +{ + mf_reboot(); +} + +/* + * Document me. + */ +static void iSeries_power_off(void) +{ + mf_power_off(); +} + +/* + * Document me. + */ +static void iSeries_halt(void) +{ + mf_power_off(); +} + +static void __init iSeries_progress(char * st, unsigned short code) +{ + printk("Progress: [%04x] - %s\n", (unsigned)code, st); + if (!piranha_simulator && mf_initialized) { + if (code != 0xffff) + mf_display_progress(code); + else + mf_clear_src(); + } +} + +static void __init iSeries_fixup_klimit(void) +{ + /* + * Change klimit to take into account any ram disk + * that may be included + */ + if (naca.xRamDisk) + klimit = KERNELBASE + (u64)naca.xRamDisk + + (naca.xRamDiskSize * PAGE_SIZE); + else { + /* + * No ram disk was included - check and see if there + * was an embedded system map. Change klimit to take + * into account any embedded system map + */ + if (embedded_sysmap_end) + klimit = KERNELBASE + ((embedded_sysmap_end + 4095) & + 0xfffffffffffff000); + } +} + +static int __init iSeries_src_init(void) +{ + /* clear the progress line */ + ppc_md.progress(" ", 0xffff); + return 0; +} + +late_initcall(iSeries_src_init); + +static inline void process_iSeries_events(void) +{ + asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); +} + +static void yield_shared_processor(void) +{ + unsigned long tb; + + HvCall_setEnabledInterrupts(HvCall_MaskIPI | + HvCall_MaskLpEvent | + HvCall_MaskLpProd | + HvCall_MaskTimeout); + + tb = get_tb(); + /* Compute future tb value when yield should expire */ + HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); + + /* + * The decrementer stops during the yield. Force a fake decrementer + * here and let the timer_interrupt code sort out the actual time. + */ + get_paca()->lppaca.int_dword.fields.decr_int = 1; + process_iSeries_events(); +} + +static int iseries_shared_idle(void) +{ + while (1) { + while (!need_resched() && !hvlpevent_is_pending()) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* Recheck with irqs off */ + if (!need_resched() && !hvlpevent_is_pending()) + yield_shared_processor(); + + HMT_medium(); + local_irq_enable(); + } + + ppc64_runlatch_on(); + + if (hvlpevent_is_pending()) + process_iSeries_events(); + + schedule(); + } + + return 0; +} + +static int iseries_dedicated_idle(void) +{ + long oldval; + + while (1) { + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + while (!need_resched()) { + ppc64_runlatch_off(); + HMT_low(); + + if (hvlpevent_is_pending()) { + HMT_medium(); + ppc64_runlatch_on(); + process_iSeries_events(); + } + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + ppc64_runlatch_on(); + schedule(); + } + + return 0; +} + +#ifndef CONFIG_PCI +void __init iSeries_init_IRQ(void) { } +#endif + +static int __init iseries_probe(int platform) +{ + return PLATFORM_ISERIES_LPAR == platform; +} + +struct machdep_calls __initdata iseries_md = { + .setup_arch = iSeries_setup_arch, + .get_cpuinfo = iSeries_get_cpuinfo, + .init_IRQ = iSeries_init_IRQ, + .get_irq = iSeries_get_irq, + .init_early = iSeries_init_early, + .pcibios_fixup = iSeries_pci_final_fixup, + .restart = iSeries_restart, + .power_off = iSeries_power_off, + .halt = iSeries_halt, + .get_boot_time = iSeries_get_boot_time, + .set_rtc_time = iSeries_set_rtc_time, + .get_rtc_time = iSeries_get_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = iSeries_progress, + .probe = iseries_probe, + /* XXX Implement enable_pmcs for iSeries */ +}; + +struct blob { + unsigned char data[PAGE_SIZE]; + unsigned long next; +}; + +struct iseries_flat_dt { + struct boot_param_header header; + u64 reserve_map[2]; + struct blob dt; + struct blob strings; +}; + +struct iseries_flat_dt iseries_dt; + +void dt_init(struct iseries_flat_dt *dt) +{ + dt->header.off_mem_rsvmap = + offsetof(struct iseries_flat_dt, reserve_map); + dt->header.off_dt_struct = offsetof(struct iseries_flat_dt, dt); + dt->header.off_dt_strings = offsetof(struct iseries_flat_dt, strings); + dt->header.totalsize = sizeof(struct iseries_flat_dt); + dt->header.dt_strings_size = sizeof(struct blob); + + /* There is no notion of hardware cpu id on iSeries */ + dt->header.boot_cpuid_phys = smp_processor_id(); + + dt->dt.next = (unsigned long)&dt->dt.data; + dt->strings.next = (unsigned long)&dt->strings.data; + + dt->header.magic = OF_DT_HEADER; + dt->header.version = 0x10; + dt->header.last_comp_version = 0x10; + + dt->reserve_map[0] = 0; + dt->reserve_map[1] = 0; +} + +void dt_check_blob(struct blob *b) +{ + if (b->next >= (unsigned long)&b->next) { + DBG("Ran out of space in flat device tree blob!\n"); + BUG(); + } +} + +void dt_push_u32(struct iseries_flat_dt *dt, u32 value) +{ + *((u32*)dt->dt.next) = value; + dt->dt.next += sizeof(u32); + + dt_check_blob(&dt->dt); +} + +void dt_push_u64(struct iseries_flat_dt *dt, u64 value) +{ + *((u64*)dt->dt.next) = value; + dt->dt.next += sizeof(u64); + + dt_check_blob(&dt->dt); +} + +unsigned long dt_push_bytes(struct blob *blob, char *data, int len) +{ + unsigned long start = blob->next - (unsigned long)blob->data; + + memcpy((char *)blob->next, data, len); + blob->next = _ALIGN(blob->next + len, 4); + + dt_check_blob(blob); + + return start; +} + +void dt_start_node(struct iseries_flat_dt *dt, char *name) +{ + dt_push_u32(dt, OF_DT_BEGIN_NODE); + dt_push_bytes(&dt->dt, name, strlen(name) + 1); +} + +#define dt_end_node(dt) dt_push_u32(dt, OF_DT_END_NODE) + +void dt_prop(struct iseries_flat_dt *dt, char *name, char *data, int len) +{ + unsigned long offset; + + dt_push_u32(dt, OF_DT_PROP); + + /* Length of the data */ + dt_push_u32(dt, len); + + /* Put the property name in the string blob. */ + offset = dt_push_bytes(&dt->strings, name, strlen(name) + 1); + + /* The offset of the properties name in the string blob. */ + dt_push_u32(dt, (u32)offset); + + /* The actual data. */ + dt_push_bytes(&dt->dt, data, len); +} + +void dt_prop_str(struct iseries_flat_dt *dt, char *name, char *data) +{ + dt_prop(dt, name, data, strlen(data) + 1); /* + 1 for NULL */ +} + +void dt_prop_u32(struct iseries_flat_dt *dt, char *name, u32 data) +{ + dt_prop(dt, name, (char *)&data, sizeof(u32)); +} + +void dt_prop_u64(struct iseries_flat_dt *dt, char *name, u64 data) +{ + dt_prop(dt, name, (char *)&data, sizeof(u64)); +} + +void dt_prop_u64_list(struct iseries_flat_dt *dt, char *name, u64 *data, int n) +{ + dt_prop(dt, name, (char *)data, sizeof(u64) * n); +} + +void dt_prop_empty(struct iseries_flat_dt *dt, char *name) +{ + dt_prop(dt, name, NULL, 0); +} + +void dt_cpus(struct iseries_flat_dt *dt) +{ + unsigned char buf[32]; + unsigned char *p; + unsigned int i, index; + struct IoHriProcessorVpd *d; + + /* yuck */ + snprintf(buf, 32, "PowerPC,%s", cur_cpu_spec->cpu_name); + p = strchr(buf, ' '); + if (!p) p = buf + strlen(buf); + + dt_start_node(dt, "cpus"); + dt_prop_u32(dt, "#address-cells", 1); + dt_prop_u32(dt, "#size-cells", 0); + + for (i = 0; i < NR_CPUS; i++) { + if (paca[i].lppaca.dyn_proc_status >= 2) + continue; + + snprintf(p, 32 - (p - buf), "@%d", i); + dt_start_node(dt, buf); + + dt_prop_str(dt, "device_type", "cpu"); + + index = paca[i].lppaca.dyn_hv_phys_proc_index; + d = &xIoHriProcessorVpd[index]; + + dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024); + dt_prop_u32(dt, "i-cache-line-size", d->xInstCacheOperandSize); + + dt_prop_u32(dt, "d-cache-size", d->xDataL1CacheSizeKB * 1024); + dt_prop_u32(dt, "d-cache-line-size", d->xDataCacheOperandSize); + + /* magic conversions to Hz copied from old code */ + dt_prop_u32(dt, "clock-frequency", + ((1UL << 34) * 1000000) / d->xProcFreq); + dt_prop_u32(dt, "timebase-frequency", + ((1UL << 32) * 1000000) / d->xTimeBaseFreq); + + dt_prop_u32(dt, "reg", i); + + dt_end_node(dt); + } + + dt_end_node(dt); +} + +void build_flat_dt(struct iseries_flat_dt *dt) +{ + u64 tmp[2]; + + dt_init(dt); + + dt_start_node(dt, ""); + + dt_prop_u32(dt, "#address-cells", 2); + dt_prop_u32(dt, "#size-cells", 2); + + /* /memory */ + dt_start_node(dt, "memory@0"); + dt_prop_str(dt, "name", "memory"); + dt_prop_str(dt, "device_type", "memory"); + tmp[0] = 0; + tmp[1] = systemcfg->physicalMemorySize; + dt_prop_u64_list(dt, "reg", tmp, 2); + dt_end_node(dt); + + /* /chosen */ + dt_start_node(dt, "chosen"); + dt_prop_u32(dt, "linux,platform", PLATFORM_ISERIES_LPAR); + dt_end_node(dt); + + dt_cpus(dt); + + dt_end_node(dt); + + dt_push_u32(dt, OF_DT_END); +} + +void * __init iSeries_early_setup(void) +{ + iSeries_fixup_klimit(); + + /* + * Initialize the table which translate Linux physical addresses to + * AS/400 absolute addresses + */ + build_iSeries_Memory_Map(); + + build_flat_dt(&iseries_dt); + + return (void *) __pa(&iseries_dt); +} diff --git a/arch/powerpc/platforms/iseries/setup.h b/arch/powerpc/platforms/iseries/setup.h new file mode 100644 index 00000000000..6da89ae991c --- /dev/null +++ b/arch/powerpc/platforms/iseries/setup.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> + * Copyright (c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> + * + * Description: + * Architecture- / platform-specific boot-time initialization code for + * the IBM AS/400 LPAR. Adapted from original code by Grant Erickson and + * code by Gary Thomas, Cort Dougan <cort@cs.nmt.edu>, and Dan Malek + * <dan@netx4.com>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ISERIES_SETUP_H__ +#define __ISERIES_SETUP_H__ + +extern void iSeries_get_boot_time(struct rtc_time *tm); +extern int iSeries_set_rtc_time(struct rtc_time *tm); +extern void iSeries_get_rtc_time(struct rtc_time *tm); + +#endif /* __ISERIES_SETUP_H__ */ diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c new file mode 100644 index 00000000000..f720916682f --- /dev/null +++ b/arch/powerpc/platforms/iseries/smp.c @@ -0,0 +1,121 @@ +/* + * SMP support for iSeries machines. + * + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * Plus various changes from other IBM teams... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/cache.h> +#include <linux/err.h> +#include <linux/sysdev.h> +#include <linux/cpu.h> + +#include <asm/ptrace.h> +#include <asm/atomic.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/paca.h> +#include <asm/iSeries/HvCall.h> +#include <asm/time.h> +#include <asm/ppcdebug.h> +#include <asm/machdep.h> +#include <asm/cputable.h> +#include <asm/system.h> + +static unsigned long iSeries_smp_message[NR_CPUS]; + +void iSeries_smp_message_recv(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + int msg; + + if (num_online_cpus() < 2) + return; + + for (msg = 0; msg < 4; msg++) + if (test_and_clear_bit(msg, &iSeries_smp_message[cpu])) + smp_message_recv(msg, regs); +} + +static inline void smp_iSeries_do_message(int cpu, int msg) +{ + set_bit(msg, &iSeries_smp_message[cpu]); + HvCall_sendIPI(&(paca[cpu])); +} + +static void smp_iSeries_message_pass(int target, int msg) +{ + int i; + + if (target < NR_CPUS) + smp_iSeries_do_message(target, msg); + else { + for_each_online_cpu(i) { + if ((target == MSG_ALL_BUT_SELF) && + (i == smp_processor_id())) + continue; + smp_iSeries_do_message(i, msg); + } + } +} + +static int smp_iSeries_probe(void) +{ + return cpus_weight(cpu_possible_map); +} + +static void smp_iSeries_kick_cpu(int nr) +{ + BUG_ON((nr < 0) || (nr >= NR_CPUS)); + + /* Verify that our partition has a processor nr */ + if (paca[nr].lppaca.dyn_proc_status >= 2) + return; + + /* The processor is currently spinning, waiting + * for the cpu_start field to become non-zero + * After we set cpu_start, the processor will + * continue on to secondary_start in iSeries_head.S + */ + paca[nr].cpu_start = 1; +} + +static void __devinit smp_iSeries_setup_cpu(int nr) +{ +} + +static struct smp_ops_t iSeries_smp_ops = { + .message_pass = smp_iSeries_message_pass, + .probe = smp_iSeries_probe, + .kick_cpu = smp_iSeries_kick_cpu, + .setup_cpu = smp_iSeries_setup_cpu, +}; + +/* This is called very early. */ +void __init smp_init_iSeries(void) +{ + smp_ops = &iSeries_smp_ops; +} diff --git a/arch/powerpc/platforms/iseries/spcomm_area.h b/arch/powerpc/platforms/iseries/spcomm_area.h new file mode 100644 index 00000000000..6e3b685115c --- /dev/null +++ b/arch/powerpc/platforms/iseries/spcomm_area.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ISERIES_SPCOMM_AREA_H +#define _ISERIES_SPCOMM_AREA_H + + +struct SpCommArea { + u32 xDesc; // Descriptor (only in new formats) 000-003 + u8 xFormat; // Format (only in new formats) 004-004 + u8 xRsvd1[11]; // Reserved 005-00F + u64 xRawTbAtIplStart; // Raw HW TB value when IPL is started 010-017 + u64 xRawTodAtIplStart; // Raw HW TOD value when IPL is started 018-01F + u64 xBcdTimeAtIplStart; // BCD time when IPL is started 020-027 + u64 xBcdTimeAtOsStart; // BCD time when OS passed control 028-02F + u8 xRsvd2[80]; // Reserved 030-07F +}; + +extern struct SpCommArea xSpCommArea; + +#endif /* _ISERIES_SPCOMM_AREA_H */ diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c new file mode 100644 index 00000000000..c0f7d2e9153 --- /dev/null +++ b/arch/powerpc/platforms/iseries/vio.c @@ -0,0 +1,156 @@ +/* + * IBM PowerPC iSeries Virtual I/O Infrastructure Support. + * + * Copyright (c) 2005 Stephen Rothwell, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/types.h> +#include <linux/device.h> +#include <linux/init.h> + +#include <asm/vio.h> +#include <asm/iommu.h> +#include <asm/tce.h> +#include <asm/abs_addr.h> +#include <asm/page.h> +#include <asm/iSeries/vio.h> +#include <asm/iSeries/HvTypes.h> +#include <asm/iSeries/HvLpConfig.h> +#include <asm/iSeries/HvCallXm.h> + +struct device *iSeries_vio_dev = &vio_bus_device.dev; +EXPORT_SYMBOL(iSeries_vio_dev); + +static struct iommu_table veth_iommu_table; +static struct iommu_table vio_iommu_table; + +static void __init iommu_vio_init(void) +{ + struct iommu_table *t; + struct iommu_table_cb cb; + unsigned long cbp; + unsigned long itc_entries; + + cb.itc_busno = 255; /* Bus 255 is the virtual bus */ + cb.itc_virtbus = 0xff; /* Ask for virtual bus */ + + cbp = virt_to_abs(&cb); + HvCallXm_getTceTableParms(cbp); + + itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); + veth_iommu_table.it_size = itc_entries / 2; + veth_iommu_table.it_busno = cb.itc_busno; + veth_iommu_table.it_offset = cb.itc_offset; + veth_iommu_table.it_index = cb.itc_index; + veth_iommu_table.it_type = TCE_VB; + veth_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&veth_iommu_table); + + if (!t) + printk("Virtual Bus VETH TCE table failed.\n"); + + vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; + vio_iommu_table.it_busno = cb.itc_busno; + vio_iommu_table.it_offset = cb.itc_offset + + veth_iommu_table.it_size; + vio_iommu_table.it_index = cb.itc_index; + vio_iommu_table.it_type = TCE_VB; + vio_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&vio_iommu_table); + + if (!t) + printk("Virtual Bus VIO TCE table failed.\n"); +} + +/** + * vio_register_device_iseries: - Register a new iSeries vio device. + * @voidev: The device to register. + */ +static struct vio_dev *__init vio_register_device_iseries(char *type, + uint32_t unit_num) +{ + struct vio_dev *viodev; + + /* allocate a vio_dev for this device */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) + return NULL; + memset(viodev, 0, sizeof(struct vio_dev)); + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); + + viodev->name = viodev->dev.bus_id; + viodev->type = type; + viodev->unit_address = unit_num; + viodev->iommu_table = &vio_iommu_table; + if (vio_register_device(viodev) == NULL) { + kfree(viodev); + return NULL; + } + return viodev; +} + +void __init probe_bus_iseries(void) +{ + HvLpIndexMap vlan_map; + struct vio_dev *viodev; + int i; + + /* there is only one of each of these */ + vio_register_device_iseries("viocons", 0); + vio_register_device_iseries("vscsi", 0); + + vlan_map = HvLpConfig_getVirtualLanIndexMap(); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { + if ((vlan_map & (0x8000 >> i)) == 0) + continue; + viodev = vio_register_device_iseries("vlan", i); + /* veth is special and has it own iommu_table */ + viodev->iommu_table = &veth_iommu_table; + } + for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) + vio_register_device_iseries("viodasd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) + vio_register_device_iseries("viocd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) + vio_register_device_iseries("viotape", i); +} + +/** + * vio_match_device_iseries: - Tell if a iSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_iseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return strncmp(dev->type, id->type, strlen(id->type)) == 0; +} + +static struct vio_bus_ops vio_bus_ops_iseries = { + .match = vio_match_device_iseries, +}; + +/** + * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus + */ +static int __init vio_bus_init_iseries(void) +{ + int err; + + err = vio_bus_init(&vio_bus_ops_iseries); + if (err == 0) { + iommu_vio_init(); + vio_bus_device.iommu_table = &vio_iommu_table; + iSeries_vio_dev = &vio_bus_device.dev; + probe_bus_iseries(); + } + return err; +} + +__initcall(vio_bus_init_iseries); diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c new file mode 100644 index 00000000000..c0c767bd37f --- /dev/null +++ b/arch/powerpc/platforms/iseries/viopath.c @@ -0,0 +1,672 @@ +/* -*- linux-c -*- + * + * iSeries Virtual I/O Message Path code + * + * Authors: Dave Boutcher <boutcher@us.ibm.com> + * Ryan Arnold <ryanarn@us.ibm.com> + * Colin Devilbiss <devilbis@us.ibm.com> + * + * (C) Copyright 2000-2005 IBM Corporation + * + * This code is used by the iSeries virtual disk, cd, + * tape, and console to communicate with OS/400 in another + * partition. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/vmalloc.h> +#include <linux/string.h> +#include <linux/proc_fs.h> +#include <linux/dma-mapping.h> +#include <linux/wait.h> +#include <linux/seq_file.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/iSeries/HvTypes.h> +#include <asm/iSeries/ItExtVpdPanel.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/iSeries/HvLpConfig.h> +#include <asm/iSeries/mf.h> +#include <asm/iSeries/vio.h> + +/* Status of the path to each other partition in the system. + * This is overkill, since we will only ever establish connections + * to our hosting partition and the primary partition on the system. + * But this allows for other support in the future. + */ +static struct viopathStatus { + int isOpen; /* Did we open the path? */ + int isActive; /* Do we have a mon msg outstanding */ + int users[VIO_MAX_SUBTYPES]; + HvLpInstanceId mSourceInst; + HvLpInstanceId mTargetInst; + int numberAllocated; +} viopathStatus[HVMAXARCHITECTEDLPS]; + +static DEFINE_SPINLOCK(statuslock); + +/* + * For each kind of event we allocate a buffer that is + * guaranteed not to cross a page boundary + */ +static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned; +static atomic_t event_buffer_available[VIO_MAX_SUBTYPES]; +static int event_buffer_initialised; + +static void handleMonitorEvent(struct HvLpEvent *event); + +/* + * We use this structure to handle asynchronous responses. The caller + * blocks on the semaphore and the handler posts the semaphore. However, + * if system_state is not SYSTEM_RUNNING, then wait_atomic is used ... + */ +struct alloc_parms { + struct semaphore sem; + int number; + atomic_t wait_atomic; + int used_wait_atomic; +}; + +/* Put a sequence number in each mon msg. The value is not + * important. Start at something other than 0 just for + * readability. wrapping this is ok. + */ +static u8 viomonseq = 22; + +/* Our hosting logical partition. We get this at startup + * time, and different modules access this variable directly. + */ +HvLpIndex viopath_hostLp = HvLpIndexInvalid; +EXPORT_SYMBOL(viopath_hostLp); +HvLpIndex viopath_ourLp = HvLpIndexInvalid; +EXPORT_SYMBOL(viopath_ourLp); + +/* For each kind of incoming event we set a pointer to a + * routine to call. + */ +static vio_event_handler_t *vio_handler[VIO_MAX_SUBTYPES]; + +#define VIOPATH_KERN_WARN KERN_WARNING "viopath: " +#define VIOPATH_KERN_INFO KERN_INFO "viopath: " + +static int proc_viopath_show(struct seq_file *m, void *v) +{ + char *buf; + u16 vlanMap; + dma_addr_t handle; + HvLpEvent_Rc hvrc; + DECLARE_MUTEX_LOCKED(Semaphore); + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return 0; + memset(buf, 0, PAGE_SIZE); + + handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE, + DMA_FROM_DEVICE); + + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_config | vioconfigget, + HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(viopath_hostLp), + viopath_targetinst(viopath_hostLp), + (u64)(unsigned long)&Semaphore, VIOVERSION << 16, + ((u64)handle) << 32, PAGE_SIZE, 0, 0); + + if (hvrc != HvLpEvent_Rc_Good) + printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc); + + down(&Semaphore); + + vlanMap = HvLpConfig_getVirtualLanIndexMap(); + + buf[PAGE_SIZE-1] = '\0'; + seq_printf(m, "%s", buf); + seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap); + seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n", + e2a(xItExtVpdPanel.mfgID[2]), + e2a(xItExtVpdPanel.mfgID[3]), + e2a(xItExtVpdPanel.systemSerial[1]), + e2a(xItExtVpdPanel.systemSerial[2]), + e2a(xItExtVpdPanel.systemSerial[3]), + e2a(xItExtVpdPanel.systemSerial[4]), + e2a(xItExtVpdPanel.systemSerial[5])); + + dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE); + kfree(buf); + + return 0; +} + +static int proc_viopath_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_viopath_show, NULL); +} + +static struct file_operations proc_viopath_operations = { + .open = proc_viopath_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init vio_proc_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/config", 0, NULL); + if (e) + e->proc_fops = &proc_viopath_operations; + + return 0; +} +__initcall(vio_proc_init); + +/* See if a given LP is active. Allow for invalid lps to be passed in + * and just return invalid + */ +int viopath_isactive(HvLpIndex lp) +{ + if (lp == HvLpIndexInvalid) + return 0; + if (lp < HVMAXARCHITECTEDLPS) + return viopathStatus[lp].isActive; + else + return 0; +} +EXPORT_SYMBOL(viopath_isactive); + +/* + * We cache the source and target instance ids for each + * partition. + */ +HvLpInstanceId viopath_sourceinst(HvLpIndex lp) +{ + return viopathStatus[lp].mSourceInst; +} +EXPORT_SYMBOL(viopath_sourceinst); + +HvLpInstanceId viopath_targetinst(HvLpIndex lp) +{ + return viopathStatus[lp].mTargetInst; +} +EXPORT_SYMBOL(viopath_targetinst); + +/* + * Send a monitor message. This is a message with the acknowledge + * bit on that the other side will NOT explicitly acknowledge. When + * the other side goes down, the hypervisor will acknowledge any + * outstanding messages....so we will know when the other side dies. + */ +static void sendMonMsg(HvLpIndex remoteLp) +{ + HvLpEvent_Rc hvrc; + + viopathStatus[remoteLp].mSourceInst = + HvCallEvent_getSourceLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].mTargetInst = + HvCallEvent_getTargetLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + + /* + * Deliberately ignore the return code here. if we call this + * more than once, we don't care. + */ + vio_setHandler(viomajorsubtype_monitor, handleMonitorEvent); + + hvrc = HvCallEvent_signalLpEventFast(remoteLp, HvLpEvent_Type_VirtualIo, + viomajorsubtype_monitor, HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_DeferredAck, + viopathStatus[remoteLp].mSourceInst, + viopathStatus[remoteLp].mTargetInst, + viomonseq++, 0, 0, 0, 0, 0); + + if (hvrc == HvLpEvent_Rc_Good) + viopathStatus[remoteLp].isActive = 1; + else { + printk(VIOPATH_KERN_WARN "could not connect to partition %d\n", + remoteLp); + viopathStatus[remoteLp].isActive = 0; + } +} + +static void handleMonitorEvent(struct HvLpEvent *event) +{ + HvLpIndex remoteLp; + int i; + + /* + * This handler is _also_ called as part of the loop + * at the end of this routine, so it must be able to + * ignore NULL events... + */ + if (!event) + return; + + /* + * First see if this is just a normal monitor message from the + * other partition + */ + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + remoteLp = event->xSourceLp; + if (!viopathStatus[remoteLp].isActive) + sendMonMsg(remoteLp); + return; + } + + /* + * This path is for an acknowledgement; the other partition + * died + */ + remoteLp = event->xTargetLp; + if ((event->xSourceInstanceId != viopathStatus[remoteLp].mSourceInst) || + (event->xTargetInstanceId != viopathStatus[remoteLp].mTargetInst)) { + printk(VIOPATH_KERN_WARN "ignoring ack....mismatched instances\n"); + return; + } + + printk(VIOPATH_KERN_WARN "partition %d ended\n", remoteLp); + + viopathStatus[remoteLp].isActive = 0; + + /* + * For each active handler, pass them a NULL + * message to indicate that the other partition + * died + */ + for (i = 0; i < VIO_MAX_SUBTYPES; i++) { + if (vio_handler[i] != NULL) + (*vio_handler[i])(NULL); + } +} + +int vio_setHandler(int subtype, vio_event_handler_t *beh) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + if (vio_handler[subtype] != NULL) + return -EBUSY; + vio_handler[subtype] = beh; + return 0; +} +EXPORT_SYMBOL(vio_setHandler); + +int vio_clearHandler(int subtype) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + if (vio_handler[subtype] == NULL) + return -EAGAIN; + vio_handler[subtype] = NULL; + return 0; +} +EXPORT_SYMBOL(vio_clearHandler); + +static void handleConfig(struct HvLpEvent *event) +{ + if (!event) + return; + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + printk(VIOPATH_KERN_WARN + "unexpected config request from partition %d", + event->xSourceLp); + + if ((event->xFlags.xFunction == HvLpEvent_Function_Int) && + (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + return; + } + + up((struct semaphore *)event->xCorrelationToken); +} + +/* + * Initialization of the hosting partition + */ +void vio_set_hostlp(void) +{ + /* + * If this has already been set then we DON'T want to either change + * it or re-register the proc file system + */ + if (viopath_hostLp != HvLpIndexInvalid) + return; + + /* + * Figure out our hosting partition. This isn't allowed to change + * while we're active + */ + viopath_ourLp = HvLpConfig_getLpIndex(); + viopath_hostLp = HvLpConfig_getHostingLpIndex(viopath_ourLp); + + if (viopath_hostLp != HvLpIndexInvalid) + vio_setHandler(viomajorsubtype_config, handleConfig); +} +EXPORT_SYMBOL(vio_set_hostlp); + +static void vio_handleEvent(struct HvLpEvent *event, struct pt_regs *regs) +{ + HvLpIndex remoteLp; + int subtype = (event->xSubtype & VIOMAJOR_SUBTYPE_MASK) + >> VIOMAJOR_SUBTYPE_SHIFT; + + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + remoteLp = event->xSourceLp; + /* + * The isActive is checked because if the hosting partition + * went down and came back up it would not be active but it + * would have different source and target instances, in which + * case we'd want to reset them. This case really protects + * against an unauthorized active partition sending interrupts + * or acks to this linux partition. + */ + if (viopathStatus[remoteLp].isActive + && (event->xSourceInstanceId != + viopathStatus[remoteLp].mTargetInst)) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "int msg rcvd, source inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mTargetInst, + event->xSourceInstanceId); + return; + } + + if (viopathStatus[remoteLp].isActive + && (event->xTargetInstanceId != + viopathStatus[remoteLp].mSourceInst)) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "int msg rcvd, target inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mSourceInst, + event->xTargetInstanceId); + return; + } + } else { + remoteLp = event->xTargetLp; + if (event->xSourceInstanceId != + viopathStatus[remoteLp].mSourceInst) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "ack msg rcvd, source inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mSourceInst, + event->xSourceInstanceId); + return; + } + + if (event->xTargetInstanceId != + viopathStatus[remoteLp].mTargetInst) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "viopath: ack msg rcvd, target inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mTargetInst, + event->xTargetInstanceId); + return; + } + } + + if (vio_handler[subtype] == NULL) { + printk(VIOPATH_KERN_WARN + "unexpected virtual io event subtype %d from partition %d\n", + event->xSubtype, remoteLp); + /* No handler. Ack if necessary */ + if ((event->xFlags.xFunction == HvLpEvent_Function_Int) && + (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + return; + } + + /* This innocuous little line is where all the real work happens */ + (*vio_handler[subtype])(event); +} + +static void viopath_donealloc(void *parm, int number) +{ + struct alloc_parms *parmsp = parm; + + parmsp->number = number; + if (parmsp->used_wait_atomic) + atomic_set(&parmsp->wait_atomic, 0); + else + up(&parmsp->sem); +} + +static int allocateEvents(HvLpIndex remoteLp, int numEvents) +{ + struct alloc_parms parms; + + if (system_state != SYSTEM_RUNNING) { + parms.used_wait_atomic = 1; + atomic_set(&parms.wait_atomic, 1); + } else { + parms.used_wait_atomic = 0; + init_MUTEX_LOCKED(&parms.sem); + } + mf_allocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo, 250, /* It would be nice to put a real number here! */ + numEvents, &viopath_donealloc, &parms); + if (system_state != SYSTEM_RUNNING) { + while (atomic_read(&parms.wait_atomic)) + mb(); + } else + down(&parms.sem); + return parms.number; +} + +int viopath_open(HvLpIndex remoteLp, int subtype, int numReq) +{ + int i; + unsigned long flags; + int tempNumAllocated; + + if ((remoteLp >= HVMAXARCHITECTEDLPS) || (remoteLp == HvLpIndexInvalid)) + return -EINVAL; + + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + spin_lock_irqsave(&statuslock, flags); + + if (!event_buffer_initialised) { + for (i = 0; i < VIO_MAX_SUBTYPES; i++) + atomic_set(&event_buffer_available[i], 1); + event_buffer_initialised = 1; + } + + viopathStatus[remoteLp].users[subtype]++; + + if (!viopathStatus[remoteLp].isOpen) { + viopathStatus[remoteLp].isOpen = 1; + HvCallEvent_openLpEventPath(remoteLp, HvLpEvent_Type_VirtualIo); + + /* + * Don't hold the spinlock during an operation that + * can sleep. + */ + spin_unlock_irqrestore(&statuslock, flags); + tempNumAllocated = allocateEvents(remoteLp, 1); + spin_lock_irqsave(&statuslock, flags); + + viopathStatus[remoteLp].numberAllocated += tempNumAllocated; + + if (viopathStatus[remoteLp].numberAllocated == 0) { + HvCallEvent_closeLpEventPath(remoteLp, + HvLpEvent_Type_VirtualIo); + + spin_unlock_irqrestore(&statuslock, flags); + return -ENOMEM; + } + + viopathStatus[remoteLp].mSourceInst = + HvCallEvent_getSourceLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].mTargetInst = + HvCallEvent_getTargetLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + HvLpEvent_registerHandler(HvLpEvent_Type_VirtualIo, + &vio_handleEvent); + sendMonMsg(remoteLp); + printk(VIOPATH_KERN_INFO "opening connection to partition %d, " + "setting sinst %d, tinst %d\n", + remoteLp, viopathStatus[remoteLp].mSourceInst, + viopathStatus[remoteLp].mTargetInst); + } + + spin_unlock_irqrestore(&statuslock, flags); + tempNumAllocated = allocateEvents(remoteLp, numReq); + spin_lock_irqsave(&statuslock, flags); + viopathStatus[remoteLp].numberAllocated += tempNumAllocated; + spin_unlock_irqrestore(&statuslock, flags); + + return 0; +} +EXPORT_SYMBOL(viopath_open); + +int viopath_close(HvLpIndex remoteLp, int subtype, int numReq) +{ + unsigned long flags; + int i; + int numOpen; + struct alloc_parms parms; + + if ((remoteLp >= HVMAXARCHITECTEDLPS) || (remoteLp == HvLpIndexInvalid)) + return -EINVAL; + + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + spin_lock_irqsave(&statuslock, flags); + /* + * If the viopath_close somehow gets called before a + * viopath_open it could decrement to -1 which is a non + * recoverable state so we'll prevent this from + * happening. + */ + if (viopathStatus[remoteLp].users[subtype] > 0) + viopathStatus[remoteLp].users[subtype]--; + + spin_unlock_irqrestore(&statuslock, flags); + + parms.used_wait_atomic = 0; + init_MUTEX_LOCKED(&parms.sem); + mf_deallocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo, + numReq, &viopath_donealloc, &parms); + down(&parms.sem); + + spin_lock_irqsave(&statuslock, flags); + for (i = 0, numOpen = 0; i < VIO_MAX_SUBTYPES; i++) + numOpen += viopathStatus[remoteLp].users[i]; + + if ((viopathStatus[remoteLp].isOpen) && (numOpen == 0)) { + printk(VIOPATH_KERN_INFO "closing connection to partition %d", + remoteLp); + + HvCallEvent_closeLpEventPath(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].isOpen = 0; + viopathStatus[remoteLp].isActive = 0; + + for (i = 0; i < VIO_MAX_SUBTYPES; i++) + atomic_set(&event_buffer_available[i], 0); + event_buffer_initialised = 0; + } + spin_unlock_irqrestore(&statuslock, flags); + return 0; +} +EXPORT_SYMBOL(viopath_close); + +void *vio_get_event_buffer(int subtype) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return NULL; + + if (atomic_dec_if_positive(&event_buffer_available[subtype]) == 0) + return &event_buffer[subtype * 256]; + else + return NULL; +} +EXPORT_SYMBOL(vio_get_event_buffer); + +void vio_free_event_buffer(int subtype, void *buffer) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) { + printk(VIOPATH_KERN_WARN + "unexpected subtype %d freeing event buffer\n", subtype); + return; + } + + if (atomic_read(&event_buffer_available[subtype]) != 0) { + printk(VIOPATH_KERN_WARN + "freeing unallocated event buffer, subtype %d\n", + subtype); + return; + } + + if (buffer != &event_buffer[subtype * 256]) { + printk(VIOPATH_KERN_WARN + "freeing invalid event buffer, subtype %d\n", subtype); + } + + atomic_set(&event_buffer_available[subtype], 1); +} +EXPORT_SYMBOL(vio_free_event_buffer); + +static const struct vio_error_entry vio_no_error = + { 0, 0, "Non-VIO Error" }; +static const struct vio_error_entry vio_unknown_error = + { 0, EIO, "Unknown Error" }; + +static const struct vio_error_entry vio_default_errors[] = { + {0x0001, EIO, "No Connection"}, + {0x0002, EIO, "No Receiver"}, + {0x0003, EIO, "No Buffer Available"}, + {0x0004, EBADRQC, "Invalid Message Type"}, + {0x0000, 0, NULL}, +}; + +const struct vio_error_entry *vio_lookup_rc( + const struct vio_error_entry *local_table, u16 rc) +{ + const struct vio_error_entry *cur; + + if (!rc) + return &vio_no_error; + if (local_table) + for (cur = local_table; cur->rc; ++cur) + if (cur->rc == rc) + return cur; + for (cur = vio_default_errors; cur->rc; ++cur) + if (cur->rc == rc) + return cur; + return &vio_unknown_error; +} +EXPORT_SYMBOL(vio_lookup_rc); diff --git a/arch/powerpc/platforms/iseries/vpd_areas.h b/arch/powerpc/platforms/iseries/vpd_areas.h new file mode 100644 index 00000000000..601e6dd860e --- /dev/null +++ b/arch/powerpc/platforms/iseries/vpd_areas.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _ISERIES_VPD_AREAS_H +#define _ISERIES_VPD_AREAS_H + +/* + * This file defines the address and length of all of the VPD area passed to + * the OS from PLIC (most of which start from the SP). + */ + +#include <asm/types.h> + +/* VPD Entry index is carved in stone - cannot be changed (easily). */ +#define ItVpdCecVpd 0 +#define ItVpdDynamicSpace 1 +#define ItVpdExtVpd 2 +#define ItVpdExtVpdOnPanel 3 +#define ItVpdFirstPaca 4 +#define ItVpdIoVpd 5 +#define ItVpdIplParms 6 +#define ItVpdMsVpd 7 +#define ItVpdPanelVpd 8 +#define ItVpdLpNaca 9 +#define ItVpdBackplaneAndMaybeClockCardVpd 10 +#define ItVpdRecoveryLogBuffer 11 +#define ItVpdSpCommArea 12 +#define ItVpdSpLogBuffer 13 +#define ItVpdSpLogBufferSave 14 +#define ItVpdSpCardVpd 15 +#define ItVpdFirstProcVpd 16 +#define ItVpdApModelVpd 17 +#define ItVpdClockCardVpd 18 +#define ItVpdBusExtCardVpd 19 +#define ItVpdProcCapacityVpd 20 +#define ItVpdInteractiveCapacityVpd 21 +#define ItVpdFirstSlotLabel 22 +#define ItVpdFirstLpQueue 23 +#define ItVpdFirstL3CacheVpd 24 +#define ItVpdFirstProcFruVpd 25 + +#define ItVpdMaxEntries 26 + +#define ItDmaMaxEntries 10 + +#define ItVpdAreasMaxSlotLabels 192 + + +struct ItVpdAreas { + u32 xSlicDesc; // Descriptor 000-003 + u16 xSlicSize; // Size of this control block 004-005 + u16 xPlicAdjustVpdLens:1; // Flag to indicate new interface006-007 + u16 xRsvd1:15; // Reserved bits ... + u16 xSlicVpdEntries; // Number of VPD entries 008-009 + u16 xSlicDmaEntries; // Number of DMA entries 00A-00B + u16 xSlicMaxLogicalProcs; // Maximum logical processors 00C-00D + u16 xSlicMaxPhysicalProcs; // Maximum physical processors 00E-00F + u16 xSlicDmaToksOffset; // Offset into this of array 010-011 + u16 xSlicVpdAdrsOffset; // Offset into this of array 012-013 + u16 xSlicDmaLensOffset; // Offset into this of array 014-015 + u16 xSlicVpdLensOffset; // Offset into this of array 016-017 + u16 xSlicMaxSlotLabels; // Maximum number of slot labels018-019 + u16 xSlicMaxLpQueues; // Maximum number of LP Queues 01A-01B + u8 xRsvd2[4]; // Reserved 01C-01F + u64 xRsvd3[12]; // Reserved 020-07F + u32 xPlicDmaLens[ItDmaMaxEntries];// Array of DMA lengths 080-0A7 + u32 xPlicDmaToks[ItDmaMaxEntries];// Array of DMA tokens 0A8-0CF + u32 xSlicVpdLens[ItVpdMaxEntries];// Array of VPD lengths 0D0-12F + void *xSlicVpdAdrs[ItVpdMaxEntries];// Array of VPD buffers 130-1EF +}; + +extern struct ItVpdAreas itVpdAreas; + +#endif /* _ISERIES_VPD_AREAS_H */ diff --git a/arch/powerpc/platforms/iseries/vpdinfo.c b/arch/powerpc/platforms/iseries/vpdinfo.c new file mode 100644 index 00000000000..d8a6796924e --- /dev/null +++ b/arch/powerpc/platforms/iseries/vpdinfo.c @@ -0,0 +1,266 @@ +/* + * This code gets the card location of the hardware + * Copyright (C) 2001 <Allan H Trautman> <IBM Corp> + * Copyright (C) 2005 Stephen Rothwel, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the: + * Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, + * Boston, MA 02111-1307 USA + * + * Change Activity: + * Created, Feb 2, 2001 + * Ported to ppc64, August 20, 2001 + * End Change Activity + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <asm/types.h> +#include <asm/resource.h> + +#include <asm/iSeries/HvCallPci.h> +#include <asm/iSeries/HvTypes.h> +#include <asm/iSeries/iSeries_pci.h> + +/* + * Size of Bus VPD data + */ +#define BUS_VPDSIZE 1024 + +/* + * Bus Vpd Tags + */ +#define VpdEndOfAreaTag 0x79 +#define VpdIdStringTag 0x82 +#define VpdVendorAreaTag 0x84 + +/* + * Mfg Area Tags + */ +#define VpdFruFrameId 0x4649 // "FI" +#define VpdSlotMapFormat 0x4D46 // "MF" +#define VpdSlotMap 0x534D // "SM" + +/* + * Structures of the areas + */ +struct MfgVpdAreaStruct { + u16 Tag; + u8 TagLength; + u8 AreaData1; + u8 AreaData2; +}; +typedef struct MfgVpdAreaStruct MfgArea; +#define MFG_ENTRY_SIZE 3 + +struct SlotMapStruct { + u8 AgentId; + u8 SecondaryAgentId; + u8 PhbId; + char CardLocation[3]; + char Parms[8]; + char Reserved[2]; +}; +typedef struct SlotMapStruct SlotMap; +#define SLOT_ENTRY_SIZE 16 + +/* + * Parse the Slot Area + */ +static void __init iSeries_Parse_SlotArea(SlotMap *MapPtr, int MapLen, + HvAgentId agent, u8 *PhbId, char card[4]) +{ + int SlotMapLen = MapLen; + SlotMap *SlotMapPtr = MapPtr; + + /* + * Parse Slot label until we find the one requested + */ + while (SlotMapLen > 0) { + if (SlotMapPtr->AgentId == agent) { + /* + * If Phb wasn't found, grab the entry first one found. + */ + if (*PhbId == 0xff) + *PhbId = SlotMapPtr->PhbId; + /* Found it, extract the data. */ + if (SlotMapPtr->PhbId == *PhbId) { + memcpy(card, &SlotMapPtr->CardLocation, 3); + card[3] = 0; + break; + } + } + /* Point to the next Slot */ + SlotMapPtr = (SlotMap *)((char *)SlotMapPtr + SLOT_ENTRY_SIZE); + SlotMapLen -= SLOT_ENTRY_SIZE; + } +} + +/* + * Parse the Mfg Area + */ +static void __init iSeries_Parse_MfgArea(u8 *AreaData, int AreaLen, + HvAgentId agent, u8 *PhbId, + u8 *frame, char card[4]) +{ + MfgArea *MfgAreaPtr = (MfgArea *)AreaData; + int MfgAreaLen = AreaLen; + u16 SlotMapFmt = 0; + + /* Parse Mfg Data */ + while (MfgAreaLen > 0) { + int MfgTagLen = MfgAreaPtr->TagLength; + /* Frame ID (FI 4649020310 ) */ + if (MfgAreaPtr->Tag == VpdFruFrameId) /* FI */ + *frame = MfgAreaPtr->AreaData1; + /* Slot Map Format (MF 4D46020004 ) */ + else if (MfgAreaPtr->Tag == VpdSlotMapFormat) /* MF */ + SlotMapFmt = (MfgAreaPtr->AreaData1 * 256) + + MfgAreaPtr->AreaData2; + /* Slot Map (SM 534D90 */ + else if (MfgAreaPtr->Tag == VpdSlotMap) { /* SM */ + SlotMap *SlotMapPtr; + + if (SlotMapFmt == 0x1004) + SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr + + MFG_ENTRY_SIZE + 1); + else + SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr + + MFG_ENTRY_SIZE); + iSeries_Parse_SlotArea(SlotMapPtr, MfgTagLen, + agent, PhbId, card); + } + /* + * Point to the next Mfg Area + * Use defined size, sizeof give wrong answer + */ + MfgAreaPtr = (MfgArea *)((char *)MfgAreaPtr + MfgTagLen + + MFG_ENTRY_SIZE); + MfgAreaLen -= (MfgTagLen + MFG_ENTRY_SIZE); + } +} + +/* + * Look for "BUS".. Data is not Null terminated. + * PHBID of 0xFF indicates PHB was not found in VPD Data. + */ +static int __init iSeries_Parse_PhbId(u8 *AreaPtr, int AreaLength) +{ + u8 *PhbPtr = AreaPtr; + int DataLen = AreaLength; + char PhbId = 0xFF; + + while (DataLen > 0) { + if ((*PhbPtr == 'B') && (*(PhbPtr + 1) == 'U') + && (*(PhbPtr + 2) == 'S')) { + PhbPtr += 3; + while (*PhbPtr == ' ') + ++PhbPtr; + PhbId = (*PhbPtr & 0x0F); + break; + } + ++PhbPtr; + --DataLen; + } + return PhbId; +} + +/* + * Parse out the VPD Areas + */ +static void __init iSeries_Parse_Vpd(u8 *VpdData, int VpdDataLen, + HvAgentId agent, u8 *frame, char card[4]) +{ + u8 *TagPtr = VpdData; + int DataLen = VpdDataLen - 3; + u8 PhbId; + + while ((*TagPtr != VpdEndOfAreaTag) && (DataLen > 0)) { + int AreaLen = *(TagPtr + 1) + (*(TagPtr + 2) * 256); + u8 *AreaData = TagPtr + 3; + + if (*TagPtr == VpdIdStringTag) + PhbId = iSeries_Parse_PhbId(AreaData, AreaLen); + else if (*TagPtr == VpdVendorAreaTag) + iSeries_Parse_MfgArea(AreaData, AreaLen, + agent, &PhbId, frame, card); + /* Point to next Area. */ + TagPtr = AreaData + AreaLen; + DataLen -= AreaLen; + } +} + +static void __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent, + u8 *frame, char card[4]) +{ + int BusVpdLen = 0; + u8 *BusVpdPtr = kmalloc(BUS_VPDSIZE, GFP_KERNEL); + + if (BusVpdPtr == NULL) { + printk("PCI: Bus VPD Buffer allocation failure.\n"); + return; + } + BusVpdLen = HvCallPci_getBusVpd(bus, ISERIES_HV_ADDR(BusVpdPtr), + BUS_VPDSIZE); + if (BusVpdLen == 0) { + printk("PCI: Bus VPD Buffer zero length.\n"); + goto out_free; + } + /* printk("PCI: BusVpdPtr: %p, %d\n",BusVpdPtr, BusVpdLen); */ + /* Make sure this is what I think it is */ + if (*BusVpdPtr != VpdIdStringTag) { /* 0x82 */ + printk("PCI: Bus VPD Buffer missing starting tag.\n"); + goto out_free; + } + iSeries_Parse_Vpd(BusVpdPtr, BusVpdLen, agent, frame, card); +out_free: + kfree(BusVpdPtr); +} + +/* + * Prints the device information. + * - Pass in pci_dev* pointer to the device. + * - Pass in the device count + * + * Format: + * PCI: Bus 0, Device 26, Vendor 0x12AE Frame 1, Card C10 Ethernet + * controller + */ +void __init iSeries_Device_Information(struct pci_dev *PciDev, int count) +{ + struct device_node *DevNode = PciDev->sysdata; + u16 bus; + u8 frame; + char card[4]; + HvSubBusNumber subbus; + HvAgentId agent; + + if (DevNode == NULL) { + printk("%d. PCI: iSeries_Device_Information DevNode is NULL\n", + count); + return; + } + + bus = ISERIES_BUS(DevNode); + subbus = ISERIES_SUBBUS(DevNode); + agent = ISERIES_PCI_AGENTID(ISERIES_GET_DEVICE_FROM_SUBBUS(subbus), + ISERIES_GET_FUNCTION_FROM_SUBBUS(subbus)); + iSeries_Get_Location_Code(bus, agent, &frame, card); + + printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, Card %4s ", + count, bus, PCI_SLOT(PciDev->devfn), PciDev->vendor, + frame, card); + printk("0x%04X\n", (int)(PciDev->class >> 8)); +} diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile new file mode 100644 index 00000000000..74712ede16d --- /dev/null +++ b/arch/powerpc/platforms/powermac/Makefile @@ -0,0 +1,8 @@ +obj-$(CONFIG_PPC_PMAC) += pic.o setup.o time.o feature.o pci.o \ + sleep.o low_i2c.o cache.o +obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o +obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq.o +ifeq ($(CONFIG_PPC_PMAC),y) +obj-$(CONFIG_NVRAM) += nvram.o +obj-$(CONFIG_SMP) += smp.o +endif diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c new file mode 100644 index 00000000000..8be2f7d071f --- /dev/null +++ b/arch/powerpc/platforms/powermac/backlight.c @@ -0,0 +1,202 @@ +/* + * Miscellaneous procedures for dealing with the PowerMac hardware. + * Contains support for the backlight. + * + * Copyright (C) 2000 Benjamin Herrenschmidt + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/stddef.h> +#include <linux/reboot.h> +#include <linux/nvram.h> +#include <linux/console.h> +#include <asm/sections.h> +#include <asm/ptrace.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/nvram.h> +#include <asm/backlight.h> + +#include <linux/adb.h> +#include <linux/pmu.h> + +static struct backlight_controller *backlighter; +static void* backlighter_data; +static int backlight_autosave; +static int backlight_level = BACKLIGHT_MAX; +static int backlight_enabled = 1; +static int backlight_req_level = -1; +static int backlight_req_enable = -1; + +static void backlight_callback(void *); +static DECLARE_WORK(backlight_work, backlight_callback, NULL); + +void register_backlight_controller(struct backlight_controller *ctrler, + void *data, char *type) +{ + struct device_node* bk_node; + char *prop; + int valid = 0; + + /* There's already a matching controller, bail out */ + if (backlighter != NULL) + return; + + bk_node = find_devices("backlight"); + +#ifdef CONFIG_ADB_PMU + /* Special case for the old PowerBook since I can't test on it */ + backlight_autosave = machine_is_compatible("AAPL,3400/2400") + || machine_is_compatible("AAPL,3500"); + if ((backlight_autosave + || machine_is_compatible("AAPL,PowerBook1998") + || machine_is_compatible("PowerBook1,1")) + && !strcmp(type, "pmu")) + valid = 1; +#endif + if (bk_node) { + prop = get_property(bk_node, "backlight-control", NULL); + if (prop && !strncmp(prop, type, strlen(type))) + valid = 1; + } + if (!valid) + return; + backlighter = ctrler; + backlighter_data = data; + + if (bk_node && !backlight_autosave) + prop = get_property(bk_node, "bklt", NULL); + else + prop = NULL; + if (prop) { + backlight_level = ((*prop)+1) >> 1; + if (backlight_level > BACKLIGHT_MAX) + backlight_level = BACKLIGHT_MAX; + } + +#ifdef CONFIG_ADB_PMU + if (backlight_autosave) { + struct adb_request req; + pmu_request(&req, NULL, 2, 0xd9, 0); + while (!req.complete) + pmu_poll(); + backlight_level = req.reply[0] >> 4; + } +#endif + acquire_console_sem(); + if (!backlighter->set_enable(1, backlight_level, data)) + backlight_enabled = 1; + release_console_sem(); + + printk(KERN_INFO "Registered \"%s\" backlight controller," + "level: %d/15\n", type, backlight_level); +} +EXPORT_SYMBOL(register_backlight_controller); + +void unregister_backlight_controller(struct backlight_controller + *ctrler, void *data) +{ + /* We keep the current backlight level (for now) */ + if (ctrler == backlighter && data == backlighter_data) + backlighter = NULL; +} +EXPORT_SYMBOL(unregister_backlight_controller); + +static int __set_backlight_enable(int enable) +{ + int rc; + + if (!backlighter) + return -ENODEV; + acquire_console_sem(); + rc = backlighter->set_enable(enable, backlight_level, + backlighter_data); + if (!rc) + backlight_enabled = enable; + release_console_sem(); + return rc; +} +int set_backlight_enable(int enable) +{ + if (!backlighter) + return -ENODEV; + backlight_req_enable = enable; + schedule_work(&backlight_work); + return 0; +} + +EXPORT_SYMBOL(set_backlight_enable); + +int get_backlight_enable(void) +{ + if (!backlighter) + return -ENODEV; + return backlight_enabled; +} +EXPORT_SYMBOL(get_backlight_enable); + +static int __set_backlight_level(int level) +{ + int rc = 0; + + if (!backlighter) + return -ENODEV; + if (level < BACKLIGHT_MIN) + level = BACKLIGHT_OFF; + if (level > BACKLIGHT_MAX) + level = BACKLIGHT_MAX; + acquire_console_sem(); + if (backlight_enabled) + rc = backlighter->set_level(level, backlighter_data); + if (!rc) + backlight_level = level; + release_console_sem(); + if (!rc && !backlight_autosave) { + level <<=1; + if (level & 0x10) + level |= 0x01; + // -- todo: save to property "bklt" + } + return rc; +} +int set_backlight_level(int level) +{ + if (!backlighter) + return -ENODEV; + backlight_req_level = level; + schedule_work(&backlight_work); + return 0; +} + +EXPORT_SYMBOL(set_backlight_level); + +int get_backlight_level(void) +{ + if (!backlighter) + return -ENODEV; + return backlight_level; +} +EXPORT_SYMBOL(get_backlight_level); + +static void backlight_callback(void *dummy) +{ + int level, enable; + + do { + level = backlight_req_level; + enable = backlight_req_enable; + mb(); + + if (level >= 0) + __set_backlight_level(level); + if (enable >= 0) + __set_backlight_enable(enable); + } while(cmpxchg(&backlight_req_level, level, -1) != level || + cmpxchg(&backlight_req_enable, enable, -1) != enable); +} diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S new file mode 100644 index 00000000000..fb977de6b70 --- /dev/null +++ b/arch/powerpc/platforms/powermac/cache.S @@ -0,0 +1,359 @@ +/* + * This file contains low-level cache management functions + * used for sleep and CPU speed changes on Apple machines. + * (In fact the only thing that is Apple-specific is that we assume + * that we can read from ROM at physical address 0xfff00000.) + * + * Copyright (C) 2004 Paul Mackerras (paulus@samba.org) and + * Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/cputable.h> + +/* + * Flush and disable all data caches (dL1, L2, L3). This is used + * when going to sleep, when doing a PMU based cpufreq transition, + * or when "offlining" a CPU on SMP machines. This code is over + * paranoid, but I've had enough issues with various CPU revs and + * bugs that I decided it was worth beeing over cautious + */ + +_GLOBAL(flush_disable_caches) +#ifndef CONFIG_6xx + blr +#else +BEGIN_FTR_SECTION + b flush_disable_745x +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) +BEGIN_FTR_SECTION + b flush_disable_75x +END_FTR_SECTION_IFSET(CPU_FTR_L2CR) + b __flush_disable_L1 + +/* This is the code for G3 and 74[01]0 */ +flush_disable_75x: + mflr r10 + + /* Turn off EE and DR in MSR */ + mfmsr r11 + rlwinm r0,r11,0,~MSR_EE + rlwinm r0,r0,0,~MSR_DR + sync + mtmsr r0 + isync + + /* Stop DST streams */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + + /* Stop DPM */ + mfspr r8,SPRN_HID0 /* Save SPRN_HID0 in r8 */ + rlwinm r4,r8,0,12,10 /* Turn off HID0[DPM] */ + sync + mtspr SPRN_HID0,r4 /* Disable DPM */ + sync + + /* Disp-flush L1. We have a weird problem here that I never + * totally figured out. On 750FX, using the ROM for the flush + * results in a non-working flush. We use that workaround for + * now until I finally understand what's going on. --BenH + */ + + /* ROM base by default */ + lis r4,0xfff0 + mfpvr r3 + srwi r3,r3,16 + cmplwi cr0,r3,0x7000 + bne+ 1f + /* RAM base on 750FX */ + li r4,0 +1: li r4,0x4000 + mtctr r4 +1: lwz r0,0(r4) + addi r4,r4,32 + bdnz 1b + sync + isync + + /* Disable / invalidate / enable L1 data */ + mfspr r3,SPRN_HID0 + rlwinm r3,r3,0,~(HID0_DCE | HID0_ICE) + mtspr SPRN_HID0,r3 + sync + isync + ori r3,r3,(HID0_DCE|HID0_DCI|HID0_ICE|HID0_ICFI) + sync + isync + mtspr SPRN_HID0,r3 + xori r3,r3,(HID0_DCI|HID0_ICFI) + mtspr SPRN_HID0,r3 + sync + + /* Get the current enable bit of the L2CR into r4 */ + mfspr r5,SPRN_L2CR + /* Set to data-only (pre-745x bit) */ + oris r3,r5,L2CR_L2DO@h + b 2f + /* When disabling L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r3 +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: /* disp-flush L2. The interesting thing here is that the L2 can be + * up to 2Mb ... so using the ROM, we'll end up wrapping back to memory + * but that is probbaly fine. We disp-flush over 4Mb to be safe + */ + lis r4,2 + mtctr r4 + lis r4,0xfff0 +1: lwz r0,0(r4) + addi r4,r4,32 + bdnz 1b + sync + isync + lis r4,2 + mtctr r4 + lis r4,0xfff0 +1: dcbf 0,r4 + addi r4,r4,32 + bdnz 1b + sync + isync + + /* now disable L2 */ + rlwinm r5,r5,0,~L2CR_L2E + b 2f + /* When disabling L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r5 +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: sync + isync + /* Invalidate L2. This is pre-745x, we clear the L2I bit ourselves */ + oris r4,r5,L2CR_L2I@h + mtspr SPRN_L2CR,r4 + sync + isync + + /* Wait for the invalidation to complete */ +1: mfspr r3,SPRN_L2CR + rlwinm. r0,r3,0,31,31 + bne 1b + + /* Clear L2I */ + xoris r4,r4,L2CR_L2I@h + sync + mtspr SPRN_L2CR,r4 + sync + + /* now disable the L1 data cache */ + mfspr r0,SPRN_HID0 + rlwinm r0,r0,0,~(HID0_DCE|HID0_ICE) + mtspr SPRN_HID0,r0 + sync + isync + + /* Restore HID0[DPM] to whatever it was before */ + sync + mfspr r0,SPRN_HID0 + rlwimi r0,r8,0,11,11 /* Turn back HID0[DPM] */ + mtspr SPRN_HID0,r0 + sync + + /* restore DR and EE */ + sync + mtmsr r11 + isync + + mtlr r10 + blr + +/* This code is for 745x processors */ +flush_disable_745x: + /* Turn off EE and DR in MSR */ + mfmsr r11 + rlwinm r0,r11,0,~MSR_EE + rlwinm r0,r0,0,~MSR_DR + sync + mtmsr r0 + isync + + /* Stop prefetch streams */ + DSSALL + sync + + /* Disable L2 prefetching */ + mfspr r0,SPRN_MSSCR0 + rlwinm r0,r0,0,0,29 + mtspr SPRN_MSSCR0,r0 + sync + isync + lis r4,0 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + + /* Due to a bug with the HW flush on some CPU revs, we occasionally + * experience data corruption. I'm adding a displacement flush along + * with a dcbf loop over a few Mb to "help". The problem isn't totally + * fixed by this in theory, but at least, in practice, I couldn't reproduce + * it even with a big hammer... + */ + + lis r4,0x0002 + mtctr r4 + li r4,0 +1: + lwz r0,0(r4) + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + isync + + /* Now, flush the first 4MB of memory */ + lis r4,0x0002 + mtctr r4 + li r4,0 + sync +1: + dcbf 0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + + /* Flush and disable the L1 data cache */ + mfspr r6,SPRN_LDSTCR + lis r3,0xfff0 /* read from ROM for displacement flush */ + li r4,0xfe /* start with only way 0 unlocked */ + li r5,128 /* 128 lines in each way */ +1: mtctr r5 + rlwimi r6,r4,0,24,31 + mtspr SPRN_LDSTCR,r6 + sync + isync +2: lwz r0,0(r3) /* touch each cache line */ + addi r3,r3,32 + bdnz 2b + rlwinm r4,r4,1,24,30 /* move on to the next way */ + ori r4,r4,1 + cmpwi r4,0xff /* all done? */ + bne 1b + /* now unlock the L1 data cache */ + li r4,0 + rlwimi r6,r4,0,24,31 + sync + mtspr SPRN_LDSTCR,r6 + sync + isync + + /* Flush the L2 cache using the hardware assist */ + mfspr r3,SPRN_L2CR + cmpwi r3,0 /* check if it is enabled first */ + bge 4f + oris r0,r3,(L2CR_L2IO_745x|L2CR_L2DO_745x)@h + b 2f + /* When disabling/locking L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r0 /* lock the L2 cache */ +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: sync + isync + ori r0,r3,L2CR_L2HWF_745x + sync + mtspr SPRN_L2CR,r0 /* set the hardware flush bit */ +3: mfspr r0,SPRN_L2CR /* wait for it to go to 0 */ + andi. r0,r0,L2CR_L2HWF_745x + bne 3b + sync + rlwinm r3,r3,0,~L2CR_L2E + b 2f + /* When disabling L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r3 /* disable the L2 cache */ +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: sync + isync + oris r4,r3,L2CR_L2I@h + mtspr SPRN_L2CR,r4 + sync + isync +1: mfspr r4,SPRN_L2CR + andis. r0,r4,L2CR_L2I@h + bne 1b + sync + +BEGIN_FTR_SECTION + /* Flush the L3 cache using the hardware assist */ +4: mfspr r3,SPRN_L3CR + cmpwi r3,0 /* check if it is enabled */ + bge 6f + oris r0,r3,L3CR_L3IO@h + ori r0,r0,L3CR_L3DO + sync + mtspr SPRN_L3CR,r0 /* lock the L3 cache */ + sync + isync + ori r0,r0,L3CR_L3HWF + sync + mtspr SPRN_L3CR,r0 /* set the hardware flush bit */ +5: mfspr r0,SPRN_L3CR /* wait for it to go to zero */ + andi. r0,r0,L3CR_L3HWF + bne 5b + rlwinm r3,r3,0,~L3CR_L3E + sync + mtspr SPRN_L3CR,r3 /* disable the L3 cache */ + sync + ori r4,r3,L3CR_L3I + mtspr SPRN_L3CR,r4 +1: mfspr r4,SPRN_L3CR + andi. r0,r4,L3CR_L3I + bne 1b + sync +END_FTR_SECTION_IFSET(CPU_FTR_L3CR) + +6: mfspr r0,SPRN_HID0 /* now disable the L1 data cache */ + rlwinm r0,r0,0,~HID0_DCE + mtspr SPRN_HID0,r0 + sync + isync + mtmsr r11 /* restore DR and EE */ + isync + blr +#endif /* CONFIG_6xx */ diff --git a/arch/powerpc/platforms/powermac/cpufreq.c b/arch/powerpc/platforms/powermac/cpufreq.c new file mode 100644 index 00000000000..6d32d99402b --- /dev/null +++ b/arch/powerpc/platforms/powermac/cpufreq.c @@ -0,0 +1,728 @@ +/* + * arch/ppc/platforms/pmac_cpufreq.c + * + * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> + * Copyright (C) 2004 John Steele Scott <toojays@toojays.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * TODO: Need a big cleanup here. Basically, we need to have different + * cpufreq_driver structures for the different type of HW instead of the + * current mess. We also need to better deal with the detection of the + * type of machine. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/adb.h> +#include <linux/pmu.h> +#include <linux/slab.h> +#include <linux/cpufreq.h> +#include <linux/init.h> +#include <linux/sysdev.h> +#include <linux/i2c.h> +#include <linux/hardirq.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/irq.h> +#include <asm/pmac_feature.h> +#include <asm/mmu_context.h> +#include <asm/sections.h> +#include <asm/cputable.h> +#include <asm/time.h> +#include <asm/system.h> +#include <asm/mpic.h> +#include <asm/keylargo.h> + +/* WARNING !!! This will cause calibrate_delay() to be called, + * but this is an __init function ! So you MUST go edit + * init/main.c to make it non-init before enabling DEBUG_FREQ + */ +#undef DEBUG_FREQ + +/* + * There is a problem with the core cpufreq code on SMP kernels, + * it won't recalculate the Bogomips properly + */ +#ifdef CONFIG_SMP +#warning "WARNING, CPUFREQ not recommended on SMP kernels" +#endif + +extern void low_choose_7447a_dfs(int dfs); +extern void low_choose_750fx_pll(int pll); +extern void low_sleep_handler(void); + +/* + * Currently, PowerMac cpufreq supports only high & low frequencies + * that are set by the firmware + */ +static unsigned int low_freq; +static unsigned int hi_freq; +static unsigned int cur_freq; +static unsigned int sleep_freq; + +/* + * Different models uses different mecanisms to switch the frequency + */ +static int (*set_speed_proc)(int low_speed); +static unsigned int (*get_speed_proc)(void); + +/* + * Some definitions used by the various speedprocs + */ +static u32 voltage_gpio; +static u32 frequency_gpio; +static u32 slew_done_gpio; +static int no_schedule; +static int has_cpu_l2lve; +static int is_pmu_based; + +/* There are only two frequency states for each processor. Values + * are in kHz for the time being. + */ +#define CPUFREQ_HIGH 0 +#define CPUFREQ_LOW 1 + +static struct cpufreq_frequency_table pmac_cpu_freqs[] = { + {CPUFREQ_HIGH, 0}, + {CPUFREQ_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + +static struct freq_attr* pmac_cpu_freqs_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static inline void local_delay(unsigned long ms) +{ + if (no_schedule) + mdelay(ms); + else + msleep(ms); +} + +static inline void wakeup_decrementer(void) +{ + set_dec(tb_ticks_per_jiffy); + /* No currently-supported powerbook has a 601, + * so use get_tbl, not native + */ + last_jiffy_stamp(0) = tb_last_stamp = get_tbl(); +} + +#ifdef DEBUG_FREQ +static inline void debug_calc_bogomips(void) +{ + /* This will cause a recalc of bogomips and display the + * result. We backup/restore the value to avoid affecting the + * core cpufreq framework's own calculation. + */ + extern void calibrate_delay(void); + + unsigned long save_lpj = loops_per_jiffy; + calibrate_delay(); + loops_per_jiffy = save_lpj; +} +#endif /* DEBUG_FREQ */ + +/* Switch CPU speed under 750FX CPU control + */ +static int cpu_750fx_cpu_speed(int low_speed) +{ + u32 hid2; + + if (low_speed == 0) { + /* ramping up, set voltage first */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); + /* Make sure we sleep for at least 1ms */ + local_delay(10); + + /* tweak L2 for high voltage */ + if (has_cpu_l2lve) { + hid2 = mfspr(SPRN_HID2); + hid2 &= ~0x2000; + mtspr(SPRN_HID2, hid2); + } + } +#ifdef CONFIG_6xx + low_choose_750fx_pll(low_speed); +#endif + if (low_speed == 1) { + /* tweak L2 for low voltage */ + if (has_cpu_l2lve) { + hid2 = mfspr(SPRN_HID2); + hid2 |= 0x2000; + mtspr(SPRN_HID2, hid2); + } + + /* ramping down, set voltage last */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); + local_delay(10); + } + + return 0; +} + +static unsigned int cpu_750fx_get_cpu_speed(void) +{ + if (mfspr(SPRN_HID1) & HID1_PS) + return low_freq; + else + return hi_freq; +} + +/* Switch CPU speed using DFS */ +static int dfs_set_cpu_speed(int low_speed) +{ + if (low_speed == 0) { + /* ramping up, set voltage first */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); + /* Make sure we sleep for at least 1ms */ + local_delay(1); + } + + /* set frequency */ +#ifdef CONFIG_6xx + low_choose_7447a_dfs(low_speed); +#endif + udelay(100); + + if (low_speed == 1) { + /* ramping down, set voltage last */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); + local_delay(1); + } + + return 0; +} + +static unsigned int dfs_get_cpu_speed(void) +{ + if (mfspr(SPRN_HID1) & HID1_DFS) + return low_freq; + else + return hi_freq; +} + + +/* Switch CPU speed using slewing GPIOs + */ +static int gpios_set_cpu_speed(int low_speed) +{ + int gpio, timeout = 0; + + /* If ramping up, set voltage first */ + if (low_speed == 0) { + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); + /* Delay is way too big but it's ok, we schedule */ + local_delay(10); + } + + /* Set frequency */ + gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); + if (low_speed == ((gpio & 0x01) == 0)) + goto skip; + + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio, + low_speed ? 0x04 : 0x05); + udelay(200); + do { + if (++timeout > 100) + break; + local_delay(1); + gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0); + } while((gpio & 0x02) == 0); + skip: + /* If ramping down, set voltage last */ + if (low_speed == 1) { + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); + /* Delay is way too big but it's ok, we schedule */ + local_delay(10); + } + +#ifdef DEBUG_FREQ + debug_calc_bogomips(); +#endif + + return 0; +} + +/* Switch CPU speed under PMU control + */ +static int pmu_set_cpu_speed(int low_speed) +{ + struct adb_request req; + unsigned long save_l2cr; + unsigned long save_l3cr; + unsigned int pic_prio; + unsigned long flags; + + preempt_disable(); + +#ifdef DEBUG_FREQ + printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1)); +#endif + pmu_suspend(); + + /* Disable all interrupt sources on openpic */ + pic_prio = mpic_cpu_get_priority(); + mpic_cpu_set_priority(0xf); + + /* Make sure the decrementer won't interrupt us */ + asm volatile("mtdec %0" : : "r" (0x7fffffff)); + /* Make sure any pending DEC interrupt occuring while we did + * the above didn't re-enable the DEC */ + mb(); + asm volatile("mtdec %0" : : "r" (0x7fffffff)); + + /* We can now disable MSR_EE */ + local_irq_save(flags); + + /* Giveup the FPU & vec */ + enable_kernel_fp(); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + enable_kernel_altivec(); +#endif /* CONFIG_ALTIVEC */ + + /* Save & disable L2 and L3 caches */ + save_l3cr = _get_L3CR(); /* (returns -1 if not available) */ + save_l2cr = _get_L2CR(); /* (returns -1 if not available) */ + + /* Send the new speed command. My assumption is that this command + * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep + */ + pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed); + while (!req.complete) + pmu_poll(); + + /* Prepare the northbridge for the speed transition */ + pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1); + + /* Call low level code to backup CPU state and recover from + * hardware reset + */ + low_sleep_handler(); + + /* Restore the northbridge */ + pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0); + + /* Restore L2 cache */ + if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0) + _set_L2CR(save_l2cr); + /* Restore L3 cache */ + if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0) + _set_L3CR(save_l3cr); + + /* Restore userland MMU context */ + set_context(current->active_mm->context, current->active_mm->pgd); + +#ifdef DEBUG_FREQ + printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1)); +#endif + + /* Restore low level PMU operations */ + pmu_unlock(); + + /* Restore decrementer */ + wakeup_decrementer(); + + /* Restore interrupts */ + mpic_cpu_set_priority(pic_prio); + + /* Let interrupts flow again ... */ + local_irq_restore(flags); + +#ifdef DEBUG_FREQ + debug_calc_bogomips(); +#endif + + pmu_resume(); + + preempt_enable(); + + return 0; +} + +static int do_set_cpu_speed(int speed_mode, int notify) +{ + struct cpufreq_freqs freqs; + unsigned long l3cr; + static unsigned long prev_l3cr; + + freqs.old = cur_freq; + freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; + freqs.cpu = smp_processor_id(); + + if (freqs.old == freqs.new) + return 0; + + if (notify) + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + if (speed_mode == CPUFREQ_LOW && + cpu_has_feature(CPU_FTR_L3CR)) { + l3cr = _get_L3CR(); + if (l3cr & L3CR_L3E) { + prev_l3cr = l3cr; + _set_L3CR(0); + } + } + set_speed_proc(speed_mode == CPUFREQ_LOW); + if (speed_mode == CPUFREQ_HIGH && + cpu_has_feature(CPU_FTR_L3CR)) { + l3cr = _get_L3CR(); + if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr) + _set_L3CR(prev_l3cr); + } + if (notify) + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; + + return 0; +} + +static unsigned int pmac_cpufreq_get_speed(unsigned int cpu) +{ + return cur_freq; +} + +static int pmac_cpufreq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs); +} + +static int pmac_cpufreq_target( struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs, + target_freq, relation, &newstate)) + return -EINVAL; + + return do_set_cpu_speed(newstate, 1); +} + +unsigned int pmac_get_one_cpufreq(int i) +{ + /* Supports only one CPU for now */ + return (i == 0) ? cur_freq : 0; +} + +static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + if (policy->cpu != 0) + return -ENODEV; + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = cur_freq; + + cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu); + return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs); +} + +static u32 read_gpio(struct device_node *np) +{ + u32 *reg = (u32 *)get_property(np, "reg", NULL); + u32 offset; + + if (reg == NULL) + return 0; + /* That works for all keylargos but shall be fixed properly + * some day... The problem is that it seems we can't rely + * on the "reg" property of the GPIO nodes, they are either + * relative to the base of KeyLargo or to the base of the + * GPIO space, and the device-tree doesn't help. + */ + offset = *reg; + if (offset < KEYLARGO_GPIO_LEVELS0) + offset += KEYLARGO_GPIO_LEVELS0; + return offset; +} + +static int pmac_cpufreq_suspend(struct cpufreq_policy *policy, pm_message_t pmsg) +{ + /* Ok, this could be made a bit smarter, but let's be robust for now. We + * always force a speed change to high speed before sleep, to make sure + * we have appropriate voltage and/or bus speed for the wakeup process, + * and to make sure our loops_per_jiffies are "good enough", that is will + * not cause too short delays if we sleep in low speed and wake in high + * speed.. + */ + no_schedule = 1; + sleep_freq = cur_freq; + if (cur_freq == low_freq && !is_pmu_based) + do_set_cpu_speed(CPUFREQ_HIGH, 0); + return 0; +} + +static int pmac_cpufreq_resume(struct cpufreq_policy *policy) +{ + /* If we resume, first check if we have a get() function */ + if (get_speed_proc) + cur_freq = get_speed_proc(); + else + cur_freq = 0; + + /* We don't, hrm... we don't really know our speed here, best + * is that we force a switch to whatever it was, which is + * probably high speed due to our suspend() routine + */ + do_set_cpu_speed(sleep_freq == low_freq ? + CPUFREQ_LOW : CPUFREQ_HIGH, 0); + + no_schedule = 0; + return 0; +} + +static struct cpufreq_driver pmac_cpufreq_driver = { + .verify = pmac_cpufreq_verify, + .target = pmac_cpufreq_target, + .get = pmac_cpufreq_get_speed, + .init = pmac_cpufreq_cpu_init, + .suspend = pmac_cpufreq_suspend, + .resume = pmac_cpufreq_resume, + .flags = CPUFREQ_PM_NO_WARN, + .attr = pmac_cpu_freqs_attr, + .name = "powermac", + .owner = THIS_MODULE, +}; + + +static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) +{ + struct device_node *volt_gpio_np = of_find_node_by_name(NULL, + "voltage-gpio"); + struct device_node *freq_gpio_np = of_find_node_by_name(NULL, + "frequency-gpio"); + struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL, + "slewing-done"); + u32 *value; + + /* + * Check to see if it's GPIO driven or PMU only + * + * The way we extract the GPIO address is slightly hackish, but it + * works well enough for now. We need to abstract the whole GPIO + * stuff sooner or later anyway + */ + + if (volt_gpio_np) + voltage_gpio = read_gpio(volt_gpio_np); + if (freq_gpio_np) + frequency_gpio = read_gpio(freq_gpio_np); + if (slew_done_gpio_np) + slew_done_gpio = read_gpio(slew_done_gpio_np); + + /* If we use the frequency GPIOs, calculate the min/max speeds based + * on the bus frequencies + */ + if (frequency_gpio && slew_done_gpio) { + int lenp, rc; + u32 *freqs, *ratio; + + freqs = (u32 *)get_property(cpunode, "bus-frequencies", &lenp); + lenp /= sizeof(u32); + if (freqs == NULL || lenp != 2) { + printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n"); + return 1; + } + ratio = (u32 *)get_property(cpunode, "processor-to-bus-ratio*2", NULL); + if (ratio == NULL) { + printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n"); + return 1; + } + + /* Get the min/max bus frequencies */ + low_freq = min(freqs[0], freqs[1]); + hi_freq = max(freqs[0], freqs[1]); + + /* Grrrr.. It _seems_ that the device-tree is lying on the low bus + * frequency, it claims it to be around 84Mhz on some models while + * it appears to be approx. 101Mhz on all. Let's hack around here... + * fortunately, we don't need to be too precise + */ + if (low_freq < 98000000) + low_freq = 101000000; + + /* Convert those to CPU core clocks */ + low_freq = (low_freq * (*ratio)) / 2000; + hi_freq = (hi_freq * (*ratio)) / 2000; + + /* Now we get the frequencies, we read the GPIO to see what is out current + * speed + */ + rc = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); + cur_freq = (rc & 0x01) ? hi_freq : low_freq; + + set_speed_proc = gpios_set_cpu_speed; + return 1; + } + + /* If we use the PMU, look for the min & max frequencies in the + * device-tree + */ + value = (u32 *)get_property(cpunode, "min-clock-frequency", NULL); + if (!value) + return 1; + low_freq = (*value) / 1000; + /* The PowerBook G4 12" (PowerBook6,1) has an error in the device-tree + * here */ + if (low_freq < 100000) + low_freq *= 10; + + value = (u32 *)get_property(cpunode, "max-clock-frequency", NULL); + if (!value) + return 1; + hi_freq = (*value) / 1000; + set_speed_proc = pmu_set_cpu_speed; + is_pmu_based = 1; + + return 0; +} + +static int pmac_cpufreq_init_7447A(struct device_node *cpunode) +{ + struct device_node *volt_gpio_np; + + if (get_property(cpunode, "dynamic-power-step", NULL) == NULL) + return 1; + + volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); + if (volt_gpio_np) + voltage_gpio = read_gpio(volt_gpio_np); + if (!voltage_gpio){ + printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n"); + return 1; + } + + /* OF only reports the high frequency */ + hi_freq = cur_freq; + low_freq = cur_freq/2; + + /* Read actual frequency from CPU */ + cur_freq = dfs_get_cpu_speed(); + set_speed_proc = dfs_set_cpu_speed; + get_speed_proc = dfs_get_cpu_speed; + + return 0; +} + +static int pmac_cpufreq_init_750FX(struct device_node *cpunode) +{ + struct device_node *volt_gpio_np; + u32 pvr, *value; + + if (get_property(cpunode, "dynamic-power-step", NULL) == NULL) + return 1; + + hi_freq = cur_freq; + value = (u32 *)get_property(cpunode, "reduced-clock-frequency", NULL); + if (!value) + return 1; + low_freq = (*value) / 1000; + + volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); + if (volt_gpio_np) + voltage_gpio = read_gpio(volt_gpio_np); + + pvr = mfspr(SPRN_PVR); + has_cpu_l2lve = !((pvr & 0xf00) == 0x100); + + set_speed_proc = cpu_750fx_cpu_speed; + get_speed_proc = cpu_750fx_get_cpu_speed; + cur_freq = cpu_750fx_get_cpu_speed(); + + return 0; +} + +/* Currently, we support the following machines: + * + * - Titanium PowerBook 1Ghz (PMU based, 667Mhz & 1Ghz) + * - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz) + * - Titanium PowerBook 400 (PMU based, 300Mhz & 400Mhz) + * - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz) + * - iBook2 500/600 (PMU based, 400Mhz & 500/600Mhz) + * - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage) + * - Recent MacRISC3 laptops + * - All new machines with 7447A CPUs + */ +static int __init pmac_cpufreq_setup(void) +{ + struct device_node *cpunode; + u32 *value; + + if (strstr(cmd_line, "nocpufreq")) + return 0; + + /* Assume only one CPU */ + cpunode = find_type_devices("cpu"); + if (!cpunode) + goto out; + + /* Get current cpu clock freq */ + value = (u32 *)get_property(cpunode, "clock-frequency", NULL); + if (!value) + goto out; + cur_freq = (*value) / 1000; + + /* Check for 7447A based MacRISC3 */ + if (machine_is_compatible("MacRISC3") && + get_property(cpunode, "dynamic-power-step", NULL) && + PVR_VER(mfspr(SPRN_PVR)) == 0x8003) { + pmac_cpufreq_init_7447A(cpunode); + /* Check for other MacRISC3 machines */ + } else if (machine_is_compatible("PowerBook3,4") || + machine_is_compatible("PowerBook3,5") || + machine_is_compatible("MacRISC3")) { + pmac_cpufreq_init_MacRISC3(cpunode); + /* Else check for iBook2 500/600 */ + } else if (machine_is_compatible("PowerBook4,1")) { + hi_freq = cur_freq; + low_freq = 400000; + set_speed_proc = pmu_set_cpu_speed; + is_pmu_based = 1; + } + /* Else check for TiPb 400 & 500 */ + else if (machine_is_compatible("PowerBook3,2")) { + /* We only know about the 400 MHz and the 500Mhz model + * they both have 300 MHz as low frequency + */ + if (cur_freq < 350000 || cur_freq > 550000) + goto out; + hi_freq = cur_freq; + low_freq = 300000; + set_speed_proc = pmu_set_cpu_speed; + is_pmu_based = 1; + } + /* Else check for 750FX */ + else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000) + pmac_cpufreq_init_750FX(cpunode); +out: + if (set_speed_proc == NULL) + return -ENODEV; + + pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq; + pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq; + + printk(KERN_INFO "Registering PowerMac CPU frequency driver\n"); + printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n", + low_freq/1000, hi_freq/1000, cur_freq/1000); + + return cpufreq_register_driver(&pmac_cpufreq_driver); +} + +module_init(pmac_cpufreq_setup); + diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c new file mode 100644 index 00000000000..2cba670c71b --- /dev/null +++ b/arch/powerpc/platforms/powermac/feature.c @@ -0,0 +1,3062 @@ +/* + * arch/ppc/platforms/pmac_feature.c + * + * Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au) + * Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TODO: + * + * - Replace mdelay with some schedule loop if possible + * - Shorten some obfuscated delays on some routines (like modem + * power) + * - Refcount some clocks (see darwin) + * - Split split split... + * + */ +#include <linux/config.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/adb.h> +#include <linux/pmu.h> +#include <linux/ioport.h> +#include <linux/pci.h> +#include <asm/sections.h> +#include <asm/errno.h> +#include <asm/ohare.h> +#include <asm/heathrow.h> +#include <asm/keylargo.h> +#include <asm/uninorth.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pmac_feature.h> +#include <asm/dbdma.h> +#include <asm/pci-bridge.h> +#include <asm/pmac_low_i2c.h> + +#undef DEBUG_FEATURE + +#ifdef DEBUG_FEATURE +#define DBG(fmt...) printk(KERN_DEBUG fmt) +#else +#define DBG(fmt...) +#endif + +#ifdef CONFIG_6xx +extern int powersave_lowspeed; +#endif + +extern int powersave_nap; +extern struct device_node *k2_skiplist[2]; + + +/* + * We use a single global lock to protect accesses. Each driver has + * to take care of its own locking + */ +static DEFINE_SPINLOCK(feature_lock); + +#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); +#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); + + +/* + * Instance of some macio stuffs + */ +struct macio_chip macio_chips[MAX_MACIO_CHIPS]; + +struct macio_chip *macio_find(struct device_node *child, int type) +{ + while(child) { + int i; + + for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++) + if (child == macio_chips[i].of_node && + (!type || macio_chips[i].type == type)) + return &macio_chips[i]; + child = child->parent; + } + return NULL; +} +EXPORT_SYMBOL_GPL(macio_find); + +static const char *macio_names[] = +{ + "Unknown", + "Grand Central", + "OHare", + "OHareII", + "Heathrow", + "Gatwick", + "Paddington", + "Keylargo", + "Pangea", + "Intrepid", + "K2" +}; + + + +/* + * Uninorth reg. access. Note that Uni-N regs are big endian + */ + +#define UN_REG(r) (uninorth_base + ((r) >> 2)) +#define UN_IN(r) (in_be32(UN_REG(r))) +#define UN_OUT(r,v) (out_be32(UN_REG(r), (v))) +#define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v))) +#define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v))) + +static struct device_node *uninorth_node; +static u32 __iomem *uninorth_base; +static u32 uninorth_rev; +static int uninorth_u3; +static void __iomem *u3_ht; + +/* + * For each motherboard family, we have a table of functions pointers + * that handle the various features. + */ + +typedef long (*feature_call)(struct device_node *node, long param, long value); + +struct feature_table_entry { + unsigned int selector; + feature_call function; +}; + +struct pmac_mb_def +{ + const char* model_string; + const char* model_name; + int model_id; + struct feature_table_entry* features; + unsigned long board_flags; +}; +static struct pmac_mb_def pmac_mb; + +/* + * Here are the chip specific feature functions + */ + +static inline int simple_feature_tweak(struct device_node *node, int type, + int reg, u32 mask, int value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, type); + if (!macio) + return -ENODEV; + LOCK(flags); + if (value) + MACIO_BIS(reg, mask); + else + MACIO_BIC(reg, mask); + (void)MACIO_IN32(reg); + UNLOCK(flags); + + return 0; +} + +#ifndef CONFIG_POWER4 + +static long ohare_htw_scc_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long chan_mask; + unsigned long fcr; + unsigned long flags; + int htw, trans; + unsigned long rmask; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (!strcmp(node->name, "ch-a")) + chan_mask = MACIO_FLAG_SCCA_ON; + else if (!strcmp(node->name, "ch-b")) + chan_mask = MACIO_FLAG_SCCB_ON; + else + return -ENODEV; + + htw = (macio->type == macio_heathrow || macio->type == macio_paddington + || macio->type == macio_gatwick); + /* On these machines, the HRW_SCC_TRANS_EN_N bit mustn't be touched */ + trans = (pmac_mb.model_id != PMAC_TYPE_YOSEMITE && + pmac_mb.model_id != PMAC_TYPE_YIKES); + if (value) { +#ifdef CONFIG_ADB_PMU + if ((param & 0xfff) == PMAC_SCC_IRDA) + pmu_enable_irled(1); +#endif /* CONFIG_ADB_PMU */ + LOCK(flags); + fcr = MACIO_IN32(OHARE_FCR); + /* Check if scc cell need enabling */ + if (!(fcr & OH_SCC_ENABLE)) { + fcr |= OH_SCC_ENABLE; + if (htw) { + /* Side effect: this will also power up the + * modem, but it's too messy to figure out on which + * ports this controls the tranceiver and on which + * it controls the modem + */ + if (trans) + fcr &= ~HRW_SCC_TRANS_EN_N; + MACIO_OUT32(OHARE_FCR, fcr); + fcr |= (rmask = HRW_RESET_SCC); + MACIO_OUT32(OHARE_FCR, fcr); + } else { + fcr |= (rmask = OH_SCC_RESET); + MACIO_OUT32(OHARE_FCR, fcr); + } + UNLOCK(flags); + (void)MACIO_IN32(OHARE_FCR); + mdelay(15); + LOCK(flags); + fcr &= ~rmask; + MACIO_OUT32(OHARE_FCR, fcr); + } + if (chan_mask & MACIO_FLAG_SCCA_ON) + fcr |= OH_SCCA_IO; + if (chan_mask & MACIO_FLAG_SCCB_ON) + fcr |= OH_SCCB_IO; + MACIO_OUT32(OHARE_FCR, fcr); + macio->flags |= chan_mask; + UNLOCK(flags); + if (param & PMAC_SCC_FLAG_XMON) + macio->flags |= MACIO_FLAG_SCC_LOCKED; + } else { + if (macio->flags & MACIO_FLAG_SCC_LOCKED) + return -EPERM; + LOCK(flags); + fcr = MACIO_IN32(OHARE_FCR); + if (chan_mask & MACIO_FLAG_SCCA_ON) + fcr &= ~OH_SCCA_IO; + if (chan_mask & MACIO_FLAG_SCCB_ON) + fcr &= ~OH_SCCB_IO; + MACIO_OUT32(OHARE_FCR, fcr); + if ((fcr & (OH_SCCA_IO | OH_SCCB_IO)) == 0) { + fcr &= ~OH_SCC_ENABLE; + if (htw && trans) + fcr |= HRW_SCC_TRANS_EN_N; + MACIO_OUT32(OHARE_FCR, fcr); + } + macio->flags &= ~(chan_mask); + UNLOCK(flags); + mdelay(10); +#ifdef CONFIG_ADB_PMU + if ((param & 0xfff) == PMAC_SCC_IRDA) + pmu_enable_irled(0); +#endif /* CONFIG_ADB_PMU */ + } + return 0; +} + +static long ohare_floppy_enable(struct device_node *node, long param, + long value) +{ + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_FLOPPY_ENABLE, value); +} + +static long ohare_mesh_enable(struct device_node *node, long param, long value) +{ + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_MESH_ENABLE, value); +} + +static long ohare_ide_enable(struct device_node *node, long param, long value) +{ + switch(param) { + case 0: + /* For some reason, setting the bit in set_initial_features() + * doesn't stick. I'm still investigating... --BenH. + */ + if (value) + simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IOBUS_ENABLE, 1); + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IDE0_ENABLE, value); + case 1: + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_BAY_IDE_ENABLE, value); + default: + return -ENODEV; + } +} + +static long ohare_ide_reset(struct device_node *node, long param, long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IDE0_RESET_N, !value); + case 1: + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IDE1_RESET_N, !value); + default: + return -ENODEV; + } +} + +static long ohare_sleep_state(struct device_node *node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + + if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0) + return -EPERM; + if (value == 1) { + MACIO_BIC(OHARE_FCR, OH_IOBUS_ENABLE); + } else if (value == 0) { + MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); + } + + return 0; +} + +static long heathrow_modem_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + u8 gpio; + unsigned long flags; + + macio = macio_find(node, macio_unknown); + if (!macio) + return -ENODEV; + gpio = MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1; + if (!value) { + LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio); + UNLOCK(flags); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + mdelay(250); + } + if (pmac_mb.model_id != PMAC_TYPE_YOSEMITE && + pmac_mb.model_id != PMAC_TYPE_YIKES) { + LOCK(flags); + if (value) + MACIO_BIC(HEATHROW_FCR, HRW_SCC_TRANS_EN_N); + else + MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + mdelay(250); + } + if (value) { + LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); + } + return 0; +} + +static long heathrow_floppy_enable(struct device_node *node, long param, + long value) +{ + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, + HRW_SWIM_ENABLE|HRW_BAY_FLOPPY_ENABLE, + value); +} + +static long heathrow_mesh_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, macio_unknown); + if (!macio) + return -ENODEV; + LOCK(flags); + /* Set clear mesh cell enable */ + if (value) + MACIO_BIS(HEATHROW_FCR, HRW_MESH_ENABLE); + else + MACIO_BIC(HEATHROW_FCR, HRW_MESH_ENABLE); + (void)MACIO_IN32(HEATHROW_FCR); + udelay(10); + /* Set/Clear termination power */ + if (value) + MACIO_BIC(HEATHROW_MBCR, 0x04000000); + else + MACIO_BIS(HEATHROW_MBCR, 0x04000000); + (void)MACIO_IN32(HEATHROW_MBCR); + udelay(10); + UNLOCK(flags); + + return 0; +} + +static long heathrow_ide_enable(struct device_node *node, long param, + long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_IDE0_ENABLE, value); + case 1: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_BAY_IDE_ENABLE, value); + default: + return -ENODEV; + } +} + +static long heathrow_ide_reset(struct device_node *node, long param, + long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_IDE0_RESET_N, !value); + case 1: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_IDE1_RESET_N, !value); + default: + return -ENODEV; + } +} + +static long heathrow_bmac_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (value) { + LOCK(flags); + MACIO_BIS(HEATHROW_FCR, HRW_BMAC_IO_ENABLE); + MACIO_BIS(HEATHROW_FCR, HRW_BMAC_RESET); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + mdelay(10); + LOCK(flags); + MACIO_BIC(HEATHROW_FCR, HRW_BMAC_RESET); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + mdelay(10); + } else { + LOCK(flags); + MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE); + UNLOCK(flags); + } + return 0; +} + +static long heathrow_sound_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long flags; + + /* B&W G3 and Yikes don't support that properly (the + * sound appear to never come back after beeing shut down). + */ + if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE || + pmac_mb.model_id == PMAC_TYPE_YIKES) + return 0; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (value) { + LOCK(flags); + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + } else { + LOCK(flags); + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + UNLOCK(flags); + } + return 0; +} + +static u32 save_fcr[6]; +static u32 save_mbcr; +static u32 save_gpio_levels[2]; +static u8 save_gpio_extint[KEYLARGO_GPIO_EXTINT_CNT]; +static u8 save_gpio_normal[KEYLARGO_GPIO_CNT]; +static u32 save_unin_clock_ctl; +static struct dbdma_regs save_dbdma[13]; +static struct dbdma_regs save_alt_dbdma[13]; + +static void dbdma_save(struct macio_chip *macio, struct dbdma_regs *save) +{ + int i; + + /* Save state & config of DBDMA channels */ + for (i = 0; i < 13; i++) { + volatile struct dbdma_regs __iomem * chan = (void __iomem *) + (macio->base + ((0x8000+i*0x100)>>2)); + save[i].cmdptr_hi = in_le32(&chan->cmdptr_hi); + save[i].cmdptr = in_le32(&chan->cmdptr); + save[i].intr_sel = in_le32(&chan->intr_sel); + save[i].br_sel = in_le32(&chan->br_sel); + save[i].wait_sel = in_le32(&chan->wait_sel); + } +} + +static void dbdma_restore(struct macio_chip *macio, struct dbdma_regs *save) +{ + int i; + + /* Save state & config of DBDMA channels */ + for (i = 0; i < 13; i++) { + volatile struct dbdma_regs __iomem * chan = (void __iomem *) + (macio->base + ((0x8000+i*0x100)>>2)); + out_le32(&chan->control, (ACTIVE|DEAD|WAKE|FLUSH|PAUSE|RUN)<<16); + while (in_le32(&chan->status) & ACTIVE) + mb(); + out_le32(&chan->cmdptr_hi, save[i].cmdptr_hi); + out_le32(&chan->cmdptr, save[i].cmdptr); + out_le32(&chan->intr_sel, save[i].intr_sel); + out_le32(&chan->br_sel, save[i].br_sel); + out_le32(&chan->wait_sel, save[i].wait_sel); + } +} + +static void heathrow_sleep(struct macio_chip *macio, int secondary) +{ + if (secondary) { + dbdma_save(macio, save_alt_dbdma); + save_fcr[2] = MACIO_IN32(0x38); + save_fcr[3] = MACIO_IN32(0x3c); + } else { + dbdma_save(macio, save_dbdma); + save_fcr[0] = MACIO_IN32(0x38); + save_fcr[1] = MACIO_IN32(0x3c); + save_mbcr = MACIO_IN32(0x34); + /* Make sure sound is shut down */ + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + /* This seems to be necessary as well or the fan + * keeps coming up and battery drains fast */ + MACIO_BIC(HEATHROW_FCR, HRW_IOBUS_ENABLE); + MACIO_BIC(HEATHROW_FCR, HRW_IDE0_RESET_N); + /* Make sure eth is down even if module or sleep + * won't work properly */ + MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE | HRW_BMAC_RESET); + } + /* Make sure modem is shut down */ + MACIO_OUT8(HRW_GPIO_MODEM_RESET, + MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1); + MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N); + MACIO_BIC(HEATHROW_FCR, OH_SCCA_IO|OH_SCCB_IO|HRW_SCC_ENABLE); + + /* Let things settle */ + (void)MACIO_IN32(HEATHROW_FCR); +} + +static void heathrow_wakeup(struct macio_chip *macio, int secondary) +{ + if (secondary) { + MACIO_OUT32(0x38, save_fcr[2]); + (void)MACIO_IN32(0x38); + mdelay(1); + MACIO_OUT32(0x3c, save_fcr[3]); + (void)MACIO_IN32(0x38); + mdelay(10); + dbdma_restore(macio, save_alt_dbdma); + } else { + MACIO_OUT32(0x38, save_fcr[0] | HRW_IOBUS_ENABLE); + (void)MACIO_IN32(0x38); + mdelay(1); + MACIO_OUT32(0x3c, save_fcr[1]); + (void)MACIO_IN32(0x38); + mdelay(1); + MACIO_OUT32(0x34, save_mbcr); + (void)MACIO_IN32(0x38); + mdelay(10); + dbdma_restore(macio, save_dbdma); + } +} + +static long heathrow_sleep_state(struct device_node *node, long param, + long value) +{ + if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0) + return -EPERM; + if (value == 1) { + if (macio_chips[1].type == macio_gatwick) + heathrow_sleep(&macio_chips[0], 1); + heathrow_sleep(&macio_chips[0], 0); + } else if (value == 0) { + heathrow_wakeup(&macio_chips[0], 0); + if (macio_chips[1].type == macio_gatwick) + heathrow_wakeup(&macio_chips[0], 1); + } + return 0; +} + +static long core99_scc_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + unsigned long flags; + unsigned long chan_mask; + u32 fcr; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (!strcmp(node->name, "ch-a")) + chan_mask = MACIO_FLAG_SCCA_ON; + else if (!strcmp(node->name, "ch-b")) + chan_mask = MACIO_FLAG_SCCB_ON; + else + return -ENODEV; + + if (value) { + int need_reset_scc = 0; + int need_reset_irda = 0; + + LOCK(flags); + fcr = MACIO_IN32(KEYLARGO_FCR0); + /* Check if scc cell need enabling */ + if (!(fcr & KL0_SCC_CELL_ENABLE)) { + fcr |= KL0_SCC_CELL_ENABLE; + need_reset_scc = 1; + } + if (chan_mask & MACIO_FLAG_SCCA_ON) { + fcr |= KL0_SCCA_ENABLE; + /* Don't enable line drivers for I2S modem */ + if ((param & 0xfff) == PMAC_SCC_I2S1) + fcr &= ~KL0_SCC_A_INTF_ENABLE; + else + fcr |= KL0_SCC_A_INTF_ENABLE; + } + if (chan_mask & MACIO_FLAG_SCCB_ON) { + fcr |= KL0_SCCB_ENABLE; + /* Perform irda specific inits */ + if ((param & 0xfff) == PMAC_SCC_IRDA) { + fcr &= ~KL0_SCC_B_INTF_ENABLE; + fcr |= KL0_IRDA_ENABLE; + fcr |= KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE; + fcr |= KL0_IRDA_SOURCE1_SEL; + fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0); + fcr &= ~(KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND); + need_reset_irda = 1; + } else + fcr |= KL0_SCC_B_INTF_ENABLE; + } + MACIO_OUT32(KEYLARGO_FCR0, fcr); + macio->flags |= chan_mask; + if (need_reset_scc) { + MACIO_BIS(KEYLARGO_FCR0, KL0_SCC_RESET); + (void)MACIO_IN32(KEYLARGO_FCR0); + UNLOCK(flags); + mdelay(15); + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR0, KL0_SCC_RESET); + } + if (need_reset_irda) { + MACIO_BIS(KEYLARGO_FCR0, KL0_IRDA_RESET); + (void)MACIO_IN32(KEYLARGO_FCR0); + UNLOCK(flags); + mdelay(15); + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR0, KL0_IRDA_RESET); + } + UNLOCK(flags); + if (param & PMAC_SCC_FLAG_XMON) + macio->flags |= MACIO_FLAG_SCC_LOCKED; + } else { + if (macio->flags & MACIO_FLAG_SCC_LOCKED) + return -EPERM; + LOCK(flags); + fcr = MACIO_IN32(KEYLARGO_FCR0); + if (chan_mask & MACIO_FLAG_SCCA_ON) + fcr &= ~KL0_SCCA_ENABLE; + if (chan_mask & MACIO_FLAG_SCCB_ON) { + fcr &= ~KL0_SCCB_ENABLE; + /* Perform irda specific clears */ + if ((param & 0xfff) == PMAC_SCC_IRDA) { + fcr &= ~KL0_IRDA_ENABLE; + fcr &= ~(KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE); + fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0); + fcr &= ~(KL0_IRDA_SOURCE1_SEL|KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND); + } + } + MACIO_OUT32(KEYLARGO_FCR0, fcr); + if ((fcr & (KL0_SCCA_ENABLE | KL0_SCCB_ENABLE)) == 0) { + fcr &= ~KL0_SCC_CELL_ENABLE; + MACIO_OUT32(KEYLARGO_FCR0, fcr); + } + macio->flags &= ~(chan_mask); + UNLOCK(flags); + mdelay(10); + } + return 0; +} + +static long +core99_modem_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + u8 gpio; + unsigned long flags; + + /* Hack for internal USB modem */ + if (node == NULL) { + if (macio_chips[0].type != macio_keylargo) + return -ENODEV; + node = macio_chips[0].of_node; + } + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + gpio = MACIO_IN8(KL_GPIO_MODEM_RESET); + gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE; + gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA; + + if (!value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + UNLOCK(flags); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + mdelay(250); + } + LOCK(flags); + if (value) { + MACIO_BIC(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR2); + mdelay(250); + } else { + MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + UNLOCK(flags); + } + if (value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); + } + return 0; +} + +static long +pangea_modem_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + u8 gpio; + unsigned long flags; + + /* Hack for internal USB modem */ + if (node == NULL) { + if (macio_chips[0].type != macio_pangea && + macio_chips[0].type != macio_intrepid) + return -ENODEV; + node = macio_chips[0].of_node; + } + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + gpio = MACIO_IN8(KL_GPIO_MODEM_RESET); + gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE; + gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA; + + if (!value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + UNLOCK(flags); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + mdelay(250); + } + LOCK(flags); + if (value) { + MACIO_OUT8(KL_GPIO_MODEM_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR2); + mdelay(250); + } else { + MACIO_OUT8(KL_GPIO_MODEM_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA); + UNLOCK(flags); + } + if (value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); + } + return 0; +} + +static long +core99_ata100_enable(struct device_node *node, long value) +{ + unsigned long flags; + struct pci_dev *pdev = NULL; + u8 pbus, pid; + + if (uninorth_rev < 0x24) + return -ENODEV; + + LOCK(flags); + if (value) + UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100); + else + UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100); + (void)UN_IN(UNI_N_CLOCK_CNTL); + UNLOCK(flags); + udelay(20); + + if (value) { + if (pci_device_from_OF_node(node, &pbus, &pid) == 0) + pdev = pci_find_slot(pbus, pid); + if (pdev == NULL) + return 0; + pci_enable_device(pdev); + pci_set_master(pdev); + } + return 0; +} + +static long +core99_ide_enable(struct device_node *node, long param, long value) +{ + /* Bus ID 0 to 2 are KeyLargo based IDE, busID 3 is U2 + * based ata-100 + */ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE0_ENABLE, value); + case 1: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE1_ENABLE, value); + case 2: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_UIDE_ENABLE, value); + case 3: + return core99_ata100_enable(node, value); + default: + return -ENODEV; + } +} + +static long +core99_ide_reset(struct device_node *node, long param, long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE0_RESET_N, !value); + case 1: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE1_RESET_N, !value); + case 2: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_UIDE_RESET_N, !value); + default: + return -ENODEV; + } +} + +static long +core99_gmac_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + + LOCK(flags); + if (value) + UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC); + else + UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC); + (void)UN_IN(UNI_N_CLOCK_CNTL); + UNLOCK(flags); + udelay(20); + + return 0; +} + +static long +core99_gmac_phy_reset(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct macio_chip *macio; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + LOCK(flags); + MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(KL_GPIO_ETH_PHY_RESET); + UNLOCK(flags); + mdelay(10); + LOCK(flags); + MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, /*KEYLARGO_GPIO_OUTPUT_ENABLE | */ + KEYLARGO_GPIO_OUTOUT_DATA); + UNLOCK(flags); + mdelay(10); + + return 0; +} + +static long +core99_sound_chip_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + + /* Do a better probe code, screamer G4 desktops & + * iMacs can do that too, add a recalibrate in + * the driver as well + */ + if (pmac_mb.model_id == PMAC_TYPE_PISMO || + pmac_mb.model_id == PMAC_TYPE_TITANIUM) { + LOCK(flags); + if (value) + MACIO_OUT8(KL_GPIO_SOUND_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE | + KEYLARGO_GPIO_OUTOUT_DATA); + else + MACIO_OUT8(KL_GPIO_SOUND_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(KL_GPIO_SOUND_POWER); + UNLOCK(flags); + } + return 0; +} + +static long +core99_airport_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + unsigned long flags; + int state; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + + /* Hint: we allow passing of macio itself for the sake of the + * sleep code + */ + if (node != macio->of_node && + (!node->parent || node->parent != macio->of_node)) + return -ENODEV; + state = (macio->flags & MACIO_FLAG_AIRPORT_ON) != 0; + if (value == state) + return 0; + if (value) { + /* This code is a reproduction of OF enable-cardslot + * and init-wireless methods, slightly hacked until + * I got it working. + */ + LOCK(flags); + MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 5); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xf); + UNLOCK(flags); + mdelay(10); + LOCK(flags); + MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 4); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xf); + UNLOCK(flags); + + mdelay(10); + + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16); + (void)MACIO_IN32(KEYLARGO_FCR2); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xb, 0); + (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xb); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xa, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xa); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xd, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xd); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_0+0xd, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xd); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_0+0xe, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xe); + UNLOCK(flags); + udelay(10); + MACIO_OUT32(0x1c000, 0); + mdelay(1); + MACIO_OUT8(0x1a3e0, 0x41); + (void)MACIO_IN8(0x1a3e0); + udelay(10); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR2, KL2_CARDSEL_16); + (void)MACIO_IN32(KEYLARGO_FCR2); + UNLOCK(flags); + mdelay(100); + + macio->flags |= MACIO_FLAG_AIRPORT_ON; + } else { + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16); + (void)MACIO_IN32(KEYLARGO_FCR2); + MACIO_OUT8(KL_GPIO_AIRPORT_0, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_1, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_2, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_3, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_4, 0); + (void)MACIO_IN8(KL_GPIO_AIRPORT_4); + UNLOCK(flags); + + macio->flags &= ~MACIO_FLAG_AIRPORT_ON; + } + return 0; +} + +#ifdef CONFIG_SMP +static long +core99_reset_cpu(struct device_node *node, long param, long value) +{ + unsigned int reset_io = 0; + unsigned long flags; + struct macio_chip *macio; + struct device_node *np; + const int dflt_reset_lines[] = { KL_GPIO_RESET_CPU0, + KL_GPIO_RESET_CPU1, + KL_GPIO_RESET_CPU2, + KL_GPIO_RESET_CPU3 }; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo) + return -ENODEV; + + np = find_path_device("/cpus"); + if (np == NULL) + return -ENODEV; + for (np = np->child; np != NULL; np = np->sibling) { + u32 *num = (u32 *)get_property(np, "reg", NULL); + u32 *rst = (u32 *)get_property(np, "soft-reset", NULL); + if (num == NULL || rst == NULL) + continue; + if (param == *num) { + reset_io = *rst; + break; + } + } + if (np == NULL || reset_io == 0) + reset_io = dflt_reset_lines[param]; + + LOCK(flags); + MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(reset_io); + udelay(1); + MACIO_OUT8(reset_io, 0); + (void)MACIO_IN8(reset_io); + UNLOCK(flags); + + return 0; +} +#endif /* CONFIG_SMP */ + +static long +core99_usb_enable(struct device_node *node, long param, long value) +{ + struct macio_chip *macio; + unsigned long flags; + char *prop; + int number; + u32 reg; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + prop = (char *)get_property(node, "AAPL,clock-id", NULL); + if (!prop) + return -ENODEV; + if (strncmp(prop, "usb0u048", 8) == 0) + number = 0; + else if (strncmp(prop, "usb1u148", 8) == 0) + number = 2; + else if (strncmp(prop, "usb2u248", 8) == 0) + number = 4; + else + return -ENODEV; + + /* Sorry for the brute-force locking, but this is only used during + * sleep and the timing seem to be critical + */ + LOCK(flags); + if (value) { + /* Turn ON */ + if (number == 0) { + MACIO_BIC(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR0); + UNLOCK(flags); + mdelay(1); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE); + } else if (number == 2) { + MACIO_BIC(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1)); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR0); + mdelay(1); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE); + } else if (number == 4) { + MACIO_BIC(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1)); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR1); + mdelay(1); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR1, KL1_USB2_CELL_ENABLE); + } + if (number < 4) { + reg = MACIO_IN32(KEYLARGO_FCR4); + reg &= ~(KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) | + KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number)); + reg &= ~(KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) | + KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1)); + MACIO_OUT32(KEYLARGO_FCR4, reg); + (void)MACIO_IN32(KEYLARGO_FCR4); + udelay(10); + } else { + reg = MACIO_IN32(KEYLARGO_FCR3); + reg &= ~(KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) | + KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0)); + reg &= ~(KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) | + KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1)); + MACIO_OUT32(KEYLARGO_FCR3, reg); + (void)MACIO_IN32(KEYLARGO_FCR3); + udelay(10); + } + if (macio->type == macio_intrepid) { + /* wait for clock stopped bits to clear */ + u32 test0 = 0, test1 = 0; + u32 status0, status1; + int timeout = 1000; + + UNLOCK(flags); + switch (number) { + case 0: + test0 = UNI_N_CLOCK_STOPPED_USB0; + test1 = UNI_N_CLOCK_STOPPED_USB0PCI; + break; + case 2: + test0 = UNI_N_CLOCK_STOPPED_USB1; + test1 = UNI_N_CLOCK_STOPPED_USB1PCI; + break; + case 4: + test0 = UNI_N_CLOCK_STOPPED_USB2; + test1 = UNI_N_CLOCK_STOPPED_USB2PCI; + break; + } + do { + if (--timeout <= 0) { + printk(KERN_ERR "core99_usb_enable: " + "Timeout waiting for clocks\n"); + break; + } + mdelay(1); + status0 = UN_IN(UNI_N_CLOCK_STOP_STATUS0); + status1 = UN_IN(UNI_N_CLOCK_STOP_STATUS1); + } while ((status0 & test0) | (status1 & test1)); + LOCK(flags); + } + } else { + /* Turn OFF */ + if (number < 4) { + reg = MACIO_IN32(KEYLARGO_FCR4); + reg |= KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) | + KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number); + reg |= KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) | + KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1); + MACIO_OUT32(KEYLARGO_FCR4, reg); + (void)MACIO_IN32(KEYLARGO_FCR4); + udelay(1); + } else { + reg = MACIO_IN32(KEYLARGO_FCR3); + reg |= KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) | + KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0); + reg |= KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) | + KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1); + MACIO_OUT32(KEYLARGO_FCR3, reg); + (void)MACIO_IN32(KEYLARGO_FCR3); + udelay(1); + } + if (number == 0) { + if (macio->type != macio_intrepid) + MACIO_BIC(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE); + (void)MACIO_IN32(KEYLARGO_FCR0); + udelay(1); + MACIO_BIS(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR0); + } else if (number == 2) { + if (macio->type != macio_intrepid) + MACIO_BIC(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE); + (void)MACIO_IN32(KEYLARGO_FCR0); + udelay(1); + MACIO_BIS(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR0); + } else if (number == 4) { + udelay(1); + MACIO_BIS(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR1); + } + udelay(1); + } + UNLOCK(flags); + + return 0; +} + +static long +core99_firewire_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct macio_chip *macio; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED)) + return -ENODEV; + + LOCK(flags); + if (value) { + UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW); + (void)UN_IN(UNI_N_CLOCK_CNTL); + } else { + UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW); + (void)UN_IN(UNI_N_CLOCK_CNTL); + } + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long +core99_firewire_cable_power(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct macio_chip *macio; + + /* Trick: we allow NULL node */ + if ((pmac_mb.board_flags & PMAC_MB_HAS_FW_POWER) == 0) + return -ENODEV; + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED)) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 0); + MACIO_IN8(KL_GPIO_FW_CABLE_POWER); + udelay(10); + } else { + MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 4); + MACIO_IN8(KL_GPIO_FW_CABLE_POWER); udelay(10); + } + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long +intrepid_aack_delay_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + + if (uninorth_rev < 0xd2) + return -ENODEV; + + LOCK(flags); + if (param) + UN_BIS(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE); + else + UN_BIC(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE); + UNLOCK(flags); + + return 0; +} + + +#endif /* CONFIG_POWER4 */ + +static long +core99_read_gpio(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + + return MACIO_IN8(param); +} + + +static long +core99_write_gpio(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + + MACIO_OUT8(param, (u8)(value & 0xff)); + return 0; +} + +#ifdef CONFIG_POWER4 +static long g5_gmac_enable(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + unsigned long flags; + + if (node == NULL) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + mb(); + k2_skiplist[0] = NULL; + } else { + k2_skiplist[0] = node; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long g5_fw_enable(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + unsigned long flags; + + if (node == NULL) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + mb(); + k2_skiplist[1] = NULL; + } else { + k2_skiplist[1] = node; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long g5_mpic_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + + if (node->parent == NULL || strcmp(node->parent->name, "u3")) + return 0; + + LOCK(flags); + UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE); + UNLOCK(flags); + + return 0; +} + +static long g5_eth_phy_reset(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + struct device_node *phy; + int need_reset; + + /* + * We must not reset the combo PHYs, only the BCM5221 found in + * the iMac G5. + */ + phy = of_get_next_child(node, NULL); + if (!phy) + return -ENODEV; + need_reset = device_is_compatible(phy, "B5221"); + of_node_put(phy); + if (!need_reset) + return 0; + + /* PHY reset is GPIO 29, not in device-tree unfortunately */ + MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, + KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA); + /* Thankfully, this is now always called at a time when we can + * schedule by sungem. + */ + msleep(10); + MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, 0); + + return 0; +} + +static long g5_i2s_enable(struct device_node *node, long param, long value) +{ + /* Very crude implementation for now */ + struct macio_chip *macio = &macio_chips[0]; + unsigned long flags; + + if (value == 0) + return 0; /* don't disable yet */ + + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR3, KL3_CLK45_ENABLE | KL3_CLK49_ENABLE | + KL3_I2S0_CLK18_ENABLE); + udelay(10); + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_I2S0_CELL_ENABLE | + K2_FCR1_I2S0_CLK_ENABLE_BIT | K2_FCR1_I2S0_ENABLE); + udelay(10); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_I2S0_RESET); + UNLOCK(flags); + udelay(10); + + return 0; +} + + +#ifdef CONFIG_SMP +static long g5_reset_cpu(struct device_node *node, long param, long value) +{ + unsigned int reset_io = 0; + unsigned long flags; + struct macio_chip *macio; + struct device_node *np; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo2) + return -ENODEV; + + np = find_path_device("/cpus"); + if (np == NULL) + return -ENODEV; + for (np = np->child; np != NULL; np = np->sibling) { + u32 *num = (u32 *)get_property(np, "reg", NULL); + u32 *rst = (u32 *)get_property(np, "soft-reset", NULL); + if (num == NULL || rst == NULL) + continue; + if (param == *num) { + reset_io = *rst; + break; + } + } + if (np == NULL || reset_io == 0) + return -ENODEV; + + LOCK(flags); + MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(reset_io); + udelay(1); + MACIO_OUT8(reset_io, 0); + (void)MACIO_IN8(reset_io); + UNLOCK(flags); + + return 0; +} +#endif /* CONFIG_SMP */ + +/* + * This can be called from pmac_smp so isn't static + * + * This takes the second CPU off the bus on dual CPU machines + * running UP + */ +void g5_phy_disable_cpu1(void) +{ + UN_OUT(U3_API_PHY_CONFIG_1, 0); +} +#endif /* CONFIG_POWER4 */ + +#ifndef CONFIG_POWER4 + +static void +keylargo_shutdown(struct macio_chip *macio, int sleep_mode) +{ + u32 temp; + + if (sleep_mode) { + mdelay(1); + MACIO_BIS(KEYLARGO_FCR0, KL0_USB_REF_SUSPEND); + (void)MACIO_IN32(KEYLARGO_FCR0); + mdelay(1); + } + + MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE | + KL0_SCC_CELL_ENABLE | + KL0_IRDA_ENABLE | KL0_IRDA_CLK32_ENABLE | + KL0_IRDA_CLK19_ENABLE); + + MACIO_BIC(KEYLARGO_MBCR, KL_MBCR_MB0_DEV_MASK); + MACIO_BIS(KEYLARGO_MBCR, KL_MBCR_MB0_IDE_ENABLE); + + MACIO_BIC(KEYLARGO_FCR1, + KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT | + KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE | + KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT | + KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE | + KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE | + KL1_EIDE0_ENABLE | KL1_EIDE0_RESET_N | + KL1_EIDE1_ENABLE | KL1_EIDE1_RESET_N | + KL1_UIDE_ENABLE); + + MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + MACIO_BIC(KEYLARGO_FCR2, KL2_IOBUS_ENABLE); + + temp = MACIO_IN32(KEYLARGO_FCR3); + if (macio->rev >= 2) { + temp |= KL3_SHUTDOWN_PLL2X; + if (sleep_mode) + temp |= KL3_SHUTDOWN_PLL_TOTAL; + } + + temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 | + KL3_SHUTDOWN_PLLKW35; + if (sleep_mode) + temp |= KL3_SHUTDOWN_PLLKW12; + temp &= ~(KL3_CLK66_ENABLE | KL3_CLK49_ENABLE | KL3_CLK45_ENABLE + | KL3_CLK31_ENABLE | KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE); + if (sleep_mode) + temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE); + MACIO_OUT32(KEYLARGO_FCR3, temp); + + /* Flush posted writes & wait a bit */ + (void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1); +} + +static void +pangea_shutdown(struct macio_chip *macio, int sleep_mode) +{ + u32 temp; + + MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE | + KL0_SCC_CELL_ENABLE | + KL0_USB0_CELL_ENABLE | KL0_USB1_CELL_ENABLE); + + MACIO_BIC(KEYLARGO_FCR1, + KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT | + KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE | + KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT | + KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE | + KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE | + KL1_UIDE_ENABLE); + if (pmac_mb.board_flags & PMAC_MB_MOBILE) + MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N); + + MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + + temp = MACIO_IN32(KEYLARGO_FCR3); + temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 | + KL3_SHUTDOWN_PLLKW35; + temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | KL3_CLK31_ENABLE + | KL3_I2S0_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE); + if (sleep_mode) + temp &= ~(KL3_VIA_CLK16_ENABLE | KL3_TIMER_CLK18_ENABLE); + MACIO_OUT32(KEYLARGO_FCR3, temp); + + /* Flush posted writes & wait a bit */ + (void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1); +} + +static void +intrepid_shutdown(struct macio_chip *macio, int sleep_mode) +{ + u32 temp; + + MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE | + KL0_SCC_CELL_ENABLE); + + MACIO_BIC(KEYLARGO_FCR1, + /*KL1_USB2_CELL_ENABLE |*/ + KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT | + KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE | + KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE); + if (pmac_mb.board_flags & PMAC_MB_MOBILE) + MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N); + + temp = MACIO_IN32(KEYLARGO_FCR3); + temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | + KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE); + if (sleep_mode) + temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_IT_VIA_CLK32_ENABLE); + MACIO_OUT32(KEYLARGO_FCR3, temp); + + /* Flush posted writes & wait a bit */ + (void)MACIO_IN32(KEYLARGO_FCR0); + mdelay(10); +} + + +void pmac_tweak_clock_spreading(int enable) +{ + struct macio_chip *macio = &macio_chips[0]; + + /* Hack for doing clock spreading on some machines PowerBooks and + * iBooks. This implements the "platform-do-clockspreading" OF + * property as decoded manually on various models. For safety, we also + * check the product ID in the device-tree in cases we'll whack the i2c + * chip to make reasonably sure we won't set wrong values in there + * + * Of course, ultimately, we have to implement a real parser for + * the platform-do-* stuff... + */ + + if (macio->type == macio_intrepid) { + if (enable) + UN_OUT(UNI_N_CLOCK_SPREADING, 2); + else + UN_OUT(UNI_N_CLOCK_SPREADING, 0); + mdelay(40); + } + + while (machine_is_compatible("PowerBook5,2") || + machine_is_compatible("PowerBook5,3") || + machine_is_compatible("PowerBook6,2") || + machine_is_compatible("PowerBook6,3")) { + struct device_node *ui2c = of_find_node_by_type(NULL, "i2c"); + struct device_node *dt = of_find_node_by_name(NULL, "device-tree"); + u8 buffer[9]; + u32 *productID; + int i, rc, changed = 0; + + if (dt == NULL) + break; + productID = (u32 *)get_property(dt, "pid#", NULL); + if (productID == NULL) + break; + while(ui2c) { + struct device_node *p = of_get_parent(ui2c); + if (p && !strcmp(p->name, "uni-n")) + break; + ui2c = of_find_node_by_type(ui2c, "i2c"); + } + if (ui2c == NULL) + break; + DBG("Trying to bump clock speed for PID: %08x...\n", *productID); + rc = pmac_low_i2c_open(ui2c, 1); + if (rc != 0) + break; + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); + rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); + DBG("read result: %d,", rc); + if (rc != 0) { + pmac_low_i2c_close(ui2c); + break; + } + for (i=0; i<9; i++) + DBG(" %02x", buffer[i]); + DBG("\n"); + + switch(*productID) { + case 0x1182: /* AlBook 12" rev 2 */ + case 0x1183: /* iBook G4 12" */ + buffer[0] = (buffer[0] & 0x8f) | 0x70; + buffer[2] = (buffer[2] & 0x7f) | 0x00; + buffer[5] = (buffer[5] & 0x80) | 0x31; + buffer[6] = (buffer[6] & 0x40) | 0xb0; + buffer[7] = (buffer[7] & 0x00) | (enable ? 0xc0 : 0xba); + buffer[8] = (buffer[8] & 0x00) | 0x30; + changed = 1; + break; + case 0x3142: /* AlBook 15" (ATI M10) */ + case 0x3143: /* AlBook 17" (ATI M10) */ + buffer[0] = (buffer[0] & 0xaf) | 0x50; + buffer[2] = (buffer[2] & 0x7f) | 0x00; + buffer[5] = (buffer[5] & 0x80) | 0x31; + buffer[6] = (buffer[6] & 0x40) | 0xb0; + buffer[7] = (buffer[7] & 0x00) | (enable ? 0xd0 : 0xc0); + buffer[8] = (buffer[8] & 0x00) | 0x30; + changed = 1; + break; + default: + DBG("i2c-hwclock: Machine model not handled\n"); + break; + } + if (!changed) { + pmac_low_i2c_close(ui2c); + break; + } + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_stdsub); + rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_write, 0x80, buffer, 9); + DBG("write result: %d,", rc); + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); + rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); + DBG("read result: %d,", rc); + if (rc != 0) { + pmac_low_i2c_close(ui2c); + break; + } + for (i=0; i<9; i++) + DBG(" %02x", buffer[i]); + pmac_low_i2c_close(ui2c); + break; + } +} + + +static int +core99_sleep(void) +{ + struct macio_chip *macio; + int i; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + /* We power off the wireless slot in case it was not done + * by the driver. We don't power it on automatically however + */ + if (macio->flags & MACIO_FLAG_AIRPORT_ON) + core99_airport_enable(macio->of_node, 0, 0); + + /* We power off the FW cable. Should be done by the driver... */ + if (macio->flags & MACIO_FLAG_FW_SUPPORTED) { + core99_firewire_enable(NULL, 0, 0); + core99_firewire_cable_power(NULL, 0, 0); + } + + /* We make sure int. modem is off (in case driver lost it) */ + if (macio->type == macio_keylargo) + core99_modem_enable(macio->of_node, 0, 0); + else + pangea_modem_enable(macio->of_node, 0, 0); + + /* We make sure the sound is off as well */ + core99_sound_chip_enable(macio->of_node, 0, 0); + + /* + * Save various bits of KeyLargo + */ + + /* Save the state of the various GPIOs */ + save_gpio_levels[0] = MACIO_IN32(KEYLARGO_GPIO_LEVELS0); + save_gpio_levels[1] = MACIO_IN32(KEYLARGO_GPIO_LEVELS1); + for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++) + save_gpio_extint[i] = MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+i); + for (i=0; i<KEYLARGO_GPIO_CNT; i++) + save_gpio_normal[i] = MACIO_IN8(KEYLARGO_GPIO_0+i); + + /* Save the FCRs */ + if (macio->type == macio_keylargo) + save_mbcr = MACIO_IN32(KEYLARGO_MBCR); + save_fcr[0] = MACIO_IN32(KEYLARGO_FCR0); + save_fcr[1] = MACIO_IN32(KEYLARGO_FCR1); + save_fcr[2] = MACIO_IN32(KEYLARGO_FCR2); + save_fcr[3] = MACIO_IN32(KEYLARGO_FCR3); + save_fcr[4] = MACIO_IN32(KEYLARGO_FCR4); + if (macio->type == macio_pangea || macio->type == macio_intrepid) + save_fcr[5] = MACIO_IN32(KEYLARGO_FCR5); + + /* Save state & config of DBDMA channels */ + dbdma_save(macio, save_dbdma); + + /* + * Turn off as much as we can + */ + if (macio->type == macio_pangea) + pangea_shutdown(macio, 1); + else if (macio->type == macio_intrepid) + intrepid_shutdown(macio, 1); + else if (macio->type == macio_keylargo) + keylargo_shutdown(macio, 1); + + /* + * Put the host bridge to sleep + */ + + save_unin_clock_ctl = UN_IN(UNI_N_CLOCK_CNTL); + /* Note: do not switch GMAC off, driver does it when necessary, WOL must keep it + * enabled ! + */ + UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl & + ~(/*UNI_N_CLOCK_CNTL_GMAC|*/UNI_N_CLOCK_CNTL_FW/*|UNI_N_CLOCK_CNTL_PCI*/)); + udelay(100); + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING); + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_SLEEP); + mdelay(10); + + /* + * FIXME: A bit of black magic with OpenPIC (don't ask me why) + */ + if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) { + MACIO_BIS(0x506e0, 0x00400000); + MACIO_BIS(0x506e0, 0x80000000); + } + return 0; +} + +static int +core99_wake_up(void) +{ + struct macio_chip *macio; + int i; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + /* + * Wakeup the host bridge + */ + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL); + udelay(10); + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING); + udelay(10); + + /* + * Restore KeyLargo + */ + + if (macio->type == macio_keylargo) { + MACIO_OUT32(KEYLARGO_MBCR, save_mbcr); + (void)MACIO_IN32(KEYLARGO_MBCR); udelay(10); + } + MACIO_OUT32(KEYLARGO_FCR0, save_fcr[0]); + (void)MACIO_IN32(KEYLARGO_FCR0); udelay(10); + MACIO_OUT32(KEYLARGO_FCR1, save_fcr[1]); + (void)MACIO_IN32(KEYLARGO_FCR1); udelay(10); + MACIO_OUT32(KEYLARGO_FCR2, save_fcr[2]); + (void)MACIO_IN32(KEYLARGO_FCR2); udelay(10); + MACIO_OUT32(KEYLARGO_FCR3, save_fcr[3]); + (void)MACIO_IN32(KEYLARGO_FCR3); udelay(10); + MACIO_OUT32(KEYLARGO_FCR4, save_fcr[4]); + (void)MACIO_IN32(KEYLARGO_FCR4); udelay(10); + if (macio->type == macio_pangea || macio->type == macio_intrepid) { + MACIO_OUT32(KEYLARGO_FCR5, save_fcr[5]); + (void)MACIO_IN32(KEYLARGO_FCR5); udelay(10); + } + + dbdma_restore(macio, save_dbdma); + + MACIO_OUT32(KEYLARGO_GPIO_LEVELS0, save_gpio_levels[0]); + MACIO_OUT32(KEYLARGO_GPIO_LEVELS1, save_gpio_levels[1]); + for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++) + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+i, save_gpio_extint[i]); + for (i=0; i<KEYLARGO_GPIO_CNT; i++) + MACIO_OUT8(KEYLARGO_GPIO_0+i, save_gpio_normal[i]); + + /* FIXME more black magic with OpenPIC ... */ + if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) { + MACIO_BIC(0x506e0, 0x00400000); + MACIO_BIC(0x506e0, 0x80000000); + } + + UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl); + udelay(100); + + return 0; +} + +static long +core99_sleep_state(struct device_node *node, long param, long value) +{ + /* Param == 1 means to enter the "fake sleep" mode that is + * used for CPU speed switch + */ + if (param == 1) { + if (value == 1) { + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING); + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_IDLE2); + } else { + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL); + udelay(10); + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING); + udelay(10); + } + return 0; + } + if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0) + return -EPERM; + + if (value == 1) + return core99_sleep(); + else if (value == 0) + return core99_wake_up(); + return 0; +} + +#endif /* CONFIG_POWER4 */ + +static long +generic_dev_can_wake(struct device_node *node, long param, long value) +{ + /* Todo: eventually check we are really dealing with on-board + * video device ... + */ + + if (pmac_mb.board_flags & PMAC_MB_MAY_SLEEP) + pmac_mb.board_flags |= PMAC_MB_CAN_SLEEP; + return 0; +} + +static long generic_get_mb_info(struct device_node *node, long param, long value) +{ + switch(param) { + case PMAC_MB_INFO_MODEL: + return pmac_mb.model_id; + case PMAC_MB_INFO_FLAGS: + return pmac_mb.board_flags; + case PMAC_MB_INFO_NAME: + /* hack hack hack... but should work */ + *((const char **)value) = pmac_mb.model_name; + return 0; + } + return -EINVAL; +} + + +/* + * Table definitions + */ + +/* Used on any machine + */ +static struct feature_table_entry any_features[] = { + { PMAC_FTR_GET_MB_INFO, generic_get_mb_info }, + { PMAC_FTR_DEVICE_CAN_WAKE, generic_dev_can_wake }, + { 0, NULL } +}; + +#ifndef CONFIG_POWER4 + +/* OHare based motherboards. Currently, we only use these on the + * 2400,3400 and 3500 series powerbooks. Some older desktops seem + * to have issues with turning on/off those asic cells + */ +static struct feature_table_entry ohare_features[] = { + { PMAC_FTR_SCC_ENABLE, ohare_htw_scc_enable }, + { PMAC_FTR_SWIM3_ENABLE, ohare_floppy_enable }, + { PMAC_FTR_MESH_ENABLE, ohare_mesh_enable }, + { PMAC_FTR_IDE_ENABLE, ohare_ide_enable}, + { PMAC_FTR_IDE_RESET, ohare_ide_reset}, + { PMAC_FTR_SLEEP_STATE, ohare_sleep_state }, + { 0, NULL } +}; + +/* Heathrow desktop machines (Beige G3). + * Separated as some features couldn't be properly tested + * and the serial port control bits appear to confuse it. + */ +static struct feature_table_entry heathrow_desktop_features[] = { + { PMAC_FTR_SWIM3_ENABLE, heathrow_floppy_enable }, + { PMAC_FTR_MESH_ENABLE, heathrow_mesh_enable }, + { PMAC_FTR_IDE_ENABLE, heathrow_ide_enable }, + { PMAC_FTR_IDE_RESET, heathrow_ide_reset }, + { PMAC_FTR_BMAC_ENABLE, heathrow_bmac_enable }, + { 0, NULL } +}; + +/* Heathrow based laptop, that is the Wallstreet and mainstreet + * powerbooks. + */ +static struct feature_table_entry heathrow_laptop_features[] = { + { PMAC_FTR_SCC_ENABLE, ohare_htw_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, heathrow_modem_enable }, + { PMAC_FTR_SWIM3_ENABLE, heathrow_floppy_enable }, + { PMAC_FTR_MESH_ENABLE, heathrow_mesh_enable }, + { PMAC_FTR_IDE_ENABLE, heathrow_ide_enable }, + { PMAC_FTR_IDE_RESET, heathrow_ide_reset }, + { PMAC_FTR_BMAC_ENABLE, heathrow_bmac_enable }, + { PMAC_FTR_SOUND_CHIP_ENABLE, heathrow_sound_enable }, + { PMAC_FTR_SLEEP_STATE, heathrow_sleep_state }, + { 0, NULL } +}; + +/* Paddington based machines + * The lombard (101) powerbook, first iMac models, B&W G3 and Yikes G4. + */ +static struct feature_table_entry paddington_features[] = { + { PMAC_FTR_SCC_ENABLE, ohare_htw_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, heathrow_modem_enable }, + { PMAC_FTR_SWIM3_ENABLE, heathrow_floppy_enable }, + { PMAC_FTR_MESH_ENABLE, heathrow_mesh_enable }, + { PMAC_FTR_IDE_ENABLE, heathrow_ide_enable }, + { PMAC_FTR_IDE_RESET, heathrow_ide_reset }, + { PMAC_FTR_BMAC_ENABLE, heathrow_bmac_enable }, + { PMAC_FTR_SOUND_CHIP_ENABLE, heathrow_sound_enable }, + { PMAC_FTR_SLEEP_STATE, heathrow_sleep_state }, + { 0, NULL } +}; + +/* Core99 & MacRISC 2 machines (all machines released since the + * iBook (included), that is all AGP machines, except pangea + * chipset. The pangea chipset is the "combo" UniNorth/KeyLargo + * used on iBook2 & iMac "flow power". + */ +static struct feature_table_entry core99_features[] = { + { PMAC_FTR_SCC_ENABLE, core99_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, core99_modem_enable }, + { PMAC_FTR_IDE_ENABLE, core99_ide_enable }, + { PMAC_FTR_IDE_RESET, core99_ide_reset }, + { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable }, + { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset }, + { PMAC_FTR_SOUND_CHIP_ENABLE, core99_sound_chip_enable }, + { PMAC_FTR_AIRPORT_ENABLE, core99_airport_enable }, + { PMAC_FTR_USB_ENABLE, core99_usb_enable }, + { PMAC_FTR_1394_ENABLE, core99_firewire_enable }, + { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power }, + { PMAC_FTR_SLEEP_STATE, core99_sleep_state }, +#ifdef CONFIG_SMP + { PMAC_FTR_RESET_CPU, core99_reset_cpu }, +#endif /* CONFIG_SMP */ + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { 0, NULL } +}; + +/* RackMac + */ +static struct feature_table_entry rackmac_features[] = { + { PMAC_FTR_SCC_ENABLE, core99_scc_enable }, + { PMAC_FTR_IDE_ENABLE, core99_ide_enable }, + { PMAC_FTR_IDE_RESET, core99_ide_reset }, + { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable }, + { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset }, + { PMAC_FTR_USB_ENABLE, core99_usb_enable }, + { PMAC_FTR_1394_ENABLE, core99_firewire_enable }, + { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power }, + { PMAC_FTR_SLEEP_STATE, core99_sleep_state }, +#ifdef CONFIG_SMP + { PMAC_FTR_RESET_CPU, core99_reset_cpu }, +#endif /* CONFIG_SMP */ + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { 0, NULL } +}; + +/* Pangea features + */ +static struct feature_table_entry pangea_features[] = { + { PMAC_FTR_SCC_ENABLE, core99_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, pangea_modem_enable }, + { PMAC_FTR_IDE_ENABLE, core99_ide_enable }, + { PMAC_FTR_IDE_RESET, core99_ide_reset }, + { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable }, + { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset }, + { PMAC_FTR_SOUND_CHIP_ENABLE, core99_sound_chip_enable }, + { PMAC_FTR_AIRPORT_ENABLE, core99_airport_enable }, + { PMAC_FTR_USB_ENABLE, core99_usb_enable }, + { PMAC_FTR_1394_ENABLE, core99_firewire_enable }, + { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power }, + { PMAC_FTR_SLEEP_STATE, core99_sleep_state }, + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { 0, NULL } +}; + +/* Intrepid features + */ +static struct feature_table_entry intrepid_features[] = { + { PMAC_FTR_SCC_ENABLE, core99_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, pangea_modem_enable }, + { PMAC_FTR_IDE_ENABLE, core99_ide_enable }, + { PMAC_FTR_IDE_RESET, core99_ide_reset }, + { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable }, + { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset }, + { PMAC_FTR_SOUND_CHIP_ENABLE, core99_sound_chip_enable }, + { PMAC_FTR_AIRPORT_ENABLE, core99_airport_enable }, + { PMAC_FTR_USB_ENABLE, core99_usb_enable }, + { PMAC_FTR_1394_ENABLE, core99_firewire_enable }, + { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power }, + { PMAC_FTR_SLEEP_STATE, core99_sleep_state }, + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { PMAC_FTR_AACK_DELAY_ENABLE, intrepid_aack_delay_enable }, + { 0, NULL } +}; + +#else /* CONFIG_POWER4 */ + +/* G5 features + */ +static struct feature_table_entry g5_features[] = { + { PMAC_FTR_GMAC_ENABLE, g5_gmac_enable }, + { PMAC_FTR_1394_ENABLE, g5_fw_enable }, + { PMAC_FTR_ENABLE_MPIC, g5_mpic_enable }, + { PMAC_FTR_GMAC_PHY_RESET, g5_eth_phy_reset }, + { PMAC_FTR_SOUND_CHIP_ENABLE, g5_i2s_enable }, +#ifdef CONFIG_SMP + { PMAC_FTR_RESET_CPU, g5_reset_cpu }, +#endif /* CONFIG_SMP */ + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { 0, NULL } +}; + +#endif /* CONFIG_POWER4 */ + +static struct pmac_mb_def pmac_mb_defs[] = { +#ifndef CONFIG_POWER4 + /* + * Desktops + */ + + { "AAPL,8500", "PowerMac 8500/8600", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,9500", "PowerMac 9500/9600", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,7200", "PowerMac 7200", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,7300", "PowerMac 7200/7300", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,7500", "PowerMac 7500", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,ShinerESB", "Apple Network Server", + PMAC_TYPE_ANS, NULL, + 0 + }, + { "AAPL,e407", "Alchemy", + PMAC_TYPE_ALCHEMY, NULL, + 0 + }, + { "AAPL,e411", "Gazelle", + PMAC_TYPE_GAZELLE, NULL, + 0 + }, + { "AAPL,Gossamer", "PowerMac G3 (Gossamer)", + PMAC_TYPE_GOSSAMER, heathrow_desktop_features, + 0 + }, + { "AAPL,PowerMac G3", "PowerMac G3 (Silk)", + PMAC_TYPE_SILK, heathrow_desktop_features, + 0 + }, + { "PowerMac1,1", "Blue&White G3", + PMAC_TYPE_YOSEMITE, paddington_features, + 0 + }, + { "PowerMac1,2", "PowerMac G4 PCI Graphics", + PMAC_TYPE_YIKES, paddington_features, + 0 + }, + { "PowerMac2,1", "iMac FireWire", + PMAC_TYPE_FW_IMAC, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac2,2", "iMac FireWire", + PMAC_TYPE_FW_IMAC, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac3,1", "PowerMac G4 AGP Graphics", + PMAC_TYPE_SAWTOOTH, core99_features, + PMAC_MB_OLD_CORE99 + }, + { "PowerMac3,2", "PowerMac G4 AGP Graphics", + PMAC_TYPE_SAWTOOTH, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac3,3", "PowerMac G4 AGP Graphics", + PMAC_TYPE_SAWTOOTH, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac3,4", "PowerMac G4 Silver", + PMAC_TYPE_QUICKSILVER, core99_features, + PMAC_MB_MAY_SLEEP + }, + { "PowerMac3,5", "PowerMac G4 Silver", + PMAC_TYPE_QUICKSILVER, core99_features, + PMAC_MB_MAY_SLEEP + }, + { "PowerMac3,6", "PowerMac G4 Windtunnel", + PMAC_TYPE_WINDTUNNEL, core99_features, + PMAC_MB_MAY_SLEEP, + }, + { "PowerMac4,1", "iMac \"Flower Power\"", + PMAC_TYPE_PANGEA_IMAC, pangea_features, + PMAC_MB_MAY_SLEEP + }, + { "PowerMac4,2", "Flat panel iMac", + PMAC_TYPE_FLAT_PANEL_IMAC, pangea_features, + PMAC_MB_CAN_SLEEP + }, + { "PowerMac4,4", "eMac", + PMAC_TYPE_EMAC, core99_features, + PMAC_MB_MAY_SLEEP + }, + { "PowerMac5,1", "PowerMac G4 Cube", + PMAC_TYPE_CUBE, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac6,1", "Flat panel iMac", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP, + }, + { "PowerMac6,3", "Flat panel iMac", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP, + }, + { "PowerMac6,4", "eMac", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP, + }, + { "PowerMac10,1", "Mac mini", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER, + }, + { "iMac,1", "iMac (first generation)", + PMAC_TYPE_ORIG_IMAC, paddington_features, + 0 + }, + + /* + * Xserve's + */ + + { "RackMac1,1", "XServe", + PMAC_TYPE_RACKMAC, rackmac_features, + 0, + }, + { "RackMac1,2", "XServe rev. 2", + PMAC_TYPE_RACKMAC, rackmac_features, + 0, + }, + + /* + * Laptops + */ + + { "AAPL,3400/2400", "PowerBook 3400", + PMAC_TYPE_HOOPER, ohare_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE + }, + { "AAPL,3500", "PowerBook 3500", + PMAC_TYPE_KANGA, ohare_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE + }, + { "AAPL,PowerBook1998", "PowerBook Wallstreet", + PMAC_TYPE_WALLSTREET, heathrow_laptop_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE + }, + { "PowerBook1,1", "PowerBook 101 (Lombard)", + PMAC_TYPE_101_PBOOK, paddington_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE + }, + { "PowerBook2,1", "iBook (first generation)", + PMAC_TYPE_ORIG_IBOOK, core99_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE + }, + { "PowerBook2,2", "iBook FireWire", + PMAC_TYPE_FW_IBOOK, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | + PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE + }, + { "PowerBook3,1", "PowerBook Pismo", + PMAC_TYPE_PISMO, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | + PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE + }, + { "PowerBook3,2", "PowerBook Titanium", + PMAC_TYPE_TITANIUM, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook3,3", "PowerBook Titanium II", + PMAC_TYPE_TITANIUM2, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook3,4", "PowerBook Titanium III", + PMAC_TYPE_TITANIUM3, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook3,5", "PowerBook Titanium IV", + PMAC_TYPE_TITANIUM4, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook4,1", "iBook 2", + PMAC_TYPE_IBOOK2, pangea_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook4,2", "iBook 2", + PMAC_TYPE_IBOOK2, pangea_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook4,3", "iBook 2 rev. 2", + PMAC_TYPE_IBOOK2, pangea_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook5,1", "PowerBook G4 17\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,2", "PowerBook G4 15\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,3", "PowerBook G4 17\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,4", "PowerBook G4 15\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,5", "PowerBook G4 17\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,6", "PowerBook G4 15\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,7", "PowerBook G4 17\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,1", "PowerBook G4 12\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,2", "PowerBook G4", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,3", "iBook G4", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,4", "PowerBook G4 12\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,5", "iBook G4", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,8", "PowerBook G4 12\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, +#else /* CONFIG_POWER4 */ + { "PowerMac7,2", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5, g5_features, + 0, + }, +#ifdef CONFIG_PPC64 + { "PowerMac7,3", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5, g5_features, + 0, + }, + { "PowerMac8,1", "iMac G5", + PMAC_TYPE_IMAC_G5, g5_features, + 0, + }, + { "PowerMac9,1", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5_U3L, g5_features, + 0, + }, + { "RackMac3,1", "XServe G5", + PMAC_TYPE_XSERVE_G5, g5_features, + 0, + }, +#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_POWER4 */ +}; + +/* + * The toplevel feature_call callback + */ +long pmac_do_feature_call(unsigned int selector, ...) +{ + struct device_node *node; + long param, value; + int i; + feature_call func = NULL; + va_list args; + + if (pmac_mb.features) + for (i=0; pmac_mb.features[i].function; i++) + if (pmac_mb.features[i].selector == selector) { + func = pmac_mb.features[i].function; + break; + } + if (!func) + for (i=0; any_features[i].function; i++) + if (any_features[i].selector == selector) { + func = any_features[i].function; + break; + } + if (!func) + return -ENODEV; + + va_start(args, selector); + node = (struct device_node*)va_arg(args, void*); + param = va_arg(args, long); + value = va_arg(args, long); + va_end(args); + + return func(node, param, value); +} + +static int __init probe_motherboard(void) +{ + int i; + struct macio_chip *macio = &macio_chips[0]; + const char *model = NULL; + struct device_node *dt; + + /* Lookup known motherboard type in device-tree. First try an + * exact match on the "model" property, then try a "compatible" + * match is none is found. + */ + dt = find_devices("device-tree"); + if (dt != NULL) + model = (const char *) get_property(dt, "model", NULL); + for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) { + if (strcmp(model, pmac_mb_defs[i].model_string) == 0) { + pmac_mb = pmac_mb_defs[i]; + goto found; + } + } + for(i=0; i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) { + if (machine_is_compatible(pmac_mb_defs[i].model_string)) { + pmac_mb = pmac_mb_defs[i]; + goto found; + } + } + + /* Fallback to selection depending on mac-io chip type */ + switch(macio->type) { +#ifndef CONFIG_POWER4 + case macio_grand_central: + pmac_mb.model_id = PMAC_TYPE_PSURGE; + pmac_mb.model_name = "Unknown PowerSurge"; + break; + case macio_ohare: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_OHARE; + pmac_mb.model_name = "Unknown OHare-based"; + break; + case macio_heathrow: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_HEATHROW; + pmac_mb.model_name = "Unknown Heathrow-based"; + pmac_mb.features = heathrow_desktop_features; + break; + case macio_paddington: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PADDINGTON; + pmac_mb.model_name = "Unknown Paddington-based"; + pmac_mb.features = paddington_features; + break; + case macio_keylargo: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_CORE99; + pmac_mb.model_name = "Unknown Keylargo-based"; + pmac_mb.features = core99_features; + break; + case macio_pangea: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PANGEA; + pmac_mb.model_name = "Unknown Pangea-based"; + pmac_mb.features = pangea_features; + break; + case macio_intrepid: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_INTREPID; + pmac_mb.model_name = "Unknown Intrepid-based"; + pmac_mb.features = intrepid_features; + break; +#else /* CONFIG_POWER4 */ + case macio_keylargo2: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2; + pmac_mb.model_name = "Unknown K2-based"; + pmac_mb.features = g5_features; + break; +#endif /* CONFIG_POWER4 */ + default: + return -ENODEV; + } +found: +#ifndef CONFIG_POWER4 + /* Fixup Hooper vs. Comet */ + if (pmac_mb.model_id == PMAC_TYPE_HOOPER) { + u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4); + if (!mach_id_ptr) + return -ENODEV; + /* Here, I used to disable the media-bay on comet. It + * appears this is wrong, the floppy connector is actually + * a kind of media-bay and works with the current driver. + */ + if (__raw_readl(mach_id_ptr) & 0x20000000UL) + pmac_mb.model_id = PMAC_TYPE_COMET; + iounmap(mach_id_ptr); + } +#endif /* CONFIG_POWER4 */ + +#ifdef CONFIG_6xx + /* Set default value of powersave_nap on machines that support it. + * It appears that uninorth rev 3 has a problem with it, we don't + * enable it on those. In theory, the flush-on-lock property is + * supposed to be set when not supported, but I'm not very confident + * that all Apple OF revs did it properly, I do it the paranoid way. + */ + while (uninorth_base && uninorth_rev > 3) { + struct device_node *np = find_path_device("/cpus"); + if (!np || !np->child) { + printk(KERN_WARNING "Can't find CPU(s) in device tree !\n"); + break; + } + np = np->child; + /* Nap mode not supported on SMP */ + if (np->sibling) + break; + /* Nap mode not supported if flush-on-lock property is present */ + if (get_property(np, "flush-on-lock", NULL)) + break; + powersave_nap = 1; + printk(KERN_INFO "Processor NAP mode on idle enabled.\n"); + break; + } + + /* On CPUs that support it (750FX), lowspeed by default during + * NAP mode + */ + powersave_lowspeed = 1; +#endif /* CONFIG_6xx */ +#ifdef CONFIG_POWER4 + powersave_nap = 1; +#endif + /* Check for "mobile" machine */ + if (model && (strncmp(model, "PowerBook", 9) == 0 + || strncmp(model, "iBook", 5) == 0)) + pmac_mb.board_flags |= PMAC_MB_MOBILE; + + + printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name); + return 0; +} + +/* Initialize the Core99 UniNorth host bridge and memory controller + */ +static void __init probe_uninorth(void) +{ + unsigned long actrl; + + /* Locate core99 Uni-N */ + uninorth_node = of_find_node_by_name(NULL, "uni-n"); + /* Locate G5 u3 */ + if (uninorth_node == NULL) { + uninorth_node = of_find_node_by_name(NULL, "u3"); + uninorth_u3 = 1; + } + if (uninorth_node && uninorth_node->n_addrs > 0) { + unsigned long address = uninorth_node->addrs[0].address; + uninorth_base = ioremap(address, 0x40000); + uninorth_rev = in_be32(UN_REG(UNI_N_VERSION)); + if (uninorth_u3) + u3_ht = ioremap(address + U3_HT_CONFIG_BASE, 0x1000); + } else + uninorth_node = NULL; + + if (!uninorth_node) + return; + + printk(KERN_INFO "Found %s memory controller & host bridge, revision: %d\n", + uninorth_u3 ? "U3" : "UniNorth", uninorth_rev); + printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base); + + /* Set the arbitrer QAck delay according to what Apple does + */ + if (uninorth_rev < 0x11) { + actrl = UN_IN(UNI_N_ARB_CTRL) & ~UNI_N_ARB_CTRL_QACK_DELAY_MASK; + actrl |= ((uninorth_rev < 3) ? UNI_N_ARB_CTRL_QACK_DELAY105 : + UNI_N_ARB_CTRL_QACK_DELAY) << UNI_N_ARB_CTRL_QACK_DELAY_SHIFT; + UN_OUT(UNI_N_ARB_CTRL, actrl); + } + + /* Some more magic as done by them in recent MacOS X on UniNorth + * revs 1.5 to 2.O and Pangea. Seem to toggle the UniN Maxbus/PCI + * memory timeout + */ + if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) || uninorth_rev == 0xc0) + UN_OUT(0x2160, UN_IN(0x2160) & 0x00ffffff); +} + +static void __init probe_one_macio(const char *name, const char *compat, int type) +{ + struct device_node* node; + int i; + volatile u32 __iomem * base; + u32* revp; + + node = find_devices(name); + if (!node || !node->n_addrs) + return; + if (compat) + do { + if (device_is_compatible(node, compat)) + break; + node = node->next; + } while (node); + if (!node) + return; + for(i=0; i<MAX_MACIO_CHIPS; i++) { + if (!macio_chips[i].of_node) + break; + if (macio_chips[i].of_node == node) + return; + } + if (i >= MAX_MACIO_CHIPS) { + printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n"); + printk(KERN_ERR "pmac_feature: %s skipped\n", node->full_name); + return; + } + base = ioremap(node->addrs[0].address, node->addrs[0].size); + if (!base) { + printk(KERN_ERR "pmac_feature: Can't map mac-io chip !\n"); + return; + } + if (type == macio_keylargo) { + u32 *did = (u32 *)get_property(node, "device-id", NULL); + if (*did == 0x00000025) + type = macio_pangea; + if (*did == 0x0000003e) + type = macio_intrepid; + } + macio_chips[i].of_node = node; + macio_chips[i].type = type; + macio_chips[i].base = base; + macio_chips[i].flags = MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON; + macio_chips[i].name = macio_names[type]; + revp = (u32 *)get_property(node, "revision-id", NULL); + if (revp) + macio_chips[i].rev = *revp; + printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n", + macio_names[type], macio_chips[i].rev, macio_chips[i].base); +} + +static int __init +probe_macios(void) +{ + /* Warning, ordering is important */ + probe_one_macio("gc", NULL, macio_grand_central); + probe_one_macio("ohare", NULL, macio_ohare); + probe_one_macio("pci106b,7", NULL, macio_ohareII); + probe_one_macio("mac-io", "keylargo", macio_keylargo); + probe_one_macio("mac-io", "paddington", macio_paddington); + probe_one_macio("mac-io", "gatwick", macio_gatwick); + probe_one_macio("mac-io", "heathrow", macio_heathrow); + probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2); + + /* Make sure the "main" macio chip appear first */ + if (macio_chips[0].type == macio_gatwick + && macio_chips[1].type == macio_heathrow) { + struct macio_chip temp = macio_chips[0]; + macio_chips[0] = macio_chips[1]; + macio_chips[1] = temp; + } + if (macio_chips[0].type == macio_ohareII + && macio_chips[1].type == macio_ohare) { + struct macio_chip temp = macio_chips[0]; + macio_chips[0] = macio_chips[1]; + macio_chips[1] = temp; + } + macio_chips[0].lbus.index = 0; + macio_chips[1].lbus.index = 1; + + return (macio_chips[0].of_node == NULL) ? -ENODEV : 0; +} + +static void __init +initial_serial_shutdown(struct device_node *np) +{ + int len; + struct slot_names_prop { + int count; + char name[1]; + } *slots; + char *conn; + int port_type = PMAC_SCC_ASYNC; + int modem = 0; + + slots = (struct slot_names_prop *)get_property(np, "slot-names", &len); + conn = get_property(np, "AAPL,connector", &len); + if (conn && (strcmp(conn, "infrared") == 0)) + port_type = PMAC_SCC_IRDA; + else if (device_is_compatible(np, "cobalt")) + modem = 1; + else if (slots && slots->count > 0) { + if (strcmp(slots->name, "IrDA") == 0) + port_type = PMAC_SCC_IRDA; + else if (strcmp(slots->name, "Modem") == 0) + modem = 1; + } + if (modem) + pmac_call_feature(PMAC_FTR_MODEM_ENABLE, np, 0, 0); + pmac_call_feature(PMAC_FTR_SCC_ENABLE, np, port_type, 0); +} + +static void __init +set_initial_features(void) +{ + struct device_node *np; + + /* That hack appears to be necessary for some StarMax motherboards + * but I'm not too sure it was audited for side-effects on other + * ohare based machines... + * Since I still have difficulties figuring the right way to + * differenciate them all and since that hack was there for a long + * time, I'll keep it around + */ + if (macio_chips[0].type == macio_ohare && !find_devices("via-pmu")) { + struct macio_chip *macio = &macio_chips[0]; + MACIO_OUT32(OHARE_FCR, STARMAX_FEATURES); + } else if (macio_chips[0].type == macio_ohare) { + struct macio_chip *macio = &macio_chips[0]; + MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); + } else if (macio_chips[1].type == macio_ohare) { + struct macio_chip *macio = &macio_chips[1]; + MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); + } + +#ifdef CONFIG_POWER4 + if (macio_chips[0].type == macio_keylargo2) { +#ifndef CONFIG_SMP + /* On SMP machines running UP, we have the second CPU eating + * bus cycles. We need to take it off the bus. This is done + * from pmac_smp for SMP kernels running on one CPU + */ + np = of_find_node_by_type(NULL, "cpu"); + if (np != NULL) + np = of_find_node_by_type(np, "cpu"); + if (np != NULL) { + g5_phy_disable_cpu1(); + of_node_put(np); + } +#endif /* CONFIG_SMP */ + /* Enable GMAC for now for PCI probing. It will be disabled + * later on after PCI probe + */ + np = of_find_node_by_name(NULL, "ethernet"); + while(np) { + if (device_is_compatible(np, "K2-GMAC")) + g5_gmac_enable(np, 0, 1); + np = of_find_node_by_name(np, "ethernet"); + } + + /* Enable FW before PCI probe. Will be disabled later on + * Note: We should have a batter way to check that we are + * dealing with uninorth internal cell and not a PCI cell + * on the external PCI. The code below works though. + */ + np = of_find_node_by_name(NULL, "firewire"); + while(np) { + if (device_is_compatible(np, "pci106b,5811")) { + macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; + g5_fw_enable(np, 0, 1); + } + np = of_find_node_by_name(np, "firewire"); + } + } +#else /* CONFIG_POWER4 */ + + if (macio_chips[0].type == macio_keylargo || + macio_chips[0].type == macio_pangea || + macio_chips[0].type == macio_intrepid) { + /* Enable GMAC for now for PCI probing. It will be disabled + * later on after PCI probe + */ + np = of_find_node_by_name(NULL, "ethernet"); + while(np) { + if (np->parent + && device_is_compatible(np->parent, "uni-north") + && device_is_compatible(np, "gmac")) + core99_gmac_enable(np, 0, 1); + np = of_find_node_by_name(np, "ethernet"); + } + + /* Enable FW before PCI probe. Will be disabled later on + * Note: We should have a batter way to check that we are + * dealing with uninorth internal cell and not a PCI cell + * on the external PCI. The code below works though. + */ + np = of_find_node_by_name(NULL, "firewire"); + while(np) { + if (np->parent + && device_is_compatible(np->parent, "uni-north") + && (device_is_compatible(np, "pci106b,18") || + device_is_compatible(np, "pci106b,30") || + device_is_compatible(np, "pci11c1,5811"))) { + macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; + core99_firewire_enable(np, 0, 1); + } + np = of_find_node_by_name(np, "firewire"); + } + + /* Enable ATA-100 before PCI probe. */ + np = of_find_node_by_name(NULL, "ata-6"); + while(np) { + if (np->parent + && device_is_compatible(np->parent, "uni-north") + && device_is_compatible(np, "kauai-ata")) { + core99_ata100_enable(np, 1); + } + np = of_find_node_by_name(np, "ata-6"); + } + + /* Switch airport off */ + np = find_devices("radio"); + while(np) { + if (np && np->parent == macio_chips[0].of_node) { + macio_chips[0].flags |= MACIO_FLAG_AIRPORT_ON; + core99_airport_enable(np, 0, 0); + } + np = np->next; + } + } + + /* On all machines that support sound PM, switch sound off */ + if (macio_chips[0].of_node) + pmac_do_feature_call(PMAC_FTR_SOUND_CHIP_ENABLE, + macio_chips[0].of_node, 0, 0); + + /* While on some desktop G3s, we turn it back on */ + if (macio_chips[0].of_node && macio_chips[0].type == macio_heathrow + && (pmac_mb.model_id == PMAC_TYPE_GOSSAMER || + pmac_mb.model_id == PMAC_TYPE_SILK)) { + struct macio_chip *macio = &macio_chips[0]; + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); + } + + /* Some machine models need the clock chip to be properly setup for + * clock spreading now. This should be a platform function but we + * don't do these at the moment + */ + pmac_tweak_clock_spreading(1); + +#endif /* CONFIG_POWER4 */ + + /* On all machines, switch modem & serial ports off */ + np = find_devices("ch-a"); + while(np) { + initial_serial_shutdown(np); + np = np->next; + } + np = find_devices("ch-b"); + while(np) { + initial_serial_shutdown(np); + np = np->next; + } +} + +void __init +pmac_feature_init(void) +{ + /* Detect the UniNorth memory controller */ + probe_uninorth(); + + /* Probe mac-io controllers */ + if (probe_macios()) { + printk(KERN_WARNING "No mac-io chip found\n"); + return; + } + + /* Setup low-level i2c stuffs */ + pmac_init_low_i2c(); + + /* Probe machine type */ + if (probe_motherboard()) + printk(KERN_WARNING "Unknown PowerMac !\n"); + + /* Set some initial features (turn off some chips that will + * be later turned on) + */ + set_initial_features(); +} + +int __init pmac_feature_late_init(void) +{ +#if 0 + struct device_node *np; + + /* Request some resources late */ + if (uninorth_node) + request_OF_resource(uninorth_node, 0, NULL); + np = find_devices("hammerhead"); + if (np) + request_OF_resource(np, 0, NULL); + np = find_devices("interrupt-controller"); + if (np) + request_OF_resource(np, 0, NULL); +#endif + return 0; +} + +device_initcall(pmac_feature_late_init); + +#if 0 +static void dump_HT_speeds(char *name, u32 cfg, u32 frq) +{ + int freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 }; + int bits[8] = { 8,16,0,32,2,4,0,0 }; + int freq = (frq >> 8) & 0xf; + + if (freqs[freq] == 0) + printk("%s: Unknown HT link frequency %x\n", name, freq); + else + printk("%s: %d MHz on main link, (%d in / %d out) bits width\n", + name, freqs[freq], + bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]); +} + +void __init pmac_check_ht_link(void) +{ +#if 0 /* Disabled for now */ + u32 ufreq, freq, ucfg, cfg; + struct device_node *pcix_node; + u8 px_bus, px_devfn; + struct pci_controller *px_hose; + + (void)in_be32(u3_ht + U3_HT_LINK_COMMAND); + ucfg = cfg = in_be32(u3_ht + U3_HT_LINK_CONFIG); + ufreq = freq = in_be32(u3_ht + U3_HT_LINK_FREQ); + dump_HT_speeds("U3 HyperTransport", cfg, freq); + + pcix_node = of_find_compatible_node(NULL, "pci", "pci-x"); + if (pcix_node == NULL) { + printk("No PCI-X bridge found\n"); + return; + } + if (pci_device_from_OF_node(pcix_node, &px_bus, &px_devfn) != 0) { + printk("PCI-X bridge found but not matched to pci\n"); + return; + } + px_hose = pci_find_hose_for_OF_device(pcix_node); + if (px_hose == NULL) { + printk("PCI-X bridge found but not matched to host\n"); + return; + } + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq); + dump_HT_speeds("PCI-X HT Uplink", cfg, freq); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq); + dump_HT_speeds("PCI-X HT Downlink", cfg, freq); +#endif +} + +#endif /* CONFIG_POWER4 */ + +/* + * Early video resume hook + */ + +static void (*pmac_early_vresume_proc)(void *data); +static void *pmac_early_vresume_data; + +void pmac_set_early_video_resume(void (*proc)(void *data), void *data) +{ + if (_machine != _MACH_Pmac) + return; + preempt_disable(); + pmac_early_vresume_proc = proc; + pmac_early_vresume_data = data; + preempt_enable(); +} +EXPORT_SYMBOL(pmac_set_early_video_resume); + +void pmac_call_early_video_resume(void) +{ + if (pmac_early_vresume_proc) + pmac_early_vresume_proc(pmac_early_vresume_data); +} + +/* + * AGP related suspend/resume code + */ + +static struct pci_dev *pmac_agp_bridge; +static int (*pmac_agp_suspend)(struct pci_dev *bridge); +static int (*pmac_agp_resume)(struct pci_dev *bridge); + +void pmac_register_agp_pm(struct pci_dev *bridge, + int (*suspend)(struct pci_dev *bridge), + int (*resume)(struct pci_dev *bridge)) +{ + if (suspend || resume) { + pmac_agp_bridge = bridge; + pmac_agp_suspend = suspend; + pmac_agp_resume = resume; + return; + } + if (bridge != pmac_agp_bridge) + return; + pmac_agp_suspend = pmac_agp_resume = NULL; + return; +} +EXPORT_SYMBOL(pmac_register_agp_pm); + +void pmac_suspend_agp_for_card(struct pci_dev *dev) +{ + if (pmac_agp_bridge == NULL || pmac_agp_suspend == NULL) + return; + if (pmac_agp_bridge->bus != dev->bus) + return; + pmac_agp_suspend(pmac_agp_bridge); +} +EXPORT_SYMBOL(pmac_suspend_agp_for_card); + +void pmac_resume_agp_for_card(struct pci_dev *dev) +{ + if (pmac_agp_bridge == NULL || pmac_agp_resume == NULL) + return; + if (pmac_agp_bridge->bus != dev->bus) + return; + pmac_agp_resume(pmac_agp_bridge); +} +EXPORT_SYMBOL(pmac_resume_agp_for_card); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c new file mode 100644 index 00000000000..f3f39e8e337 --- /dev/null +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -0,0 +1,523 @@ +/* + * arch/ppc/platforms/pmac_low_i2c.c + * + * Copyright (C) 2003 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file contains some low-level i2c access routines that + * need to be used by various bits of the PowerMac platform code + * at times where the real asynchronous & interrupt driven driver + * cannot be used. The API borrows some semantics from the darwin + * driver in order to ease the implementation of the platform + * properties parser + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/adb.h> +#include <linux/pmu.h> +#include <asm/keylargo.h> +#include <asm/uninorth.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pmac_low_i2c.h> + +#define MAX_LOW_I2C_HOST 4 + +#ifdef DEBUG +#define DBG(x...) do {\ + printk(KERN_DEBUG "KW:" x); \ + } while(0) +#else +#define DBG(x...) +#endif + +struct low_i2c_host; + +typedef int (*low_i2c_func_t)(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len); + +struct low_i2c_host +{ + struct device_node *np; /* OF device node */ + struct semaphore mutex; /* Access mutex for use by i2c-keywest */ + low_i2c_func_t func; /* Access function */ + unsigned int is_open : 1; /* Poor man's access control */ + int mode; /* Current mode */ + int channel; /* Current channel */ + int num_channels; /* Number of channels */ + void __iomem *base; /* For keywest-i2c, base address */ + int bsteps; /* And register stepping */ + int speed; /* And speed */ +}; + +static struct low_i2c_host low_i2c_hosts[MAX_LOW_I2C_HOST]; + +/* No locking is necessary on allocation, we are running way before + * anything can race with us + */ +static struct low_i2c_host *find_low_i2c_host(struct device_node *np) +{ + int i; + + for (i = 0; i < MAX_LOW_I2C_HOST; i++) + if (low_i2c_hosts[i].np == np) + return &low_i2c_hosts[i]; + return NULL; +} + +/* + * + * i2c-keywest implementation (UniNorth, U2, U3, Keylargo's) + * + */ + +/* + * Keywest i2c definitions borrowed from drivers/i2c/i2c-keywest.h, + * should be moved somewhere in include/asm-ppc/ + */ +/* Register indices */ +typedef enum { + reg_mode = 0, + reg_control, + reg_status, + reg_isr, + reg_ier, + reg_addr, + reg_subaddr, + reg_data +} reg_t; + + +/* Mode register */ +#define KW_I2C_MODE_100KHZ 0x00 +#define KW_I2C_MODE_50KHZ 0x01 +#define KW_I2C_MODE_25KHZ 0x02 +#define KW_I2C_MODE_DUMB 0x00 +#define KW_I2C_MODE_STANDARD 0x04 +#define KW_I2C_MODE_STANDARDSUB 0x08 +#define KW_I2C_MODE_COMBINED 0x0C +#define KW_I2C_MODE_MODE_MASK 0x0C +#define KW_I2C_MODE_CHAN_MASK 0xF0 + +/* Control register */ +#define KW_I2C_CTL_AAK 0x01 +#define KW_I2C_CTL_XADDR 0x02 +#define KW_I2C_CTL_STOP 0x04 +#define KW_I2C_CTL_START 0x08 + +/* Status register */ +#define KW_I2C_STAT_BUSY 0x01 +#define KW_I2C_STAT_LAST_AAK 0x02 +#define KW_I2C_STAT_LAST_RW 0x04 +#define KW_I2C_STAT_SDA 0x08 +#define KW_I2C_STAT_SCL 0x10 + +/* IER & ISR registers */ +#define KW_I2C_IRQ_DATA 0x01 +#define KW_I2C_IRQ_ADDR 0x02 +#define KW_I2C_IRQ_STOP 0x04 +#define KW_I2C_IRQ_START 0x08 +#define KW_I2C_IRQ_MASK 0x0F + +/* State machine states */ +enum { + state_idle, + state_addr, + state_read, + state_write, + state_stop, + state_dead +}; + +#define WRONG_STATE(name) do {\ + printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s (isr: %02x)\n", \ + name, __kw_state_names[state], isr); \ + } while(0) + +static const char *__kw_state_names[] = { + "state_idle", + "state_addr", + "state_read", + "state_write", + "state_stop", + "state_dead" +}; + +static inline u8 __kw_read_reg(struct low_i2c_host *host, reg_t reg) +{ + return readb(host->base + (((unsigned int)reg) << host->bsteps)); +} + +static inline void __kw_write_reg(struct low_i2c_host *host, reg_t reg, u8 val) +{ + writeb(val, host->base + (((unsigned)reg) << host->bsteps)); + (void)__kw_read_reg(host, reg_subaddr); +} + +#define kw_write_reg(reg, val) __kw_write_reg(host, reg, val) +#define kw_read_reg(reg) __kw_read_reg(host, reg) + + +/* Don't schedule, the g5 fan controller is too + * timing sensitive + */ +static u8 kw_wait_interrupt(struct low_i2c_host* host) +{ + int i, j; + u8 isr; + + for (i = 0; i < 100000; i++) { + isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK; + if (isr != 0) + return isr; + + /* This code is used with the timebase frozen, we cannot rely + * on udelay ! For now, just use a bogus loop + */ + for (j = 1; j < 10000; j++) + mb(); + } + return isr; +} + +static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int *rc, u8 **data, int *len, u8 isr) +{ + u8 ack; + + DBG("kw_handle_interrupt(%s, isr: %x)\n", __kw_state_names[state], isr); + + if (isr == 0) { + if (state != state_stop) { + DBG("KW: Timeout !\n"); + *rc = -EIO; + goto stop; + } + if (state == state_stop) { + ack = kw_read_reg(reg_status); + if (!(ack & KW_I2C_STAT_BUSY)) { + state = state_idle; + kw_write_reg(reg_ier, 0x00); + } + } + return state; + } + + if (isr & KW_I2C_IRQ_ADDR) { + ack = kw_read_reg(reg_status); + if (state != state_addr) { + kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR); + WRONG_STATE("KW_I2C_IRQ_ADDR"); + *rc = -EIO; + goto stop; + } + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + *rc = -ENODEV; + DBG("KW: NAK on address\n"); + return state_stop; + } else { + if (rw) { + state = state_read; + if (*len > 1) + kw_write_reg(reg_control, KW_I2C_CTL_AAK); + } else { + state = state_write; + kw_write_reg(reg_data, **data); + (*data)++; (*len)--; + } + } + kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR); + } + + if (isr & KW_I2C_IRQ_DATA) { + if (state == state_read) { + **data = kw_read_reg(reg_data); + (*data)++; (*len)--; + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + if ((*len) == 0) + state = state_stop; + else if ((*len) == 1) + kw_write_reg(reg_control, 0); + } else if (state == state_write) { + ack = kw_read_reg(reg_status); + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + DBG("KW: nack on data write\n"); + *rc = -EIO; + goto stop; + } else if (*len) { + kw_write_reg(reg_data, **data); + (*data)++; (*len)--; + } else { + kw_write_reg(reg_control, KW_I2C_CTL_STOP); + state = state_stop; + *rc = 0; + } + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + } else { + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + WRONG_STATE("KW_I2C_IRQ_DATA"); + if (state != state_stop) { + *rc = -EIO; + goto stop; + } + } + } + + if (isr & KW_I2C_IRQ_STOP) { + kw_write_reg(reg_isr, KW_I2C_IRQ_STOP); + if (state != state_stop) { + WRONG_STATE("KW_I2C_IRQ_STOP"); + *rc = -EIO; + } + return state_idle; + } + + if (isr & KW_I2C_IRQ_START) + kw_write_reg(reg_isr, KW_I2C_IRQ_START); + + return state; + + stop: + kw_write_reg(reg_control, KW_I2C_CTL_STOP); + return state_stop; +} + +static int keywest_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 subaddr, u8 *data, int len) +{ + u8 mode_reg = host->speed; + int state = state_addr; + int rc = 0; + + /* Setup mode & subaddress if any */ + switch(host->mode) { + case pmac_low_i2c_mode_dumb: + printk(KERN_ERR "low_i2c: Dumb mode not supported !\n"); + return -EINVAL; + case pmac_low_i2c_mode_std: + mode_reg |= KW_I2C_MODE_STANDARD; + break; + case pmac_low_i2c_mode_stdsub: + mode_reg |= KW_I2C_MODE_STANDARDSUB; + break; + case pmac_low_i2c_mode_combined: + mode_reg |= KW_I2C_MODE_COMBINED; + break; + } + + /* Setup channel & clear pending irqs */ + kw_write_reg(reg_isr, kw_read_reg(reg_isr)); + kw_write_reg(reg_mode, mode_reg | (host->channel << 4)); + kw_write_reg(reg_status, 0); + + /* Set up address and r/w bit */ + kw_write_reg(reg_addr, addr); + + /* Set up the sub address */ + if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB + || (mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_COMBINED) + kw_write_reg(reg_subaddr, subaddr); + + /* Start sending address & disable interrupt*/ + kw_write_reg(reg_ier, 0 /*KW_I2C_IRQ_MASK*/); + kw_write_reg(reg_control, KW_I2C_CTL_XADDR); + + /* State machine, to turn into an interrupt handler */ + while(state != state_idle) { + u8 isr = kw_wait_interrupt(host); + state = kw_handle_interrupt(host, state, addr & 1, &rc, &data, &len, isr); + } + + return rc; +} + +static void keywest_low_i2c_add(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(NULL); + u32 *psteps, *prate, steps, aoffset = 0; + struct device_node *parent; + + if (host == NULL) { + printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", + np->full_name); + return; + } + memset(host, 0, sizeof(*host)); + + init_MUTEX(&host->mutex); + host->np = of_node_get(np); + psteps = (u32 *)get_property(np, "AAPL,address-step", NULL); + steps = psteps ? (*psteps) : 0x10; + for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++) + steps >>= 1; + parent = of_get_parent(np); + host->num_channels = 1; + if (parent && parent->name[0] == 'u') { + host->num_channels = 2; + aoffset = 3; + } + /* Select interface rate */ + host->speed = KW_I2C_MODE_100KHZ; + prate = (u32 *)get_property(np, "AAPL,i2c-rate", NULL); + if (prate) switch(*prate) { + case 100: + host->speed = KW_I2C_MODE_100KHZ; + break; + case 50: + host->speed = KW_I2C_MODE_50KHZ; + break; + case 25: + host->speed = KW_I2C_MODE_25KHZ; + break; + } + + host->mode = pmac_low_i2c_mode_std; + host->base = ioremap(np->addrs[0].address + aoffset, + np->addrs[0].size); + host->func = keywest_low_i2c_func; +} + +/* + * + * PMU implementation + * + */ + + +#ifdef CONFIG_ADB_PMU + +static int pmu_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len) +{ + // TODO + return -ENODEV; +} + +static void pmu_low_i2c_add(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(NULL); + + if (host == NULL) { + printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", + np->full_name); + return; + } + memset(host, 0, sizeof(*host)); + + init_MUTEX(&host->mutex); + host->np = of_node_get(np); + host->num_channels = 3; + host->mode = pmac_low_i2c_mode_std; + host->func = pmu_low_i2c_func; +} + +#endif /* CONFIG_ADB_PMU */ + +void __init pmac_init_low_i2c(void) +{ + struct device_node *np; + + /* Probe keywest-i2c busses */ + np = of_find_compatible_node(NULL, "i2c", "keywest-i2c"); + while(np) { + keywest_low_i2c_add(np); + np = of_find_compatible_node(np, "i2c", "keywest-i2c"); + } + +#ifdef CONFIG_ADB_PMU + /* Probe PMU busses */ + np = of_find_node_by_name(NULL, "via-pmu"); + if (np) + pmu_low_i2c_add(np); +#endif /* CONFIG_ADB_PMU */ + + /* TODO: Add CUDA support as well */ +} + +int pmac_low_i2c_lock(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + down(&host->mutex); + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_lock); + +int pmac_low_i2c_unlock(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + up(&host->mutex); + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_unlock); + + +int pmac_low_i2c_open(struct device_node *np, int channel) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + + if (channel >= host->num_channels) + return -EINVAL; + + down(&host->mutex); + host->is_open = 1; + host->channel = channel; + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_open); + +int pmac_low_i2c_close(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + + host->is_open = 0; + up(&host->mutex); + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_close); + +int pmac_low_i2c_setmode(struct device_node *np, int mode) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + WARN_ON(!host->is_open); + host->mode = mode; + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_setmode); + +int pmac_low_i2c_xfer(struct device_node *np, u8 addrdir, u8 subaddr, u8 *data, int len) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + WARN_ON(!host->is_open); + + return host->func(host, addrdir, subaddr, data, len); +} +EXPORT_SYMBOL(pmac_low_i2c_xfer); + diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c new file mode 100644 index 00000000000..8c9b008c722 --- /dev/null +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -0,0 +1,584 @@ +/* + * arch/ppc/platforms/pmac_nvram.c + * + * Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Todo: - add support for the OF persistent properties + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/nvram.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/adb.h> +#include <linux/pmu.h> +#include <linux/bootmem.h> +#include <linux/completion.h> +#include <linux/spinlock.h> +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/nvram.h> + +#define DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define NVRAM_SIZE 0x2000 /* 8kB of non-volatile RAM */ + +#define CORE99_SIGNATURE 0x5a +#define CORE99_ADLER_START 0x14 + +/* On Core99, nvram is either a sharp, a micron or an AMD flash */ +#define SM_FLASH_STATUS_DONE 0x80 +#define SM_FLASH_STATUS_ERR 0x38 +#define SM_FLASH_CMD_ERASE_CONFIRM 0xd0 +#define SM_FLASH_CMD_ERASE_SETUP 0x20 +#define SM_FLASH_CMD_RESET 0xff +#define SM_FLASH_CMD_WRITE_SETUP 0x40 +#define SM_FLASH_CMD_CLEAR_STATUS 0x50 +#define SM_FLASH_CMD_READ_STATUS 0x70 + +/* CHRP NVRAM header */ +struct chrp_header { + u8 signature; + u8 cksum; + u16 len; + char name[12]; + u8 data[0]; +}; + +struct core99_header { + struct chrp_header hdr; + u32 adler; + u32 generation; + u32 reserved[2]; +}; + +/* + * Read and write the non-volatile RAM on PowerMacs and CHRP machines. + */ +static int nvram_naddrs; +static volatile unsigned char *nvram_addr; +static volatile unsigned char *nvram_data; +static int nvram_mult, is_core_99; +static int core99_bank = 0; +static int nvram_partitions[3]; +static DEFINE_SPINLOCK(nv_lock); + +extern int pmac_newworld; +extern int system_running; + +static int (*core99_write_bank)(int bank, u8* datas); +static int (*core99_erase_bank)(int bank); + +static char *nvram_image; + + +static unsigned char core99_nvram_read_byte(int addr) +{ + if (nvram_image == NULL) + return 0xff; + return nvram_image[addr]; +} + +static void core99_nvram_write_byte(int addr, unsigned char val) +{ + if (nvram_image == NULL) + return; + nvram_image[addr] = val; +} + + +static unsigned char direct_nvram_read_byte(int addr) +{ + return in_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult]); +} + +static void direct_nvram_write_byte(int addr, unsigned char val) +{ + out_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult], val); +} + + +static unsigned char indirect_nvram_read_byte(int addr) +{ + unsigned char val; + unsigned long flags; + + spin_lock_irqsave(&nv_lock, flags); + out_8(nvram_addr, addr >> 5); + val = in_8(&nvram_data[(addr & 0x1f) << 4]); + spin_unlock_irqrestore(&nv_lock, flags); + + return val; +} + +static void indirect_nvram_write_byte(int addr, unsigned char val) +{ + unsigned long flags; + + spin_lock_irqsave(&nv_lock, flags); + out_8(nvram_addr, addr >> 5); + out_8(&nvram_data[(addr & 0x1f) << 4], val); + spin_unlock_irqrestore(&nv_lock, flags); +} + + +#ifdef CONFIG_ADB_PMU + +static void pmu_nvram_complete(struct adb_request *req) +{ + if (req->arg) + complete((struct completion *)req->arg); +} + +static unsigned char pmu_nvram_read_byte(int addr) +{ + struct adb_request req; + DECLARE_COMPLETION(req_complete); + + req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL; + if (pmu_request(&req, pmu_nvram_complete, 3, PMU_READ_NVRAM, + (addr >> 8) & 0xff, addr & 0xff)) + return 0xff; + if (system_state == SYSTEM_RUNNING) + wait_for_completion(&req_complete); + while (!req.complete) + pmu_poll(); + return req.reply[0]; +} + +static void pmu_nvram_write_byte(int addr, unsigned char val) +{ + struct adb_request req; + DECLARE_COMPLETION(req_complete); + + req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL; + if (pmu_request(&req, pmu_nvram_complete, 4, PMU_WRITE_NVRAM, + (addr >> 8) & 0xff, addr & 0xff, val)) + return; + if (system_state == SYSTEM_RUNNING) + wait_for_completion(&req_complete); + while (!req.complete) + pmu_poll(); +} + +#endif /* CONFIG_ADB_PMU */ + + +static u8 chrp_checksum(struct chrp_header* hdr) +{ + u8 *ptr; + u16 sum = hdr->signature; + for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++) + sum += *ptr; + while (sum > 0xFF) + sum = (sum & 0xFF) + (sum>>8); + return sum; +} + +static u32 core99_calc_adler(u8 *buffer) +{ + int cnt; + u32 low, high; + + buffer += CORE99_ADLER_START; + low = 1; + high = 0; + for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) { + if ((cnt % 5000) == 0) { + high %= 65521UL; + high %= 65521UL; + } + low += buffer[cnt]; + high += low; + } + low %= 65521UL; + high %= 65521UL; + + return (high << 16) | low; +} + +static u32 core99_check(u8* datas) +{ + struct core99_header* hdr99 = (struct core99_header*)datas; + + if (hdr99->hdr.signature != CORE99_SIGNATURE) { + DBG("Invalid signature\n"); + return 0; + } + if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) { + DBG("Invalid checksum\n"); + return 0; + } + if (hdr99->adler != core99_calc_adler(datas)) { + DBG("Invalid adler\n"); + return 0; + } + return hdr99->generation; +} + +static int sm_erase_bank(int bank) +{ + int stat, i; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank); + + out_8(base, SM_FLASH_CMD_ERASE_SETUP); + out_8(base, SM_FLASH_CMD_ERASE_CONFIRM); + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: Sharp/Miron flash erase timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + + for (i=0; i<NVRAM_SIZE; i++) + if (base[i] != 0xff) { + printk(KERN_ERR "nvram: Sharp/Micron flash erase failed !\n"); + return -ENXIO; + } + return 0; +} + +static int sm_write_bank(int bank, u8* datas) +{ + int i, stat = 0; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: Sharp/Micron Writing bank %d...\n", bank); + + for (i=0; i<NVRAM_SIZE; i++) { + out_8(base+i, SM_FLASH_CMD_WRITE_SETUP); + udelay(1); + out_8(base+i, datas[i]); + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + if (!(stat & SM_FLASH_STATUS_DONE)) + break; + } + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + for (i=0; i<NVRAM_SIZE; i++) + if (base[i] != datas[i]) { + printk(KERN_ERR "nvram: Sharp/Micron flash write failed !\n"); + return -ENXIO; + } + return 0; +} + +static int amd_erase_bank(int bank) +{ + int i, stat = 0; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: AMD Erasing bank %d...\n", bank); + + /* Unlock 1 */ + out_8(base+0x555, 0xaa); + udelay(1); + /* Unlock 2 */ + out_8(base+0x2aa, 0x55); + udelay(1); + + /* Sector-Erase */ + out_8(base+0x555, 0x80); + udelay(1); + out_8(base+0x555, 0xaa); + udelay(1); + out_8(base+0x2aa, 0x55); + udelay(1); + out_8(base, 0x30); + udelay(1); + + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: AMD flash erase timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + for (i=0; i<NVRAM_SIZE; i++) + if (base[i] != 0xff) { + printk(KERN_ERR "nvram: AMD flash erase failed !\n"); + return -ENXIO; + } + return 0; +} + +static int amd_write_bank(int bank, u8* datas) +{ + int i, stat = 0; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: AMD Writing bank %d...\n", bank); + + for (i=0; i<NVRAM_SIZE; i++) { + /* Unlock 1 */ + out_8(base+0x555, 0xaa); + udelay(1); + /* Unlock 2 */ + out_8(base+0x2aa, 0x55); + udelay(1); + + /* Write single word */ + out_8(base+0x555, 0xa0); + udelay(1); + out_8(base+i, datas[i]); + + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: AMD flash write timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + if (stat != 0) + break; + } + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + for (i=0; i<NVRAM_SIZE; i++) + if (base[i] != datas[i]) { + printk(KERN_ERR "nvram: AMD flash write failed !\n"); + return -ENXIO; + } + return 0; +} + +static void __init lookup_partitions(void) +{ + u8 buffer[17]; + int i, offset; + struct chrp_header* hdr; + + if (pmac_newworld) { + nvram_partitions[pmac_nvram_OF] = -1; + nvram_partitions[pmac_nvram_XPRAM] = -1; + nvram_partitions[pmac_nvram_NR] = -1; + hdr = (struct chrp_header *)buffer; + + offset = 0; + buffer[16] = 0; + do { + for (i=0;i<16;i++) + buffer[i] = nvram_read_byte(offset+i); + if (!strcmp(hdr->name, "common")) + nvram_partitions[pmac_nvram_OF] = offset + 0x10; + if (!strcmp(hdr->name, "APL,MacOS75")) { + nvram_partitions[pmac_nvram_XPRAM] = offset + 0x10; + nvram_partitions[pmac_nvram_NR] = offset + 0x110; + } + offset += (hdr->len * 0x10); + } while(offset < NVRAM_SIZE); + } else { + nvram_partitions[pmac_nvram_OF] = 0x1800; + nvram_partitions[pmac_nvram_XPRAM] = 0x1300; + nvram_partitions[pmac_nvram_NR] = 0x1400; + } + DBG("nvram: OF partition at 0x%x\n", nvram_partitions[pmac_nvram_OF]); + DBG("nvram: XP partition at 0x%x\n", nvram_partitions[pmac_nvram_XPRAM]); + DBG("nvram: NR partition at 0x%x\n", nvram_partitions[pmac_nvram_NR]); +} + +static void core99_nvram_sync(void) +{ + struct core99_header* hdr99; + unsigned long flags; + + if (!is_core_99 || !nvram_data || !nvram_image) + return; + + spin_lock_irqsave(&nv_lock, flags); + if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE, + NVRAM_SIZE)) + goto bail; + + DBG("Updating nvram...\n"); + + hdr99 = (struct core99_header*)nvram_image; + hdr99->generation++; + hdr99->hdr.signature = CORE99_SIGNATURE; + hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr); + hdr99->adler = core99_calc_adler(nvram_image); + core99_bank = core99_bank ? 0 : 1; + if (core99_erase_bank) + if (core99_erase_bank(core99_bank)) { + printk("nvram: Error erasing bank %d\n", core99_bank); + goto bail; + } + if (core99_write_bank) + if (core99_write_bank(core99_bank, nvram_image)) + printk("nvram: Error writing bank %d\n", core99_bank); + bail: + spin_unlock_irqrestore(&nv_lock, flags); + +#ifdef DEBUG + mdelay(2000); +#endif +} + +void __init pmac_nvram_init(void) +{ + struct device_node *dp; + + nvram_naddrs = 0; + + dp = find_devices("nvram"); + if (dp == NULL) { + printk(KERN_ERR "Can't find NVRAM device\n"); + return; + } + nvram_naddrs = dp->n_addrs; + is_core_99 = device_is_compatible(dp, "nvram,flash"); + if (is_core_99) { + int i; + u32 gen_bank0, gen_bank1; + + if (nvram_naddrs < 1) { + printk(KERN_ERR "nvram: no address\n"); + return; + } + nvram_image = alloc_bootmem(NVRAM_SIZE); + if (nvram_image == NULL) { + printk(KERN_ERR "nvram: can't allocate ram image\n"); + return; + } + nvram_data = ioremap(dp->addrs[0].address, NVRAM_SIZE*2); + nvram_naddrs = 1; /* Make sure we get the correct case */ + + DBG("nvram: Checking bank 0...\n"); + + gen_bank0 = core99_check((u8 *)nvram_data); + gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE); + core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0; + + DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1); + DBG("nvram: Active bank is: %d\n", core99_bank); + + for (i=0; i<NVRAM_SIZE; i++) + nvram_image[i] = nvram_data[i + core99_bank*NVRAM_SIZE]; + + ppc_md.nvram_read_val = core99_nvram_read_byte; + ppc_md.nvram_write_val = core99_nvram_write_byte; + ppc_md.nvram_sync = core99_nvram_sync; + /* + * Maybe we could be smarter here though making an exclusive list + * of known flash chips is a bit nasty as older OF didn't provide us + * with a useful "compatible" entry. A solution would be to really + * identify the chip using flash id commands and base ourselves on + * a list of known chips IDs + */ + if (device_is_compatible(dp, "amd-0137")) { + core99_erase_bank = amd_erase_bank; + core99_write_bank = amd_write_bank; + } else { + core99_erase_bank = sm_erase_bank; + core99_write_bank = sm_write_bank; + } + } else if (_machine == _MACH_chrp && nvram_naddrs == 1) { + nvram_data = ioremap(dp->addrs[0].address + isa_mem_base, + dp->addrs[0].size); + nvram_mult = 1; + ppc_md.nvram_read_val = direct_nvram_read_byte; + ppc_md.nvram_write_val = direct_nvram_write_byte; + } else if (nvram_naddrs == 1) { + nvram_data = ioremap(dp->addrs[0].address, dp->addrs[0].size); + nvram_mult = (dp->addrs[0].size + NVRAM_SIZE - 1) / NVRAM_SIZE; + ppc_md.nvram_read_val = direct_nvram_read_byte; + ppc_md.nvram_write_val = direct_nvram_write_byte; + } else if (nvram_naddrs == 2) { + nvram_addr = ioremap(dp->addrs[0].address, dp->addrs[0].size); + nvram_data = ioremap(dp->addrs[1].address, dp->addrs[1].size); + ppc_md.nvram_read_val = indirect_nvram_read_byte; + ppc_md.nvram_write_val = indirect_nvram_write_byte; + } else if (nvram_naddrs == 0 && sys_ctrler == SYS_CTRLER_PMU) { +#ifdef CONFIG_ADB_PMU + nvram_naddrs = -1; + ppc_md.nvram_read_val = pmu_nvram_read_byte; + ppc_md.nvram_write_val = pmu_nvram_write_byte; +#endif /* CONFIG_ADB_PMU */ + } else { + printk(KERN_ERR "Don't know how to access NVRAM with %d addresses\n", + nvram_naddrs); + } + lookup_partitions(); +} + +int pmac_get_partition(int partition) +{ + return nvram_partitions[partition]; +} + +u8 pmac_xpram_read(int xpaddr) +{ + int offset = nvram_partitions[pmac_nvram_XPRAM]; + + if (offset < 0) + return 0xff; + + return ppc_md.nvram_read_val(xpaddr + offset); +} + +void pmac_xpram_write(int xpaddr, u8 data) +{ + int offset = nvram_partitions[pmac_nvram_XPRAM]; + + if (offset < 0) + return; + + ppc_md.nvram_write_val(xpaddr + offset, data); +} + +EXPORT_SYMBOL(pmac_get_partition); +EXPORT_SYMBOL(pmac_xpram_read); +EXPORT_SYMBOL(pmac_xpram_write); diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c new file mode 100644 index 00000000000..afb147e6b89 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pci.c @@ -0,0 +1,1317 @@ +/* + * Support for PCI bridges found on Power Macintoshes. + * + * Copyright (C) 2003 Benjamin Herrenschmuidt (benh@kernel.crashing.org) + * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/pmac_feature.h> +#ifdef CONFIG_PPC64 +#include <asm/iommu.h> +#include <asm/ppc-pci.h> +#endif + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +static int add_bridge(struct device_node *dev); +extern void pmac_check_ht_link(void); + +/* XXX Could be per-controller, but I don't think we risk anything by + * assuming we won't have both UniNorth and Bandit */ +static int has_uninorth; +#ifdef CONFIG_POWER4 +static struct pci_controller *u3_agp; +#endif /* CONFIG_POWER4 */ + +extern u8 pci_cache_line_size; +extern int pcibios_assign_bus_offset; + +struct device_node *k2_skiplist[2]; + +/* + * Magic constants for enabling cache coherency in the bandit/PSX bridge. + */ +#define BANDIT_DEVID_2 8 +#define BANDIT_REVID 3 + +#define BANDIT_DEVNUM 11 +#define BANDIT_MAGIC 0x50 +#define BANDIT_COHERENT 0x40 + +static int __init fixup_one_level_bus_range(struct device_node *node, int higher) +{ + for (; node != 0;node = node->sibling) { + int * bus_range; + unsigned int *class_code; + int len; + + /* For PCI<->PCI bridges or CardBus bridges, we go down */ + class_code = (unsigned int *) get_property(node, "class-code", NULL); + if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) + continue; + bus_range = (int *) get_property(node, "bus-range", &len); + if (bus_range != NULL && len > 2 * sizeof(int)) { + if (bus_range[1] > higher) + higher = bus_range[1]; + } + higher = fixup_one_level_bus_range(node->child, higher); + } + return higher; +} + +/* This routine fixes the "bus-range" property of all bridges in the + * system since they tend to have their "last" member wrong on macs + * + * Note that the bus numbers manipulated here are OF bus numbers, they + * are not Linux bus numbers. + */ +static void __init fixup_bus_range(struct device_node *bridge) +{ + int * bus_range; + int len; + + /* Lookup the "bus-range" property for the hose */ + bus_range = (int *) get_property(bridge, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s\n", + bridge->full_name); + return; + } + bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); +} + +/* + * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers. + * + * The "Bandit" version is present in all early PCI PowerMacs, + * and up to the first ones using Grackle. Some machines may + * have 2 bandit controllers (2 PCI busses). + * + * "Chaos" is used in some "Bandit"-type machines as a bridge + * for the separate display bus. It is accessed the same + * way as bandit, but cannot be probed for devices. It therefore + * has its own config access functions. + * + * The "UniNorth" version is present in all Core99 machines + * (iBook, G4, new IMacs, and all the recent Apple machines). + * It contains 3 controllers in one ASIC. + * + * The U3 is the bridge used on G5 machines. It contains an + * AGP bus which is dealt with the old UniNorth access routines + * and a HyperTransport bus which uses its own set of access + * functions. + */ + +#define MACRISC_CFA0(devfn, off) \ + ((1 << (unsigned long)PCI_SLOT(dev_fn)) \ + | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \ + | (((unsigned long)(off)) & 0xFCUL)) + +#define MACRISC_CFA1(bus, devfn, off) \ + ((((unsigned long)(bus)) << 16) \ + |(((unsigned long)(devfn)) << 8) \ + |(((unsigned long)(off)) & 0xFCUL) \ + |1UL) + +static unsigned long macrisc_cfg_access(struct pci_controller* hose, + u8 bus, u8 dev_fn, u8 offset) +{ + unsigned int caddr; + + if (bus == hose->first_busno) { + if (dev_fn < (11 << 3)) + return 0; + caddr = MACRISC_CFA0(dev_fn, offset); + } else + caddr = MACRISC_CFA1(bus, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= has_uninorth ? 0x07 : 0x03; + return ((unsigned long)hose->cfg_data) + offset; +} + +static int macrisc_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = macrisc_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int macrisc_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = macrisc_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops macrisc_pci_ops = +{ + macrisc_read_config, + macrisc_write_config +}; + +/* + * Verify that a specific (bus, dev_fn) exists on chaos + */ +static int +chaos_validate_dev(struct pci_bus *bus, int devfn, int offset) +{ + struct device_node *np; + u32 *vendor, *device; + + np = pci_busdev_to_OF_node(bus, devfn); + if (np == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + vendor = (u32 *)get_property(np, "vendor-id", NULL); + device = (u32 *)get_property(np, "device-id", NULL); + if (vendor == NULL || device == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + if ((*vendor == 0x106b) && (*device == 3) && (offset >= 0x10) + && (offset != 0x14) && (offset != 0x18) && (offset <= 0x24)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + return PCIBIOS_SUCCESSFUL; +} + +static int +chaos_read_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 *val) +{ + int result = chaos_validate_dev(bus, devfn, offset); + if (result == PCIBIOS_BAD_REGISTER_NUMBER) + *val = ~0U; + if (result != PCIBIOS_SUCCESSFUL) + return result; + return macrisc_read_config(bus, devfn, offset, len, val); +} + +static int +chaos_write_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 val) +{ + int result = chaos_validate_dev(bus, devfn, offset); + if (result != PCIBIOS_SUCCESSFUL) + return result; + return macrisc_write_config(bus, devfn, offset, len, val); +} + +static struct pci_ops chaos_pci_ops = +{ + chaos_read_config, + chaos_write_config +}; + +#ifdef CONFIG_POWER4 +/* + * These versions of U3 HyperTransport config space access ops do not + * implement self-view of the HT host yet + */ + +/* + * This function deals with some "special cases" devices. + * + * 0 -> No special case + * 1 -> Skip the device but act as if the access was successfull + * (return 0xff's on reads, eventually, cache config space + * accesses in a later version) + * -1 -> Hide the device (unsuccessful acess) + */ +static int u3_ht_skip_device(struct pci_controller *hose, + struct pci_bus *bus, unsigned int devfn) +{ + struct device_node *busdn, *dn; + int i; + + /* We only allow config cycles to devices that are in OF device-tree + * as we are apparently having some weird things going on with some + * revs of K2 on recent G5s + */ + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = hose->arch_data; + for (dn = busdn->child; dn; dn = dn->sibling) + if (dn->data && PCI_DN(dn)->devfn == devfn) + break; + if (dn == NULL) + return -1; + + /* + * When a device in K2 is powered down, we die on config + * cycle accesses. Fix that here. + */ + for (i=0; i<2; i++) + if (k2_skiplist[i] == dn) + return 1; + + return 0; +} + +#define U3_HT_CFA0(devfn, off) \ + ((((unsigned long)devfn) << 8) | offset) +#define U3_HT_CFA1(bus, devfn, off) \ + (U3_HT_CFA0(devfn, off) \ + + (((unsigned long)bus) << 16) \ + + 0x01000000UL) + +static unsigned long u3_ht_cfg_access(struct pci_controller* hose, + u8 bus, u8 devfn, u8 offset) +{ + if (bus == hose->first_busno) { + /* For now, we don't self probe U3 HT bridge */ + if (PCI_SLOT(devfn) == 0) + return 0; + return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset); + } else + return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset); +} + +static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (u3_ht_skip_device(hose, bus, devfn)) { + case 0: + break; + case 1: + switch (len) { + case 1: + *val = 0xff; break; + case 2: + *val = 0xffff; break; + default: + *val = 0xfffffffful; break; + } + return PCIBIOS_SUCCESSFUL; + default: + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (u3_ht_skip_device(hose, bus, devfn)) { + case 0: + break; + case 1: + return PCIBIOS_SUCCESSFUL; + default: + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u3_ht_pci_ops = +{ + u3_ht_read_config, + u3_ht_write_config +}; +#endif /* CONFIG_POWER4 */ + +/* + * For a bandit bridge, turn on cache coherency if necessary. + * N.B. we could clean this up using the hose ops directly. + */ +static void __init init_bandit(struct pci_controller *bp) +{ + unsigned int vendev, magic; + int rev; + + /* read the word at offset 0 in config space for device 11 */ + out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + PCI_VENDOR_ID); + udelay(2); + vendev = in_le32(bp->cfg_data); + if (vendev == (PCI_DEVICE_ID_APPLE_BANDIT << 16) + + PCI_VENDOR_ID_APPLE) { + /* read the revision id */ + out_le32(bp->cfg_addr, + (1UL << BANDIT_DEVNUM) + PCI_REVISION_ID); + udelay(2); + rev = in_8(bp->cfg_data); + if (rev != BANDIT_REVID) + printk(KERN_WARNING + "Unknown revision %d for bandit\n", rev); + } else if (vendev != (BANDIT_DEVID_2 << 16) + PCI_VENDOR_ID_APPLE) { + printk(KERN_WARNING "bandit isn't? (%x)\n", vendev); + return; + } + + /* read the word at offset 0x50 */ + out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + BANDIT_MAGIC); + udelay(2); + magic = in_le32(bp->cfg_data); + if ((magic & BANDIT_COHERENT) != 0) + return; + magic |= BANDIT_COHERENT; + udelay(2); + out_le32(bp->cfg_data, magic); + printk(KERN_INFO "Cache coherency enabled for bandit/PSX\n"); +} + + +/* + * Tweak the PCI-PCI bridge chip on the blue & white G3s. + */ +static void __init init_p2pbridge(void) +{ + struct device_node *p2pbridge; + struct pci_controller* hose; + u8 bus, devfn; + u16 val; + + /* XXX it would be better here to identify the specific + PCI-PCI bridge chip we have. */ + if ((p2pbridge = find_devices("pci-bridge")) == 0 + || p2pbridge->parent == NULL + || strcmp(p2pbridge->parent->name, "pci") != 0) + return; + if (pci_device_from_OF_node(p2pbridge, &bus, &devfn) < 0) { + DBG("Can't find PCI infos for PCI<->PCI bridge\n"); + return; + } + /* Warning: At this point, we have not yet renumbered all busses. + * So we must use OF walking to find out hose + */ + hose = pci_find_hose_for_OF_device(p2pbridge); + if (!hose) { + DBG("Can't find hose for PCI<->PCI bridge\n"); + return; + } + if (early_read_config_word(hose, bus, devfn, + PCI_BRIDGE_CONTROL, &val) < 0) { + printk(KERN_ERR "init_p2pbridge: couldn't read bridge control\n"); + return; + } + val &= ~PCI_BRIDGE_CTL_MASTER_ABORT; + early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val); +} + +/* + * Some Apple desktop machines have a NEC PD720100A USB2 controller + * on the motherboard. Open Firmware, on these, will disable the + * EHCI part of it so it behaves like a pair of OHCI's. This fixup + * code re-enables it ;) + */ +static void __init fixup_nec_usb2(void) +{ + struct device_node *nec; + + for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) { + struct pci_controller *hose; + u32 data, *prop; + u8 bus, devfn; + + prop = (u32 *)get_property(nec, "vendor-id", NULL); + if (prop == NULL) + continue; + if (0x1033 != *prop) + continue; + prop = (u32 *)get_property(nec, "device-id", NULL); + if (prop == NULL) + continue; + if (0x0035 != *prop) + continue; + prop = (u32 *)get_property(nec, "reg", NULL); + if (prop == NULL) + continue; + devfn = (prop[0] >> 8) & 0xff; + bus = (prop[0] >> 16) & 0xff; + if (PCI_FUNC(devfn) != 0) + continue; + hose = pci_find_hose_for_OF_device(nec); + if (!hose) + continue; + early_read_config_dword(hose, bus, devfn, 0xe4, &data); + if (data & 1UL) { + printk("Found NEC PD720100A USB2 chip with disabled EHCI, fixing up...\n"); + data &= ~1UL; + early_write_config_dword(hose, bus, devfn, 0xe4, data); + early_write_config_byte(hose, bus, devfn | 2, PCI_INTERRUPT_LINE, + nec->intrs[0].line); + } + } +} + +#define GRACKLE_CFA(b, d, o) (0x80 | ((b) << 8) | ((d) << 16) \ + | (((o) & ~3) << 24)) + +#define GRACKLE_PICR1_STG 0x00000040 +#define GRACKLE_PICR1_LOOPSNOOP 0x00000010 + +/* N.B. this is called before bridges is initialized, so we can't + use grackle_pcibios_{read,write}_config_dword. */ +static inline void grackle_set_stg(struct pci_controller* bp, int enable) +{ + unsigned int val; + + out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); + val = in_le32(bp->cfg_data); + val = enable? (val | GRACKLE_PICR1_STG) : + (val & ~GRACKLE_PICR1_STG); + out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); + out_le32(bp->cfg_data, val); + (void)in_le32(bp->cfg_data); +} + +static inline void grackle_set_loop_snoop(struct pci_controller *bp, int enable) +{ + unsigned int val; + + out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); + val = in_le32(bp->cfg_data); + val = enable? (val | GRACKLE_PICR1_LOOPSNOOP) : + (val & ~GRACKLE_PICR1_LOOPSNOOP); + out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); + out_le32(bp->cfg_data, val); + (void)in_le32(bp->cfg_data); +} + +static int __init +setup_uninorth(struct pci_controller* hose, struct reg_property* addr) +{ + pci_assign_all_busses = 1; + has_uninorth = 1; + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(addr->address + 0x800000, 0x1000); + hose->cfg_data = ioremap(addr->address + 0xc00000, 0x1000); + /* We "know" that the bridge at f2000000 has the PCI slots. */ + return addr->address == 0xf2000000; +} + +static void __init +setup_bandit(struct pci_controller* hose, struct reg_property* addr) +{ + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(addr->address + 0x800000, 0x1000); + hose->cfg_data = ioremap(addr->address + 0xc00000, 0x1000); + init_bandit(hose); +} + +static void __init +setup_chaos(struct pci_controller* hose, struct reg_property* addr) +{ + /* assume a `chaos' bridge */ + hose->ops = &chaos_pci_ops; + hose->cfg_addr = ioremap(addr->address + 0x800000, 0x1000); + hose->cfg_data = ioremap(addr->address + 0xc00000, 0x1000); +} + +#ifdef CONFIG_POWER4 +static void __init setup_u3_agp(struct pci_controller* hose) +{ + /* On G5, we move AGP up to high bus number so we don't need + * to reassign bus numbers for HT. If we ever have P2P bridges + * on AGP, we'll have to move pci_assign_all_busses to the + * pci_controller structure so we enable it for AGP and not for + * HT childs. + * We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->first_busno = 0xf0; + hose->last_busno = 0xff; + has_uninorth = 1; + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + u3_agp = hose; +} + +static void __init setup_u3_ht(struct pci_controller* hose) +{ + struct device_node *np = (struct device_node *)hose->arch_data; + int i, cur; + + hose->ops = &u3_ht_pci_ops; + + /* We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, 0x02000000); + + /* + * /ht node doesn't expose a "ranges" property, so we "remove" regions that + * have been allocated to AGP. So far, this version of the code doesn't assign + * any of the 0xfxxxxxxx "fine" memory regions to /ht. + * We need to fix that sooner or later by either parsing all child "ranges" + * properties or figuring out the U3 address space decoding logic and + * then read its configuration register (if any). + */ + hose->io_base_phys = 0xf4000000; + hose->io_base_virt = ioremap(hose->io_base_phys, 0x00400000); + isa_io_base = pci_io_base = (unsigned long) hose->io_base_virt; + hose->io_resource.name = np->full_name; + hose->io_resource.start = 0; + hose->io_resource.end = 0x003fffff; + hose->io_resource.flags = IORESOURCE_IO; + hose->pci_mem_offset = 0; + hose->first_busno = 0; + hose->last_busno = 0xef; + hose->mem_resources[0].name = np->full_name; + hose->mem_resources[0].start = 0x80000000; + hose->mem_resources[0].end = 0xefffffff; + hose->mem_resources[0].flags = IORESOURCE_MEM; + + if (u3_agp == NULL) { + DBG("U3 has no AGP, using full resource range\n"); + return; + } + + /* We "remove" the AGP resources from the resources allocated to HT, that + * is we create "holes". However, that code does assumptions that so far + * happen to be true (cross fingers...), typically that resources in the + * AGP node are properly ordered + */ + cur = 0; + for (i=0; i<3; i++) { + struct resource *res = &u3_agp->mem_resources[i]; + if (res->flags != IORESOURCE_MEM) + continue; + /* We don't care about "fine" resources */ + if (res->start >= 0xf0000000) + continue; + /* Check if it's just a matter of "shrinking" us in one direction */ + if (hose->mem_resources[cur].start == res->start) { + DBG("U3/HT: shrink start of %d, %08lx -> %08lx\n", + cur, hose->mem_resources[cur].start, res->end + 1); + hose->mem_resources[cur].start = res->end + 1; + continue; + } + if (hose->mem_resources[cur].end == res->end) { + DBG("U3/HT: shrink end of %d, %08lx -> %08lx\n", + cur, hose->mem_resources[cur].end, res->start - 1); + hose->mem_resources[cur].end = res->start - 1; + continue; + } + /* No, it's not the case, we need a hole */ + if (cur == 2) { + /* not enough resources for a hole, we drop part of the range */ + printk(KERN_WARNING "Running out of resources for /ht host !\n"); + hose->mem_resources[cur].end = res->start - 1; + continue; + } + cur++; + DBG("U3/HT: hole, %d end at %08lx, %d start at %08lx\n", + cur-1, res->start - 1, cur, res->end + 1); + hose->mem_resources[cur].name = np->full_name; + hose->mem_resources[cur].flags = IORESOURCE_MEM; + hose->mem_resources[cur].start = res->end + 1; + hose->mem_resources[cur].end = hose->mem_resources[cur-1].end; + hose->mem_resources[cur-1].end = res->start - 1; + } +} + +#endif /* CONFIG_POWER4 */ + +void __init +setup_grackle(struct pci_controller *hose) +{ + setup_indirect_pci(hose, 0xfec00000, 0xfee00000); + if (machine_is_compatible("AAPL,PowerBook1998")) + grackle_set_loop_snoop(hose, 1); +#if 0 /* Disabled for now, HW problems ??? */ + grackle_set_stg(hose, 1); +#endif +} + +static void __init pmac_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev, int primary) +{ + static unsigned int static_lc_ranges[2024]; + unsigned int *dt_ranges, *lc_ranges, *ranges, *prev; + unsigned int size; + int rlen = 0, orig_rlen; + int memno = 0; + struct resource *res; + int np, na = prom_n_addr_cells(dev); + + np = na + 5; + + /* First we try to merge ranges to fix a problem with some pmacs + * that can have more than 3 ranges, fortunately using contiguous + * addresses -- BenH + */ + dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen); + if (!dt_ranges) + return; + /* lc_ranges = alloc_bootmem(rlen);*/ + lc_ranges = static_lc_ranges; + if (!lc_ranges) + return; /* what can we do here ? */ + memcpy(lc_ranges, dt_ranges, rlen); + orig_rlen = rlen; + + /* Let's work on a copy of the "ranges" property instead of damaging + * the device-tree image in memory + */ + ranges = lc_ranges; + prev = NULL; + while ((rlen -= np * sizeof(unsigned int)) >= 0) { + if (prev) { + if (prev[0] == ranges[0] && prev[1] == ranges[1] && + (prev[2] + prev[na+4]) == ranges[2] && + (prev[na+2] + prev[na+4]) == ranges[na+2]) { + prev[na+4] += ranges[na+4]; + ranges[0] = 0; + ranges += np; + continue; + } + } + prev = ranges; + ranges += np; + } + + /* + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 2: a PCI address + * cells 3 or 3+4: a CPU physical address + * (size depending on dev->n_addr_cells) + * cells 4+5 or 5+6: the size of the range + */ + ranges = lc_ranges; + rlen = orig_rlen; + while (ranges && (rlen -= np * sizeof(unsigned int)) >= 0) { + res = NULL; + size = ranges[na+4]; + switch (ranges[0] >> 24) { + case 1: /* I/O space */ + if (ranges[2] != 0) + break; + hose->io_base_phys = ranges[na+2]; + /* limit I/O space to 16MB */ + if (size > 0x01000000) + size = 0x01000000; + hose->io_base_virt = ioremap(ranges[na+2], size); + if (primary) + isa_io_base = (unsigned long) hose->io_base_virt; + res = &hose->io_resource; + res->flags = IORESOURCE_IO; + res->start = ranges[2]; + break; + case 2: /* memory space */ + memno = 0; + if (ranges[1] == 0 && ranges[2] == 0 + && ranges[na+4] <= (16 << 20)) { + /* 1st 16MB, i.e. ISA memory area */ +#if 0 + if (primary) + isa_mem_base = ranges[na+2]; +#endif + memno = 1; + } + while (memno < 3 && hose->mem_resources[memno].flags) + ++memno; + if (memno == 0) + hose->pci_mem_offset = ranges[na+2] - ranges[2]; + if (memno < 3) { + res = &hose->mem_resources[memno]; + res->flags = IORESOURCE_MEM; + res->start = ranges[na+2]; + } + break; + } + if (res != NULL) { + res->name = dev->full_name; + res->end = res->start + size - 1; + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + } + ranges += np; + } +} + +/* + * We assume that if we have a G3 powermac, we have one bridge called + * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise, + * if we have one or more bandit or chaos bridges, we don't have a MPC106. + */ +static int __init add_bridge(struct device_node *dev) +{ + int len; + struct pci_controller *hose; + struct reg_property *addr; + char* disp_name; + int *bus_range; + int primary = 1; + + DBG("Adding PCI host bridge %s\n", dev->full_name); + + addr = (struct reg_property *) get_property(dev, "reg", &len); + if (addr == NULL || len < sizeof(*addr)) { + printk(KERN_WARNING "Can't use %s: no address\n", + dev->full_name); + return -ENODEV; + } + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", + dev->full_name); + } + + hose = pcibios_alloc_controller(); + if (!hose) + return -ENOMEM; + hose->arch_data = dev; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + disp_name = NULL; +#ifdef CONFIG_POWER4 + if (device_is_compatible(dev, "u3-agp")) { + setup_u3_agp(hose, addr); + disp_name = "U3-AGP"; + primary = 0; + } else if (device_is_compatible(dev, "u3-ht")) { + setup_u3_ht(hose, addr); + disp_name = "U3-HT"; + primary = 1; + } else +#endif /* CONFIG_POWER4 */ + if (device_is_compatible(dev, "uni-north")) { + primary = setup_uninorth(hose, addr); + disp_name = "UniNorth"; + } else if (strcmp(dev->name, "pci") == 0) { + /* XXX assume this is a mpc106 (grackle) */ + setup_grackle(hose); + disp_name = "Grackle (MPC106)"; + } else if (strcmp(dev->name, "bandit") == 0) { + setup_bandit(hose, addr); + disp_name = "Bandit"; + } else if (strcmp(dev->name, "chaos") == 0) { + setup_chaos(hose, addr); + disp_name = "Chaos"; + primary = 0; + } + printk(KERN_INFO "Found %s PCI host bridge at 0x%08lx. Firmware bus number: %d->%d\n", + disp_name, addr->address, hose->first_busno, hose->last_busno); + DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", + hose, hose->cfg_addr, hose->cfg_data); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, primary); + + /* Fixup "bus-range" OF property */ + fixup_bus_range(dev); + + return 0; +} + +static void __init +pcibios_fixup_OF_interrupts(void) +{ + struct pci_dev* dev = NULL; + + /* + * Open Firmware often doesn't initialize the + * PCI_INTERRUPT_LINE config register properly, so we + * should find the device node and apply the interrupt + * obtained from the OF device-tree + */ + for_each_pci_dev(dev) { + struct device_node *node; + node = pci_device_to_OF_node(dev); + /* this is the node, see if it has interrupts */ + if (node && node->n_intrs > 0) + dev->irq = node->intrs[0].line; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + } +} + +void __init +pmac_pcibios_fixup(void) +{ + /* Fixup interrupts according to OF tree */ + pcibios_fixup_OF_interrupts(); +} + +void __init pmac_find_bridges(void) +{ + struct device_node *np, *root; + struct device_node *ht = NULL; + + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_CRIT "pmac_find_bridges: can't find root of device tree\n"); + return; + } + for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) { + if (np->name == NULL) + continue; + if (strcmp(np->name, "bandit") == 0 + || strcmp(np->name, "chaos") == 0 + || strcmp(np->name, "pci") == 0) { + if (add_bridge(np) == 0) + of_node_get(np); + } + if (strcmp(np->name, "ht") == 0) { + of_node_get(np); + ht = np; + } + } + of_node_put(root); + + /* Probe HT last as it relies on the agp resources to be already + * setup + */ + if (ht && add_bridge(ht) != 0) + of_node_put(ht); + + init_p2pbridge(); + fixup_nec_usb2(); + + /* We are still having some issues with the Xserve G4, enabling + * some offset between bus number and domains for now when we + * assign all busses should help for now + */ + if (pci_assign_all_busses) + pcibios_assign_bus_offset = 0x10; + +} + +int +pmac_pci_enable_device_hook(struct pci_dev *dev, int initial) +{ + struct device_node* node; + int updatecfg = 0; + int uninorth_child; + + node = pci_device_to_OF_node(dev); + + /* We don't want to enable USB controllers absent from the OF tree + * (iBook second controller) + */ + if (dev->vendor == PCI_VENDOR_ID_APPLE + && (dev->class == ((PCI_CLASS_SERIAL_USB << 8) | 0x10)) + && !node) { + printk(KERN_INFO "Apple USB OHCI %s disabled by firmware\n", + pci_name(dev)); + return -EINVAL; + } + + if (!node) + return 0; + + uninorth_child = node->parent && + device_is_compatible(node->parent, "uni-north"); + + /* Firewire & GMAC were disabled after PCI probe, the driver is + * claiming them, we must re-enable them now. + */ + if (uninorth_child && !strcmp(node->name, "firewire") && + (device_is_compatible(node, "pci106b,18") || + device_is_compatible(node, "pci106b,30") || + device_is_compatible(node, "pci11c1,5811"))) { + pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, node, 0, 1); + pmac_call_feature(PMAC_FTR_1394_ENABLE, node, 0, 1); + updatecfg = 1; + } + if (uninorth_child && !strcmp(node->name, "ethernet") && + device_is_compatible(node, "gmac")) { + pmac_call_feature(PMAC_FTR_GMAC_ENABLE, node, 0, 1); + updatecfg = 1; + } + + if (updatecfg) { + u16 cmd; + + /* + * Make sure PCI is correctly configured + * + * We use old pci_bios versions of the function since, by + * default, gmac is not powered up, and so will be absent + * from the kernel initial PCI lookup. + * + * Should be replaced by 2.4 new PCI mechanisms and really + * register the device. + */ + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE; + pci_write_config_word(dev, PCI_COMMAND, cmd); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16); + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, pci_cache_line_size); + } + + return 0; +} + +/* We power down some devices after they have been probed. They'll + * be powered back on later on + */ +void __init +pmac_pcibios_after_init(void) +{ + struct device_node* nd; + +#ifdef CONFIG_BLK_DEV_IDE + struct pci_dev *dev = NULL; + + /* OF fails to initialize IDE controllers on macs + * (and maybe other machines) + * + * Ideally, this should be moved to the IDE layer, but we need + * to check specifically with Andre Hedrick how to do it cleanly + * since the common IDE code seem to care about the fact that the + * BIOS may have disabled a controller. + * + * -- BenH + */ + for_each_pci_dev(dev) { + if ((dev->class >> 16) == PCI_BASE_CLASS_STORAGE) + pci_enable_device(dev); + } +#endif /* CONFIG_BLK_DEV_IDE */ + + nd = find_devices("firewire"); + while (nd) { + if (nd->parent && (device_is_compatible(nd, "pci106b,18") || + device_is_compatible(nd, "pci106b,30") || + device_is_compatible(nd, "pci11c1,5811")) + && device_is_compatible(nd->parent, "uni-north")) { + pmac_call_feature(PMAC_FTR_1394_ENABLE, nd, 0, 0); + pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, nd, 0, 0); + } + nd = nd->next; + } + nd = find_devices("ethernet"); + while (nd) { + if (nd->parent && device_is_compatible(nd, "gmac") + && device_is_compatible(nd->parent, "uni-north")) + pmac_call_feature(PMAC_FTR_GMAC_ENABLE, nd, 0, 0); + nd = nd->next; + } +} + +#ifdef CONFIG_PPC64 +static void __init pmac_fixup_phb_resources(void) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; + hose->io_resource.start += offset; + hose->io_resource.end += offset; + printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", + hose->global_number, + hose->io_resource.start, hose->io_resource.end); + } +} + +void __init pmac_pci_init(void) +{ + struct device_node *np, *root; + struct device_node *ht = NULL; + + /* Probe root PCI hosts, that is on U3 the AGP host and the + * HyperTransport host. That one is actually "kept" around + * and actually added last as it's resource management relies + * on the AGP resources to have been setup first + */ + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_CRIT "pmac_find_bridges: can't find root of device tree\n"); + return; + } + for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) { + if (np->name == NULL) + continue; + if (strcmp(np->name, "pci") == 0) { + if (add_bridge(np) == 0) + of_node_get(np); + } + if (strcmp(np->name, "ht") == 0) { + of_node_get(np); + ht = np; + } + } + of_node_put(root); + + /* Now setup the HyperTransport host if we found any + */ + if (ht && add_bridge(ht) != 0) + of_node_put(ht); + + /* Fixup the IO resources on our host bridges as the common code + * does it only for childs of the host bridges + */ + pmac_fixup_phb_resources(); + + /* Setup the linkage between OF nodes and PHBs */ + pci_devs_phb_init(); + + /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We + * assume there is no P2P bridge on the AGP bus, which should be a + * safe assumptions hopefully. + */ + if (u3_agp) { + struct device_node *np = u3_agp->arch_data; + PCI_DN(np)->busno = 0xf0; + for (np = np->child; np; np = np->sibling) + PCI_DN(np)->busno = 0xf0; + } + + pmac_check_ht_link(); + + /* Tell pci.c to not use the common resource allocation mecanism */ + pci_probe_only = 1; + + /* Allow all IO */ + io_page_mask = -1; +} +#endif + +#ifdef CONFIG_PPC32 +void pmac_pci_fixup_cardbus(struct pci_dev* dev) +{ + if (_machine != _MACH_Pmac) + return; + /* + * Fix the interrupt routing on the various cardbus bridges + * used on powerbooks + */ + if (dev->vendor != PCI_VENDOR_ID_TI) + return; + if (dev->device == PCI_DEVICE_ID_TI_1130 || + dev->device == PCI_DEVICE_ID_TI_1131) { + u8 val; + /* Enable PCI interrupt */ + if (pci_read_config_byte(dev, 0x91, &val) == 0) + pci_write_config_byte(dev, 0x91, val | 0x30); + /* Disable ISA interrupt mode */ + if (pci_read_config_byte(dev, 0x92, &val) == 0) + pci_write_config_byte(dev, 0x92, val & ~0x06); + } + if (dev->device == PCI_DEVICE_ID_TI_1210 || + dev->device == PCI_DEVICE_ID_TI_1211 || + dev->device == PCI_DEVICE_ID_TI_1410 || + dev->device == PCI_DEVICE_ID_TI_1510) { + u8 val; + /* 0x8c == TI122X_IRQMUX, 2 says to route the INTA + signal out the MFUNC0 pin */ + if (pci_read_config_byte(dev, 0x8c, &val) == 0) + pci_write_config_byte(dev, 0x8c, (val & ~0x0f) | 2); + /* Disable ISA interrupt mode */ + if (pci_read_config_byte(dev, 0x92, &val) == 0) + pci_write_config_byte(dev, 0x92, val & ~0x06); + } +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus); + +void pmac_pci_fixup_pciata(struct pci_dev* dev) +{ + u8 progif = 0; + + /* + * On PowerMacs, we try to switch any PCI ATA controller to + * fully native mode + */ + if (_machine != _MACH_Pmac) + return; + /* Some controllers don't have the class IDE */ + if (dev->vendor == PCI_VENDOR_ID_PROMISE) + switch(dev->device) { + case PCI_DEVICE_ID_PROMISE_20246: + case PCI_DEVICE_ID_PROMISE_20262: + case PCI_DEVICE_ID_PROMISE_20263: + case PCI_DEVICE_ID_PROMISE_20265: + case PCI_DEVICE_ID_PROMISE_20267: + case PCI_DEVICE_ID_PROMISE_20268: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20270: + case PCI_DEVICE_ID_PROMISE_20271: + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20276: + case PCI_DEVICE_ID_PROMISE_20277: + goto good; + } + /* Others, check PCI class */ + if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE) + return; + good: + pci_read_config_byte(dev, PCI_CLASS_PROG, &progif); + if ((progif & 5) != 5) { + printk(KERN_INFO "Forcing PCI IDE into native mode: %s\n", pci_name(dev)); + (void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5); + if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || + (progif & 5) != 5) + printk(KERN_ERR "Rewrite of PROGIF failed !\n"); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pmac_pci_fixup_pciata); +#endif + +/* + * Disable second function on K2-SATA, it's broken + * and disable IO BARs on first one + */ +static void fixup_k2_sata(struct pci_dev* dev) +{ + int i; + u16 cmd; + + if (PCI_FUNC(dev->devfn) > 0) { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 6; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + } + } else { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~PCI_COMMAND_IO; + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 5; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata); + diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c new file mode 100644 index 00000000000..58017d18db7 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pic.c @@ -0,0 +1,682 @@ +/* + * Support for the interrupt controllers found on Power Macintosh, + * currently Apple's "Grand Central" interrupt controller in all + * it's incarnations. OpenPIC support used on newer machines is + * in a separate file + * + * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) + * + * Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/sysdev.h> +#include <linux/adb.h> +#include <linux/pmu.h> +#include <linux/module.h> + +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/time.h> +#include <asm/pmac_feature.h> +#include <asm/mpic.h> + +#include "pmac.h" + +/* + * XXX this should be in xmon.h, but putting it there means xmon.h + * has to include <linux/interrupt.h> (to get irqreturn_t), which + * causes all sorts of problems. -- paulus + */ +extern irqreturn_t xmon_irq(int, void *, struct pt_regs *); + +#ifdef CONFIG_PPC32 +struct pmac_irq_hw { + unsigned int event; + unsigned int enable; + unsigned int ack; + unsigned int level; +}; + +/* Default addresses */ +static volatile struct pmac_irq_hw *pmac_irq_hw[4] = { + (struct pmac_irq_hw *) 0xf3000020, + (struct pmac_irq_hw *) 0xf3000010, + (struct pmac_irq_hw *) 0xf4000020, + (struct pmac_irq_hw *) 0xf4000010, +}; + +#define GC_LEVEL_MASK 0x3ff00000 +#define OHARE_LEVEL_MASK 0x1ff00000 +#define HEATHROW_LEVEL_MASK 0x1ff00000 + +static int max_irqs; +static int max_real_irqs; +static u32 level_mask[4]; + +static DEFINE_SPINLOCK(pmac_pic_lock); + +/* XXX here for now, should move to arch/powerpc/kernel/irq.c */ +int ppc_do_canonicalize_irqs; +EXPORT_SYMBOL(ppc_do_canonicalize_irqs); + +#define GATWICK_IRQ_POOL_SIZE 10 +static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE]; + +/* + * Mark an irq as "lost". This is only used on the pmac + * since it can lose interrupts (see pmac_set_irq_mask). + * -- Cort + */ +void +__set_lost(unsigned long irq_nr, int nokick) +{ + if (!test_and_set_bit(irq_nr, ppc_lost_interrupts)) { + atomic_inc(&ppc_n_lost_interrupts); + if (!nokick) + set_dec(1); + } +} + +static void +pmac_mask_and_ack_irq(unsigned int irq_nr) +{ + unsigned long bit = 1UL << (irq_nr & 0x1f); + int i = irq_nr >> 5; + unsigned long flags; + + if ((unsigned)irq_nr >= max_irqs) + return; + + clear_bit(irq_nr, ppc_cached_irq_mask); + if (test_and_clear_bit(irq_nr, ppc_lost_interrupts)) + atomic_dec(&ppc_n_lost_interrupts); + spin_lock_irqsave(&pmac_pic_lock, flags); + out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); + out_le32(&pmac_irq_hw[i]->ack, bit); + do { + /* make sure ack gets to controller before we enable + interrupts */ + mb(); + } while((in_le32(&pmac_irq_hw[i]->enable) & bit) + != (ppc_cached_irq_mask[i] & bit)); + spin_unlock_irqrestore(&pmac_pic_lock, flags); +} + +static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) +{ + unsigned long bit = 1UL << (irq_nr & 0x1f); + int i = irq_nr >> 5; + unsigned long flags; + + if ((unsigned)irq_nr >= max_irqs) + return; + + spin_lock_irqsave(&pmac_pic_lock, flags); + /* enable unmasked interrupts */ + out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); + + do { + /* make sure mask gets to controller before we + return to user */ + mb(); + } while((in_le32(&pmac_irq_hw[i]->enable) & bit) + != (ppc_cached_irq_mask[i] & bit)); + + /* + * Unfortunately, setting the bit in the enable register + * when the device interrupt is already on *doesn't* set + * the bit in the flag register or request another interrupt. + */ + if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level)) + __set_lost((ulong)irq_nr, nokicklost); + spin_unlock_irqrestore(&pmac_pic_lock, flags); +} + +/* When an irq gets requested for the first client, if it's an + * edge interrupt, we clear any previous one on the controller + */ +static unsigned int pmac_startup_irq(unsigned int irq_nr) +{ + unsigned long bit = 1UL << (irq_nr & 0x1f); + int i = irq_nr >> 5; + + if ((irq_desc[irq_nr].status & IRQ_LEVEL) == 0) + out_le32(&pmac_irq_hw[i]->ack, bit); + set_bit(irq_nr, ppc_cached_irq_mask); + pmac_set_irq_mask(irq_nr, 0); + + return 0; +} + +static void pmac_mask_irq(unsigned int irq_nr) +{ + clear_bit(irq_nr, ppc_cached_irq_mask); + pmac_set_irq_mask(irq_nr, 0); + mb(); +} + +static void pmac_unmask_irq(unsigned int irq_nr) +{ + set_bit(irq_nr, ppc_cached_irq_mask); + pmac_set_irq_mask(irq_nr, 0); +} + +static void pmac_end_irq(unsigned int irq_nr) +{ + if (!(irq_desc[irq_nr].status & (IRQ_DISABLED|IRQ_INPROGRESS)) + && irq_desc[irq_nr].action) { + set_bit(irq_nr, ppc_cached_irq_mask); + pmac_set_irq_mask(irq_nr, 1); + } +} + + +struct hw_interrupt_type pmac_pic = { + .typename = " PMAC-PIC ", + .startup = pmac_startup_irq, + .enable = pmac_unmask_irq, + .disable = pmac_mask_irq, + .ack = pmac_mask_and_ack_irq, + .end = pmac_end_irq, +}; + +struct hw_interrupt_type gatwick_pic = { + .typename = " GATWICK ", + .startup = pmac_startup_irq, + .enable = pmac_unmask_irq, + .disable = pmac_mask_irq, + .ack = pmac_mask_and_ack_irq, + .end = pmac_end_irq, +}; + +static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs) +{ + int irq, bits; + + for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) { + int i = irq >> 5; + bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; + /* We must read level interrupts from the level register */ + bits |= (in_le32(&pmac_irq_hw[i]->level) & level_mask[i]); + bits &= ppc_cached_irq_mask[i]; + if (bits == 0) + continue; + irq += __ilog2(bits); + __do_IRQ(irq, regs); + return IRQ_HANDLED; + } + printk("gatwick irq not from gatwick pic\n"); + return IRQ_NONE; +} + +int +pmac_get_irq(struct pt_regs *regs) +{ + int irq; + unsigned long bits = 0; + +#ifdef CONFIG_SMP + void psurge_smp_message_recv(struct pt_regs *); + + /* IPI's are a hack on the powersurge -- Cort */ + if ( smp_processor_id() != 0 ) { + psurge_smp_message_recv(regs); + return -2; /* ignore, already handled */ + } +#endif /* CONFIG_SMP */ + for (irq = max_real_irqs; (irq -= 32) >= 0; ) { + int i = irq >> 5; + bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; + /* We must read level interrupts from the level register */ + bits |= (in_le32(&pmac_irq_hw[i]->level) & level_mask[i]); + bits &= ppc_cached_irq_mask[i]; + if (bits == 0) + continue; + irq += __ilog2(bits); + break; + } + + return irq; +} + +/* This routine will fix some missing interrupt values in the device tree + * on the gatwick mac-io controller used by some PowerBooks + */ +static void __init +pmac_fix_gatwick_interrupts(struct device_node *gw, int irq_base) +{ + struct device_node *node; + int count; + + memset(gatwick_int_pool, 0, sizeof(gatwick_int_pool)); + node = gw->child; + count = 0; + while(node) + { + /* Fix SCC */ + if (strcasecmp(node->name, "escc") == 0) + if (node->child) { + if (node->child->n_intrs < 3) { + node->child->intrs = &gatwick_int_pool[count]; + count += 3; + } + node->child->n_intrs = 3; + node->child->intrs[0].line = 15+irq_base; + node->child->intrs[1].line = 4+irq_base; + node->child->intrs[2].line = 5+irq_base; + printk(KERN_INFO "irq: fixed SCC on second controller (%d,%d,%d)\n", + node->child->intrs[0].line, + node->child->intrs[1].line, + node->child->intrs[2].line); + } + /* Fix media-bay & left SWIM */ + if (strcasecmp(node->name, "media-bay") == 0) { + struct device_node* ya_node; + + if (node->n_intrs == 0) + node->intrs = &gatwick_int_pool[count++]; + node->n_intrs = 1; + node->intrs[0].line = 29+irq_base; + printk(KERN_INFO "irq: fixed media-bay on second controller (%d)\n", + node->intrs[0].line); + + ya_node = node->child; + while(ya_node) + { + if (strcasecmp(ya_node->name, "floppy") == 0) { + if (ya_node->n_intrs < 2) { + ya_node->intrs = &gatwick_int_pool[count]; + count += 2; + } + ya_node->n_intrs = 2; + ya_node->intrs[0].line = 19+irq_base; + ya_node->intrs[1].line = 1+irq_base; + printk(KERN_INFO "irq: fixed floppy on second controller (%d,%d)\n", + ya_node->intrs[0].line, ya_node->intrs[1].line); + } + if (strcasecmp(ya_node->name, "ata4") == 0) { + if (ya_node->n_intrs < 2) { + ya_node->intrs = &gatwick_int_pool[count]; + count += 2; + } + ya_node->n_intrs = 2; + ya_node->intrs[0].line = 14+irq_base; + ya_node->intrs[1].line = 3+irq_base; + printk(KERN_INFO "irq: fixed ide on second controller (%d,%d)\n", + ya_node->intrs[0].line, ya_node->intrs[1].line); + } + ya_node = ya_node->sibling; + } + } + node = node->sibling; + } + if (count > 10) { + printk("WARNING !! Gatwick interrupt pool overflow\n"); + printk(" GATWICK_IRQ_POOL_SIZE = %d\n", GATWICK_IRQ_POOL_SIZE); + printk(" requested = %d\n", count); + } +} + +/* + * The PowerBook 3400/2400/3500 can have a combo ethernet/modem + * card which includes an ohare chip that acts as a second interrupt + * controller. If we find this second ohare, set it up and fix the + * interrupt value in the device tree for the ethernet chip. + */ +static int __init enable_second_ohare(void) +{ + unsigned char bus, devfn; + unsigned short cmd; + unsigned long addr; + struct device_node *irqctrler = find_devices("pci106b,7"); + struct device_node *ether; + + if (irqctrler == NULL || irqctrler->n_addrs <= 0) + return -1; + addr = (unsigned long) ioremap(irqctrler->addrs[0].address, 0x40); + pmac_irq_hw[1] = (volatile struct pmac_irq_hw *)(addr + 0x20); + max_irqs = 64; + if (pci_device_from_OF_node(irqctrler, &bus, &devfn) == 0) { + struct pci_controller* hose = pci_find_hose_for_OF_device(irqctrler); + if (!hose) + printk(KERN_ERR "Can't find PCI hose for OHare2 !\n"); + else { + early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; + cmd &= ~PCI_COMMAND_IO; + early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd); + } + } + + /* Fix interrupt for the modem/ethernet combo controller. The number + in the device tree (27) is bogus (correct for the ethernet-only + board but not the combo ethernet/modem board). + The real interrupt is 28 on the second controller -> 28+32 = 60. + */ + ether = find_devices("pci1011,14"); + if (ether && ether->n_intrs > 0) { + ether->intrs[0].line = 60; + printk(KERN_INFO "irq: Fixed ethernet IRQ to %d\n", + ether->intrs[0].line); + } + + /* Return the interrupt number of the cascade */ + return irqctrler->intrs[0].line; +} + +#ifdef CONFIG_XMON +static struct irqaction xmon_action = { + .handler = xmon_irq, + .flags = 0, + .mask = CPU_MASK_NONE, + .name = "NMI - XMON" +}; +#endif + +static struct irqaction gatwick_cascade_action = { + .handler = gatwick_action, + .flags = SA_INTERRUPT, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; +#endif /* CONFIG_PPC32 */ + +static int pmac_u3_cascade(struct pt_regs *regs, void *data) +{ + return mpic_get_one_irq((struct mpic *)data, regs); +} + +void __init pmac_pic_init(void) +{ + struct device_node *irqctrler = NULL; + struct device_node *irqctrler2 = NULL; + struct device_node *np; +#ifdef CONFIG_PPC32 + int i; + unsigned long addr; + int irq_cascade = -1; +#endif + struct mpic *mpic1, *mpic2; + + /* We first try to detect Apple's new Core99 chipset, since mac-io + * is quite different on those machines and contains an IBM MPIC2. + */ + np = find_type_devices("open-pic"); + while (np) { + if (np->parent && !strcmp(np->parent->name, "u3")) + irqctrler2 = np; + else + irqctrler = np; + np = np->next; + } + if (irqctrler != NULL && irqctrler->n_addrs > 0) { + unsigned char senses[128]; + + printk(KERN_INFO "PowerMac using OpenPIC irq controller at 0x%08x\n", + (unsigned int)irqctrler->addrs[0].address); + ppc_md.get_irq = mpic_get_irq; + pmac_call_feature(PMAC_FTR_ENABLE_MPIC, irqctrler, 0, 0); + + prom_get_irq_senses(senses, 0, 128); + mpic1 = mpic_alloc(irqctrler->addrs[0].address, + MPIC_PRIMARY | MPIC_WANTS_RESET, + 0, 0, 128, 252, senses, 128, " OpenPIC "); + BUG_ON(mpic1 == NULL); + mpic_init(mpic1); + + if (irqctrler2 != NULL && irqctrler2->n_intrs > 0 && + irqctrler2->n_addrs > 0) { + printk(KERN_INFO "Slave OpenPIC at 0x%08x hooked on IRQ %d\n", + (u32)irqctrler2->addrs[0].address, + irqctrler2->intrs[0].line); + + pmac_call_feature(PMAC_FTR_ENABLE_MPIC, irqctrler2, 0, 0); + prom_get_irq_senses(senses, 128, 128 + 124); + + /* We don't need to set MPIC_BROKEN_U3 here since we don't have + * hypertransport interrupts routed to it + */ + mpic2 = mpic_alloc(irqctrler2->addrs[0].address, + MPIC_BIG_ENDIAN | MPIC_WANTS_RESET, + 0, 128, 124, 0, senses, 124, + " U3-MPIC "); + BUG_ON(mpic2 == NULL); + mpic_init(mpic2); + mpic_setup_cascade(irqctrler2->intrs[0].line, + pmac_u3_cascade, mpic2); + } +#if defined(CONFIG_XMON) && defined(CONFIG_PPC32) + { + struct device_node* pswitch; + int nmi_irq; + + pswitch = find_devices("programmer-switch"); + if (pswitch && pswitch->n_intrs) { + nmi_irq = pswitch->intrs[0].line; + mpic_irq_set_priority(nmi_irq, 9); + setup_irq(nmi_irq, &xmon_action); + } + } +#endif /* CONFIG_XMON */ + return; + } + irqctrler = NULL; + +#ifdef CONFIG_PPC32 + /* Get the level/edge settings, assume if it's not + * a Grand Central nor an OHare, then it's an Heathrow + * (or Paddington). + */ + if (find_devices("gc")) + level_mask[0] = GC_LEVEL_MASK; + else if (find_devices("ohare")) { + level_mask[0] = OHARE_LEVEL_MASK; + /* We might have a second cascaded ohare */ + level_mask[1] = OHARE_LEVEL_MASK; + } else { + level_mask[0] = HEATHROW_LEVEL_MASK; + level_mask[1] = 0; + /* We might have a second cascaded heathrow */ + level_mask[2] = HEATHROW_LEVEL_MASK; + level_mask[3] = 0; + } + + /* + * G3 powermacs and 1999 G3 PowerBooks have 64 interrupts, + * 1998 G3 Series PowerBooks have 128, + * other powermacs have 32. + * The combo ethernet/modem card for the Powerstar powerbooks + * (2400/3400/3500, ohare based) has a second ohare chip + * effectively making a total of 64. + */ + max_irqs = max_real_irqs = 32; + irqctrler = find_devices("mac-io"); + if (irqctrler) + { + max_real_irqs = 64; + if (irqctrler->next) + max_irqs = 128; + else + max_irqs = 64; + } + for ( i = 0; i < max_real_irqs ; i++ ) + irq_desc[i].handler = &pmac_pic; + + /* get addresses of first controller */ + if (irqctrler) { + if (irqctrler->n_addrs > 0) { + addr = (unsigned long) + ioremap(irqctrler->addrs[0].address, 0x40); + for (i = 0; i < 2; ++i) + pmac_irq_hw[i] = (volatile struct pmac_irq_hw*) + (addr + (2 - i) * 0x10); + } + + /* get addresses of second controller */ + irqctrler = irqctrler->next; + if (irqctrler && irqctrler->n_addrs > 0) { + addr = (unsigned long) + ioremap(irqctrler->addrs[0].address, 0x40); + for (i = 2; i < 4; ++i) + pmac_irq_hw[i] = (volatile struct pmac_irq_hw*) + (addr + (4 - i) * 0x10); + irq_cascade = irqctrler->intrs[0].line; + if (device_is_compatible(irqctrler, "gatwick")) + pmac_fix_gatwick_interrupts(irqctrler, max_real_irqs); + } + } else { + /* older powermacs have a GC (grand central) or ohare at + f3000000, with interrupt control registers at f3000020. */ + addr = (unsigned long) ioremap(0xf3000000, 0x40); + pmac_irq_hw[0] = (volatile struct pmac_irq_hw *) (addr + 0x20); + } + + /* PowerBooks 3400 and 3500 can have a second controller in a second + ohare chip, on the combo ethernet/modem card */ + if (machine_is_compatible("AAPL,3400/2400") + || machine_is_compatible("AAPL,3500")) + irq_cascade = enable_second_ohare(); + + /* disable all interrupts in all controllers */ + for (i = 0; i * 32 < max_irqs; ++i) + out_le32(&pmac_irq_hw[i]->enable, 0); + /* mark level interrupts */ + for (i = 0; i < max_irqs; i++) + if (level_mask[i >> 5] & (1UL << (i & 0x1f))) + irq_desc[i].status = IRQ_LEVEL; + + /* get interrupt line of secondary interrupt controller */ + if (irq_cascade >= 0) { + printk(KERN_INFO "irq: secondary controller on irq %d\n", + (int)irq_cascade); + for ( i = max_real_irqs ; i < max_irqs ; i++ ) + irq_desc[i].handler = &gatwick_pic; + setup_irq(irq_cascade, &gatwick_cascade_action); + } + printk("System has %d possible interrupts\n", max_irqs); + if (max_irqs != max_real_irqs) + printk(KERN_DEBUG "%d interrupts on main controller\n", + max_real_irqs); + +#ifdef CONFIG_XMON + setup_irq(20, &xmon_action); +#endif /* CONFIG_XMON */ +#endif /* CONFIG_PPC32 */ +} + +#ifdef CONFIG_PM +/* + * These procedures are used in implementing sleep on the powerbooks. + * sleep_save_intrs() saves the states of all interrupt enables + * and disables all interrupts except for the nominated one. + * sleep_restore_intrs() restores the states of all interrupt enables. + */ +unsigned long sleep_save_mask[2]; + +/* This used to be passed by the PMU driver but that link got + * broken with the new driver model. We use this tweak for now... + */ +static int pmacpic_find_viaint(void) +{ + int viaint = -1; + +#ifdef CONFIG_ADB_PMU + struct device_node *np; + + if (pmu_get_model() != PMU_OHARE_BASED) + goto not_found; + np = of_find_node_by_name(NULL, "via-pmu"); + if (np == NULL) + goto not_found; + viaint = np->intrs[0].line; +#endif /* CONFIG_ADB_PMU */ + +not_found: + return viaint; +} + +static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state) +{ + int viaint = pmacpic_find_viaint(); + + sleep_save_mask[0] = ppc_cached_irq_mask[0]; + sleep_save_mask[1] = ppc_cached_irq_mask[1]; + ppc_cached_irq_mask[0] = 0; + ppc_cached_irq_mask[1] = 0; + if (viaint > 0) + set_bit(viaint, ppc_cached_irq_mask); + out_le32(&pmac_irq_hw[0]->enable, ppc_cached_irq_mask[0]); + if (max_real_irqs > 32) + out_le32(&pmac_irq_hw[1]->enable, ppc_cached_irq_mask[1]); + (void)in_le32(&pmac_irq_hw[0]->event); + /* make sure mask gets to controller before we return to caller */ + mb(); + (void)in_le32(&pmac_irq_hw[0]->enable); + + return 0; +} + +static int pmacpic_resume(struct sys_device *sysdev) +{ + int i; + + out_le32(&pmac_irq_hw[0]->enable, 0); + if (max_real_irqs > 32) + out_le32(&pmac_irq_hw[1]->enable, 0); + mb(); + for (i = 0; i < max_real_irqs; ++i) + if (test_bit(i, sleep_save_mask)) + pmac_unmask_irq(i); + + return 0; +} + +#endif /* CONFIG_PM */ + +static struct sysdev_class pmacpic_sysclass = { + set_kset_name("pmac_pic"), +}; + +static struct sys_device device_pmacpic = { + .id = 0, + .cls = &pmacpic_sysclass, +}; + +static struct sysdev_driver driver_pmacpic = { +#ifdef CONFIG_PM + .suspend = &pmacpic_suspend, + .resume = &pmacpic_resume, +#endif /* CONFIG_PM */ +}; + +static int __init init_pmacpic_sysfs(void) +{ +#ifdef CONFIG_PPC32 + if (max_irqs == 0) + return -ENODEV; +#endif + printk(KERN_DEBUG "Registering pmac pic with sysfs...\n"); + sysdev_class_register(&pmacpic_sysclass); + sysdev_register(&device_pmacpic); + sysdev_driver_register(&pmacpic_sysclass, &driver_pmacpic); + return 0; +} + +subsys_initcall(init_pmacpic_sysfs); + diff --git a/arch/powerpc/platforms/powermac/pic.h b/arch/powerpc/platforms/powermac/pic.h new file mode 100644 index 00000000000..664103dfeef --- /dev/null +++ b/arch/powerpc/platforms/powermac/pic.h @@ -0,0 +1,11 @@ +#ifndef __PPC_PLATFORMS_PMAC_PIC_H +#define __PPC_PLATFORMS_PMAC_PIC_H + +#include <linux/irq.h> + +extern struct hw_interrupt_type pmac_pic; + +void pmac_pic_init(void); +int pmac_get_irq(struct pt_regs *regs); + +#endif /* __PPC_PLATFORMS_PMAC_PIC_H */ diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h new file mode 100644 index 00000000000..81f52512b04 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -0,0 +1,48 @@ +#ifndef __PMAC_H__ +#define __PMAC_H__ + +#include <linux/pci.h> +#include <linux/ide.h> +#include <linux/irq.h> + +/* + * Declaration for the various functions exported by the + * pmac_* files. Mostly for use by pmac_setup + */ + +extern long pmac_time_init(void); +extern unsigned long pmac_get_rtc_time(void); +extern int pmac_set_rtc_time(unsigned long nowtime); +extern void pmac_read_rtc_time(void); +extern void pmac_calibrate_decr(void); +extern void pmac_pcibios_fixup(void); +extern void pmac_find_bridges(void); +extern unsigned long pmac_ide_get_base(int index); +extern void pmac_ide_init_hwif_ports(hw_regs_t *hw, + unsigned long data_port, unsigned long ctrl_port, int *irq); + +extern void pmac_nvram_update(void); +extern unsigned char pmac_nvram_read_byte(int addr); +extern void pmac_nvram_write_byte(int addr, unsigned char val); +extern int pmac_pci_enable_device_hook(struct pci_dev *dev, int initial); +extern void pmac_pcibios_after_init(void); +extern int of_show_percpuinfo(struct seq_file *m, int i); + +extern void pmac_pci_init(void); +extern void pmac_setup_pci_dma(void); +extern void pmac_check_ht_link(void); + +extern void pmac_setup_smp(void); + +extern unsigned long pmac_ide_get_base(int index); +extern void pmac_ide_init_hwif_ports(hw_regs_t *hw, + unsigned long data_port, unsigned long ctrl_port, int *irq); + +extern void pmac_nvram_init(void); + +extern struct hw_interrupt_type pmac_pic; + +void pmac_pic_init(void); +int pmac_get_irq(struct pt_regs *regs); + +#endif /* __PMAC_H__ */ diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c new file mode 100644 index 00000000000..9416fcaa6da --- /dev/null +++ b/arch/powerpc/platforms/powermac/setup.c @@ -0,0 +1,645 @@ +/* + * arch/ppc/platforms/setup.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * + * Derived from "arch/alpha/kernel/setup.c" + * Copyright (C) 1995 Linus Torvalds + * + * Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +/* + * bootup setup stuff.. + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/tty.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/ioport.h> +#include <linux/major.h> +#include <linux/initrd.h> +#include <linux/vt_kern.h> +#include <linux/console.h> +#include <linux/ide.h> +#include <linux/pci.h> +#include <linux/adb.h> +#include <linux/cuda.h> +#include <linux/pmu.h> +#include <linux/irq.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> +#include <linux/bitops.h> +#include <linux/suspend.h> + +#include <asm/reg.h> +#include <asm/sections.h> +#include <asm/prom.h> +#include <asm/system.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/pci-bridge.h> +#include <asm/ohare.h> +#include <asm/mediabay.h> +#include <asm/machdep.h> +#include <asm/dma.h> +#include <asm/bootx.h> +#include <asm/cputable.h> +#include <asm/btext.h> +#include <asm/pmac_feature.h> +#include <asm/time.h> +#include <asm/of_device.h> +#include <asm/mmu_context.h> + +#include "pmac.h" + +#undef SHOW_GATWICK_IRQS + +unsigned char drive_info; + +int ppc_override_l2cr = 0; +int ppc_override_l2cr_value; +int has_l2cache = 0; + +int pmac_newworld = 1; + +static int current_root_goodness = -1; + +extern int pmac_newworld; + +#define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */ + +extern void zs_kgdb_hook(int tty_num); +static void ohare_init(void); +#ifdef CONFIG_BOOTX_TEXT +static void pmac_progress(char *s, unsigned short hex); +#endif + +sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; + +#ifdef CONFIG_SMP +extern struct smp_ops_t psurge_smp_ops; +extern struct smp_ops_t core99_smp_ops; +#endif /* CONFIG_SMP */ + +static int +pmac_show_cpuinfo(struct seq_file *m) +{ + struct device_node *np; + char *pp; + int plen; + int mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO, + NULL, PMAC_MB_INFO_MODEL, 0); + unsigned int mbflags = (unsigned int)pmac_call_feature(PMAC_FTR_GET_MB_INFO, + NULL, PMAC_MB_INFO_FLAGS, 0); + char* mbname; + + if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME, (int)&mbname) != 0) + mbname = "Unknown"; + + /* find motherboard type */ + seq_printf(m, "machine\t\t: "); + np = find_devices("device-tree"); + if (np != NULL) { + pp = (char *) get_property(np, "model", NULL); + if (pp != NULL) + seq_printf(m, "%s\n", pp); + else + seq_printf(m, "PowerMac\n"); + pp = (char *) get_property(np, "compatible", &plen); + if (pp != NULL) { + seq_printf(m, "motherboard\t:"); + while (plen > 0) { + int l = strlen(pp) + 1; + seq_printf(m, " %s", pp); + plen -= l; + pp += l; + } + seq_printf(m, "\n"); + } + } else + seq_printf(m, "PowerMac\n"); + + /* print parsed model */ + seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname); + seq_printf(m, "pmac flags\t: %08x\n", mbflags); + + /* find l2 cache info */ + np = find_devices("l2-cache"); + if (np == 0) + np = find_type_devices("cache"); + if (np != 0) { + unsigned int *ic = (unsigned int *) + get_property(np, "i-cache-size", NULL); + unsigned int *dc = (unsigned int *) + get_property(np, "d-cache-size", NULL); + seq_printf(m, "L2 cache\t:"); + has_l2cache = 1; + if (get_property(np, "cache-unified", NULL) != 0 && dc) { + seq_printf(m, " %dK unified", *dc / 1024); + } else { + if (ic) + seq_printf(m, " %dK instruction", *ic / 1024); + if (dc) + seq_printf(m, "%s %dK data", + (ic? " +": ""), *dc / 1024); + } + pp = get_property(np, "ram-type", NULL); + if (pp) + seq_printf(m, " %s", pp); + seq_printf(m, "\n"); + } + + /* find ram info */ + np = find_devices("memory"); + if (np != 0) { + int n; + struct reg_property *reg = (struct reg_property *) + get_property(np, "reg", &n); + + if (reg != 0) { + unsigned long total = 0; + + for (n /= sizeof(struct reg_property); n > 0; --n) + total += (reg++)->size; + seq_printf(m, "memory\t\t: %luMB\n", total >> 20); + } + } + + /* Checks "l2cr-value" property in the registry */ + np = find_devices("cpus"); + if (np == 0) + np = find_type_devices("cpu"); + if (np != 0) { + unsigned int *l2cr = (unsigned int *) + get_property(np, "l2cr-value", NULL); + if (l2cr != 0) { + seq_printf(m, "l2cr override\t: 0x%x\n", *l2cr); + } + } + + /* Indicate newworld/oldworld */ + seq_printf(m, "pmac-generation\t: %s\n", + pmac_newworld ? "NewWorld" : "OldWorld"); + + + return 0; +} + +static int +pmac_show_percpuinfo(struct seq_file *m, int i) +{ +#ifdef CONFIG_CPU_FREQ_PMAC + extern unsigned int pmac_get_one_cpufreq(int i); + unsigned int freq = pmac_get_one_cpufreq(i); + if (freq != 0) { + seq_printf(m, "clock\t\t: %dMHz\n", freq/1000); + return 0; + } +#endif /* CONFIG_CPU_FREQ_PMAC */ + return of_show_percpuinfo(m, i); +} + +static volatile u32 *sysctrl_regs; + +void __init +pmac_setup_arch(void) +{ + struct device_node *cpu; + int *fp; + unsigned long pvr; + + pvr = PVR_VER(mfspr(SPRN_PVR)); + + /* Set loops_per_jiffy to a half-way reasonable value, + for use until calibrate_delay gets called. */ + cpu = find_type_devices("cpu"); + if (cpu != 0) { + fp = (int *) get_property(cpu, "clock-frequency", NULL); + if (fp != 0) { + if (pvr == 4 || pvr >= 8) + /* 604, G3, G4 etc. */ + loops_per_jiffy = *fp / HZ; + else + /* 601, 603, etc. */ + loops_per_jiffy = *fp / (2*HZ); + } else + loops_per_jiffy = 50000000 / HZ; + } + + /* this area has the CPU identification register + and some registers used by smp boards */ + sysctrl_regs = (volatile u32 *) ioremap(0xf8000000, 0x1000); + ohare_init(); + + /* Lookup PCI hosts */ + pmac_find_bridges(); + + /* Checks "l2cr-value" property in the registry */ + if (cpu_has_feature(CPU_FTR_L2CR)) { + struct device_node *np = find_devices("cpus"); + if (np == 0) + np = find_type_devices("cpu"); + if (np != 0) { + unsigned int *l2cr = (unsigned int *) + get_property(np, "l2cr-value", NULL); + if (l2cr != 0) { + ppc_override_l2cr = 1; + ppc_override_l2cr_value = *l2cr; + _set_L2CR(0); + _set_L2CR(ppc_override_l2cr_value); + } + } + } + + if (ppc_override_l2cr) + printk(KERN_INFO "L2CR overriden (0x%x), backside cache is %s\n", + ppc_override_l2cr_value, (ppc_override_l2cr_value & 0x80000000) + ? "enabled" : "disabled"); + +#ifdef CONFIG_KGDB + zs_kgdb_hook(0); +#endif + +#ifdef CONFIG_ADB_CUDA + find_via_cuda(); +#else + if (find_devices("via-cuda")) { + printk("WARNING ! Your machine is Cuda based but your kernel\n"); + printk(" wasn't compiled with CONFIG_ADB_CUDA option !\n"); + } +#endif +#ifdef CONFIG_ADB_PMU + find_via_pmu(); +#else + if (find_devices("via-pmu")) { + printk("WARNING ! Your machine is PMU based but your kernel\n"); + printk(" wasn't compiled with CONFIG_ADB_PMU option !\n"); + } +#endif +#ifdef CONFIG_NVRAM + pmac_nvram_init(); +#endif +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) + ROOT_DEV = Root_RAM0; + else +#endif + ROOT_DEV = DEFAULT_ROOT_DEVICE; + +#ifdef CONFIG_SMP + /* Check for Core99 */ + if (find_devices("uni-n") || find_devices("u3")) + ppc_md.smp_ops = &core99_smp_ops; + else + ppc_md.smp_ops = &psurge_smp_ops; +#endif /* CONFIG_SMP */ + + pci_create_OF_bus_map(); +} + +static void __init ohare_init(void) +{ + /* + * Turn on the L2 cache. + * We assume that we have a PSX memory controller iff + * we have an ohare I/O controller. + */ + if (find_devices("ohare") != NULL) { + if (((sysctrl_regs[2] >> 24) & 0xf) >= 3) { + if (sysctrl_regs[4] & 0x10) + sysctrl_regs[4] |= 0x04000020; + else + sysctrl_regs[4] |= 0x04000000; + if(has_l2cache) + printk(KERN_INFO "Level 2 cache enabled\n"); + } + } +} + +char *bootpath; +char *bootdevice; +void *boot_host; +int boot_target; +int boot_part; +extern dev_t boot_dev; + +#ifdef CONFIG_SCSI +void __init +note_scsi_host(struct device_node *node, void *host) +{ + int l; + char *p; + + l = strlen(node->full_name); + if (bootpath != NULL && bootdevice != NULL + && strncmp(node->full_name, bootdevice, l) == 0 + && (bootdevice[l] == '/' || bootdevice[l] == 0)) { + boot_host = host; + /* + * There's a bug in OF 1.0.5. (Why am I not surprised.) + * If you pass a path like scsi/sd@1:0 to canon, it returns + * something like /bandit@F2000000/gc@10/53c94@10000/sd@0,0 + * That is, the scsi target number doesn't get preserved. + * So we pick the target number out of bootpath and use that. + */ + p = strstr(bootpath, "/sd@"); + if (p != NULL) { + p += 4; + boot_target = simple_strtoul(p, NULL, 10); + p = strchr(p, ':'); + if (p != NULL) + boot_part = simple_strtoul(p + 1, NULL, 10); + } + } +} +EXPORT_SYMBOL(note_scsi_host); +#endif + +#if defined(CONFIG_BLK_DEV_IDE) && defined(CONFIG_BLK_DEV_IDE_PMAC) +static dev_t __init +find_ide_boot(void) +{ + char *p; + int n; + dev_t __init pmac_find_ide_boot(char *bootdevice, int n); + + if (bootdevice == NULL) + return 0; + p = strrchr(bootdevice, '/'); + if (p == NULL) + return 0; + n = p - bootdevice; + + return pmac_find_ide_boot(bootdevice, n); +} +#endif /* CONFIG_BLK_DEV_IDE && CONFIG_BLK_DEV_IDE_PMAC */ + +static void __init +find_boot_device(void) +{ +#if defined(CONFIG_BLK_DEV_IDE) && defined(CONFIG_BLK_DEV_IDE_PMAC) + boot_dev = find_ide_boot(); +#endif +} + +static int initializing = 1; +/* TODO: Merge the suspend-to-ram with the common code !!! + * currently, this is a stub implementation for suspend-to-disk + * only + */ + +#ifdef CONFIG_SOFTWARE_SUSPEND + +static int pmac_pm_prepare(suspend_state_t state) +{ + printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state); + + return 0; +} + +static int pmac_pm_enter(suspend_state_t state) +{ + printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state); + + /* Giveup the lazy FPU & vec so we don't have to back them + * up from the low level code + */ + enable_kernel_fp(); + +#ifdef CONFIG_ALTIVEC + if (cur_cpu_spec->cpu_features & CPU_FTR_ALTIVEC) + enable_kernel_altivec(); +#endif /* CONFIG_ALTIVEC */ + + return 0; +} + +static int pmac_pm_finish(suspend_state_t state) +{ + printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state); + + /* Restore userland MMU context */ + set_context(current->active_mm->context, current->active_mm->pgd); + + return 0; +} + +static struct pm_ops pmac_pm_ops = { + .pm_disk_mode = PM_DISK_SHUTDOWN, + .prepare = pmac_pm_prepare, + .enter = pmac_pm_enter, + .finish = pmac_pm_finish, +}; + +#endif /* CONFIG_SOFTWARE_SUSPEND */ + +static int pmac_late_init(void) +{ + initializing = 0; +#ifdef CONFIG_SOFTWARE_SUSPEND + pm_set_ops(&pmac_pm_ops); +#endif /* CONFIG_SOFTWARE_SUSPEND */ + return 0; +} + +late_initcall(pmac_late_init); + +/* can't be __init - can be called whenever a disk is first accessed */ +void +note_bootable_part(dev_t dev, int part, int goodness) +{ + static int found_boot = 0; + char *p; + + if (!initializing) + return; + if ((goodness <= current_root_goodness) && + ROOT_DEV != DEFAULT_ROOT_DEVICE) + return; + p = strstr(saved_command_line, "root="); + if (p != NULL && (p == saved_command_line || p[-1] == ' ')) + return; + + if (!found_boot) { + find_boot_device(); + found_boot = 1; + } + if (!boot_dev || dev == boot_dev) { + ROOT_DEV = dev + part; + boot_dev = 0; + current_root_goodness = goodness; + } +} + +static void +pmac_restart(char *cmd) +{ +#ifdef CONFIG_ADB_CUDA + struct adb_request req; +#endif /* CONFIG_ADB_CUDA */ + + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + cuda_request(&req, NULL, 2, CUDA_PACKET, + CUDA_RESET_SYSTEM); + for (;;) + cuda_poll(); + break; +#endif /* CONFIG_ADB_CUDA */ +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + pmu_restart(); + break; +#endif /* CONFIG_ADB_PMU */ + default: ; + } +} + +static void +pmac_power_off(void) +{ +#ifdef CONFIG_ADB_CUDA + struct adb_request req; +#endif /* CONFIG_ADB_CUDA */ + + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + cuda_request(&req, NULL, 2, CUDA_PACKET, + CUDA_POWERDOWN); + for (;;) + cuda_poll(); + break; +#endif /* CONFIG_ADB_CUDA */ +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + pmu_shutdown(); + break; +#endif /* CONFIG_ADB_PMU */ + default: ; + } +} + +static void +pmac_halt(void) +{ + pmac_power_off(); +} + +void __init pmac_init(void) +{ + /* isa_io_base gets set in pmac_find_bridges */ + isa_mem_base = PMAC_ISA_MEM_BASE; + pci_dram_offset = PMAC_PCI_DRAM_OFFSET; + ISA_DMA_THRESHOLD = ~0L; + DMA_MODE_READ = 1; + DMA_MODE_WRITE = 2; + + ppc_md.setup_arch = pmac_setup_arch; + ppc_md.show_cpuinfo = pmac_show_cpuinfo; + ppc_md.show_percpuinfo = pmac_show_percpuinfo; + ppc_md.irq_canonicalize = NULL; + ppc_md.init_IRQ = pmac_pic_init; + ppc_md.get_irq = pmac_get_irq; /* Changed later on ... */ + + ppc_md.pcibios_fixup = pmac_pcibios_fixup; + ppc_md.pcibios_enable_device_hook = pmac_pci_enable_device_hook; + ppc_md.pcibios_after_init = pmac_pcibios_after_init; + ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; + + ppc_md.restart = pmac_restart; + ppc_md.power_off = pmac_power_off; + ppc_md.halt = pmac_halt; + + ppc_md.time_init = pmac_time_init; + ppc_md.set_rtc_time = pmac_set_rtc_time; + ppc_md.get_rtc_time = pmac_get_rtc_time; + ppc_md.calibrate_decr = pmac_calibrate_decr; + + ppc_md.feature_call = pmac_do_feature_call; + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +#ifdef CONFIG_BLK_DEV_IDE_PMAC + ppc_ide_md.ide_init_hwif = pmac_ide_init_hwif_ports; + ppc_ide_md.default_io_base = pmac_ide_get_base; +#endif /* CONFIG_BLK_DEV_IDE_PMAC */ +#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */ + +#ifdef CONFIG_BOOTX_TEXT + ppc_md.progress = pmac_progress; +#endif /* CONFIG_BOOTX_TEXT */ + + if (ppc_md.progress) ppc_md.progress("pmac_init(): exit", 0); + +} + +#ifdef CONFIG_BOOTX_TEXT +static void __init +pmac_progress(char *s, unsigned short hex) +{ + if (boot_text_mapped) { + btext_drawstring(s); + btext_drawchar('\n'); + } +} +#endif /* CONFIG_BOOTX_TEXT */ + +static int __init +pmac_declare_of_platform_devices(void) +{ + struct device_node *np; + + np = find_devices("uni-n"); + if (np) { + for (np = np->child; np != NULL; np = np->sibling) + if (strncmp(np->name, "i2c", 3) == 0) { + of_platform_device_create(np, "uni-n-i2c", + NULL); + break; + } + } + np = find_devices("u3"); + if (np) { + for (np = np->child; np != NULL; np = np->sibling) + if (strncmp(np->name, "i2c", 3) == 0) { + of_platform_device_create(np, "u3-i2c", + NULL); + break; + } + } + + np = find_devices("valkyrie"); + if (np) + of_platform_device_create(np, "valkyrie", NULL); + np = find_devices("platinum"); + if (np) + of_platform_device_create(np, "platinum", NULL); + + return 0; +} + +device_initcall(pmac_declare_of_platform_devices); diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S new file mode 100644 index 00000000000..88419c77ac4 --- /dev/null +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -0,0 +1,396 @@ +/* + * This file contains sleep low-level functions for PowerBook G3. + * Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * and Paul Mackerras (paulus@samba.org). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/ppc_asm.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> + +#define MAGIC 0x4c617273 /* 'Lars' */ + +/* + * Structure for storing CPU registers on the stack. + */ +#define SL_SP 0 +#define SL_PC 4 +#define SL_MSR 8 +#define SL_SDR1 0xc +#define SL_SPRG0 0x10 /* 4 sprg's */ +#define SL_DBAT0 0x20 +#define SL_IBAT0 0x28 +#define SL_DBAT1 0x30 +#define SL_IBAT1 0x38 +#define SL_DBAT2 0x40 +#define SL_IBAT2 0x48 +#define SL_DBAT3 0x50 +#define SL_IBAT3 0x58 +#define SL_TB 0x60 +#define SL_R2 0x68 +#define SL_CR 0x6c +#define SL_R12 0x70 /* r12 to r31 */ +#define SL_SIZE (SL_R12 + 80) + + .section .text + .align 5 + +#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC) + +/* This gets called by via-pmu.c late during the sleep process. + * The PMU was already send the sleep command and will shut us down + * soon. We need to save all that is needed and setup the wakeup + * vector that will be called by the ROM on wakeup + */ +_GLOBAL(low_sleep_handler) +#ifndef CONFIG_6xx + blr +#else + mflr r0 + stw r0,4(r1) + stwu r1,-SL_SIZE(r1) + mfcr r0 + stw r0,SL_CR(r1) + stw r2,SL_R2(r1) + stmw r12,SL_R12(r1) + + /* Save MSR & SDR1 */ + mfmsr r4 + stw r4,SL_MSR(r1) + mfsdr1 r4 + stw r4,SL_SDR1(r1) + + /* Get a stable timebase and save it */ +1: mftbu r4 + stw r4,SL_TB(r1) + mftb r5 + stw r5,SL_TB+4(r1) + mftbu r3 + cmpw r3,r4 + bne 1b + + /* Save SPRGs */ + mfsprg r4,0 + stw r4,SL_SPRG0(r1) + mfsprg r4,1 + stw r4,SL_SPRG0+4(r1) + mfsprg r4,2 + stw r4,SL_SPRG0+8(r1) + mfsprg r4,3 + stw r4,SL_SPRG0+12(r1) + + /* Save BATs */ + mfdbatu r4,0 + stw r4,SL_DBAT0(r1) + mfdbatl r4,0 + stw r4,SL_DBAT0+4(r1) + mfdbatu r4,1 + stw r4,SL_DBAT1(r1) + mfdbatl r4,1 + stw r4,SL_DBAT1+4(r1) + mfdbatu r4,2 + stw r4,SL_DBAT2(r1) + mfdbatl r4,2 + stw r4,SL_DBAT2+4(r1) + mfdbatu r4,3 + stw r4,SL_DBAT3(r1) + mfdbatl r4,3 + stw r4,SL_DBAT3+4(r1) + mfibatu r4,0 + stw r4,SL_IBAT0(r1) + mfibatl r4,0 + stw r4,SL_IBAT0+4(r1) + mfibatu r4,1 + stw r4,SL_IBAT1(r1) + mfibatl r4,1 + stw r4,SL_IBAT1+4(r1) + mfibatu r4,2 + stw r4,SL_IBAT2(r1) + mfibatl r4,2 + stw r4,SL_IBAT2+4(r1) + mfibatu r4,3 + stw r4,SL_IBAT3(r1) + mfibatl r4,3 + stw r4,SL_IBAT3+4(r1) + + /* Backup various CPU config stuffs */ + bl __save_cpu_setup + + /* The ROM can wake us up via 2 different vectors: + * - On wallstreet & lombard, we must write a magic + * value 'Lars' at address 4 and a pointer to a + * memory location containing the PC to resume from + * at address 0. + * - On Core99, we must store the wakeup vector at + * address 0x80 and eventually it's parameters + * at address 0x84. I've have some trouble with those + * parameters however and I no longer use them. + */ + lis r5,grackle_wake_up@ha + addi r5,r5,grackle_wake_up@l + tophys(r5,r5) + stw r5,SL_PC(r1) + lis r4,KERNELBASE@h + tophys(r5,r1) + addi r5,r5,SL_PC + lis r6,MAGIC@ha + addi r6,r6,MAGIC@l + stw r5,0(r4) + stw r6,4(r4) + /* Setup stuffs at 0x80-0x84 for Core99 */ + lis r3,core99_wake_up@ha + addi r3,r3,core99_wake_up@l + tophys(r3,r3) + stw r3,0x80(r4) + stw r5,0x84(r4) + /* Store a pointer to our backup storage into + * a kernel global + */ + lis r3,sleep_storage@ha + addi r3,r3,sleep_storage@l + stw r5,0(r3) + + .globl low_cpu_die +low_cpu_die: + /* Flush & disable all caches */ + bl flush_disable_caches + + /* Turn off data relocation. */ + mfmsr r3 /* Save MSR in r7 */ + rlwinm r3,r3,0,28,26 /* Turn off DR bit */ + sync + mtmsr r3 + isync + +BEGIN_FTR_SECTION + /* Flush any pending L2 data prefetches to work around HW bug */ + sync + lis r3,0xfff0 + lwz r0,0(r3) /* perform cache-inhibited load to ROM */ + sync /* (caches are disabled at this point) */ +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) + +/* + * Set the HID0 and MSR for sleep. + */ + mfspr r2,SPRN_HID0 + rlwinm r2,r2,0,10,7 /* clear doze, nap */ + oris r2,r2,HID0_SLEEP@h + sync + isync + mtspr SPRN_HID0,r2 + sync + +/* This loop puts us back to sleep in case we have a spurrious + * wakeup so that the host bridge properly stays asleep. The + * CPU will be turned off, either after a known time (about 1 + * second) on wallstreet & lombard, or as soon as the CPU enters + * SLEEP mode on core99 + */ + mfmsr r2 + oris r2,r2,MSR_POW@h +1: sync + mtmsr r2 + isync + b 1b + +/* + * Here is the resume code. + */ + + +/* + * Core99 machines resume here + * r4 has the physical address of SL_PC(sp) (unused) + */ +_GLOBAL(core99_wake_up) + /* Make sure HID0 no longer contains any sleep bit and that data cache + * is disabled + */ + mfspr r3,SPRN_HID0 + rlwinm r3,r3,0,11,7 /* clear SLEEP, NAP, DOZE bits */ + rlwinm 3,r3,0,18,15 /* clear DCE, ICE */ + mtspr SPRN_HID0,r3 + sync + isync + + /* sanitize MSR */ + mfmsr r3 + ori r3,r3,MSR_EE|MSR_IP + xori r3,r3,MSR_EE|MSR_IP + sync + isync + mtmsr r3 + sync + isync + + /* Recover sleep storage */ + lis r3,sleep_storage@ha + addi r3,r3,sleep_storage@l + tophys(r3,r3) + lwz r1,0(r3) + + /* Pass thru to older resume code ... */ +/* + * Here is the resume code for older machines. + * r1 has the physical address of SL_PC(sp). + */ + +grackle_wake_up: + + /* Restore the kernel's segment registers before + * we do any r1 memory access as we are not sure they + * are in a sane state above the first 256Mb region + */ + li r0,16 /* load up segment register values */ + mtctr r0 /* for context 0 */ + lis r3,0x2000 /* Ku = 1, VSID = 0 */ + li r4,0 +3: mtsrin r3,r4 + addi r3,r3,0x111 /* increment VSID */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b + sync + isync + + subi r1,r1,SL_PC + + /* Restore various CPU config stuffs */ + bl __restore_cpu_setup + + /* Make sure all FPRs have been initialized */ + bl reloc_offset + bl __init_fpu_registers + + /* Invalidate & enable L1 cache, we don't care about + * whatever the ROM may have tried to write to memory + */ + bl __inval_enable_L1 + + /* Restore the BATs, and SDR1. Then we can turn on the MMU. */ + lwz r4,SL_SDR1(r1) + mtsdr1 r4 + lwz r4,SL_SPRG0(r1) + mtsprg 0,r4 + lwz r4,SL_SPRG0+4(r1) + mtsprg 1,r4 + lwz r4,SL_SPRG0+8(r1) + mtsprg 2,r4 + lwz r4,SL_SPRG0+12(r1) + mtsprg 3,r4 + + lwz r4,SL_DBAT0(r1) + mtdbatu 0,r4 + lwz r4,SL_DBAT0+4(r1) + mtdbatl 0,r4 + lwz r4,SL_DBAT1(r1) + mtdbatu 1,r4 + lwz r4,SL_DBAT1+4(r1) + mtdbatl 1,r4 + lwz r4,SL_DBAT2(r1) + mtdbatu 2,r4 + lwz r4,SL_DBAT2+4(r1) + mtdbatl 2,r4 + lwz r4,SL_DBAT3(r1) + mtdbatu 3,r4 + lwz r4,SL_DBAT3+4(r1) + mtdbatl 3,r4 + lwz r4,SL_IBAT0(r1) + mtibatu 0,r4 + lwz r4,SL_IBAT0+4(r1) + mtibatl 0,r4 + lwz r4,SL_IBAT1(r1) + mtibatu 1,r4 + lwz r4,SL_IBAT1+4(r1) + mtibatl 1,r4 + lwz r4,SL_IBAT2(r1) + mtibatu 2,r4 + lwz r4,SL_IBAT2+4(r1) + mtibatl 2,r4 + lwz r4,SL_IBAT3(r1) + mtibatu 3,r4 + lwz r4,SL_IBAT3+4(r1) + mtibatl 3,r4 + +BEGIN_FTR_SECTION + li r4,0 + mtspr SPRN_DBAT4U,r4 + mtspr SPRN_DBAT4L,r4 + mtspr SPRN_DBAT5U,r4 + mtspr SPRN_DBAT5L,r4 + mtspr SPRN_DBAT6U,r4 + mtspr SPRN_DBAT6L,r4 + mtspr SPRN_DBAT7U,r4 + mtspr SPRN_DBAT7L,r4 + mtspr SPRN_IBAT4U,r4 + mtspr SPRN_IBAT4L,r4 + mtspr SPRN_IBAT5U,r4 + mtspr SPRN_IBAT5L,r4 + mtspr SPRN_IBAT6U,r4 + mtspr SPRN_IBAT6L,r4 + mtspr SPRN_IBAT7U,r4 + mtspr SPRN_IBAT7L,r4 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS) + + /* Flush all TLBs */ + lis r4,0x1000 +1: addic. r4,r4,-0x1000 + tlbie r4 + blt 1b + sync + + /* restore the MSR and turn on the MMU */ + lwz r3,SL_MSR(r1) + bl turn_on_mmu + + /* get back the stack pointer */ + tovirt(r1,r1) + + /* Restore TB */ + li r3,0 + mttbl r3 + lwz r3,SL_TB(r1) + lwz r4,SL_TB+4(r1) + mttbu r3 + mttbl r4 + + /* Restore the callee-saved registers and return */ + lwz r0,SL_CR(r1) + mtcr r0 + lwz r2,SL_R2(r1) + lmw r12,SL_R12(r1) + addi r1,r1,SL_SIZE + lwz r0,4(r1) + mtlr r0 + blr + +turn_on_mmu: + mflr r4 + tovirt(r4,r4) + mtsrr0 r4 + mtsrr1 r3 + sync + isync + rfi + +#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ + + .section .data + .balign L1_CACHE_LINE_SIZE +sleep_storage: + .long 0 + .balign L1_CACHE_LINE_SIZE, 0 + +#endif /* CONFIG_6xx */ + .section .text diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c new file mode 100644 index 00000000000..fb996336c58 --- /dev/null +++ b/arch/powerpc/platforms/powermac/smp.c @@ -0,0 +1,716 @@ +/* + * SMP support for power macintosh. + * + * We support both the old "powersurge" SMP architecture + * and the current Core99 (G4 PowerMac) machines. + * + * Note that we don't support the very first rev. of + * Apple/DayStar 2 CPUs board, the one with the funky + * watchdog. Hopefully, none of these should be there except + * maybe internally to Apple. I should probably still add some + * code to detect this card though and disable SMP. --BenH. + * + * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net) + * and Ben Herrenschmidt <benh@kernel.crashing.org>. + * + * Support for DayStar quad CPU cards + * Copyright (C) XLR8, Inc. 1994-2000 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/hardirq.h> +#include <linux/cpu.h> + +#include <asm/ptrace.h> +#include <asm/atomic.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/residual.h> +#include <asm/machdep.h> +#include <asm/pmac_feature.h> +#include <asm/time.h> +#include <asm/mpic.h> +#include <asm/cacheflush.h> +#include <asm/keylargo.h> + +/* + * Powersurge (old powermac SMP) support. + */ + +extern void __secondary_start_pmac_0(void); + +/* Addresses for powersurge registers */ +#define HAMMERHEAD_BASE 0xf8000000 +#define HHEAD_CONFIG 0x90 +#define HHEAD_SEC_INTR 0xc0 + +/* register for interrupting the primary processor on the powersurge */ +/* N.B. this is actually the ethernet ROM! */ +#define PSURGE_PRI_INTR 0xf3019000 + +/* register for storing the start address for the secondary processor */ +/* N.B. this is the PCI config space address register for the 1st bridge */ +#define PSURGE_START 0xf2800000 + +/* Daystar/XLR8 4-CPU card */ +#define PSURGE_QUAD_REG_ADDR 0xf8800000 + +#define PSURGE_QUAD_IRQ_SET 0 +#define PSURGE_QUAD_IRQ_CLR 1 +#define PSURGE_QUAD_IRQ_PRIMARY 2 +#define PSURGE_QUAD_CKSTOP_CTL 3 +#define PSURGE_QUAD_PRIMARY_ARB 4 +#define PSURGE_QUAD_BOARD_ID 6 +#define PSURGE_QUAD_WHICH_CPU 7 +#define PSURGE_QUAD_CKSTOP_RDBK 8 +#define PSURGE_QUAD_RESET_CTL 11 + +#define PSURGE_QUAD_OUT(r, v) (out_8(quad_base + ((r) << 4) + 4, (v))) +#define PSURGE_QUAD_IN(r) (in_8(quad_base + ((r) << 4) + 4) & 0x0f) +#define PSURGE_QUAD_BIS(r, v) (PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) | (v))) +#define PSURGE_QUAD_BIC(r, v) (PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) & ~(v))) + +/* virtual addresses for the above */ +static volatile u8 __iomem *hhead_base; +static volatile u8 __iomem *quad_base; +static volatile u32 __iomem *psurge_pri_intr; +static volatile u8 __iomem *psurge_sec_intr; +static volatile u32 __iomem *psurge_start; + +/* values for psurge_type */ +#define PSURGE_NONE -1 +#define PSURGE_DUAL 0 +#define PSURGE_QUAD_OKEE 1 +#define PSURGE_QUAD_COTTON 2 +#define PSURGE_QUAD_ICEGRASS 3 + +/* what sort of powersurge board we have */ +static int psurge_type = PSURGE_NONE; + +/* L2 and L3 cache settings to pass from CPU0 to CPU1 */ +volatile static long int core99_l2_cache; +volatile static long int core99_l3_cache; + +/* Timebase freeze GPIO */ +static unsigned int core99_tb_gpio; + +/* Sync flag for HW tb sync */ +static volatile int sec_tb_reset = 0; +static unsigned int pri_tb_hi, pri_tb_lo; +static unsigned int pri_tb_stamp; + +static void __devinit core99_init_caches(int cpu) +{ + if (!cpu_has_feature(CPU_FTR_L2CR)) + return; + + if (cpu == 0) { + core99_l2_cache = _get_L2CR(); + printk("CPU0: L2CR is %lx\n", core99_l2_cache); + } else { + printk("CPU%d: L2CR was %lx\n", cpu, _get_L2CR()); + _set_L2CR(0); + _set_L2CR(core99_l2_cache); + printk("CPU%d: L2CR set to %lx\n", cpu, core99_l2_cache); + } + + if (!cpu_has_feature(CPU_FTR_L3CR)) + return; + + if (cpu == 0){ + core99_l3_cache = _get_L3CR(); + printk("CPU0: L3CR is %lx\n", core99_l3_cache); + } else { + printk("CPU%d: L3CR was %lx\n", cpu, _get_L3CR()); + _set_L3CR(0); + _set_L3CR(core99_l3_cache); + printk("CPU%d: L3CR set to %lx\n", cpu, core99_l3_cache); + } +} + +/* + * Set and clear IPIs for powersurge. + */ +static inline void psurge_set_ipi(int cpu) +{ + if (psurge_type == PSURGE_NONE) + return; + if (cpu == 0) + in_be32(psurge_pri_intr); + else if (psurge_type == PSURGE_DUAL) + out_8(psurge_sec_intr, 0); + else + PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_SET, 1 << cpu); +} + +static inline void psurge_clr_ipi(int cpu) +{ + if (cpu > 0) { + switch(psurge_type) { + case PSURGE_DUAL: + out_8(psurge_sec_intr, ~0); + case PSURGE_NONE: + break; + default: + PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, 1 << cpu); + } + } +} + +/* + * On powersurge (old SMP powermac architecture) we don't have + * separate IPIs for separate messages like openpic does. Instead + * we have a bitmap for each processor, where a 1 bit means that + * the corresponding message is pending for that processor. + * Ideally each cpu's entry would be in a different cache line. + * -- paulus. + */ +static unsigned long psurge_smp_message[NR_CPUS]; + +void psurge_smp_message_recv(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + int msg; + + /* clear interrupt */ + psurge_clr_ipi(cpu); + + if (num_online_cpus() < 2) + return; + + /* make sure there is a message there */ + for (msg = 0; msg < 4; msg++) + if (test_and_clear_bit(msg, &psurge_smp_message[cpu])) + smp_message_recv(msg, regs); +} + +irqreturn_t psurge_primary_intr(int irq, void *d, struct pt_regs *regs) +{ + psurge_smp_message_recv(regs); + return IRQ_HANDLED; +} + +static void smp_psurge_message_pass(int target, int msg, unsigned long data, + int wait) +{ + int i; + + if (num_online_cpus() < 2) + return; + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_online(i)) + continue; + if (target == MSG_ALL + || (target == MSG_ALL_BUT_SELF && i != smp_processor_id()) + || target == i) { + set_bit(msg, &psurge_smp_message[i]); + psurge_set_ipi(i); + } + } +} + +/* + * Determine a quad card presence. We read the board ID register, we + * force the data bus to change to something else, and we read it again. + * It it's stable, then the register probably exist (ugh !) + */ +static int __init psurge_quad_probe(void) +{ + int type; + unsigned int i; + + type = PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID); + if (type < PSURGE_QUAD_OKEE || type > PSURGE_QUAD_ICEGRASS + || type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID)) + return PSURGE_DUAL; + + /* looks OK, try a slightly more rigorous test */ + /* bogus is not necessarily cacheline-aligned, + though I don't suppose that really matters. -- paulus */ + for (i = 0; i < 100; i++) { + volatile u32 bogus[8]; + bogus[(0+i)%8] = 0x00000000; + bogus[(1+i)%8] = 0x55555555; + bogus[(2+i)%8] = 0xFFFFFFFF; + bogus[(3+i)%8] = 0xAAAAAAAA; + bogus[(4+i)%8] = 0x33333333; + bogus[(5+i)%8] = 0xCCCCCCCC; + bogus[(6+i)%8] = 0xCCCCCCCC; + bogus[(7+i)%8] = 0x33333333; + wmb(); + asm volatile("dcbf 0,%0" : : "r" (bogus) : "memory"); + mb(); + if (type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID)) + return PSURGE_DUAL; + } + return type; +} + +static void __init psurge_quad_init(void) +{ + int procbits; + + if (ppc_md.progress) ppc_md.progress("psurge_quad_init", 0x351); + procbits = ~PSURGE_QUAD_IN(PSURGE_QUAD_WHICH_CPU); + if (psurge_type == PSURGE_QUAD_ICEGRASS) + PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits); + else + PSURGE_QUAD_BIC(PSURGE_QUAD_CKSTOP_CTL, procbits); + mdelay(33); + out_8(psurge_sec_intr, ~0); + PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, procbits); + PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits); + if (psurge_type != PSURGE_QUAD_ICEGRASS) + PSURGE_QUAD_BIS(PSURGE_QUAD_CKSTOP_CTL, procbits); + PSURGE_QUAD_BIC(PSURGE_QUAD_PRIMARY_ARB, procbits); + mdelay(33); + PSURGE_QUAD_BIC(PSURGE_QUAD_RESET_CTL, procbits); + mdelay(33); + PSURGE_QUAD_BIS(PSURGE_QUAD_PRIMARY_ARB, procbits); + mdelay(33); +} + +static int __init smp_psurge_probe(void) +{ + int i, ncpus; + + /* We don't do SMP on the PPC601 -- paulus */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + return 1; + + /* + * The powersurge cpu board can be used in the generation + * of powermacs that have a socket for an upgradeable cpu card, + * including the 7500, 8500, 9500, 9600. + * The device tree doesn't tell you if you have 2 cpus because + * OF doesn't know anything about the 2nd processor. + * Instead we look for magic bits in magic registers, + * in the hammerhead memory controller in the case of the + * dual-cpu powersurge board. -- paulus. + */ + if (find_devices("hammerhead") == NULL) + return 1; + + hhead_base = ioremap(HAMMERHEAD_BASE, 0x800); + quad_base = ioremap(PSURGE_QUAD_REG_ADDR, 1024); + psurge_sec_intr = hhead_base + HHEAD_SEC_INTR; + + psurge_type = psurge_quad_probe(); + if (psurge_type != PSURGE_DUAL) { + psurge_quad_init(); + /* All released cards using this HW design have 4 CPUs */ + ncpus = 4; + } else { + iounmap(quad_base); + if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) { + /* not a dual-cpu card */ + iounmap(hhead_base); + psurge_type = PSURGE_NONE; + return 1; + } + ncpus = 2; + } + + psurge_start = ioremap(PSURGE_START, 4); + psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4); + + /* this is not actually strictly necessary -- paulus. */ + for (i = 1; i < ncpus; ++i) + smp_hw_index[i] = i; + + if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352); + + return ncpus; +} + +static void __init smp_psurge_kick_cpu(int nr) +{ + unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8; + unsigned long a; + + /* may need to flush here if secondary bats aren't setup */ + for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32) + asm volatile("dcbf 0,%0" : : "r" (a) : "memory"); + asm volatile("sync"); + + if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu", 0x353); + + out_be32(psurge_start, start); + mb(); + + psurge_set_ipi(nr); + udelay(10); + psurge_clr_ipi(nr); + + if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354); +} + +/* + * With the dual-cpu powersurge board, the decrementers and timebases + * of both cpus are frozen after the secondary cpu is started up, + * until we give the secondary cpu another interrupt. This routine + * uses this to get the timebases synchronized. + * -- paulus. + */ +static void __init psurge_dual_sync_tb(int cpu_nr) +{ + int t; + + set_dec(tb_ticks_per_jiffy); + set_tb(0, 0); + last_jiffy_stamp(cpu_nr) = 0; + + if (cpu_nr > 0) { + mb(); + sec_tb_reset = 1; + return; + } + + /* wait for the secondary to have reset its TB before proceeding */ + for (t = 10000000; t > 0 && !sec_tb_reset; --t) + ; + + /* now interrupt the secondary, starting both TBs */ + psurge_set_ipi(1); + + smp_tb_synchronized = 1; +} + +static struct irqaction psurge_irqaction = { + .handler = psurge_primary_intr, + .flags = SA_INTERRUPT, + .mask = CPU_MASK_NONE, + .name = "primary IPI", +}; + +static void __init smp_psurge_setup_cpu(int cpu_nr) +{ + + if (cpu_nr == 0) { + /* If we failed to start the second CPU, we should still + * send it an IPI to start the timebase & DEC or we might + * have them stuck. + */ + if (num_online_cpus() < 2) { + if (psurge_type == PSURGE_DUAL) + psurge_set_ipi(1); + return; + } + /* reset the entry point so if we get another intr we won't + * try to startup again */ + out_be32(psurge_start, 0x100); + if (setup_irq(30, &psurge_irqaction)) + printk(KERN_ERR "Couldn't get primary IPI interrupt"); + } + + if (psurge_type == PSURGE_DUAL) + psurge_dual_sync_tb(cpu_nr); +} + +void __init smp_psurge_take_timebase(void) +{ + /* Dummy implementation */ +} + +void __init smp_psurge_give_timebase(void) +{ + /* Dummy implementation */ +} + +static int __init smp_core99_probe(void) +{ +#ifdef CONFIG_6xx + extern int powersave_nap; +#endif + struct device_node *cpus, *firstcpu; + int i, ncpus = 0, boot_cpu = -1; + u32 *tbprop = NULL; + + if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345); + cpus = firstcpu = find_type_devices("cpu"); + while(cpus != NULL) { + u32 *regprop = (u32 *)get_property(cpus, "reg", NULL); + char *stateprop = (char *)get_property(cpus, "state", NULL); + if (regprop != NULL && stateprop != NULL && + !strncmp(stateprop, "running", 7)) + boot_cpu = *regprop; + ++ncpus; + cpus = cpus->next; + } + if (boot_cpu == -1) + printk(KERN_WARNING "Couldn't detect boot CPU !\n"); + if (boot_cpu != 0) + printk(KERN_WARNING "Boot CPU is %d, unsupported setup !\n", boot_cpu); + + if (machine_is_compatible("MacRISC4")) { + extern struct smp_ops_t core99_smp_ops; + + core99_smp_ops.take_timebase = smp_generic_take_timebase; + core99_smp_ops.give_timebase = smp_generic_give_timebase; + } else { + if (firstcpu != NULL) + tbprop = (u32 *)get_property(firstcpu, "timebase-enable", NULL); + if (tbprop) + core99_tb_gpio = *tbprop; + else + core99_tb_gpio = KL_GPIO_TB_ENABLE; + } + + if (ncpus > 1) { + mpic_request_ipis(); + for (i = 1; i < ncpus; ++i) + smp_hw_index[i] = i; +#ifdef CONFIG_6xx + powersave_nap = 0; +#endif + core99_init_caches(0); + } + + return ncpus; +} + +static void __devinit smp_core99_kick_cpu(int nr) +{ + unsigned long save_vector, new_vector; + unsigned long flags; + + volatile unsigned long *vector + = ((volatile unsigned long *)(KERNELBASE+0x100)); + if (nr < 0 || nr > 3) + return; + if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); + + local_irq_save(flags); + local_irq_disable(); + + /* Save reset vector */ + save_vector = *vector; + + /* Setup fake reset vector that does + * b __secondary_start_pmac_0 + nr*8 - KERNELBASE + */ + new_vector = (unsigned long) __secondary_start_pmac_0 + nr * 8; + *vector = 0x48000002 + new_vector - KERNELBASE; + + /* flush data cache and inval instruction cache */ + flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); + + /* Put some life in our friend */ + pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); + + /* FIXME: We wait a bit for the CPU to take the exception, I should + * instead wait for the entry code to set something for me. Well, + * ideally, all that crap will be done in prom.c and the CPU left + * in a RAM-based wait loop like CHRP. + */ + mdelay(1); + + /* Restore our exception vector */ + *vector = save_vector; + flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); + + local_irq_restore(flags); + if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); +} + +static void __devinit smp_core99_setup_cpu(int cpu_nr) +{ + /* Setup L2/L3 */ + if (cpu_nr != 0) + core99_init_caches(cpu_nr); + + /* Setup openpic */ + mpic_setup_this_cpu(); + + if (cpu_nr == 0) { +#ifdef CONFIG_POWER4 + extern void g5_phy_disable_cpu1(void); + + /* If we didn't start the second CPU, we must take + * it off the bus + */ + if (machine_is_compatible("MacRISC4") && + num_online_cpus() < 2) + g5_phy_disable_cpu1(); +#endif /* CONFIG_POWER4 */ + if (ppc_md.progress) ppc_md.progress("core99_setup_cpu 0 done", 0x349); + } +} + +/* not __init, called in sleep/wakeup code */ +void smp_core99_take_timebase(void) +{ + unsigned long flags; + + /* tell the primary we're here */ + sec_tb_reset = 1; + mb(); + + /* wait for the primary to set pri_tb_hi/lo */ + while (sec_tb_reset < 2) + mb(); + + /* set our stuff the same as the primary */ + local_irq_save(flags); + set_dec(1); + set_tb(pri_tb_hi, pri_tb_lo); + last_jiffy_stamp(smp_processor_id()) = pri_tb_stamp; + mb(); + + /* tell the primary we're done */ + sec_tb_reset = 0; + mb(); + local_irq_restore(flags); +} + +/* not __init, called in sleep/wakeup code */ +void smp_core99_give_timebase(void) +{ + unsigned long flags; + unsigned int t; + + /* wait for the secondary to be in take_timebase */ + for (t = 100000; t > 0 && !sec_tb_reset; --t) + udelay(10); + if (!sec_tb_reset) { + printk(KERN_WARNING "Timeout waiting sync on second CPU\n"); + return; + } + + /* freeze the timebase and read it */ + /* disable interrupts so the timebase is disabled for the + shortest possible time */ + local_irq_save(flags); + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4); + pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); + mb(); + pri_tb_hi = get_tbu(); + pri_tb_lo = get_tbl(); + pri_tb_stamp = last_jiffy_stamp(smp_processor_id()); + mb(); + + /* tell the secondary we're ready */ + sec_tb_reset = 2; + mb(); + + /* wait for the secondary to have taken it */ + for (t = 100000; t > 0 && sec_tb_reset; --t) + udelay(10); + if (sec_tb_reset) + printk(KERN_WARNING "Timeout waiting sync(2) on second CPU\n"); + else + smp_tb_synchronized = 1; + + /* Now, restart the timebase by leaving the GPIO to an open collector */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); + pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); + local_irq_restore(flags); +} + +void smp_core99_message_pass(int target, int msg, unsigned long data, int wait) +{ + cpumask_t mask = CPU_MASK_ALL; + /* make sure we're sending something that translates to an IPI */ + if (msg > 0x3) { + printk("SMP %d: smp_message_pass: unknown msg %d\n", + smp_processor_id(), msg); + return; + } + switch (target) { + case MSG_ALL: + mpic_send_ipi(msg, cpus_addr(mask)[0]); + break; + case MSG_ALL_BUT_SELF: + cpu_clear(smp_processor_id(), mask); + mpic_send_ipi(msg, cpus_addr(mask)[0]); + break; + default: + mpic_send_ipi(msg, 1 << target); + break; + } +} + + +/* PowerSurge-style Macs */ +struct smp_ops_t psurge_smp_ops = { + .message_pass = smp_psurge_message_pass, + .probe = smp_psurge_probe, + .kick_cpu = smp_psurge_kick_cpu, + .setup_cpu = smp_psurge_setup_cpu, + .give_timebase = smp_psurge_give_timebase, + .take_timebase = smp_psurge_take_timebase, +}; + +/* Core99 Macs (dual G4s) */ +struct smp_ops_t core99_smp_ops = { + .message_pass = smp_core99_message_pass, + .probe = smp_core99_probe, + .kick_cpu = smp_core99_kick_cpu, + .setup_cpu = smp_core99_setup_cpu, + .give_timebase = smp_core99_give_timebase, + .take_timebase = smp_core99_take_timebase, +}; + +#ifdef CONFIG_HOTPLUG_CPU + +int __cpu_disable(void) +{ + cpu_clear(smp_processor_id(), cpu_online_map); + + /* XXX reset cpu affinity here */ + mpic_cpu_set_priority(0xf); + asm volatile("mtdec %0" : : "r" (0x7fffffff)); + mb(); + udelay(20); + asm volatile("mtdec %0" : : "r" (0x7fffffff)); + return 0; +} + +extern void low_cpu_die(void) __attribute__((noreturn)); /* in pmac_sleep.S */ +static int cpu_dead[NR_CPUS]; + +void cpu_die(void) +{ + local_irq_disable(); + cpu_dead[smp_processor_id()] = 1; + mb(); + low_cpu_die(); +} + +void __cpu_die(unsigned int cpu) +{ + int timeout; + + timeout = 1000; + while (!cpu_dead[cpu]) { + if (--timeout == 0) { + printk("CPU %u refused to die!\n", cpu); + break; + } + msleep(1); + } + cpu_callin_map[cpu] = 0; + cpu_dead[cpu] = 0; +} + +#endif diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c new file mode 100644 index 00000000000..ff6adff36cb --- /dev/null +++ b/arch/powerpc/platforms/powermac/time.c @@ -0,0 +1,291 @@ +/* + * Support for periodic interrupts (100 per second) and for getting + * the current time from the RTC on Power Macintoshes. + * + * We use the decrementer register for our periodic interrupts. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996 Paul Mackerras. + */ +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/time.h> +#include <linux/adb.h> +#include <linux/cuda.h> +#include <linux/pmu.h> +#include <linux/hardirq.h> + +#include <asm/sections.h> +#include <asm/prom.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/nvram.h> + +/* Apparently the RTC stores seconds since 1 Jan 1904 */ +#define RTC_OFFSET 2082844800 + +/* + * Calibrate the decrementer frequency with the VIA timer 1. + */ +#define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */ + +/* VIA registers */ +#define RS 0x200 /* skip between registers */ +#define T1CL (4*RS) /* Timer 1 ctr/latch (low 8 bits) */ +#define T1CH (5*RS) /* Timer 1 counter (high 8 bits) */ +#define T1LL (6*RS) /* Timer 1 latch (low 8 bits) */ +#define T1LH (7*RS) /* Timer 1 latch (high 8 bits) */ +#define ACR (11*RS) /* Auxiliary control register */ +#define IFR (13*RS) /* Interrupt flag register */ + +/* Bits in ACR */ +#define T1MODE 0xc0 /* Timer 1 mode */ +#define T1MODE_CONT 0x40 /* continuous interrupts */ + +/* Bits in IFR and IER */ +#define T1_INT 0x40 /* Timer 1 interrupt */ + +extern struct timezone sys_tz; + +long __init +pmac_time_init(void) +{ +#ifdef CONFIG_NVRAM + s32 delta = 0; + int dst; + + delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16; + delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8; + delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb); + if (delta & 0x00800000UL) + delta |= 0xFF000000UL; + dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0); + printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60, + dst ? "on" : "off"); + return delta; +#else + return 0; +#endif +} + +unsigned long +pmac_get_rtc_time(void) +{ +#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) + struct adb_request req; + unsigned long now; +#endif + + /* Get the time from the RTC */ + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0) + return 0; + while (!req.complete) + cuda_poll(); + if (req.reply_len != 7) + printk(KERN_ERR "pmac_get_rtc_time: got %d byte reply\n", + req.reply_len); + now = (req.reply[3] << 24) + (req.reply[4] << 16) + + (req.reply[5] << 8) + req.reply[6]; + return now - RTC_OFFSET; +#endif /* CONFIG_ADB_CUDA */ +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) + return 0; + while (!req.complete) + pmu_poll(); + if (req.reply_len != 4) + printk(KERN_ERR "pmac_get_rtc_time: got %d byte reply\n", + req.reply_len); + now = (req.reply[0] << 24) + (req.reply[1] << 16) + + (req.reply[2] << 8) + req.reply[3]; + return now - RTC_OFFSET; +#endif /* CONFIG_ADB_PMU */ + default: ; + } + return 0; +} + +int +pmac_set_rtc_time(unsigned long nowtime) +{ +#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) + struct adb_request req; +#endif + + nowtime += RTC_OFFSET; + + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME, + nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0) + return 0; + while (!req.complete) + cuda_poll(); + if ((req.reply_len != 3) && (req.reply_len != 7)) + printk(KERN_ERR "pmac_set_rtc_time: got %d byte reply\n", + req.reply_len); + return 1; +#endif /* CONFIG_ADB_CUDA */ +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + if (pmu_request(&req, NULL, 5, PMU_SET_RTC, + nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0) + return 0; + while (!req.complete) + pmu_poll(); + if (req.reply_len != 0) + printk(KERN_ERR "pmac_set_rtc_time: got %d byte reply\n", + req.reply_len); + return 1; +#endif /* CONFIG_ADB_PMU */ + default: + return 0; + } +} + +/* + * Calibrate the decrementer register using VIA timer 1. + * This is used both on powermacs and CHRP machines. + */ +int __init +via_calibrate_decr(void) +{ + struct device_node *vias; + volatile unsigned char __iomem *via; + int count = VIA_TIMER_FREQ_6 / 100; + unsigned int dstart, dend; + + vias = find_devices("via-cuda"); + if (vias == 0) + vias = find_devices("via-pmu"); + if (vias == 0) + vias = find_devices("via"); + if (vias == 0 || vias->n_addrs == 0) + return 0; + via = ioremap(vias->addrs[0].address, vias->addrs[0].size); + + /* set timer 1 for continuous interrupts */ + out_8(&via[ACR], (via[ACR] & ~T1MODE) | T1MODE_CONT); + /* set the counter to a small value */ + out_8(&via[T1CH], 2); + /* set the latch to `count' */ + out_8(&via[T1LL], count); + out_8(&via[T1LH], count >> 8); + /* wait until it hits 0 */ + while ((in_8(&via[IFR]) & T1_INT) == 0) + ; + dstart = get_dec(); + /* clear the interrupt & wait until it hits 0 again */ + in_8(&via[T1CL]); + while ((in_8(&via[IFR]) & T1_INT) == 0) + ; + dend = get_dec(); + + tb_ticks_per_jiffy = (dstart - dend) / (6 * (HZ/100)); + tb_to_us = mulhwu_scale_factor(dstart - dend, 60000); + + printk(KERN_INFO "via_calibrate_decr: ticks per jiffy = %u (%u ticks)\n", + tb_ticks_per_jiffy, dstart - dend); + + iounmap(via); + + return 1; +} + +#ifdef CONFIG_PM +/* + * Reset the time after a sleep. + */ +static int +time_sleep_notify(struct pmu_sleep_notifier *self, int when) +{ + static unsigned long time_diff; + unsigned long flags; + unsigned long seq; + + switch (when) { + case PBOOK_SLEEP_NOW: + do { + seq = read_seqbegin_irqsave(&xtime_lock, flags); + time_diff = xtime.tv_sec - pmac_get_rtc_time(); + } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + break; + case PBOOK_WAKE: + write_seqlock_irqsave(&xtime_lock, flags); + xtime.tv_sec = pmac_get_rtc_time() + time_diff; + xtime.tv_nsec = 0; + last_rtc_update = xtime.tv_sec; + write_sequnlock_irqrestore(&xtime_lock, flags); + break; + } + return PBOOK_SLEEP_OK; +} + +static struct pmu_sleep_notifier time_sleep_notifier = { + time_sleep_notify, SLEEP_LEVEL_MISC, +}; +#endif /* CONFIG_PM */ + +/* + * Query the OF and get the decr frequency. + * This was taken from the pmac time_init() when merging the prep/pmac + * time functions. + */ +void __init +pmac_calibrate_decr(void) +{ + struct device_node *cpu; + unsigned int freq, *fp; + +#ifdef CONFIG_PM + pmu_register_sleep_notifier(&time_sleep_notifier); +#endif /* CONFIG_PM */ + + /* We assume MacRISC2 machines have correct device-tree + * calibration. That's better since the VIA itself seems + * to be slightly off. --BenH + */ + if (!machine_is_compatible("MacRISC2") && + !machine_is_compatible("MacRISC3") && + !machine_is_compatible("MacRISC4")) + if (via_calibrate_decr()) + return; + + /* Special case: QuickSilver G4s seem to have a badly calibrated + * timebase-frequency in OF, VIA is much better on these. We should + * probably implement calibration based on the KL timer on these + * machines anyway... -BenH + */ + if (machine_is_compatible("PowerMac3,5")) + if (via_calibrate_decr()) + return; + /* + * The cpu node should have a timebase-frequency property + * to tell us the rate at which the decrementer counts. + */ + cpu = find_type_devices("cpu"); + if (cpu == 0) + panic("can't find cpu node in time_init"); + fp = (unsigned int *) get_property(cpu, "timebase-frequency", NULL); + if (fp == 0) + panic("can't get cpu timebase frequency"); + freq = *fp; + printk("time_init: decrementer frequency = %u.%.6u MHz\n", + freq/1000000, freq%1000000); + tb_ticks_per_jiffy = freq / HZ; + tb_to_us = mulhwu_scale_factor(freq, 1000000); +} diff --git a/arch/powerpc/platforms/prep/Kconfig b/arch/powerpc/platforms/prep/Kconfig new file mode 100644 index 00000000000..673ac47a162 --- /dev/null +++ b/arch/powerpc/platforms/prep/Kconfig @@ -0,0 +1,22 @@ + +config PREP_RESIDUAL + bool "Support for PReP Residual Data" + depends on PPC_PREP + help + Some PReP systems have residual data passed to the kernel by the + firmware. This allows detection of memory size, devices present and + other useful pieces of information. Sometimes this information is + not present or incorrect, in which case it could lead to the machine + behaving incorrectly. If this happens, either disable PREP_RESIDUAL + or pass the 'noresidual' option to the kernel. + + If you are running a PReP system, say Y here, otherwise say N. + +config PROC_PREPRESIDUAL + bool "Support for reading of PReP Residual Data in /proc" + depends on PREP_RESIDUAL && PROC_FS + help + Enabling this option will create a /proc/residual file which allows + you to get at the residual data on PReP systems. You will need a tool + (lsresidual) to parse it. If you aren't on a PReP system, you don't + want this. diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig new file mode 100644 index 00000000000..7a3b6fc4d97 --- /dev/null +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -0,0 +1,47 @@ + +config PPC_SPLPAR + depends on PPC_PSERIES + bool "Support for shared-processor logical partitions" + default n + help + Enabling this option will make the kernel run more efficiently + on logically-partitioned pSeries systems which use shared + processors, that is, which share physical processors between + two or more partitions. + +config HMT + bool "Hardware multithreading" + depends on SMP && PPC_PSERIES && BROKEN + help + This option enables hardware multithreading on RS64 cpus. + pSeries systems p620 and p660 have such a cpu type. + +config EEH + bool "PCI Extended Error Handling (EEH)" if EMBEDDED + depends on PPC_PSERIES + default y if !EMBEDDED + +config PPC_RTAS + bool + depends on PPC_PSERIES || PPC_BPA + default y + +config RTAS_PROC + bool "Proc interface to RTAS" + depends on PPC_RTAS + default y + +config RTAS_FLASH + tristate "Firmware flash interface" + depends on PPC64 && RTAS_PROC + +config SCANLOG + tristate "Scanlog dump interface" + depends on RTAS_PROC && PPC_PSERIES + +config LPARCFG + tristate "LPAR Configuration Data" + depends on PPC_PSERIES || PPC_ISERIES + help + Provide system capacity information via human readable + <key word>=<value> pairs through a /proc/ppc64/lparcfg interface. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile new file mode 100644 index 00000000000..9ebb34180a1 --- /dev/null +++ b/arch/powerpc/platforms/pseries/Makefile @@ -0,0 +1,4 @@ +obj-y := pci.o lpar.o hvCall.o nvram.o reconfig.o \ + setup.o iommu.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_IBMVIO) += vio.o diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S new file mode 100644 index 00000000000..176e8da7646 --- /dev/null +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -0,0 +1,131 @@ +/* + * arch/ppc64/kernel/pSeries_hvCall.S + * + * This file contains the generic code to perform a call to the + * pSeries LPAR hypervisor. + * NOTE: this file will go away when we move to inline this work. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/hvcall.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> + +#define STK_PARM(i) (48 + ((i)-3)*8) + + .text + +/* long plpar_hcall(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long *out1, R8 + unsigned long *out2, R9 + unsigned long *out3); R10 + */ +_GLOBAL(plpar_hcall) + HMT_MEDIUM + + mfcr r0 + + std r8,STK_PARM(r8)(r1) /* Save out ptrs */ + std r9,STK_PARM(r9)(r1) + std r10,STK_PARM(r10)(r1) + + stw r0,8(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + + ld r8,STK_PARM(r8)(r1) /* Fetch r4-r6 ret args */ + ld r9,STK_PARM(r9)(r1) + ld r10,STK_PARM(r10)(r1) + std r4,0(r8) + std r5,0(r9) + std r6,0(r10) + + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* Simple interface with no output values (other than status) */ +_GLOBAL(plpar_hcall_norets) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* long plpar_hcall_8arg_2ret(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long arg5, R8 + unsigned long arg6, R9 + unsigned long arg7, R10 + unsigned long arg8, 112(R1) + unsigned long *out1); 120(R1) + */ +_GLOBAL(plpar_hcall_8arg_2ret) + HMT_MEDIUM + + mfcr r0 + ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */ + stw r0,8(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + ld r10,STK_PARM(r12)(r1) /* Fetch r4 ret arg */ + std r4,0(r10) + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* long plpar_hcall_4out(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long *out1, R8 + unsigned long *out2, R9 + unsigned long *out3, R10 + unsigned long *out4); 112(R1) + */ +_GLOBAL(plpar_hcall_4out) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + + std r8,STK_PARM(r8)(r1) /* Save out ptrs */ + std r9,STK_PARM(r9)(r1) + std r10,STK_PARM(r10)(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + + ld r8,STK_PARM(r8)(r1) /* Fetch r4-r7 ret args */ + ld r9,STK_PARM(r9)(r1) + ld r10,STK_PARM(r10)(r1) + ld r11,STK_PARM(r11)(r1) + std r4,0(r8) + std r5,0(r9) + std r6,0(r10) + std r7,0(r11) + + mtcrf 0xff,r0 + blr /* return r3 = status */ diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c new file mode 100644 index 00000000000..9e90d41131d --- /dev/null +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -0,0 +1,606 @@ +/* + * arch/ppc64/kernel/pSeries_iommu.c + * + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup: + * + * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation + * + * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/ppcdebug.h> +#include <asm/iommu.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/abs_addr.h> +#include <asm/plpar_wrappers.h> +#include <asm/pSeries_reconfig.h> +#include <asm/systemcfg.h> +#include <asm/firmware.h> +#include <asm/tce.h> +#include <asm/ppc-pci.h> + +#define DBG(fmt...) + +extern int is_python(struct device_node *); + +static void tce_build_pSeries(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + union tce_entry t; + union tce_entry *tp; + + index <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + + t.te_word = 0; + t.te_rdwr = 1; // Read allowed + + if (direction != DMA_TO_DEVICE) + t.te_pciwr = 1; + + tp = ((union tce_entry *)tbl->it_base) + index; + + while (npages--) { + /* can't move this out since we might cross LMB boundary */ + t.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + + tp->te_word = t.te_word; + + uaddr += TCE_PAGE_SIZE; + tp++; + } +} + + +static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) +{ + union tce_entry t; + union tce_entry *tp; + + npages <<= TCE_PAGE_FACTOR; + index <<= TCE_PAGE_FACTOR; + + t.te_word = 0; + tp = ((union tce_entry *)tbl->it_base) + index; + + while (npages--) { + tp->te_word = t.te_word; + + tp++; + } +} + + +static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce; + + tce.te_word = 0; + tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + tce.te_rdwr = 1; + if (direction != DMA_TO_DEVICE) + tce.te_pciwr = 1; + + while (npages--) { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word ); + + if (rc && printk_ratelimit()) { + printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } + + tcenum++; + tce.te_rpn++; + } +} + +static DEFINE_PER_CPU(void *, tce_page) = NULL; + +static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce, *tcep; + long l, limit; + + tcenum <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + + if (npages == 1) + return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + direction); + + tcep = __get_cpu_var(tce_page); + + /* This is safe to do since interrupts are off when we're called + * from iommu_alloc{,_sg}() + */ + if (!tcep) { + tcep = (void *)__get_free_page(GFP_ATOMIC); + /* If allocation fails, fall back to the loop implementation */ + if (!tcep) + return tce_build_pSeriesLP(tbl, tcenum, npages, + uaddr, direction); + __get_cpu_var(tce_page) = tcep; + } + + tce.te_word = 0; + tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + tce.te_rdwr = 1; + if (direction != DMA_TO_DEVICE) + tce.te_pciwr = 1; + + /* We can map max one pageful of TCEs at a time */ + do { + /* + * Set up the page with TCE data, looping through and setting + * the values. + */ + limit = min_t(long, npages, 4096/sizeof(union tce_entry)); + + for (l = 0; l < limit; l++) { + tcep[l] = tce; + tce.te_rpn++; + } + + rc = plpar_tce_put_indirect((u64)tbl->it_index, + (u64)tcenum << 12, + (u64)virt_to_abs(tcep), + limit); + + npages -= limit; + tcenum += limit; + } while (npages > 0 && !rc); + + if (rc && printk_ratelimit()) { + printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%lx\n", (u64)npages); + printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word); + show_stack(current, (unsigned long *)__get_SP()); + } +} + +static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +{ + u64 rc; + union tce_entry tce; + + tcenum <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + + tce.te_word = 0; + + while (npages--) { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word); + + if (rc && printk_ratelimit()) { + printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } + + tcenum++; + } +} + + +static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +{ + u64 rc; + union tce_entry tce; + + tcenum <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + + tce.te_word = 0; + + rc = plpar_tce_stuff((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word, + npages); + + if (rc && printk_ratelimit()) { + printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); + printk("\trc = %ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%lx\n", (u64)npages); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } +} + +static void iommu_table_setparms(struct pci_controller *phb, + struct device_node *dn, + struct iommu_table *tbl) +{ + struct device_node *node; + unsigned long *basep; + unsigned int *sizep; + + node = (struct device_node *)phb->arch_data; + + basep = (unsigned long *)get_property(node, "linux,tce-base", NULL); + sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL); + if (basep == NULL || sizep == NULL) { + printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has " + "missing tce entries !\n", dn->full_name); + return; + } + + tbl->it_base = (unsigned long)__va(*basep); + memset((void *)tbl->it_base, 0, *sizep); + + tbl->it_busno = phb->bus->number; + + /* Units of tce entries */ + tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT; + + /* Test if we are going over 2GB of DMA space */ + if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { + udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + } + + phb->dma_window_base_cur += phb->dma_window_size; + + /* Set the tce table size - measured in entries */ + tbl->it_size = phb->dma_window_size >> PAGE_SHIFT; + + tbl->it_index = 0; + tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; +} + +/* + * iommu_table_setparms_lpar + * + * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. + * + * ToDo: properly interpret the ibm,dma-window property. The definition is: + * logical-bus-number (1 word) + * phys-address (#address-cells words) + * size (#cell-size words) + * + * Currently we hard code these sizes (more or less). + */ +static void iommu_table_setparms_lpar(struct pci_controller *phb, + struct device_node *dn, + struct iommu_table *tbl, + unsigned int *dma_window) +{ + tbl->it_busno = PCI_DN(dn)->bussubno; + + /* TODO: Parse field size properties properly. */ + tbl->it_size = (((unsigned long)dma_window[4] << 32) | + (unsigned long)dma_window[5]) >> PAGE_SHIFT; + tbl->it_offset = (((unsigned long)dma_window[2] << 32) | + (unsigned long)dma_window[3]) >> PAGE_SHIFT; + tbl->it_base = 0; + tbl->it_index = dma_window[0]; + tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; +} + +static void iommu_bus_setup_pSeries(struct pci_bus *bus) +{ + struct device_node *dn; + struct iommu_table *tbl; + struct device_node *isa_dn, *isa_dn_orig; + struct device_node *tmp; + struct pci_dn *pci; + int children; + + DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self); + + dn = pci_bus_to_OF_node(bus); + pci = PCI_DN(dn); + + if (bus->self) { + /* This is not a root bus, any setup will be done for the + * device-side of the bridge in iommu_dev_setup_pSeries(). + */ + return; + } + + /* Check if the ISA bus on the system is under + * this PHB. + */ + isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); + + while (isa_dn && isa_dn != dn) + isa_dn = isa_dn->parent; + + if (isa_dn_orig) + of_node_put(isa_dn_orig); + + /* Count number of direct PCI children of the PHB. + * All PCI device nodes have class-code property, so it's + * an easy way to find them. + */ + for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) + if (get_property(tmp, "class-code", NULL)) + children++; + + DBG("Children: %d\n", children); + + /* Calculate amount of DMA window per slot. Each window must be + * a power of two (due to pci_alloc_consistent requirements). + * + * Keep 256MB aside for PHBs with ISA. + */ + + if (!isa_dn) { + /* No ISA/IDE - just set window size and return */ + pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ + + while (pci->phb->dma_window_size * children > 0x80000000ul) + pci->phb->dma_window_size >>= 1; + DBG("No ISA/IDE, window size is 0x%lx\n", + pci->phb->dma_window_size); + pci->phb->dma_window_base_cur = 0; + + return; + } + + /* If we have ISA, then we probably have an IDE + * controller too. Allocate a 128MB table but + * skip the first 128MB to avoid stepping on ISA + * space. + */ + pci->phb->dma_window_size = 0x8000000ul; + pci->phb->dma_window_base_cur = 0x8000000ul; + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(pci->phb, dn, tbl); + pci->iommu_table = iommu_init_table(tbl); + + /* Divide the rest (1.75GB) among the children */ + pci->phb->dma_window_size = 0x80000000ul; + while (pci->phb->dma_window_size * children > 0x70000000ul) + pci->phb->dma_window_size >>= 1; + + DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size); + +} + + +static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) +{ + struct iommu_table *tbl; + struct device_node *dn, *pdn; + struct pci_dn *ppci; + unsigned int *dma_window = NULL; + + DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self); + + dn = pci_bus_to_OF_node(bus); + + /* Find nearest ibm,dma-window, walking up the device tree */ + for (pdn = dn; pdn != NULL; pdn = pdn->parent) { + dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL); + if (dma_window != NULL) + break; + } + + if (dma_window == NULL) { + DBG("iommu_bus_setup_pSeriesLP: bus %s seems to have no ibm,dma-window property\n", dn->full_name); + return; + } + + ppci = pdn->data; + if (!ppci->iommu_table) { + /* Bussubno hasn't been copied yet. + * Do it now because iommu_table_setparms_lpar needs it. + */ + + ppci->bussubno = bus->number; + + tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), + GFP_KERNEL); + + iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); + + ppci->iommu_table = iommu_init_table(tbl); + } + + if (pdn != dn) + PCI_DN(dn)->iommu_table = ppci->iommu_table; +} + + +static void iommu_dev_setup_pSeries(struct pci_dev *dev) +{ + struct device_node *dn, *mydn; + struct iommu_table *tbl; + + DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, pci_name(dev)); + + mydn = dn = pci_device_to_OF_node(dev); + + /* If we're the direct child of a root bus, then we need to allocate + * an iommu table ourselves. The bus setup code should have setup + * the window sizes already. + */ + if (!dev->bus->self) { + DBG(" --> first child, no bridge. Allocating iommu table.\n"); + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + iommu_table_setparms(PCI_DN(dn)->phb, dn, tbl); + PCI_DN(mydn)->iommu_table = iommu_init_table(tbl); + + return; + } + + /* If this device is further down the bus tree, search upwards until + * an already allocated iommu table is found and use that. + */ + + while (dn && dn->data && PCI_DN(dn)->iommu_table == NULL) + dn = dn->parent; + + if (dn && dn->data) { + PCI_DN(mydn)->iommu_table = PCI_DN(dn)->iommu_table; + } else { + DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, pci_name(dev)); + } +} + +static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +{ + int err = NOTIFY_OK; + struct device_node *np = node; + struct pci_dn *pci = np->data; + + switch (action) { + case PSERIES_RECONFIG_REMOVE: + if (pci->iommu_table && + get_property(np, "ibm,dma-window", NULL)) + iommu_free_table(np); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block iommu_reconfig_nb = { + .notifier_call = iommu_reconfig_notifier, +}; + +static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) +{ + struct device_node *pdn, *dn; + struct iommu_table *tbl; + int *dma_window = NULL; + struct pci_dn *pci; + + DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev)); + + /* dev setup for LPAR is a little tricky, since the device tree might + * contain the dma-window properties per-device and not neccesarily + * for the bus. So we need to search upwards in the tree until we + * either hit a dma-window property, OR find a parent with a table + * already allocated. + */ + dn = pci_device_to_OF_node(dev); + + for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table; + pdn = pdn->parent) { + dma_window = (unsigned int *) + get_property(pdn, "ibm,dma-window", NULL); + if (dma_window) + break; + } + + /* Check for parent == NULL so we don't try to setup the empty EADS + * slots on POWER4 machines. + */ + if (dma_window == NULL || pdn->parent == NULL) { + DBG("No dma window for device, linking to parent\n"); + PCI_DN(dn)->iommu_table = PCI_DN(pdn)->iommu_table; + return; + } else { + DBG("Found DMA window, allocating table\n"); + } + + pci = pdn->data; + if (!pci->iommu_table) { + /* iommu_table_setparms_lpar needs bussubno. */ + pci->bussubno = pci->phb->bus->number; + + tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), + GFP_KERNEL); + + iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); + + pci->iommu_table = iommu_init_table(tbl); + } + + if (pdn != dn) + PCI_DN(dn)->iommu_table = pci->iommu_table; +} + +static void iommu_bus_setup_null(struct pci_bus *b) { } +static void iommu_dev_setup_null(struct pci_dev *d) { } + +/* These are called very early. */ +void iommu_init_early_pSeries(void) +{ + if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) { + /* Direct I/O, IOMMU off */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + pci_direct_iommu_init(); + + return; + } + + if (systemcfg->platform & PLATFORM_LPAR) { + if (firmware_has_feature(FW_FEATURE_MULTITCE)) { + ppc_md.tce_build = tce_buildmulti_pSeriesLP; + ppc_md.tce_free = tce_freemulti_pSeriesLP; + } else { + ppc_md.tce_build = tce_build_pSeriesLP; + ppc_md.tce_free = tce_free_pSeriesLP; + } + ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP; + ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP; + } else { + ppc_md.tce_build = tce_build_pSeries; + ppc_md.tce_free = tce_free_pSeries; + ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries; + ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries; + } + + + pSeries_reconfig_notifier_register(&iommu_reconfig_nb); + + pci_iommu_init(); +} + diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c new file mode 100644 index 00000000000..268d8362dde --- /dev/null +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -0,0 +1,517 @@ +/* + * pSeries_lpar.c + * Copyright (C) 2001 Todd Inglett, IBM Corporation + * + * pSeries LPAR support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define DEBUG + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/dma-mapping.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/machdep.h> +#include <asm/abs_addr.h> +#include <asm/mmu_context.h> +#include <asm/ppcdebug.h> +#include <asm/iommu.h> +#include <asm/tlbflush.h> +#include <asm/tlb.h> +#include <asm/prom.h> +#include <asm/abs_addr.h> +#include <asm/cputable.h> +#include <asm/plpar_wrappers.h> + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* in pSeries_hvCall.S */ +EXPORT_SYMBOL(plpar_hcall); +EXPORT_SYMBOL(plpar_hcall_4out); +EXPORT_SYMBOL(plpar_hcall_norets); +EXPORT_SYMBOL(plpar_hcall_8arg_2ret); + +extern void pSeries_find_serial_port(void); + + +int vtermno; /* virtual terminal# for udbg */ + +#define __ALIGNED__ __attribute__((__aligned__(sizeof(long)))) +static void udbg_hvsi_putc(unsigned char c) +{ + /* packet's seqno isn't used anyways */ + uint8_t packet[] __ALIGNED__ = { 0xff, 5, 0, 0, c }; + int rc; + + if (c == '\n') + udbg_hvsi_putc('\r'); + + do { + rc = plpar_put_term_char(vtermno, sizeof(packet), packet); + } while (rc == H_Busy); +} + +static long hvsi_udbg_buf_len; +static uint8_t hvsi_udbg_buf[256]; + +static int udbg_hvsi_getc_poll(void) +{ + unsigned char ch; + int rc, i; + + if (hvsi_udbg_buf_len == 0) { + rc = plpar_get_term_char(vtermno, &hvsi_udbg_buf_len, hvsi_udbg_buf); + if (rc != H_Success || hvsi_udbg_buf[0] != 0xff) { + /* bad read or non-data packet */ + hvsi_udbg_buf_len = 0; + } else { + /* remove the packet header */ + for (i = 4; i < hvsi_udbg_buf_len; i++) + hvsi_udbg_buf[i-4] = hvsi_udbg_buf[i]; + hvsi_udbg_buf_len -= 4; + } + } + + if (hvsi_udbg_buf_len <= 0 || hvsi_udbg_buf_len > 256) { + /* no data ready */ + hvsi_udbg_buf_len = 0; + return -1; + } + + ch = hvsi_udbg_buf[0]; + /* shift remaining data down */ + for (i = 1; i < hvsi_udbg_buf_len; i++) { + hvsi_udbg_buf[i-1] = hvsi_udbg_buf[i]; + } + hvsi_udbg_buf_len--; + + return ch; +} + +static unsigned char udbg_hvsi_getc(void) +{ + int ch; + for (;;) { + ch = udbg_hvsi_getc_poll(); + if (ch == -1) { + /* This shouldn't be needed...but... */ + volatile unsigned long delay; + for (delay=0; delay < 2000000; delay++) + ; + } else { + return ch; + } + } +} + +static void udbg_putcLP(unsigned char c) +{ + char buf[16]; + unsigned long rc; + + if (c == '\n') + udbg_putcLP('\r'); + + buf[0] = c; + do { + rc = plpar_put_term_char(vtermno, 1, buf); + } while(rc == H_Busy); +} + +/* Buffered chars getc */ +static long inbuflen; +static long inbuf[2]; /* must be 2 longs */ + +static int udbg_getc_pollLP(void) +{ + /* The interface is tricky because it may return up to 16 chars. + * We save them statically for future calls to udbg_getc(). + */ + char ch, *buf = (char *)inbuf; + int i; + long rc; + if (inbuflen == 0) { + /* get some more chars. */ + inbuflen = 0; + rc = plpar_get_term_char(vtermno, &inbuflen, buf); + if (rc != H_Success) + inbuflen = 0; /* otherwise inbuflen is garbage */ + } + if (inbuflen <= 0 || inbuflen > 16) { + /* Catch error case as well as other oddities (corruption) */ + inbuflen = 0; + return -1; + } + ch = buf[0]; + for (i = 1; i < inbuflen; i++) /* shuffle them down. */ + buf[i-1] = buf[i]; + inbuflen--; + return ch; +} + +static unsigned char udbg_getcLP(void) +{ + int ch; + for (;;) { + ch = udbg_getc_pollLP(); + if (ch == -1) { + /* This shouldn't be needed...but... */ + volatile unsigned long delay; + for (delay=0; delay < 2000000; delay++) + ; + } else { + return ch; + } + } +} + +/* call this from early_init() for a working debug console on + * vterm capable LPAR machines + */ +void udbg_init_debug_lpar(void) +{ + vtermno = 0; + udbg_putc = udbg_putcLP; + udbg_getc = udbg_getcLP; + udbg_getc_poll = udbg_getc_pollLP; +} + +/* returns 0 if couldn't find or use /chosen/stdout as console */ +int find_udbg_vterm(void) +{ + struct device_node *stdout_node; + u32 *termno; + char *name; + int found = 0; + + /* find the boot console from /chosen/stdout */ + if (!of_chosen) + return 0; + name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + if (name == NULL) + return 0; + stdout_node = of_find_node_by_path(name); + if (!stdout_node) + return 0; + + /* now we have the stdout node; figure out what type of device it is. */ + name = (char *)get_property(stdout_node, "name", NULL); + if (!name) { + printk(KERN_WARNING "stdout node missing 'name' property!\n"); + goto out; + } + + if (strncmp(name, "vty", 3) == 0) { + if (device_is_compatible(stdout_node, "hvterm1")) { + termno = (u32 *)get_property(stdout_node, "reg", NULL); + if (termno) { + vtermno = termno[0]; + udbg_putc = udbg_putcLP; + udbg_getc = udbg_getcLP; + udbg_getc_poll = udbg_getc_pollLP; + found = 1; + } + } else if (device_is_compatible(stdout_node, "hvterm-protocol")) { + termno = (u32 *)get_property(stdout_node, "reg", NULL); + if (termno) { + vtermno = termno[0]; + udbg_putc = udbg_hvsi_putc; + udbg_getc = udbg_hvsi_getc; + udbg_getc_poll = udbg_hvsi_getc_poll; + found = 1; + } + } + } else if (strncmp(name, "serial", 6)) { + /* XXX fix ISA serial console */ + printk(KERN_WARNING "serial stdout on LPAR ('%s')! " + "can't print udbg messages\n", + stdout_node->full_name); + } else { + printk(KERN_WARNING "don't know how to print to stdout '%s'\n", + stdout_node->full_name); + } + +out: + of_node_put(stdout_node); + return found; +} + +void vpa_init(int cpu) +{ + int hwcpu = get_hard_smp_processor_id(cpu); + unsigned long vpa = (unsigned long)&(paca[cpu].lppaca); + long ret; + unsigned long flags; + + /* Register the Virtual Processor Area (VPA) */ + flags = 1UL << (63 - 18); + + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + paca[cpu].lppaca.vmxregs_in_use = 1; + + ret = register_vpa(flags, hwcpu, __pa(vpa)); + + if (ret) + printk(KERN_ERR "WARNING: vpa_init: VPA registration for " + "cpu %d (hw %d) of area %lx returns %ld\n", + cpu, hwcpu, __pa(vpa), ret); +} + +long pSeries_lpar_hpte_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, + unsigned long vflags, unsigned long rflags) +{ + unsigned long lpar_rc; + unsigned long flags; + unsigned long slot; + unsigned long hpte_v, hpte_r; + unsigned long dummy0, dummy1; + + hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID; + if (vflags & HPTE_V_LARGE) + hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); + + hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; + + /* Now fill in the actual HPTE */ + /* Set CEC cookie to 0 */ + /* Zero page = 0 */ + /* I-cache Invalidate = 0 */ + /* I-cache synchronize = 0 */ + /* Exact = 0 */ + flags = 0; + + /* XXX why is this here? - Anton */ + if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) + hpte_r &= ~_PAGE_COHERENT; + + lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v, + hpte_r, &slot, &dummy0, &dummy1); + + if (unlikely(lpar_rc == H_PTEG_Full)) + return -1; + + /* + * Since we try and ioremap PHBs we don't own, the pte insert + * will fail. However we must catch the failure in hash_page + * or we will loop forever, so return -2 in this case. + */ + if (unlikely(lpar_rc != H_Success)) + return -2; + + /* Because of iSeries, we have to pass down the secondary + * bucket bit here as well + */ + return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); +} + +static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); + +static long pSeries_lpar_hpte_remove(unsigned long hpte_group) +{ + unsigned long slot_offset; + unsigned long lpar_rc; + int i; + unsigned long dummy1, dummy2; + + /* pick a random slot to start at */ + slot_offset = mftb() & 0x7; + + for (i = 0; i < HPTES_PER_GROUP; i++) { + + /* don't remove a bolted entry */ + lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, + (0x1UL << 4), &dummy1, &dummy2); + + if (lpar_rc == H_Success) + return i; + + BUG_ON(lpar_rc != H_Not_Found); + + slot_offset++; + slot_offset &= 0x7; + } + + return -1; +} + +static void pSeries_lpar_hptab_clear(void) +{ + unsigned long size_bytes = 1UL << ppc64_pft_size; + unsigned long hpte_count = size_bytes >> 4; + unsigned long dummy1, dummy2; + int i; + + /* TODO: Use bulk call */ + for (i = 0; i < hpte_count; i++) + plpar_pte_remove(0, i, 0, &dummy1, &dummy2); +} + +/* + * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and + * the low 3 bits of flags happen to line up. So no transform is needed. + * We can probably optimize here and assume the high bits of newpp are + * already zero. For now I am paranoid. + */ +static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large, int local) +{ + unsigned long lpar_rc; + unsigned long flags = (newpp & 7) | H_AVPN; + unsigned long avpn = va >> 23; + + if (large) + avpn &= ~0x1UL; + + lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7)); + + if (lpar_rc == H_Not_Found) + return -1; + + BUG_ON(lpar_rc != H_Success); + + return 0; +} + +static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) +{ + unsigned long dword0; + unsigned long lpar_rc; + unsigned long dummy_word1; + unsigned long flags; + + /* Read 1 pte at a time */ + /* Do not need RPN to logical page translation */ + /* No cross CEC PFT access */ + flags = 0; + + lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1); + + BUG_ON(lpar_rc != H_Success); + + return dword0; +} + +static long pSeries_lpar_hpte_find(unsigned long vpn) +{ + unsigned long hash; + unsigned long i, j; + long slot; + unsigned long hpte_v; + + hash = hpt_hash(vpn, 0); + + for (j = 0; j < 2; j++) { + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + for (i = 0; i < HPTES_PER_GROUP; i++) { + hpte_v = pSeries_lpar_hpte_getword0(slot); + + if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) + && (hpte_v & HPTE_V_VALID) + && (!!(hpte_v & HPTE_V_SECONDARY) == j)) { + /* HPTE matches */ + if (j) + slot = -slot; + return slot; + } + ++slot; + } + hash = ~hash; + } + + return -1; +} + +static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, + unsigned long ea) +{ + unsigned long lpar_rc; + unsigned long vsid, va, vpn, flags; + long slot; + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + + slot = pSeries_lpar_hpte_find(vpn); + BUG_ON(slot == -1); + + flags = newpp & 7; + lpar_rc = plpar_pte_protect(flags, slot, 0); + + BUG_ON(lpar_rc != H_Success); +} + +static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + unsigned long avpn = va >> 23; + unsigned long lpar_rc; + unsigned long dummy1, dummy2; + + if (large) + avpn &= ~0x1UL; + + lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1, + &dummy2); + + if (lpar_rc == H_Not_Found) + return; + + BUG_ON(lpar_rc != H_Success); +} + +/* + * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie + * lock. + */ +void pSeries_lpar_flush_hash_range(unsigned long number, int local) +{ + int i; + unsigned long flags = 0; + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + + for (i = 0; i < number; i++) + flush_hash_page(batch->vaddr[i], batch->pte[i], local); + + if (lock_tlbie) + spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); +} + +void hpte_init_lpar(void) +{ + ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; + ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; + ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; + ppc_md.hpte_insert = pSeries_lpar_hpte_insert; + ppc_md.hpte_remove = pSeries_lpar_hpte_remove; + ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; + ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; + + htab_finish_init(); +} diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c new file mode 100644 index 00000000000..18abfb1f4e2 --- /dev/null +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -0,0 +1,148 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /dev/nvram driver for PPC64 + * + * This perhaps should live in drivers/char + */ + + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <asm/uaccess.h> +#include <asm/nvram.h> +#include <asm/rtas.h> +#include <asm/prom.h> +#include <asm/machdep.h> + +static unsigned int nvram_size; +static int nvram_fetch, nvram_store; +static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ +static DEFINE_SPINLOCK(nvram_lock); + + +static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len; + int done; + unsigned long flags; + char *p = buf; + + + if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + memcpy(p, nvram_buf, len); + + p += len; + i += len; + } + + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len; + int done; + unsigned long flags; + const char *p = buf; + + if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + memcpy(nvram_buf, p, len); + + if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + p += len; + i += len; + } + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_get_size(void) +{ + return nvram_size ? nvram_size : -ENODEV; +} + +int __init pSeries_nvram_init(void) +{ + struct device_node *nvram; + unsigned int *nbytes_p, proplen; + + nvram = of_find_node_by_type(NULL, "nvram"); + if (nvram == NULL) + return -ENODEV; + + nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen); + if (nbytes_p == NULL || proplen != sizeof(unsigned int)) + return -EIO; + + nvram_size = *nbytes_p; + + nvram_fetch = rtas_token("nvram-fetch"); + nvram_store = rtas_token("nvram-store"); + printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); + of_node_put(nvram); + + ppc_md.nvram_read = pSeries_nvram_read; + ppc_md.nvram_write = pSeries_nvram_write; + ppc_md.nvram_size = pSeries_nvram_get_size; + + return 0; +} diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c new file mode 100644 index 00000000000..c198656a3bb --- /dev/null +++ b/arch/powerpc/platforms/pseries/pci.c @@ -0,0 +1,142 @@ +/* + * arch/ppc64/kernel/pSeries_pci.c + * + * Copyright (C) 2001 Dave Engebretsen, IBM Corporation + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * + * pSeries specific routines for PCI. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include <asm/pci-bridge.h> +#include <asm/prom.h> +#include <asm/ppc-pci.h> + +static int __devinitdata s7a_workaround = -1; + +#if 0 +void pcibios_name_device(struct pci_dev *dev) +{ + struct device_node *dn; + + /* + * Add IBM loc code (slot) as a prefix to the device names for service + */ + dn = pci_device_to_OF_node(dev); + if (dn) { + char *loc_code = get_property(dn, "ibm,loc-code", 0); + if (loc_code) { + int loc_len = strlen(loc_code); + if (loc_len < sizeof(dev->dev.name)) { + memmove(dev->dev.name+loc_len+1, dev->dev.name, + sizeof(dev->dev.name)-loc_len-1); + memcpy(dev->dev.name, loc_code, loc_len); + dev->dev.name[loc_len] = ' '; + dev->dev.name[sizeof(dev->dev.name)-1] = '\0'; + } + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); +#endif + +static void __devinit check_s7a(void) +{ + struct device_node *root; + char *model; + + s7a_workaround = 0; + root = of_find_node_by_path("/"); + if (root) { + model = get_property(root, "model", NULL); + if (model && !strcmp(model, "IBM,7013-S7A")) + s7a_workaround = 1; + of_node_put(root); + } +} + +void __devinit pSeries_irq_bus_setup(struct pci_bus *bus) +{ + struct pci_dev *dev; + + if (s7a_workaround < 0) + check_s7a(); + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_read_irq_line(dev); + if (s7a_workaround) { + if (dev->irq > 16) { + dev->irq -= 3; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, + dev->irq); + } + } + } +} + +static void __init pSeries_request_regions(void) +{ + if (!isa_io_base) + return; + + request_region(0x20,0x20,"pic1"); + request_region(0xa0,0x20,"pic2"); + request_region(0x00,0x20,"dma1"); + request_region(0x40,0x20,"timer"); + request_region(0x80,0x10,"dma page reg"); + request_region(0xc0,0x20,"dma2"); +} + +void __init pSeries_final_fixup(void) +{ + phbs_remap_io(); + pSeries_request_regions(); + + pci_addr_cache_build(); +} + +/* + * Assume the winbond 82c105 is the IDE controller on a + * p610. We should probably be more careful in case + * someone tries to plug in a similar adapter. + */ +static void fixup_winbond_82c105(struct pci_dev* dev) +{ + int i; + unsigned int reg; + + if (!(systemcfg->platform & PLATFORM_PSERIES)) + return; + + printk("Using INTC for W82c105 IDE controller.\n"); + pci_read_config_dword(dev, 0x40, ®); + /* Enable LEGIRQ to use INTC instead of ISA interrupts */ + pci_write_config_dword(dev, 0x40, reg | (1<<11)); + + for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) { + /* zap the 2nd function of the winbond chip */ + if (dev->resource[i].flags & IORESOURCE_IO + && dev->bus->number == 0 && dev->devfn == 0x81) + dev->resource[i].flags &= ~IORESOURCE_IO; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, + fixup_winbond_82c105); diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c new file mode 100644 index 00000000000..58c61219d08 --- /dev/null +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -0,0 +1,426 @@ +/* + * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI + * Hotplug and Dynamic Logical Partitioning on RPA platforms). + * + * Copyright (C) 2005 Nathan Lynch + * Copyright (C) 2005 IBM Corporation + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kref.h> +#include <linux/notifier.h> +#include <linux/proc_fs.h> + +#include <asm/prom.h> +#include <asm/pSeries_reconfig.h> +#include <asm/uaccess.h> + + + +/* + * Routines for "runtime" addition and removal of device tree nodes. + */ +#ifdef CONFIG_PROC_DEVICETREE +/* + * Add a node to /proc/device-tree. + */ +static void add_node_proc_entries(struct device_node *np) +{ + struct proc_dir_entry *ent; + + ent = proc_mkdir(strrchr(np->full_name, '/') + 1, np->parent->pde); + if (ent) + proc_device_tree_add_node(np, ent); +} + +static void remove_node_proc_entries(struct device_node *np) +{ + struct property *pp = np->properties; + struct device_node *parent = np->parent; + + while (pp) { + remove_proc_entry(pp->name, np->pde); + pp = pp->next; + } + if (np->pde) + remove_proc_entry(np->pde->name, parent->pde); +} +#else /* !CONFIG_PROC_DEVICETREE */ +static void add_node_proc_entries(struct device_node *np) +{ + return; +} + +static void remove_node_proc_entries(struct device_node *np) +{ + return; +} +#endif /* CONFIG_PROC_DEVICETREE */ + +/** + * derive_parent - basically like dirname(1) + * @path: the full_name of a node to be added to the tree + * + * Returns the node which should be the parent of the node + * described by path. E.g., for path = "/foo/bar", returns + * the node with full_name = "/foo". + */ +static struct device_node *derive_parent(const char *path) +{ + struct device_node *parent = NULL; + char *parent_path = "/"; + size_t parent_path_len = strrchr(path, '/') - path + 1; + + /* reject if path is "/" */ + if (!strcmp(path, "/")) + return ERR_PTR(-EINVAL); + + if (strrchr(path, '/') != path) { + parent_path = kmalloc(parent_path_len, GFP_KERNEL); + if (!parent_path) + return ERR_PTR(-ENOMEM); + strlcpy(parent_path, path, parent_path_len); + } + parent = of_find_node_by_path(parent_path); + if (!parent) + return ERR_PTR(-EINVAL); + if (strcmp(parent_path, "/")) + kfree(parent_path); + return parent; +} + +static struct notifier_block *pSeries_reconfig_chain; + +int pSeries_reconfig_notifier_register(struct notifier_block *nb) +{ + return notifier_chain_register(&pSeries_reconfig_chain, nb); +} + +void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) +{ + notifier_chain_unregister(&pSeries_reconfig_chain, nb); +} + +static int pSeries_reconfig_add_node(const char *path, struct property *proplist) +{ + struct device_node *np; + int err = -ENOMEM; + + np = kzalloc(sizeof(*np), GFP_KERNEL); + if (!np) + goto out_err; + + np->full_name = kmalloc(strlen(path) + 1, GFP_KERNEL); + if (!np->full_name) + goto out_err; + + strcpy(np->full_name, path); + + np->properties = proplist; + OF_MARK_DYNAMIC(np); + kref_init(&np->kref); + + np->parent = derive_parent(path); + if (IS_ERR(np->parent)) { + err = PTR_ERR(np->parent); + goto out_err; + } + + err = notifier_call_chain(&pSeries_reconfig_chain, + PSERIES_RECONFIG_ADD, np); + if (err == NOTIFY_BAD) { + printk(KERN_ERR "Failed to add device node %s\n", path); + err = -ENOMEM; /* For now, safe to assume kmalloc failure */ + goto out_err; + } + + of_attach_node(np); + + add_node_proc_entries(np); + + of_node_put(np->parent); + + return 0; + +out_err: + if (np) { + of_node_put(np->parent); + kfree(np->full_name); + kfree(np); + } + return err; +} + +static int pSeries_reconfig_remove_node(struct device_node *np) +{ + struct device_node *parent, *child; + + parent = of_get_parent(np); + if (!parent) + return -EINVAL; + + if ((child = of_get_next_child(np, NULL))) { + of_node_put(child); + return -EBUSY; + } + + remove_node_proc_entries(np); + + notifier_call_chain(&pSeries_reconfig_chain, + PSERIES_RECONFIG_REMOVE, np); + of_detach_node(np); + + of_node_put(parent); + of_node_put(np); /* Must decrement the refcount */ + return 0; +} + +/* + * /proc/ppc64/ofdt - yucky binary interface for adding and removing + * OF device nodes. Should be deprecated as soon as we get an + * in-kernel wrapper for the RTAS ibm,configure-connector call. + */ + +static void release_prop_list(const struct property *prop) +{ + struct property *next; + for (; prop; prop = next) { + next = prop->next; + kfree(prop->name); + kfree(prop->value); + kfree(prop); + } + +} + +/** + * parse_next_property - process the next property from raw input buffer + * @buf: input buffer, must be nul-terminated + * @end: end of the input buffer + 1, for validation + * @name: return value; set to property name in buf + * @length: return value; set to length of value + * @value: return value; set to the property value in buf + * + * Note that the caller must make copies of the name and value returned, + * this function does no allocation or copying of the data. Return value + * is set to the next name in buf, or NULL on error. + */ +static char * parse_next_property(char *buf, char *end, char **name, int *length, + unsigned char **value) +{ + char *tmp; + + *name = buf; + + tmp = strchr(buf, ' '); + if (!tmp) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + *tmp = '\0'; + + if (++tmp >= end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + + /* now we're on the length */ + *length = -1; + *length = simple_strtoul(tmp, &tmp, 10); + if (*length == -1) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + if (*tmp != ' ' || ++tmp >= end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + + /* now we're on the value */ + *value = tmp; + tmp += *length; + if (tmp > end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + else if (tmp < end && *tmp != ' ' && *tmp != '\0') { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + tmp++; + + /* and now we should be on the next name, or the end */ + return tmp; +} + +static struct property *new_property(const char *name, const int length, + const unsigned char *value, struct property *last) +{ + struct property *new = kmalloc(sizeof(*new), GFP_KERNEL); + + if (!new) + return NULL; + memset(new, 0, sizeof(*new)); + + if (!(new->name = kmalloc(strlen(name) + 1, GFP_KERNEL))) + goto cleanup; + if (!(new->value = kmalloc(length + 1, GFP_KERNEL))) + goto cleanup; + + strcpy(new->name, name); + memcpy(new->value, value, length); + *(((char *)new->value) + length) = 0; + new->length = length; + new->next = last; + return new; + +cleanup: + if (new->name) + kfree(new->name); + if (new->value) + kfree(new->value); + kfree(new); + return NULL; +} + +static int do_add_node(char *buf, size_t bufsize) +{ + char *path, *end, *name; + struct device_node *np; + struct property *prop = NULL; + unsigned char* value; + int length, rv = 0; + + end = buf + bufsize; + path = buf; + buf = strchr(buf, ' '); + if (!buf) + return -EINVAL; + *buf = '\0'; + buf++; + + if ((np = of_find_node_by_path(path))) { + of_node_put(np); + return -EINVAL; + } + + /* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */ + while (buf < end && + (buf = parse_next_property(buf, end, &name, &length, &value))) { + struct property *last = prop; + + prop = new_property(name, length, value, last); + if (!prop) { + rv = -ENOMEM; + prop = last; + goto out; + } + } + if (!buf) { + rv = -EINVAL; + goto out; + } + + rv = pSeries_reconfig_add_node(path, prop); + +out: + if (rv) + release_prop_list(prop); + return rv; +} + +static int do_remove_node(char *buf) +{ + struct device_node *node; + int rv = -ENODEV; + + if ((node = of_find_node_by_path(buf))) + rv = pSeries_reconfig_remove_node(node); + + of_node_put(node); + return rv; +} + +/** + * ofdt_write - perform operations on the Open Firmware device tree + * + * @file: not used + * @buf: command and arguments + * @count: size of the command buffer + * @off: not used + * + * Operations supported at this time are addition and removal of + * whole nodes along with their properties. Operations on individual + * properties are not implemented (yet). + */ +static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count, + loff_t *off) +{ + int rv = 0; + char *kbuf; + char *tmp; + + if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) { + rv = -ENOMEM; + goto out; + } + if (copy_from_user(kbuf, buf, count)) { + rv = -EFAULT; + goto out; + } + + kbuf[count] = '\0'; + + tmp = strchr(kbuf, ' '); + if (!tmp) { + rv = -EINVAL; + goto out; + } + *tmp = '\0'; + tmp++; + + if (!strcmp(kbuf, "add_node")) + rv = do_add_node(tmp, count - (tmp - kbuf)); + else if (!strcmp(kbuf, "remove_node")) + rv = do_remove_node(tmp); + else + rv = -EINVAL; +out: + kfree(kbuf); + return rv ? rv : count; +} + +static struct file_operations ofdt_fops = { + .write = ofdt_write +}; + +/* create /proc/ppc64/ofdt write-only by root */ +static int proc_ppc64_create_ofdt(void) +{ + struct proc_dir_entry *ent; + + if (!(systemcfg->platform & PLATFORM_PSERIES)) + return 0; + + ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL); + if (ent) { + ent->nlink = 1; + ent->data = NULL; + ent->size = 0; + ent->proc_fops = &ofdt_fops; + } + + return 0; +} +__initcall(proc_ppc64_create_ofdt); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c new file mode 100644 index 00000000000..eb25ee2eead --- /dev/null +++ b/arch/powerpc/platforms/pseries/setup.c @@ -0,0 +1,622 @@ +/* + * linux/arch/ppc/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * bootup setup stuff.. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/cpu.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/tty.h> +#include <linux/major.h> +#include <linux/interrupt.h> +#include <linux/reboot.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/console.h> +#include <linux/pci.h> +#include <linux/utsname.h> +#include <linux/adb.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> + +#include <asm/mmu.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/pci-bridge.h> +#include <asm/iommu.h> +#include <asm/dma.h> +#include <asm/machdep.h> +#include <asm/irq.h> +#include <asm/time.h> +#include <asm/nvram.h> +#include <asm/plpar_wrappers.h> +#include <asm/xics.h> +#include <asm/firmware.h> +#include <asm/pmc.h> +#include <asm/mpic.h> +#include <asm/ppc-pci.h> +#include <asm/i8259.h> +#include <asm/udbg.h> + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +extern void find_udbg_vterm(void); +extern void system_reset_fwnmi(void); /* from head.S */ +extern void machine_check_fwnmi(void); /* from head.S */ +extern void generic_find_legacy_serial_ports(u64 *physport, + unsigned int *default_speed); + +int fwnmi_active; /* TRUE if an FWNMI handler is present */ + +extern void pSeries_system_reset_exception(struct pt_regs *regs); +extern int pSeries_machine_check_exception(struct pt_regs *regs); + +static int pseries_shared_idle(void); +static int pseries_dedicated_idle(void); + +static volatile void __iomem * chrp_int_ack_special; +struct mpic *pSeries_mpic; + +void pSeries_get_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: CHRP %s\n", model); + of_node_put(root); +} + +/* Initialize firmware assisted non-maskable interrupts if + * the firmware supports this feature. + * + */ +static void __init fwnmi_init(void) +{ + int ret; + int ibm_nmi_register = rtas_token("ibm,nmi-register"); + if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) + return; + ret = rtas_call(ibm_nmi_register, 2, 1, NULL, + __pa((unsigned long)system_reset_fwnmi), + __pa((unsigned long)machine_check_fwnmi)); + if (ret == 0) + fwnmi_active = 1; +} + +static int pSeries_irq_cascade(struct pt_regs *regs, void *data) +{ + if (chrp_int_ack_special) + return readb(chrp_int_ack_special); + else + return i8259_irq(regs); +} + +static void __init pSeries_init_mpic(void) +{ + unsigned int *addrp; + struct device_node *np; + int i; + + /* All ISUs are setup, complete initialization */ + mpic_init(pSeries_mpic); + + /* Check what kind of cascade ACK we have */ + if (!(np = of_find_node_by_name(NULL, "pci")) + || !(addrp = (unsigned int *) + get_property(np, "8259-interrupt-acknowledge", NULL))) + printk(KERN_ERR "Cannot find pci to get ack address\n"); + else + chrp_int_ack_special = ioremap(addrp[prom_n_addr_cells(np)-1], 1); + of_node_put(np); + + /* Setup the legacy interrupts & controller */ + for (i = 0; i < NUM_ISA_INTERRUPTS; i++) + irq_desc[i].handler = &i8259_pic; + i8259_init(0); + + /* Hook cascade to mpic */ + mpic_setup_cascade(NUM_ISA_INTERRUPTS, pSeries_irq_cascade, NULL); +} + +static void __init pSeries_setup_mpic(void) +{ + unsigned int *opprop; + unsigned long openpic_addr = 0; + unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS]; + struct device_node *root; + int irq_count; + + /* Find the Open PIC if present */ + root = of_find_node_by_path("/"); + opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL); + if (opprop != 0) { + int n = prom_n_addr_cells(root); + + for (openpic_addr = 0; n > 0; --n) + openpic_addr = (openpic_addr << 32) + *opprop++; + printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); + } + of_node_put(root); + + BUG_ON(openpic_addr == 0); + + /* Get the sense values from OF */ + prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS); + + /* Setup the openpic driver */ + irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */ + pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY, + 16, 16, irq_count, /* isu size, irq offset, irq count */ + NR_IRQS - 4, /* ipi offset */ + senses, irq_count, /* sense & sense size */ + " MPIC "); +} + +static void pseries_lpar_enable_pmcs(void) +{ + unsigned long set, reset; + + power4_enable_pmcs(); + + set = 1UL << 63; + reset = 0; + plpar_hcall_norets(H_PERFMON, set, reset); + + /* instruct hypervisor to maintain PMCs */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + get_paca()->lppaca.pmcregs_in_use = 1; +} + +static void __init pSeries_setup_arch(void) +{ + /* Fixup ppc_md depending on the type of interrupt controller */ + if (ppc64_interrupt_controller == IC_OPEN_PIC) { + ppc_md.init_IRQ = pSeries_init_mpic; + ppc_md.get_irq = mpic_get_irq; + ppc_md.cpu_irq_down = mpic_teardown_this_cpu; + /* Allocate the mpic now, so that find_and_init_phbs() can + * fill the ISUs */ + pSeries_setup_mpic(); + } else { + ppc_md.init_IRQ = xics_init_IRQ; + ppc_md.get_irq = xics_get_irq; + ppc_md.cpu_irq_down = xics_teardown_cpu; + } + +#ifdef CONFIG_SMP + smp_init_pSeries(); +#endif + /* openpic global configuration register (64-bit format). */ + /* openpic Interrupt Source Unit pointer (64-bit format). */ + /* python0 facility area (mmio) (64-bit format) REAL address. */ + + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + if (ROOT_DEV == 0) { + printk("No ramdisk, default root is /dev/sda2\n"); + ROOT_DEV = Root_SDA2; + } + + fwnmi_init(); + + /* Find and initialize PCI host bridges */ + init_pci_config_tokens(); + find_and_init_phbs(); + eeh_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + pSeries_nvram_init(); + + /* Choose an idle loop */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + vpa_init(boot_cpuid); + if (get_paca()->lppaca.shared_proc) { + printk(KERN_INFO "Using shared processor idle loop\n"); + ppc_md.idle_loop = pseries_shared_idle; + } else { + printk(KERN_INFO "Using dedicated idle loop\n"); + ppc_md.idle_loop = pseries_dedicated_idle; + } + } else { + printk(KERN_INFO "Using default idle loop\n"); + ppc_md.idle_loop = default_idle; + } + + if (systemcfg->platform & PLATFORM_LPAR) + ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; + else + ppc_md.enable_pmcs = power4_enable_pmcs; +} + +static int __init pSeries_init_panel(void) +{ + /* Manually leave the kernel version on the panel. */ + ppc_md.progress("Linux ppc64\n", 0); + ppc_md.progress(system_utsname.version, 0); + + return 0; +} +arch_initcall(pSeries_init_panel); + + +/* Build up the ppc64_firmware_features bitmask field + * using contents of device-tree/ibm,hypertas-functions. + * Ultimately this functionality may be moved into prom.c prom_init(). + */ +static void __init fw_feature_init(void) +{ + struct device_node * dn; + char * hypertas; + unsigned int len; + + DBG(" -> fw_feature_init()\n"); + + ppc64_firmware_features = 0; + dn = of_find_node_by_path("/rtas"); + if (dn == NULL) { + printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n"); + goto no_rtas; + } + + hypertas = get_property(dn, "ibm,hypertas-functions", &len); + if (hypertas) { + while (len > 0){ + int i, hypertas_len; + /* check value against table of strings */ + for(i=0; i < FIRMWARE_MAX_FEATURES ;i++) { + if ((firmware_features_table[i].name) && + (strcmp(firmware_features_table[i].name,hypertas))==0) { + /* we have a match */ + ppc64_firmware_features |= + (firmware_features_table[i].val); + break; + } + } + hypertas_len = strlen(hypertas); + len -= hypertas_len +1; + hypertas+= hypertas_len +1; + } + } + + of_node_put(dn); + no_rtas: + printk(KERN_INFO "firmware_features = 0x%lx\n", + ppc64_firmware_features); + + DBG(" <- fw_feature_init()\n"); +} + + +static void __init pSeries_discover_pic(void) +{ + struct device_node *np; + char *typep; + + /* + * Setup interrupt mapping options that are needed for finish_device_tree + * to properly parse the OF interrupt tree & do the virtual irq mapping + */ + __irq_offset_value = NUM_ISA_INTERRUPTS; + ppc64_interrupt_controller = IC_INVALID; + for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) { + typep = (char *)get_property(np, "compatible", NULL); + if (strstr(typep, "open-pic")) + ppc64_interrupt_controller = IC_OPEN_PIC; + else if (strstr(typep, "ppc-xicp")) + ppc64_interrupt_controller = IC_PPC_XIC; + else + printk("pSeries_discover_pic: failed to recognize" + " interrupt-controller\n"); + break; + } +} + +static void pSeries_mach_cpu_die(void) +{ + local_irq_disable(); + idle_task_exit(); + /* Some hardware requires clearing the CPPR, while other hardware does not + * it is safe either way + */ + pSeriesLP_cppr_info(0, 0); + rtas_stop_self(); + /* Should never get here... */ + BUG(); + for(;;); +} + + +/* + * Early initialization. Relocation is on but do not reference unbolted pages + */ +static void __init pSeries_init_early(void) +{ + void *comport; + int iommu_off = 0; + unsigned int default_speed; + u64 physport; + + DBG(" -> pSeries_init_early()\n"); + + fw_feature_init(); + + if (systemcfg->platform & PLATFORM_LPAR) + hpte_init_lpar(); + else { + hpte_init_native(); + iommu_off = (of_chosen && + get_property(of_chosen, "linux,iommu-off", NULL)); + } + + generic_find_legacy_serial_ports(&physport, &default_speed); + + if (systemcfg->platform & PLATFORM_LPAR) + find_udbg_vterm(); + else if (physport) { + /* Map the uart for udbg. */ + comport = (void *)ioremap(physport, 16); + udbg_init_uart(comport, default_speed); + + DBG("Hello World !\n"); + } + + + iommu_init_early_pSeries(); + + pSeries_discover_pic(); + + DBG(" <- pSeries_init_early()\n"); +} + + +static int pSeries_check_legacy_ioport(unsigned int baseport) +{ + struct device_node *np; + +#define I8042_DATA_REG 0x60 +#define FDC_BASE 0x3f0 + + + switch(baseport) { + case I8042_DATA_REG: + np = of_find_node_by_type(NULL, "8042"); + if (np == NULL) + return -ENODEV; + of_node_put(np); + break; + case FDC_BASE: + np = of_find_node_by_type(NULL, "fdc"); + if (np == NULL) + return -ENODEV; + of_node_put(np); + break; + } + return 0; +} + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +extern struct machdep_calls pSeries_md; + +static int __init pSeries_probe(int platform) +{ + if (platform != PLATFORM_PSERIES && + platform != PLATFORM_PSERIES_LPAR) + return 0; + + /* if we have some ppc_md fixups for LPAR to do, do + * it here ... + */ + + return 1; +} + +DECLARE_PER_CPU(unsigned long, smt_snooze_delay); + +static inline void dedicated_idle_sleep(unsigned int cpu) +{ + struct paca_struct *ppaca = &paca[cpu ^ 1]; + + /* Only sleep if the other thread is not idle */ + if (!(ppaca->lppaca.idle)) { + local_irq_disable(); + + /* + * We are about to sleep the thread and so wont be polling any + * more. + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + + /* + * SMT dynamic mode. Cede will result in this thread going + * dormant, if the partner thread is still doing work. Thread + * wakes up if partner goes idle, an interrupt is presented, or + * a prod occurs. Returning from the cede enables external + * interrupts. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } else { + /* + * Give the HV an opportunity at the processor, since we are + * not doing any work. + */ + poll_pending(); + } +} + +static int pseries_dedicated_idle(void) +{ + long oldval; + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + unsigned long start_snooze; + unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + start_snooze = __get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + + while (!need_resched() && !cpu_is_offline(cpu)) { + ppc64_runlatch_off(); + + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + + if (*smt_snooze_delay != 0 && + __get_tb() > start_snooze) { + HMT_medium(); + dedicated_idle_sleep(cpu); + } + + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } +} + +static int pseries_shared_idle(void) +{ + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + while (!need_resched() && !cpu_is_offline(cpu)) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + * + * Check need_resched() again with interrupts disabled + * to avoid a race. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + + HMT_medium(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } + + return 0; +} + +static int pSeries_pci_probe_mode(struct pci_bus *bus) +{ + if (systemcfg->platform & PLATFORM_LPAR) + return PCI_PROBE_DEVTREE; + return PCI_PROBE_NORMAL; +} + +struct machdep_calls __initdata pSeries_md = { + .probe = pSeries_probe, + .setup_arch = pSeries_setup_arch, + .init_early = pSeries_init_early, + .get_cpuinfo = pSeries_get_cpuinfo, + .log_error = pSeries_log_error, + .pcibios_fixup = pSeries_final_fixup, + .pci_probe_mode = pSeries_pci_probe_mode, + .irq_bus_setup = pSeries_irq_bus_setup, + .restart = rtas_restart, + .power_off = rtas_power_off, + .halt = rtas_halt, + .panic = rtas_os_term, + .cpu_die = pSeries_mach_cpu_die, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = rtas_progress, + .check_legacy_ioport = pSeries_check_legacy_ioport, + .system_reset_exception = pSeries_system_reset_exception, + .machine_check_exception = pSeries_machine_check_exception, +}; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c new file mode 100644 index 00000000000..ae1bd270f30 --- /dev/null +++ b/arch/powerpc/platforms/pseries/smp.c @@ -0,0 +1,471 @@ +/* + * SMP support for pSeries machines. + * + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * Plus various changes from other IBM teams... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/cache.h> +#include <linux/err.h> +#include <linux/sysdev.h> +#include <linux/cpu.h> + +#include <asm/ptrace.h> +#include <asm/atomic.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/paca.h> +#include <asm/time.h> +#include <asm/machdep.h> +#include <asm/xics.h> +#include <asm/cputable.h> +#include <asm/firmware.h> +#include <asm/system.h> +#include <asm/rtas.h> +#include <asm/plpar_wrappers.h> +#include <asm/pSeries_reconfig.h> +#include <asm/mpic.h> + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * The primary thread of each non-boot processor is recorded here before + * smp init. + */ +static cpumask_t of_spin_map; + +extern void pSeries_secondary_smp_init(unsigned long); + +#ifdef CONFIG_HOTPLUG_CPU + +/* Get state of physical CPU. + * Return codes: + * 0 - The processor is in the RTAS stopped state + * 1 - stop-self is in progress + * 2 - The processor is not in the RTAS stopped state + * -1 - Hardware Error + * -2 - Hardware Busy, Try again later. + */ +static int query_cpu_stopped(unsigned int pcpu) +{ + int cpu_status; + int status, qcss_tok; + + qcss_tok = rtas_token("query-cpu-stopped-state"); + if (qcss_tok == RTAS_UNKNOWN_SERVICE) + return -1; + status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); + if (status != 0) { + printk(KERN_ERR + "RTAS query-cpu-stopped-state failed: %i\n", status); + return status; + } + + return cpu_status; +} + +int pSeries_cpu_disable(void) +{ + int cpu = smp_processor_id(); + + cpu_clear(cpu, cpu_online_map); + systemcfg->processorCount--; + + /*fix boot_cpuid here*/ + if (cpu == boot_cpuid) + boot_cpuid = any_online_cpu(cpu_online_map); + + /* FIXME: abstract this to not be platform specific later on */ + xics_migrate_irqs_away(); + return 0; +} + +void pSeries_cpu_die(unsigned int cpu) +{ + int tries; + int cpu_status; + unsigned int pcpu = get_hard_smp_processor_id(cpu); + + for (tries = 0; tries < 25; tries++) { + cpu_status = query_cpu_stopped(pcpu); + if (cpu_status == 0 || cpu_status == -1) + break; + msleep(200); + } + if (cpu_status != 0) { + printk("Querying DEAD? cpu %i (%i) shows %i\n", + cpu, pcpu, cpu_status); + } + + /* Isolation and deallocation are definatly done by + * drslot_chrp_cpu. If they were not they would be + * done here. Change isolate state to Isolate and + * change allocation-state to Unusable. + */ + paca[cpu].cpu_start = 0; +} + +/* + * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle + * here is that a cpu device node may represent up to two logical cpus + * in the SMT case. We must honor the assumption in other code that + * the logical ids for sibling SMT threads x and y are adjacent, such + * that x^1 == y and y^1 == x. + */ +static int pSeries_add_processor(struct device_node *np) +{ + unsigned int cpu; + cpumask_t candidate_map, tmp = CPU_MASK_NONE; + int err = -ENOSPC, len, nthreads, i; + u32 *intserv; + + intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return 0; + + nthreads = len / sizeof(u32); + for (i = 0; i < nthreads; i++) + cpu_set(i, tmp); + + lock_cpu_hotplug(); + + BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map)); + + /* Get a bitmap of unoccupied slots. */ + cpus_xor(candidate_map, cpu_possible_map, cpu_present_map); + if (cpus_empty(candidate_map)) { + /* If we get here, it most likely means that NR_CPUS is + * less than the partition's max processors setting. + */ + printk(KERN_ERR "Cannot add cpu %s; this system configuration" + " supports %d logical cpus.\n", np->full_name, + cpus_weight(cpu_possible_map)); + goto out_unlock; + } + + while (!cpus_empty(tmp)) + if (cpus_subset(tmp, candidate_map)) + /* Found a range where we can insert the new cpu(s) */ + break; + else + cpus_shift_left(tmp, tmp, nthreads); + + if (cpus_empty(tmp)) { + printk(KERN_ERR "Unable to find space in cpu_present_map for" + " processor %s with %d thread(s)\n", np->name, + nthreads); + goto out_unlock; + } + + for_each_cpu_mask(cpu, tmp) { + BUG_ON(cpu_isset(cpu, cpu_present_map)); + cpu_set(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, *intserv++); + } + err = 0; +out_unlock: + unlock_cpu_hotplug(); + return err; +} + +/* + * Update the present map for a cpu node which is going away, and set + * the hard id in the paca(s) to -1 to be consistent with boot time + * convention for non-present cpus. + */ +static void pSeries_remove_processor(struct device_node *np) +{ + unsigned int cpu; + int len, nthreads, i; + u32 *intserv; + + intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return; + + nthreads = len / sizeof(u32); + + lock_cpu_hotplug(); + for (i = 0; i < nthreads; i++) { + for_each_present_cpu(cpu) { + if (get_hard_smp_processor_id(cpu) != intserv[i]) + continue; + BUG_ON(cpu_online(cpu)); + cpu_clear(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, -1); + break; + } + if (cpu == NR_CPUS) + printk(KERN_WARNING "Could not find cpu to remove " + "with physical id 0x%x\n", intserv[i]); + } + unlock_cpu_hotplug(); +} + +static int pSeries_smp_notifier(struct notifier_block *nb, unsigned long action, void *node) +{ + int err = NOTIFY_OK; + + switch (action) { + case PSERIES_RECONFIG_ADD: + if (pSeries_add_processor(node)) + err = NOTIFY_BAD; + break; + case PSERIES_RECONFIG_REMOVE: + pSeries_remove_processor(node); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block pSeries_smp_nb = { + .notifier_call = pSeries_smp_notifier, +}; + +#endif /* CONFIG_HOTPLUG_CPU */ + +/** + * smp_startup_cpu() - start the given cpu + * + * At boot time, there is nothing to do for primary threads which were + * started from Open Firmware. For anything else, call RTAS with the + * appropriate start location. + * + * Returns: + * 0 - failure + * 1 - success + */ +static inline int __devinit smp_startup_cpu(unsigned int lcpu) +{ + int status; + unsigned long start_here = __pa((u32)*((unsigned long *) + pSeries_secondary_smp_init)); + unsigned int pcpu; + int start_cpu; + + if (cpu_isset(lcpu, of_spin_map)) + /* Already started by OF and sitting in spin loop */ + return 1; + + pcpu = get_hard_smp_processor_id(lcpu); + + /* Fixup atomic count: it exited inside IRQ handler. */ + paca[lcpu].__current->thread_info->preempt_count = 0; + + /* + * If the RTAS start-cpu token does not exist then presume the + * cpu is already spinning. + */ + start_cpu = rtas_token("start-cpu"); + if (start_cpu == RTAS_UNKNOWN_SERVICE) + return 1; + + status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu); + if (status != 0) { + printk(KERN_ERR "start-cpu failed: %i\n", status); + return 0; + } + + return 1; +} + +#ifdef CONFIG_XICS +static inline void smp_xics_do_message(int cpu, int msg) +{ + set_bit(msg, &xics_ipi_message[cpu].value); + mb(); + xics_cause_IPI(cpu); +} + +static void smp_xics_message_pass(int target, int msg) +{ + unsigned int i; + + if (target < NR_CPUS) { + smp_xics_do_message(target, msg); + } else { + for_each_online_cpu(i) { + if (target == MSG_ALL_BUT_SELF + && i == smp_processor_id()) + continue; + smp_xics_do_message(i, msg); + } + } +} + +static int __init smp_xics_probe(void) +{ + xics_request_IPIs(); + + return cpus_weight(cpu_possible_map); +} + +static void __devinit smp_xics_setup_cpu(int cpu) +{ + if (cpu != boot_cpuid) + xics_setup_cpu(); + + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + vpa_init(cpu); + + cpu_clear(cpu, of_spin_map); + +} +#endif /* CONFIG_XICS */ + +static DEFINE_SPINLOCK(timebase_lock); +static unsigned long timebase = 0; + +static void __devinit pSeries_give_timebase(void) +{ + spin_lock(&timebase_lock); + rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + timebase = get_tb(); + spin_unlock(&timebase_lock); + + while (timebase) + barrier(); + rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); +} + +static void __devinit pSeries_take_timebase(void) +{ + while (!timebase) + barrier(); + spin_lock(&timebase_lock); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + spin_unlock(&timebase_lock); +} + +static void __devinit smp_pSeries_kick_cpu(int nr) +{ + BUG_ON(nr < 0 || nr >= NR_CPUS); + + if (!smp_startup_cpu(nr)) + return; + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca[nr].cpu_start = 1; +} + +static int smp_pSeries_cpu_bootable(unsigned int nr) +{ + /* Special case - we inhibit secondary thread startup + * during boot if the user requests it. Odd-numbered + * cpus are assumed to be secondary threads. + */ + if (system_state < SYSTEM_RUNNING && + cpu_has_feature(CPU_FTR_SMT) && + !smt_enabled_at_boot && nr % 2 != 0) + return 0; + + return 1; +} +#ifdef CONFIG_MPIC +static struct smp_ops_t pSeries_mpic_smp_ops = { + .message_pass = smp_mpic_message_pass, + .probe = smp_mpic_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_mpic_setup_cpu, +}; +#endif +#ifdef CONFIG_XICS +static struct smp_ops_t pSeries_xics_smp_ops = { + .message_pass = smp_xics_message_pass, + .probe = smp_xics_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_xics_setup_cpu, + .cpu_bootable = smp_pSeries_cpu_bootable, +}; +#endif + +/* This is called very early */ +void __init smp_init_pSeries(void) +{ + int i; + + DBG(" -> smp_init_pSeries()\n"); + + switch (ppc64_interrupt_controller) { +#ifdef CONFIG_MPIC + case IC_OPEN_PIC: + smp_ops = &pSeries_mpic_smp_ops; + break; +#endif +#ifdef CONFIG_XICS + case IC_PPC_XIC: + smp_ops = &pSeries_xics_smp_ops; + break; +#endif + default: + panic("Invalid interrupt controller"); + } + +#ifdef CONFIG_HOTPLUG_CPU + smp_ops->cpu_disable = pSeries_cpu_disable; + smp_ops->cpu_die = pSeries_cpu_die; + + /* Processors can be added/removed only on LPAR */ + if (systemcfg->platform == PLATFORM_PSERIES_LPAR) + pSeries_reconfig_notifier_register(&pSeries_smp_nb); +#endif + + /* Mark threads which are still spinning in hold loops. */ + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (i % 2 == 0) + /* + * Even-numbered logical cpus correspond to + * primary threads. + */ + cpu_set(i, of_spin_map); + } + } else { + of_spin_map = cpu_present_map; + } + + cpu_clear(boot_cpuid, of_spin_map); + + /* Non-lpar has additional take/give timebase */ + if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { + smp_ops->give_timebase = pSeries_give_timebase; + smp_ops->take_timebase = pSeries_take_timebase; + } + + DBG(" <- smp_init_pSeries()\n"); +} + diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c new file mode 100644 index 00000000000..866379b80c0 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vio.c @@ -0,0 +1,274 @@ +/* + * IBM PowerPC pSeries Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003-2005 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Santiago Leon santil@us.ibm.com + * Hollis Blanchard <hollisb@us.ibm.com> + * Stephen Rothwell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/kobject.h> +#include <asm/iommu.h> +#include <asm/dma.h> +#include <asm/prom.h> +#include <asm/vio.h> +#include <asm/hvcall.h> +#include <asm/tce.h> + +extern struct subsystem devices_subsys; /* needed for vio_find_name() */ + +static void probe_bus_pseries(void) +{ + struct device_node *node_vroot, *of_node; + + node_vroot = find_devices("vdevice"); + if ((node_vroot == NULL) || (node_vroot->child == NULL)) + /* this machine doesn't do virtual IO, and that's ok */ + return; + + /* + * Create struct vio_devices for each virtual device in the device tree. + * Drivers will associate with them later. + */ + for (of_node = node_vroot->child; of_node != NULL; + of_node = of_node->sibling) { + printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); + vio_register_device_node(of_node); + } +} + +/** + * vio_match_device_pseries: - Tell if a pSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_pseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && + device_is_compatible(dev->dev.platform_data, id->compat); +} + +static void vio_release_device_pseries(struct device *dev) +{ + /* XXX free TCE table */ + of_node_put(dev->platform_data); +} + +static ssize_t viodev_show_devspec(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct device_node *of_node = dev->platform_data; + + return sprintf(buf, "%s\n", of_node->full_name); +} +DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); + +static void vio_unregister_device_pseries(struct vio_dev *viodev) +{ + device_remove_file(&viodev->dev, &dev_attr_devspec); +} + +static struct vio_bus_ops vio_bus_ops_pseries = { + .match = vio_match_device_pseries, + .unregister_device = vio_unregister_device_pseries, + .release_device = vio_release_device_pseries, +}; + +/** + * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus + */ +static int __init vio_bus_init_pseries(void) +{ + int err; + + err = vio_bus_init(&vio_bus_ops_pseries); + if (err == 0) + probe_bus_pseries(); + return err; +} + +__initcall(vio_bus_init_pseries); + +/** + * vio_build_iommu_table: - gets the dma information from OF and + * builds the TCE tree. + * @dev: the virtual device. + * + * Returns a pointer to the built tce tree, or NULL if it can't + * find property. +*/ +static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) +{ + unsigned int *dma_window; + struct iommu_table *newTceTable; + unsigned long offset; + int dma_window_property_size; + + dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); + if(!dma_window) { + return NULL; + } + + newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + /* There should be some code to extract the phys-encoded offset + using prom_n_addr_cells(). However, according to a comment + on earlier versions, it's always zero, so we don't bother */ + offset = dma_window[1] >> PAGE_SHIFT; + + /* TCE table size - measured in tce entries */ + newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; + /* offset for VIO should always be 0 */ + newTceTable->it_offset = offset; + newTceTable->it_busno = 0; + newTceTable->it_index = (unsigned long)dma_window[0]; + newTceTable->it_type = TCE_VB; + + return iommu_init_table(newTceTable); +} + +/** + * vio_register_device_node: - Register a new vio device. + * @of_node: The OF node for this device. + * + * Creates and initializes a vio_dev structure from the data in + * of_node (dev.platform_data) and adds it to the list of virtual devices. + * Returns a pointer to the created vio_dev or NULL if node has + * NULL device_type or compatible fields. + */ +struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) +{ + struct vio_dev *viodev; + unsigned int *unit_address; + unsigned int *irq_p; + + /* we need the 'device_type' property, in order to match with drivers */ + if ((NULL == of_node->type)) { + printk(KERN_WARNING + "%s: node %s missing 'device_type'\n", __FUNCTION__, + of_node->name ? of_node->name : "<unknown>"); + return NULL; + } + + unit_address = (unsigned int *)get_property(of_node, "reg", NULL); + if (!unit_address) { + printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, + of_node->name ? of_node->name : "<unknown>"); + return NULL; + } + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) { + return NULL; + } + memset(viodev, 0, sizeof(struct vio_dev)); + + viodev->dev.platform_data = of_node_get(of_node); + + viodev->irq = NO_IRQ; + irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); + if (irq_p) { + int virq = virt_irq_create_mapping(*irq_p); + if (virq == NO_IRQ) { + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", of_node->full_name); + } else + viodev->irq = irq_offset_up(virq); + } + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); + viodev->name = of_node->name; + viodev->type = of_node->type; + viodev->unit_address = *unit_address; + viodev->iommu_table = vio_build_iommu_table(viodev); + + /* register with generic device framework */ + if (vio_register_device(viodev) == NULL) { + /* XXX free TCE table */ + kfree(viodev); + return NULL; + } + device_create_file(&viodev->dev, &dev_attr_devspec); + + return viodev; +} +EXPORT_SYMBOL(vio_register_device_node); + +/** + * vio_get_attribute: - get attribute for virtual device + * @vdev: The vio device to get property. + * @which: The property/attribute to be extracted. + * @length: Pointer to length of returned data size (unused if NULL). + * + * Calls prom.c's get_property() to return the value of the + * attribute specified by the preprocessor constant @which +*/ +const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) +{ + return get_property(vdev->dev.platform_data, (char*)which, length); +} +EXPORT_SYMBOL(vio_get_attribute); + +/* vio_find_name() - internal because only vio.c knows how we formatted the + * kobject name + * XXX once vio_bus_type.devices is actually used as a kset in + * drivers/base/bus.c, this function should be removed in favor of + * "device_find(kobj_name, &vio_bus_type)" + */ +static struct vio_dev *vio_find_name(const char *kobj_name) +{ + struct kobject *found; + + found = kset_find_obj(&devices_subsys.kset, kobj_name); + if (!found) + return NULL; + + return to_vio_dev(container_of(found, struct device, kobj)); +} + +/** + * vio_find_node - find an already-registered vio_dev + * @vnode: device_node of the virtual device we're looking for + */ +struct vio_dev *vio_find_node(struct device_node *vnode) +{ + uint32_t *unit_address; + char kobj_name[BUS_ID_SIZE]; + + /* construct the kobject name from the device node */ + unit_address = (uint32_t *)get_property(vnode, "reg", NULL); + if (!unit_address) + return NULL; + snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); + + return vio_find_name(kobj_name); +} +EXPORT_SYMBOL(vio_find_node); + +int vio_enable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); + if (rc != H_Success) + printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_enable_interrupts); + +int vio_disable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); + if (rc != H_Success) + printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_disable_interrupts); diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile new file mode 100644 index 00000000000..c649f03acf6 --- /dev/null +++ b/arch/powerpc/sysdev/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_MPIC) += mpic.o +indirectpci-$(CONFIG_PPC_PMAC) = indirect_pci.o +obj-$(CONFIG_PPC32) += $(indirectpci-y) diff --git a/arch/powerpc/sysdev/indirect_pci.c b/arch/powerpc/sysdev/indirect_pci.c new file mode 100644 index 00000000000..e7148846970 --- /dev/null +++ b/arch/powerpc/sysdev/indirect_pci.c @@ -0,0 +1,134 @@ +/* + * Support for indirect PCI bridges. + * + * Copyright (C) 1998 Gabriel Paubert. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> + +#ifdef CONFIG_PPC_INDIRECT_PCI_BE +#define PCI_CFG_OUT out_be32 +#else +#define PCI_CFG_OUT out_le32 +#endif + +static int +indirect_read_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 *val) +{ + struct pci_controller *hose = bus->sysdata; + volatile void __iomem *cfg_data; + u8 cfg_type = 0; + + if (ppc_md.pci_exclude_device) + if (ppc_md.pci_exclude_device(bus->number, devfn)) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (hose->set_cfg_type) + if (bus->number != hose->first_busno) + cfg_type = 1; + + PCI_CFG_OUT(hose->cfg_addr, + (0x80000000 | ((bus->number - hose->bus_offset) << 16) + | (devfn << 8) | ((offset & 0xfc) | cfg_type))); + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + cfg_data = hose->cfg_data + (offset & 3); + switch (len) { + case 1: + *val = in_8(cfg_data); + break; + case 2: + *val = in_le16(cfg_data); + break; + default: + *val = in_le32(cfg_data); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int +indirect_write_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 val) +{ + struct pci_controller *hose = bus->sysdata; + volatile void __iomem *cfg_data; + u8 cfg_type = 0; + + if (ppc_md.pci_exclude_device) + if (ppc_md.pci_exclude_device(bus->number, devfn)) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (hose->set_cfg_type) + if (bus->number != hose->first_busno) + cfg_type = 1; + + PCI_CFG_OUT(hose->cfg_addr, + (0x80000000 | ((bus->number - hose->bus_offset) << 16) + | (devfn << 8) | ((offset & 0xfc) | cfg_type))); + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + cfg_data = hose->cfg_data + (offset & 3); + switch (len) { + case 1: + out_8(cfg_data, val); + break; + case 2: + out_le16(cfg_data, val); + break; + default: + out_le32(cfg_data, val); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops indirect_pci_ops = +{ + indirect_read_config, + indirect_write_config +}; + +void __init +setup_indirect_pci_nomap(struct pci_controller* hose, void __iomem * cfg_addr, + void __iomem * cfg_data) +{ + hose->cfg_addr = cfg_addr; + hose->cfg_data = cfg_data; + hose->ops = &indirect_pci_ops; +} + +void __init +setup_indirect_pci(struct pci_controller* hose, u32 cfg_addr, u32 cfg_data) +{ + unsigned long base = cfg_addr & PAGE_MASK; + void __iomem *mbase, *addr, *data; + + mbase = ioremap(base, PAGE_SIZE); + addr = mbase + (cfg_addr & ~PAGE_MASK); + if ((cfg_data & PAGE_MASK) != base) + mbase = ioremap(cfg_data & PAGE_MASK, PAGE_SIZE); + data = mbase + (cfg_data & ~PAGE_MASK); + setup_indirect_pci_nomap(hose, addr, data); +} diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c new file mode 100644 index 00000000000..02b4d2488bf --- /dev/null +++ b/arch/powerpc/sysdev/mpic.c @@ -0,0 +1,907 @@ +/* + * arch/powerpc/kernel/mpic.c + * + * Driver for interrupt controllers following the OpenPIC standard, the + * common implementation beeing IBM's MPIC. This driver also can deal + * with various broken implementations of this HW. + * + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/bootmem.h> +#include <linux/spinlock.h> +#include <linux/pci.h> + +#include <asm/ptrace.h> +#include <asm/signal.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/machdep.h> +#include <asm/mpic.h> +#include <asm/smp.h> + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +static struct mpic *mpics; +static struct mpic *mpic_primary; +static DEFINE_SPINLOCK(mpic_lock); + +#ifdef CONFIG_PPC32 /* XXX for now */ +#define distribute_irqs CONFIG_IRQ_ALL_CPUS +#endif + +/* + * Register accessor functions + */ + + +static inline u32 _mpic_read(unsigned int be, volatile u32 __iomem *base, + unsigned int reg) +{ + if (be) + return in_be32(base + (reg >> 2)); + else + return in_le32(base + (reg >> 2)); +} + +static inline void _mpic_write(unsigned int be, volatile u32 __iomem *base, + unsigned int reg, u32 value) +{ + if (be) + out_be32(base + (reg >> 2), value); + else + out_le32(base + (reg >> 2), value); +} + +static inline u32 _mpic_ipi_read(struct mpic *mpic, unsigned int ipi) +{ + unsigned int be = (mpic->flags & MPIC_BIG_ENDIAN) != 0; + unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10); + + if (mpic->flags & MPIC_BROKEN_IPI) + be = !be; + return _mpic_read(be, mpic->gregs, offset); +} + +static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 value) +{ + unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10); + + _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->gregs, offset, value); +} + +static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg) +{ + unsigned int cpu = 0; + + if (mpic->flags & MPIC_PRIMARY) + cpu = hard_smp_processor_id(); + + return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg); +} + +static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value) +{ + unsigned int cpu = 0; + + if (mpic->flags & MPIC_PRIMARY) + cpu = hard_smp_processor_id(); + + _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg, value); +} + +static inline u32 _mpic_irq_read(struct mpic *mpic, unsigned int src_no, unsigned int reg) +{ + unsigned int isu = src_no >> mpic->isu_shift; + unsigned int idx = src_no & mpic->isu_mask; + + return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu], + reg + (idx * MPIC_IRQ_STRIDE)); +} + +static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no, + unsigned int reg, u32 value) +{ + unsigned int isu = src_no >> mpic->isu_shift; + unsigned int idx = src_no & mpic->isu_mask; + + _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu], + reg + (idx * MPIC_IRQ_STRIDE), value); +} + +#define mpic_read(b,r) _mpic_read(mpic->flags & MPIC_BIG_ENDIAN,(b),(r)) +#define mpic_write(b,r,v) _mpic_write(mpic->flags & MPIC_BIG_ENDIAN,(b),(r),(v)) +#define mpic_ipi_read(i) _mpic_ipi_read(mpic,(i)) +#define mpic_ipi_write(i,v) _mpic_ipi_write(mpic,(i),(v)) +#define mpic_cpu_read(i) _mpic_cpu_read(mpic,(i)) +#define mpic_cpu_write(i,v) _mpic_cpu_write(mpic,(i),(v)) +#define mpic_irq_read(s,r) _mpic_irq_read(mpic,(s),(r)) +#define mpic_irq_write(s,r,v) _mpic_irq_write(mpic,(s),(r),(v)) + + +/* + * Low level utility functions + */ + + + +/* Check if we have one of those nice broken MPICs with a flipped endian on + * reads from IPI registers + */ +static void __init mpic_test_broken_ipi(struct mpic *mpic) +{ + u32 r; + + mpic_write(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0, MPIC_VECPRI_MASK); + r = mpic_read(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0); + + if (r == le32_to_cpu(MPIC_VECPRI_MASK)) { + printk(KERN_INFO "mpic: Detected reversed IPI registers\n"); + mpic->flags |= MPIC_BROKEN_IPI; + } +} + +#ifdef CONFIG_MPIC_BROKEN_U3 + +/* Test if an interrupt is sourced from HyperTransport (used on broken U3s) + * to force the edge setting on the MPIC and do the ack workaround. + */ +static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source_no) +{ + if (source_no >= 128 || !mpic->fixups) + return 0; + return mpic->fixups[source_no].base != NULL; +} + +static inline void mpic_apic_end_irq(struct mpic *mpic, unsigned int source_no) +{ + struct mpic_irq_fixup *fixup = &mpic->fixups[source_no]; + u32 tmp; + + spin_lock(&mpic->fixup_lock); + writeb(0x11 + 2 * fixup->irq, fixup->base); + tmp = readl(fixup->base + 2); + writel(tmp | 0x80000000ul, fixup->base + 2); + /* config writes shouldn't be posted but let's be safe ... */ + (void)readl(fixup->base + 2); + spin_unlock(&mpic->fixup_lock); +} + + +static void __init mpic_amd8111_read_irq(struct mpic *mpic, u8 __iomem *devbase) +{ + int i, irq; + u32 tmp; + + printk(KERN_INFO "mpic: - Workarounds on AMD 8111 @ %p\n", devbase); + + for (i=0; i < 24; i++) { + writeb(0x10 + 2*i, devbase + 0xf2); + tmp = readl(devbase + 0xf4); + if ((tmp & 0x1) || !(tmp & 0x20)) + continue; + irq = (tmp >> 16) & 0xff; + mpic->fixups[irq].irq = i; + mpic->fixups[irq].base = devbase + 0xf2; + } +} + +static void __init mpic_amd8131_read_irq(struct mpic *mpic, u8 __iomem *devbase) +{ + int i, irq; + u32 tmp; + + printk(KERN_INFO "mpic: - Workarounds on AMD 8131 @ %p\n", devbase); + + for (i=0; i < 4; i++) { + writeb(0x10 + 2*i, devbase + 0xba); + tmp = readl(devbase + 0xbc); + if ((tmp & 0x1) || !(tmp & 0x20)) + continue; + irq = (tmp >> 16) & 0xff; + mpic->fixups[irq].irq = i; + mpic->fixups[irq].base = devbase + 0xba; + } +} + +static void __init mpic_scan_ioapics(struct mpic *mpic) +{ + unsigned int devfn; + u8 __iomem *cfgspace; + + printk(KERN_INFO "mpic: Setting up IO-APICs workarounds for U3\n"); + + /* Allocate fixups array */ + mpic->fixups = alloc_bootmem(128 * sizeof(struct mpic_irq_fixup)); + BUG_ON(mpic->fixups == NULL); + memset(mpic->fixups, 0, 128 * sizeof(struct mpic_irq_fixup)); + + /* Init spinlock */ + spin_lock_init(&mpic->fixup_lock); + + /* Map u3 config space. We assume all IO-APICs are on the primary bus + * and slot will never be above "0xf" so we only need to map 32k + */ + cfgspace = (unsigned char __iomem *)ioremap(0xf2000000, 0x8000); + BUG_ON(cfgspace == NULL); + + /* Now we scan all slots. We do a very quick scan, we read the header type, + * vendor ID and device ID only, that's plenty enough + */ + for (devfn = 0; devfn < PCI_DEVFN(0x10,0); devfn ++) { + u8 __iomem *devbase = cfgspace + (devfn << 8); + u8 hdr_type = readb(devbase + PCI_HEADER_TYPE); + u32 l = readl(devbase + PCI_VENDOR_ID); + u16 vendor_id, device_id; + int multifunc = 0; + + DBG("devfn %x, l: %x\n", devfn, l); + + /* If no device, skip */ + if (l == 0xffffffff || l == 0x00000000 || + l == 0x0000ffff || l == 0xffff0000) + goto next; + + /* Check if it's a multifunction device (only really used + * to function 0 though + */ + multifunc = !!(hdr_type & 0x80); + vendor_id = l & 0xffff; + device_id = (l >> 16) & 0xffff; + + /* If a known device, go to fixup setup code */ + if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7460) + mpic_amd8111_read_irq(mpic, devbase); + if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7450) + mpic_amd8131_read_irq(mpic, devbase); + next: + /* next device, if function 0 */ + if ((PCI_FUNC(devfn) == 0) && !multifunc) + devfn += 7; + } +} + +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + +/* Find an mpic associated with a given linux interrupt */ +static struct mpic *mpic_find(unsigned int irq, unsigned int *is_ipi) +{ + struct mpic *mpic = mpics; + + while(mpic) { + /* search IPIs first since they may override the main interrupts */ + if (irq >= mpic->ipi_offset && irq < (mpic->ipi_offset + 4)) { + if (is_ipi) + *is_ipi = 1; + return mpic; + } + if (irq >= mpic->irq_offset && + irq < (mpic->irq_offset + mpic->irq_count)) { + if (is_ipi) + *is_ipi = 0; + return mpic; + } + mpic = mpic -> next; + } + return NULL; +} + +/* Convert a cpu mask from logical to physical cpu numbers. */ +static inline u32 mpic_physmask(u32 cpumask) +{ + int i; + u32 mask = 0; + + for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1) + mask |= (cpumask & 1) << get_hard_smp_processor_id(i); + return mask; +} + +#ifdef CONFIG_SMP +/* Get the mpic structure from the IPI number */ +static inline struct mpic * mpic_from_ipi(unsigned int ipi) +{ + return container_of(irq_desc[ipi].handler, struct mpic, hc_ipi); +} +#endif + +/* Get the mpic structure from the irq number */ +static inline struct mpic * mpic_from_irq(unsigned int irq) +{ + return container_of(irq_desc[irq].handler, struct mpic, hc_irq); +} + +/* Send an EOI */ +static inline void mpic_eoi(struct mpic *mpic) +{ + mpic_cpu_write(MPIC_CPU_EOI, 0); + (void)mpic_cpu_read(MPIC_CPU_WHOAMI); +} + +#ifdef CONFIG_SMP +static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) +{ + struct mpic *mpic = dev_id; + + smp_message_recv(irq - mpic->ipi_offset, regs); + return IRQ_HANDLED; +} +#endif /* CONFIG_SMP */ + +/* + * Linux descriptor level callbacks + */ + + +static void mpic_enable_irq(unsigned int irq) +{ + unsigned int loops = 100000; + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = irq - mpic->irq_offset; + + DBG("%s: enable_irq: %d (src %d)\n", mpic->name, irq, src); + + mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & ~MPIC_VECPRI_MASK); + + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "mpic_enable_irq timeout\n"); + break; + } + } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK); +} + +static void mpic_disable_irq(unsigned int irq) +{ + unsigned int loops = 100000; + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = irq - mpic->irq_offset; + + DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); + + mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | MPIC_VECPRI_MASK); + + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "mpic_enable_irq timeout\n"); + break; + } + } while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK)); +} + +static void mpic_end_irq(unsigned int irq) +{ + struct mpic *mpic = mpic_from_irq(irq); + + DBG("%s: end_irq: %d\n", mpic->name, irq); + + /* We always EOI on end_irq() even for edge interrupts since that + * should only lower the priority, the MPIC should have properly + * latched another edge interrupt coming in anyway + */ + +#ifdef CONFIG_MPIC_BROKEN_U3 + if (mpic->flags & MPIC_BROKEN_U3) { + unsigned int src = irq - mpic->irq_offset; + if (mpic_is_ht_interrupt(mpic, src)) + mpic_apic_end_irq(mpic, src); + } +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + mpic_eoi(mpic); +} + +#ifdef CONFIG_SMP + +static void mpic_enable_ipi(unsigned int irq) +{ + struct mpic *mpic = mpic_from_ipi(irq); + unsigned int src = irq - mpic->ipi_offset; + + DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src); + mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK); +} + +static void mpic_disable_ipi(unsigned int irq) +{ + /* NEVER disable an IPI... that's just plain wrong! */ +} + +static void mpic_end_ipi(unsigned int irq) +{ + struct mpic *mpic = mpic_from_ipi(irq); + + /* + * IPIs are marked IRQ_PER_CPU. This has the side effect of + * preventing the IRQ_PENDING/IRQ_INPROGRESS logic from + * applying to them. We EOI them late to avoid re-entering. + * We mark IPI's with SA_INTERRUPT as they must run with + * irqs disabled. + */ + mpic_eoi(mpic); +} + +#endif /* CONFIG_SMP */ + +static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) +{ + struct mpic *mpic = mpic_from_irq(irq); + + cpumask_t tmp; + + cpus_and(tmp, cpumask, cpu_online_map); + + mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION, + mpic_physmask(cpus_addr(tmp)[0])); +} + + +/* + * Exported functions + */ + + +struct mpic * __init mpic_alloc(unsigned long phys_addr, + unsigned int flags, + unsigned int isu_size, + unsigned int irq_offset, + unsigned int irq_count, + unsigned int ipi_offset, + unsigned char *senses, + unsigned int senses_count, + const char *name) +{ + struct mpic *mpic; + u32 reg; + const char *vers; + int i; + + mpic = alloc_bootmem(sizeof(struct mpic)); + if (mpic == NULL) + return NULL; + + + memset(mpic, 0, sizeof(struct mpic)); + mpic->name = name; + + mpic->hc_irq.typename = name; + mpic->hc_irq.enable = mpic_enable_irq; + mpic->hc_irq.disable = mpic_disable_irq; + mpic->hc_irq.end = mpic_end_irq; + if (flags & MPIC_PRIMARY) + mpic->hc_irq.set_affinity = mpic_set_affinity; +#ifdef CONFIG_SMP + mpic->hc_ipi.typename = name; + mpic->hc_ipi.enable = mpic_enable_ipi; + mpic->hc_ipi.disable = mpic_disable_ipi; + mpic->hc_ipi.end = mpic_end_ipi; +#endif /* CONFIG_SMP */ + + mpic->flags = flags; + mpic->isu_size = isu_size; + mpic->irq_offset = irq_offset; + mpic->irq_count = irq_count; + mpic->ipi_offset = ipi_offset; + mpic->num_sources = 0; /* so far */ + mpic->senses = senses; + mpic->senses_count = senses_count; + + /* Map the global registers */ + mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000); + mpic->tmregs = mpic->gregs + (MPIC_TIMER_BASE >> 2); + BUG_ON(mpic->gregs == NULL); + + /* Reset */ + if (flags & MPIC_WANTS_RESET) { + mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0, + mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + | MPIC_GREG_GCONF_RESET); + while( mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + & MPIC_GREG_GCONF_RESET) + mb(); + } + + /* Read feature register, calculate num CPUs and, for non-ISU + * MPICs, num sources as well. On ISU MPICs, sources are counted + * as ISUs are added + */ + reg = mpic_read(mpic->gregs, MPIC_GREG_FEATURE_0); + mpic->num_cpus = ((reg & MPIC_GREG_FEATURE_LAST_CPU_MASK) + >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1; + if (isu_size == 0) + mpic->num_sources = ((reg & MPIC_GREG_FEATURE_LAST_SRC_MASK) + >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1; + + /* Map the per-CPU registers */ + for (i = 0; i < mpic->num_cpus; i++) { + mpic->cpuregs[i] = ioremap(phys_addr + MPIC_CPU_BASE + + i * MPIC_CPU_STRIDE, 0x1000); + BUG_ON(mpic->cpuregs[i] == NULL); + } + + /* Initialize main ISU if none provided */ + if (mpic->isu_size == 0) { + mpic->isu_size = mpic->num_sources; + mpic->isus[0] = ioremap(phys_addr + MPIC_IRQ_BASE, + MPIC_IRQ_STRIDE * mpic->isu_size); + BUG_ON(mpic->isus[0] == NULL); + } + mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1); + mpic->isu_mask = (1 << mpic->isu_shift) - 1; + + /* Display version */ + switch (reg & MPIC_GREG_FEATURE_VERSION_MASK) { + case 1: + vers = "1.0"; + break; + case 2: + vers = "1.2"; + break; + case 3: + vers = "1.3"; + break; + default: + vers = "<unknown>"; + break; + } + printk(KERN_INFO "mpic: Setting up MPIC \"%s\" version %s at %lx, max %d CPUs\n", + name, vers, phys_addr, mpic->num_cpus); + printk(KERN_INFO "mpic: ISU size: %d, shift: %d, mask: %x\n", mpic->isu_size, + mpic->isu_shift, mpic->isu_mask); + + mpic->next = mpics; + mpics = mpic; + + if (flags & MPIC_PRIMARY) + mpic_primary = mpic; + + return mpic; +} + +void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, + unsigned long phys_addr) +{ + unsigned int isu_first = isu_num * mpic->isu_size; + + BUG_ON(isu_num >= MPIC_MAX_ISU); + + mpic->isus[isu_num] = ioremap(phys_addr, MPIC_IRQ_STRIDE * mpic->isu_size); + if ((isu_first + mpic->isu_size) > mpic->num_sources) + mpic->num_sources = isu_first + mpic->isu_size; +} + +void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler, + void *data) +{ + struct mpic *mpic = mpic_find(irq, NULL); + unsigned long flags; + + /* Synchronization here is a bit dodgy, so don't try to replace cascade + * interrupts on the fly too often ... but normally it's set up at boot. + */ + spin_lock_irqsave(&mpic_lock, flags); + if (mpic->cascade) + mpic_disable_irq(mpic->cascade_vec + mpic->irq_offset); + mpic->cascade = NULL; + wmb(); + mpic->cascade_vec = irq - mpic->irq_offset; + mpic->cascade_data = data; + wmb(); + mpic->cascade = handler; + mpic_enable_irq(irq); + spin_unlock_irqrestore(&mpic_lock, flags); +} + +void __init mpic_init(struct mpic *mpic) +{ + int i; + + BUG_ON(mpic->num_sources == 0); + + printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources); + + /* Set current processor priority to max */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); + + /* Initialize timers: just disable them all */ + for (i = 0; i < 4; i++) { + mpic_write(mpic->tmregs, + i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0); + mpic_write(mpic->tmregs, + i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI, + MPIC_VECPRI_MASK | + (MPIC_VEC_TIMER_0 + i)); + } + + /* Initialize IPIs to our reserved vectors and mark them disabled for now */ + mpic_test_broken_ipi(mpic); + for (i = 0; i < 4; i++) { + mpic_ipi_write(i, + MPIC_VECPRI_MASK | + (10 << MPIC_VECPRI_PRIORITY_SHIFT) | + (MPIC_VEC_IPI_0 + i)); +#ifdef CONFIG_SMP + if (!(mpic->flags & MPIC_PRIMARY)) + continue; + irq_desc[mpic->ipi_offset+i].status |= IRQ_PER_CPU; + irq_desc[mpic->ipi_offset+i].handler = &mpic->hc_ipi; + +#endif /* CONFIG_SMP */ + } + + /* Initialize interrupt sources */ + if (mpic->irq_count == 0) + mpic->irq_count = mpic->num_sources; + +#ifdef CONFIG_MPIC_BROKEN_U3 + /* Do the ioapic fixups on U3 broken mpic */ + DBG("MPIC flags: %x\n", mpic->flags); + if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY)) + mpic_scan_ioapics(mpic); +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + for (i = 0; i < mpic->num_sources; i++) { + /* start with vector = source number, and masked */ + u32 vecpri = MPIC_VECPRI_MASK | i | (8 << MPIC_VECPRI_PRIORITY_SHIFT); + int level = 0; + + /* if it's an IPI, we skip it */ + if ((mpic->irq_offset + i) >= (mpic->ipi_offset + i) && + (mpic->irq_offset + i) < (mpic->ipi_offset + i + 4)) + continue; + + /* do senses munging */ + if (mpic->senses && i < mpic->senses_count) { + if (mpic->senses[i] & IRQ_SENSE_LEVEL) + vecpri |= MPIC_VECPRI_SENSE_LEVEL; + if (mpic->senses[i] & IRQ_POLARITY_POSITIVE) + vecpri |= MPIC_VECPRI_POLARITY_POSITIVE; + } else + vecpri |= MPIC_VECPRI_SENSE_LEVEL; + + /* remember if it was a level interrupts */ + level = (vecpri & MPIC_VECPRI_SENSE_LEVEL); + + /* deal with broken U3 */ + if (mpic->flags & MPIC_BROKEN_U3) { +#ifdef CONFIG_MPIC_BROKEN_U3 + if (mpic_is_ht_interrupt(mpic, i)) { + vecpri &= ~(MPIC_VECPRI_SENSE_MASK | + MPIC_VECPRI_POLARITY_MASK); + vecpri |= MPIC_VECPRI_POLARITY_POSITIVE; + } +#else + printk(KERN_ERR "mpic: BROKEN_U3 set, but CONFIG doesn't match\n"); +#endif + } + + DBG("setup source %d, vecpri: %08x, level: %d\n", i, vecpri, + (level != 0)); + + /* init hw */ + mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri); + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + 1 << hard_smp_processor_id()); + + /* init linux descriptors */ + if (i < mpic->irq_count) { + irq_desc[mpic->irq_offset+i].status = level ? IRQ_LEVEL : 0; + irq_desc[mpic->irq_offset+i].handler = &mpic->hc_irq; + } + } + + /* Init spurrious vector */ + mpic_write(mpic->gregs, MPIC_GREG_SPURIOUS, MPIC_VEC_SPURRIOUS); + + /* Disable 8259 passthrough */ + mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0, + mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + | MPIC_GREG_GCONF_8259_PTHROU_DIS); + + /* Set current processor priority to 0 */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0); +} + + + +void mpic_irq_set_priority(unsigned int irq, unsigned int pri) +{ + int is_ipi; + struct mpic *mpic = mpic_find(irq, &is_ipi); + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&mpic_lock, flags); + if (is_ipi) { + reg = mpic_ipi_read(irq - mpic->ipi_offset) & MPIC_VECPRI_PRIORITY_MASK; + mpic_ipi_write(irq - mpic->ipi_offset, + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + } else { + reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI) + & MPIC_VECPRI_PRIORITY_MASK; + mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI, + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + } + spin_unlock_irqrestore(&mpic_lock, flags); +} + +unsigned int mpic_irq_get_priority(unsigned int irq) +{ + int is_ipi; + struct mpic *mpic = mpic_find(irq, &is_ipi); + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&mpic_lock, flags); + if (is_ipi) + reg = mpic_ipi_read(irq - mpic->ipi_offset); + else + reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI); + spin_unlock_irqrestore(&mpic_lock, flags); + return (reg & MPIC_VECPRI_PRIORITY_MASK) >> MPIC_VECPRI_PRIORITY_SHIFT; +} + +void mpic_setup_this_cpu(void) +{ +#ifdef CONFIG_SMP + struct mpic *mpic = mpic_primary; + unsigned long flags; + u32 msk = 1 << hard_smp_processor_id(); + unsigned int i; + + BUG_ON(mpic == NULL); + + DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); + + spin_lock_irqsave(&mpic_lock, flags); + + /* let the mpic know we want intrs. default affinity is 0xffffffff + * until changed via /proc. That's how it's done on x86. If we want + * it differently, then we should make sure we also change the default + * values of irq_affinity in irq.c. + */ + if (distribute_irqs) { + for (i = 0; i < mpic->num_sources ; i++) + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk); + } + + /* Set current processor priority to 0 */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0); + + spin_unlock_irqrestore(&mpic_lock, flags); +#endif /* CONFIG_SMP */ +} + +int mpic_cpu_get_priority(void) +{ + struct mpic *mpic = mpic_primary; + + return mpic_cpu_read(MPIC_CPU_CURRENT_TASK_PRI); +} + +void mpic_cpu_set_priority(int prio) +{ + struct mpic *mpic = mpic_primary; + + prio &= MPIC_CPU_TASKPRI_MASK; + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, prio); +} + +/* + * XXX: someone who knows mpic should check this. + * do we need to eoi the ipi including for kexec cpu here (see xics comments)? + * or can we reset the mpic in the new kernel? + */ +void mpic_teardown_this_cpu(int secondary) +{ + struct mpic *mpic = mpic_primary; + unsigned long flags; + u32 msk = 1 << hard_smp_processor_id(); + unsigned int i; + + BUG_ON(mpic == NULL); + + DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); + spin_lock_irqsave(&mpic_lock, flags); + + /* let the mpic know we don't want intrs. */ + for (i = 0; i < mpic->num_sources ; i++) + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk); + + /* Set current processor priority to max */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); + + spin_unlock_irqrestore(&mpic_lock, flags); +} + + +void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) +{ + struct mpic *mpic = mpic_primary; + + BUG_ON(mpic == NULL); + + DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no); + + mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10, + mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0])); +} + +int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) +{ + u32 irq; + + irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK; + DBG("%s: get_one_irq(): %d\n", mpic->name, irq); + + if (mpic->cascade && irq == mpic->cascade_vec) { + DBG("%s: cascading ...\n", mpic->name); + irq = mpic->cascade(regs, mpic->cascade_data); + mpic_eoi(mpic); + return irq; + } + if (unlikely(irq == MPIC_VEC_SPURRIOUS)) + return -1; + if (irq < MPIC_VEC_IPI_0) + return irq + mpic->irq_offset; + DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0); + return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset; +} + +int mpic_get_irq(struct pt_regs *regs) +{ + struct mpic *mpic = mpic_primary; + + BUG_ON(mpic == NULL); + + return mpic_get_one_irq(mpic, regs); +} + + +#ifdef CONFIG_SMP +void mpic_request_ipis(void) +{ + struct mpic *mpic = mpic_primary; + + BUG_ON(mpic == NULL); + + printk("requesting IPIs ... \n"); + + /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */ + request_irq(mpic->ipi_offset+0, mpic_ipi_action, SA_INTERRUPT, + "IPI0 (call function)", mpic); + request_irq(mpic->ipi_offset+1, mpic_ipi_action, SA_INTERRUPT, + "IPI1 (reschedule)", mpic); + request_irq(mpic->ipi_offset+2, mpic_ipi_action, SA_INTERRUPT, + "IPI2 (unused)", mpic); + request_irq(mpic->ipi_offset+3, mpic_ipi_action, SA_INTERRUPT, + "IPI3 (debugger break)", mpic); + + printk("IPIs requested... \n"); +} +#endif /* CONFIG_SMP */ |