summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/cell
diff options
context:
space:
mode:
authorAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
committerAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
commitb4b6116a13633898cf868f2f103c96a90c4c20f8 (patch)
tree93d1b7e2cfcdf473d8d4ff3ad141fa864f8491f6 /arch/powerpc/platforms/cell
parentedd4be777c953e5faafc80d091d3084b4343f5d3 (diff)
downloadkernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.gz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.xz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.zip
fedora kernel: d9aad82f3319f3cfd1aebc01234254ef0c37ad84v3.3.2-1
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'arch/powerpc/platforms/cell')
-rw-r--r--arch/powerpc/platforms/cell/Kconfig163
-rw-r--r--arch/powerpc/platforms/cell/Makefile49
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c507
-rw-r--r--arch/powerpc/platforms/cell/beat.c264
-rw-r--r--arch/powerpc/platforms/cell/beat.h39
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c442
-rw-r--r--arch/powerpc/platforms/cell/beat_hvCall.S287
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.c255
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.h30
-rw-r--r--arch/powerpc/platforms/cell/beat_iommu.c115
-rw-r--r--arch/powerpc/platforms/cell/beat_spu_priv1.c205
-rw-r--r--arch/powerpc/platforms/cell/beat_syscall.h164
-rw-r--r--arch/powerpc/platforms/cell/beat_udbg.c98
-rw-r--r--arch/powerpc/platforms/cell/beat_wrapper.h290
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq.c209
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq.h24
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c115
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c156
-rw-r--r--arch/powerpc/platforms/cell/cbe_powerbutton.c118
-rw-r--r--arch/powerpc/platforms/cell/cbe_regs.c281
-rw-r--r--arch/powerpc/platforms/cell/cbe_thermal.c399
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.c500
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.h46
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc.h232
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_epci.c429
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_pciex.c541
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_sio.c101
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_uhc.c95
-rw-r--r--arch/powerpc/platforms/cell/celleb_setup.c243
-rw-r--r--arch/powerpc/platforms/cell/cpufreq_spudemand.c171
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c411
-rw-r--r--arch/powerpc/platforms/cell/interrupt.h89
-rw-r--r--arch/powerpc/platforms/cell/iommu.c1235
-rw-r--r--arch/powerpc/platforms/cell/pervasive.c132
-rw-r--r--arch/powerpc/platforms/cell/pervasive.h42
-rw-r--r--arch/powerpc/platforms/cell/pmu.c424
-rw-r--r--arch/powerpc/platforms/cell/qpace_setup.c148
-rw-r--r--arch/powerpc/platforms/cell/ras.c355
-rw-r--r--arch/powerpc/platforms/cell/ras.h9
-rw-r--r--arch/powerpc/platforms/cell/setup.c280
-rw-r--r--arch/powerpc/platforms/cell/smp.c188
-rw-r--r--arch/powerpc/platforms/cell/spider-pci.c184
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c364
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c852
-rw-r--r--arch/powerpc/platforms/cell/spu_callbacks.c74
-rw-r--r--arch/powerpc/platforms/cell/spu_fault.c94
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c556
-rw-r--r--arch/powerpc/platforms/cell/spu_notify.c68
-rw-r--r--arch/powerpc/platforms/cell/spu_priv1_mmio.c180
-rw-r--r--arch/powerpc/platforms/cell/spu_priv1_mmio.h26
-rw-r--r--arch/powerpc/platforms/cell/spu_syscalls.c178
-rw-r--r--arch/powerpc/platforms/cell/spufs/Makefile61
-rw-r--r--arch/powerpc/platforms/cell/spufs/backing_ops.c413
-rw-r--r--arch/powerpc/platforms/cell/spufs/context.c188
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c250
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c191
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c2775
-rw-r--r--arch/powerpc/platforms/cell/spufs/gang.c87
-rw-r--r--arch/powerpc/platforms/cell/spufs/hw_ops.c349
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c865
-rw-r--r--arch/powerpc/platforms/cell/spufs/lscsa_alloc.c183
-rw-r--r--arch/powerpc/platforms/cell/spufs/run.c454
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c1174
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore.c336
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S116
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped935
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_save.c195
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_save_crt0.S102
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped743
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_utils.h160
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h376
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.h39
-rw-r--r--arch/powerpc/platforms/cell/spufs/switch.c2222
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c88
74 files changed, 24759 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
new file mode 100644
index 00000000000..2e7ff0c5cf4
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -0,0 +1,163 @@
+config PPC_CELL
+ bool
+ default n
+
+config PPC_CELL_COMMON
+ bool
+ select PPC_CELL
+ select PPC_DCR_MMIO
+ select PPC_INDIRECT_PIO
+ select PPC_INDIRECT_MMIO
+ select PPC_NATIVE
+ select PPC_RTAS
+ select IRQ_EDGE_EOI_HANDLER
+
+config PPC_CELL_NATIVE
+ bool
+ select PPC_CELL_COMMON
+ select MPIC
+ select PPC_IO_WORKAROUNDS
+ select IBM_EMAC_EMAC4
+ select IBM_EMAC_RGMII
+ select IBM_EMAC_ZMII #test only
+ select IBM_EMAC_TAH #test only
+ default n
+
+config PPC_IBM_CELL_BLADE
+ bool "IBM Cell Blade"
+ depends on PPC64 && PPC_BOOK3S
+ select PPC_CELL_NATIVE
+ select PPC_OF_PLATFORM_PCI
+ select PCI
+ select MMIO_NVRAM
+ select PPC_UDBG_16550
+ select UDBG_RTAS_CONSOLE
+
+config PPC_CELLEB
+ bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
+ depends on PPC64 && PPC_BOOK3S
+ select PPC_CELL_NATIVE
+ select PPC_OF_PLATFORM_PCI
+ select PCI
+ select HAS_TXX9_SERIAL
+ select PPC_UDBG_BEAT
+ select USB_OHCI_BIG_ENDIAN_MMIO
+ select USB_EHCI_BIG_ENDIAN_MMIO
+
+config PPC_CELL_QPACE
+ bool "IBM Cell - QPACE"
+ depends on PPC64 && PPC_BOOK3S
+ select PPC_CELL_COMMON
+
+config AXON_MSI
+ bool
+ depends on PPC_IBM_CELL_BLADE && PCI_MSI
+ default y
+
+menu "Cell Broadband Engine options"
+ depends on PPC_CELL
+
+config SPU_FS
+ tristate "SPU file system"
+ default m
+ depends on PPC_CELL
+ select SPU_BASE
+ select MEMORY_HOTPLUG
+ help
+ The SPU file system is used to access Synergistic Processing
+ Units on machines implementing the Broadband Processor
+ Architecture.
+
+config SPU_FS_64K_LS
+ bool "Use 64K pages to map SPE local store"
+ # we depend on PPC_MM_SLICES for now rather than selecting
+ # it because we depend on hugetlbfs hooks being present. We
+ # will fix that when the generic code has been improved to
+ # not require hijacking hugetlbfs hooks.
+ depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
+ default y
+ select PPC_HAS_HASH_64K
+ help
+ This option causes SPE local stores to be mapped in process
+ address spaces using 64K pages while the rest of the kernel
+ uses 4K pages. This can improve performances of applications
+ using multiple SPEs by lowering the TLB pressure on them.
+
+config SPU_BASE
+ bool
+ default n
+
+config CBE_RAS
+ bool "RAS features for bare metal Cell BE"
+ depends on PPC_CELL_NATIVE
+ default y
+
+config PPC_IBM_CELL_RESETBUTTON
+ bool "IBM Cell Blade Pinhole reset button"
+ depends on CBE_RAS && PPC_IBM_CELL_BLADE
+ default y
+ help
+ Support Pinhole Resetbutton on IBM Cell blades.
+ This adds a method to trigger system reset via front panel pinhole button.
+
+config PPC_IBM_CELL_POWERBUTTON
+ tristate "IBM Cell Blade power button"
+ depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV
+ default y
+ help
+ Support Powerbutton on IBM Cell blades.
+ This will enable the powerbutton as an input device.
+
+config CBE_THERM
+ tristate "CBE thermal support"
+ default m
+ depends on CBE_RAS && SPU_BASE
+
+config CBE_CPUFREQ
+ tristate "CBE frequency scaling"
+ depends on CBE_RAS && CPU_FREQ
+ default m
+ help
+ This adds the cpufreq driver for Cell BE processors.
+ For details, take a look at <file:Documentation/cpu-freq/>.
+ If you don't have such processor, say N
+
+config CBE_CPUFREQ_PMI_ENABLE
+ bool "CBE frequency scaling using PMI interface"
+ depends on CBE_CPUFREQ && EXPERIMENTAL
+ default n
+ help
+ Select this, if you want to use the PMI interface
+ to switch frequencies. Using PMI, the
+ processor will not only be able to run at lower speed,
+ but also at lower core voltage.
+
+config CBE_CPUFREQ_PMI
+ tristate
+ depends on CBE_CPUFREQ_PMI_ENABLE
+ default CBE_CPUFREQ
+
+config PPC_PMI
+ tristate
+ default y
+ depends on CBE_CPUFREQ_PMI || PPC_IBM_CELL_POWERBUTTON
+ help
+ PMI (Platform Management Interrupt) is a way to
+ communicate with the BMC (Baseboard Management Controller).
+ It is used in some IBM Cell blades.
+
+config CBE_CPUFREQ_SPU_GOVERNOR
+ tristate "CBE frequency scaling based on SPU usage"
+ depends on SPU_FS && CPU_FREQ
+ default m
+ help
+ This governor checks for spu usage to adjust the cpu frequency.
+ If no spu is running on a given cpu, that cpu will be throttled to
+ the minimal possible frequency.
+
+endmenu
+
+config OPROFILE_CELL
+ def_bool y
+ depends on PPC_CELL_NATIVE && (OPROFILE = m || OPROFILE = y) && SPU_BASE
+
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
new file mode 100644
index 00000000000..a4a89350bcf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -0,0 +1,49 @@
+obj-$(CONFIG_PPC_CELL_COMMON) += cbe_regs.o interrupt.o pervasive.o
+
+obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \
+ pmu.o spider-pci.o
+obj-$(CONFIG_CBE_RAS) += ras.o
+
+obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
+obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o
+obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o
+cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o
+obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o
+
+obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o
+
+ifeq ($(CONFIG_SMP),y)
+obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o
+obj-$(CONFIG_PPC_CELL_QPACE) += smp.o
+endif
+
+# needed only when building loadable spufs.ko
+spu-priv1-$(CONFIG_PPC_CELL_COMMON) += spu_priv1_mmio.o
+spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o
+
+obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \
+ spu_notify.o \
+ spu_syscalls.o spu_fault.o \
+ $(spu-priv1-y) \
+ $(spu-manage-y) \
+ spufs/
+
+obj-$(CONFIG_AXON_MSI) += axon_msi.o
+
+# qpace setup
+obj-$(CONFIG_PPC_CELL_QPACE) += qpace_setup.o
+
+# celleb stuff
+ifeq ($(CONFIG_PPC_CELLEB),y)
+obj-y += celleb_setup.o \
+ celleb_pci.o celleb_scc_epci.o \
+ celleb_scc_pciex.o \
+ celleb_scc_uhc.o \
+ spider-pci.o beat.o beat_htab.o \
+ beat_hvCall.o beat_interrupt.o \
+ beat_iommu.o
+
+obj-$(CONFIG_PPC_UDBG_BEAT) += beat_udbg.o
+obj-$(CONFIG_SERIAL_TXX9) += celleb_scc_sio.o
+obj-$(CONFIG_SPU_BASE) += beat_spu_priv1.o
+endif
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
new file mode 100644
index 00000000000..40a6e34793b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright 2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/export.h>
+#include <linux/of_platform.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <asm/dcr.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+
+
+/*
+ * MSIC registers, specified as offsets from dcr_base
+ */
+#define MSIC_CTRL_REG 0x0
+
+/* Base Address registers specify FIFO location in BE memory */
+#define MSIC_BASE_ADDR_HI_REG 0x3
+#define MSIC_BASE_ADDR_LO_REG 0x4
+
+/* Hold the read/write offsets into the FIFO */
+#define MSIC_READ_OFFSET_REG 0x5
+#define MSIC_WRITE_OFFSET_REG 0x6
+
+
+/* MSIC control register flags */
+#define MSIC_CTRL_ENABLE 0x0001
+#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002
+#define MSIC_CTRL_IRQ_ENABLE 0x0008
+#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010
+
+/*
+ * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
+ * Currently we're using a 64KB FIFO size.
+ */
+#define MSIC_FIFO_SIZE_SHIFT 16
+#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT)
+
+/*
+ * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
+ * 8-9 of the MSIC control reg.
+ */
+#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
+
+/*
+ * We need to mask the read/write offsets to make sure they stay within
+ * the bounds of the FIFO. Also they should always be 16-byte aligned.
+ */
+#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
+
+/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
+#define MSIC_FIFO_ENTRY_SIZE 0x10
+
+
+struct axon_msic {
+ struct irq_host *irq_host;
+ __le32 *fifo_virt;
+ dma_addr_t fifo_phys;
+ dcr_host_t dcr_host;
+ u32 read_offset;
+#ifdef DEBUG
+ u32 __iomem *trigger;
+#endif
+};
+
+#ifdef DEBUG
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic);
+#else
+static inline void axon_msi_debug_setup(struct device_node *dn,
+ struct axon_msic *msic) { }
+#endif
+
+
+static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
+{
+ pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
+
+ dcr_write(msic->dcr_host, dcr_n, val);
+}
+
+static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct axon_msic *msic = irq_get_handler_data(irq);
+ u32 write_offset, msi;
+ int idx;
+ int retry = 0;
+
+ write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
+ pr_devel("axon_msi: original write_offset 0x%x\n", write_offset);
+
+ /* write_offset doesn't wrap properly, so we have to mask it */
+ write_offset &= MSIC_FIFO_SIZE_MASK;
+
+ while (msic->read_offset != write_offset && retry < 100) {
+ idx = msic->read_offset / sizeof(__le32);
+ msi = le32_to_cpu(msic->fifo_virt[idx]);
+ msi &= 0xFFFF;
+
+ pr_devel("axon_msi: woff %x roff %x msi %x\n",
+ write_offset, msic->read_offset, msi);
+
+ if (msi < NR_IRQS && irq_get_chip_data(msi) == msic) {
+ generic_handle_irq(msi);
+ msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
+ } else {
+ /*
+ * Reading the MSIC_WRITE_OFFSET_REG does not
+ * reliably flush the outstanding DMA to the
+ * FIFO buffer. Here we were reading stale
+ * data, so we need to retry.
+ */
+ udelay(1);
+ retry++;
+ pr_devel("axon_msi: invalid irq 0x%x!\n", msi);
+ continue;
+ }
+
+ if (retry) {
+ pr_devel("axon_msi: late irq 0x%x, retry %d\n",
+ msi, retry);
+ retry = 0;
+ }
+
+ msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+ msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+ }
+
+ if (retry) {
+ printk(KERN_WARNING "axon_msi: irq timed out\n");
+
+ msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+ msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+ }
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+static struct axon_msic *find_msi_translator(struct pci_dev *dev)
+{
+ struct irq_host *irq_host;
+ struct device_node *dn, *tmp;
+ const phandle *ph;
+ struct axon_msic *msic = NULL;
+
+ dn = of_node_get(pci_device_to_OF_node(dev));
+ if (!dn) {
+ dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+ return NULL;
+ }
+
+ for (; dn; dn = of_get_next_parent(dn)) {
+ ph = of_get_property(dn, "msi-translator", NULL);
+ if (ph)
+ break;
+ }
+
+ if (!ph) {
+ dev_dbg(&dev->dev,
+ "axon_msi: no msi-translator property found\n");
+ goto out_error;
+ }
+
+ tmp = dn;
+ dn = of_find_node_by_phandle(*ph);
+ of_node_put(tmp);
+ if (!dn) {
+ dev_dbg(&dev->dev,
+ "axon_msi: msi-translator doesn't point to a node\n");
+ goto out_error;
+ }
+
+ irq_host = irq_find_host(dn);
+ if (!irq_host) {
+ dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n",
+ dn->full_name);
+ goto out_error;
+ }
+
+ msic = irq_host->host_data;
+
+out_error:
+ of_node_put(dn);
+
+ return msic;
+}
+
+static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
+{
+ if (!find_msi_translator(dev))
+ return -ENODEV;
+
+ return 0;
+}
+
+static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+{
+ struct device_node *dn;
+ struct msi_desc *entry;
+ int len;
+ const u32 *prop;
+
+ dn = of_node_get(pci_device_to_OF_node(dev));
+ if (!dn) {
+ dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+ return -ENODEV;
+ }
+
+ entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
+
+ for (; dn; dn = of_get_next_parent(dn)) {
+ if (entry->msi_attrib.is_64) {
+ prop = of_get_property(dn, "msi-address-64", &len);
+ if (prop)
+ break;
+ }
+
+ prop = of_get_property(dn, "msi-address-32", &len);
+ if (prop)
+ break;
+ }
+
+ if (!prop) {
+ dev_dbg(&dev->dev,
+ "axon_msi: no msi-address-(32|64) properties found\n");
+ return -ENOENT;
+ }
+
+ switch (len) {
+ case 8:
+ msg->address_hi = prop[0];
+ msg->address_lo = prop[1];
+ break;
+ case 4:
+ msg->address_hi = 0;
+ msg->address_lo = prop[0];
+ break;
+ default:
+ dev_dbg(&dev->dev,
+ "axon_msi: malformed msi-address-(32|64) property\n");
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ of_node_put(dn);
+
+ return 0;
+}
+
+static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ unsigned int virq, rc;
+ struct msi_desc *entry;
+ struct msi_msg msg;
+ struct axon_msic *msic;
+
+ msic = find_msi_translator(dev);
+ if (!msic)
+ return -ENODEV;
+
+ rc = setup_msi_msg_address(dev, &msg);
+ if (rc)
+ return rc;
+
+ /* We rely on being able to stash a virq in a u16 */
+ BUILD_BUG_ON(NR_IRQS > 65536);
+
+ list_for_each_entry(entry, &dev->msi_list, list) {
+ virq = irq_create_direct_mapping(msic->irq_host);
+ if (virq == NO_IRQ) {
+ dev_warn(&dev->dev,
+ "axon_msi: virq allocation failed!\n");
+ return -1;
+ }
+ dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
+
+ irq_set_msi_desc(virq, entry);
+ msg.data = virq;
+ write_msi_msg(virq, &msg);
+ }
+
+ return 0;
+}
+
+static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+ struct msi_desc *entry;
+
+ dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
+
+ list_for_each_entry(entry, &dev->msi_list, list) {
+ if (entry->irq == NO_IRQ)
+ continue;
+
+ irq_set_msi_desc(entry->irq, NULL);
+ irq_dispose_mapping(entry->irq);
+ }
+}
+
+static struct irq_chip msic_irq_chip = {
+ .irq_mask = mask_msi_irq,
+ .irq_unmask = unmask_msi_irq,
+ .irq_shutdown = mask_msi_irq,
+ .name = "AXON-MSI",
+};
+
+static int msic_host_map(struct irq_host *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
+
+ return 0;
+}
+
+static struct irq_host_ops msic_host_ops = {
+ .map = msic_host_map,
+};
+
+static void axon_msi_shutdown(struct platform_device *device)
+{
+ struct axon_msic *msic = dev_get_drvdata(&device->dev);
+ u32 tmp;
+
+ pr_devel("axon_msi: disabling %s\n",
+ msic->irq_host->of_node->full_name);
+ tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
+ tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
+ msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
+}
+
+static int axon_msi_probe(struct platform_device *device)
+{
+ struct device_node *dn = device->dev.of_node;
+ struct axon_msic *msic;
+ unsigned int virq;
+ int dcr_base, dcr_len;
+
+ pr_devel("axon_msi: setting up dn %s\n", dn->full_name);
+
+ msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+ if (!msic) {
+ printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
+ dn->full_name);
+ goto out;
+ }
+
+ dcr_base = dcr_resource_start(dn, 0);
+ dcr_len = dcr_resource_len(dn, 0);
+
+ if (dcr_base == 0 || dcr_len == 0) {
+ printk(KERN_ERR
+ "axon_msi: couldn't parse dcr properties on %s\n",
+ dn->full_name);
+ goto out_free_msic;
+ }
+
+ msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+ if (!DCR_MAP_OK(msic->dcr_host)) {
+ printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
+ dn->full_name);
+ goto out_free_msic;
+ }
+
+ msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
+ &msic->fifo_phys, GFP_KERNEL);
+ if (!msic->fifo_virt) {
+ printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
+ dn->full_name);
+ goto out_free_msic;
+ }
+
+ virq = irq_of_parse_and_map(dn, 0);
+ if (virq == NO_IRQ) {
+ printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
+ dn->full_name);
+ goto out_free_fifo;
+ }
+ memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
+
+ msic->irq_host = irq_alloc_host(dn, IRQ_HOST_MAP_NOMAP,
+ NR_IRQS, &msic_host_ops, 0);
+ if (!msic->irq_host) {
+ printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n",
+ dn->full_name);
+ goto out_free_fifo;
+ }
+
+ msic->irq_host->host_data = msic;
+
+ irq_set_handler_data(virq, msic);
+ irq_set_chained_handler(virq, axon_msi_cascade);
+ pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq);
+
+ /* Enable the MSIC hardware */
+ msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32);
+ msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
+ msic->fifo_phys & 0xFFFFFFFF);
+ msic_dcr_write(msic, MSIC_CTRL_REG,
+ MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
+ MSIC_CTRL_FIFO_SIZE);
+
+ msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
+ & MSIC_FIFO_SIZE_MASK;
+
+ dev_set_drvdata(&device->dev, msic);
+
+ ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
+ ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+ ppc_md.msi_check_device = axon_msi_check_device;
+
+ axon_msi_debug_setup(dn, msic);
+
+ printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
+
+ return 0;
+
+out_free_fifo:
+ dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt,
+ msic->fifo_phys);
+out_free_msic:
+ kfree(msic);
+out:
+
+ return -1;
+}
+
+static const struct of_device_id axon_msi_device_id[] = {
+ {
+ .compatible = "ibm,axon-msic"
+ },
+ {}
+};
+
+static struct platform_driver axon_msi_driver = {
+ .probe = axon_msi_probe,
+ .shutdown = axon_msi_shutdown,
+ .driver = {
+ .name = "axon-msi",
+ .owner = THIS_MODULE,
+ .of_match_table = axon_msi_device_id,
+ },
+};
+
+static int __init axon_msi_init(void)
+{
+ return platform_driver_register(&axon_msi_driver);
+}
+subsys_initcall(axon_msi_init);
+
+
+#ifdef DEBUG
+static int msic_set(void *data, u64 val)
+{
+ struct axon_msic *msic = data;
+ out_le32(msic->trigger, val);
+ return 0;
+}
+
+static int msic_get(void *data, u64 *val)
+{
+ *val = 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n");
+
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
+{
+ char name[8];
+ u64 addr;
+
+ addr = of_translate_address(dn, of_get_property(dn, "reg", NULL));
+ if (addr == OF_BAD_ADDR) {
+ pr_devel("axon_msi: couldn't translate reg property\n");
+ return;
+ }
+
+ msic->trigger = ioremap(addr, 0x4);
+ if (!msic->trigger) {
+ pr_devel("axon_msi: ioremap failed\n");
+ return;
+ }
+
+ snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
+
+ if (!debugfs_create_file(name, 0600, powerpc_debugfs_root,
+ msic, &fops_msic)) {
+ pr_devel("axon_msi: debugfs_create_file failed!\n");
+ return;
+ }
+}
+#endif /* DEBUG */
diff --git a/arch/powerpc/platforms/cell/beat.c b/arch/powerpc/platforms/cell/beat.c
new file mode 100644
index 00000000000..852592b2b71
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat.c
@@ -0,0 +1,264 @@
+/*
+ * Simple routines for Celleb/Beat
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/rtc.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include <linux/reboot.h>
+
+#include <asm/hvconsole.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "beat_wrapper.h"
+#include "beat.h"
+#include "beat_interrupt.h"
+
+static int beat_pm_poweroff_flag;
+
+void beat_restart(char *cmd)
+{
+ beat_shutdown_logical_partition(!beat_pm_poweroff_flag);
+}
+
+void beat_power_off(void)
+{
+ beat_shutdown_logical_partition(0);
+}
+
+u64 beat_halt_code = 0x1000000000000000UL;
+EXPORT_SYMBOL(beat_halt_code);
+
+void beat_halt(void)
+{
+ beat_shutdown_logical_partition(beat_halt_code);
+}
+
+int beat_set_rtc_time(struct rtc_time *rtc_time)
+{
+ u64 tim;
+ tim = mktime(rtc_time->tm_year+1900,
+ rtc_time->tm_mon+1, rtc_time->tm_mday,
+ rtc_time->tm_hour, rtc_time->tm_min, rtc_time->tm_sec);
+ if (beat_rtc_write(tim))
+ return -1;
+ return 0;
+}
+
+void beat_get_rtc_time(struct rtc_time *rtc_time)
+{
+ u64 tim;
+
+ if (beat_rtc_read(&tim))
+ tim = 0;
+ to_tm(tim, rtc_time);
+ rtc_time->tm_year -= 1900;
+ rtc_time->tm_mon -= 1;
+}
+
+#define BEAT_NVRAM_SIZE 4096
+
+ssize_t beat_nvram_read(char *buf, size_t count, loff_t *index)
+{
+ unsigned int i;
+ unsigned long len;
+ char *p = buf;
+
+ if (*index >= BEAT_NVRAM_SIZE)
+ return -ENODEV;
+ i = *index;
+ if (i + count > BEAT_NVRAM_SIZE)
+ count = BEAT_NVRAM_SIZE - i;
+
+ for (; count != 0; count -= len) {
+ len = count;
+ if (len > BEAT_NVRW_CNT)
+ len = BEAT_NVRW_CNT;
+ if (beat_eeprom_read(i, len, p))
+ return -EIO;
+
+ p += len;
+ i += len;
+ }
+ *index = i;
+ return p - buf;
+}
+
+ssize_t beat_nvram_write(char *buf, size_t count, loff_t *index)
+{
+ unsigned int i;
+ unsigned long len;
+ char *p = buf;
+
+ if (*index >= BEAT_NVRAM_SIZE)
+ return -ENODEV;
+ i = *index;
+ if (i + count > BEAT_NVRAM_SIZE)
+ count = BEAT_NVRAM_SIZE - i;
+
+ for (; count != 0; count -= len) {
+ len = count;
+ if (len > BEAT_NVRW_CNT)
+ len = BEAT_NVRW_CNT;
+ if (beat_eeprom_write(i, len, p))
+ return -EIO;
+
+ p += len;
+ i += len;
+ }
+ *index = i;
+ return p - buf;
+}
+
+ssize_t beat_nvram_get_size(void)
+{
+ return BEAT_NVRAM_SIZE;
+}
+
+int beat_set_xdabr(unsigned long dabr)
+{
+ if (beat_set_dabr(dabr, DABRX_KERNEL | DABRX_USER))
+ return -1;
+ return 0;
+}
+
+int64_t beat_get_term_char(u64 vterm, u64 *len, u64 *t1, u64 *t2)
+{
+ u64 db[2];
+ s64 ret;
+
+ ret = beat_get_characters_from_console(vterm, len, (u8 *)db);
+ if (ret == 0) {
+ *t1 = db[0];
+ *t2 = db[1];
+ }
+ return ret;
+}
+EXPORT_SYMBOL(beat_get_term_char);
+
+int64_t beat_put_term_char(u64 vterm, u64 len, u64 t1, u64 t2)
+{
+ u64 db[2];
+
+ db[0] = t1;
+ db[1] = t2;
+ return beat_put_characters_to_console(vterm, len, (u8 *)db);
+}
+EXPORT_SYMBOL(beat_put_term_char);
+
+void beat_power_save(void)
+{
+ beat_pause(0);
+}
+
+#ifdef CONFIG_KEXEC
+void beat_kexec_cpu_down(int crash, int secondary)
+{
+ beatic_deinit_IRQ();
+}
+#endif
+
+static irqreturn_t beat_power_event(int virq, void *arg)
+{
+ printk(KERN_DEBUG "Beat: power button pressed\n");
+ beat_pm_poweroff_flag = 1;
+ ctrl_alt_del();
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t beat_reset_event(int virq, void *arg)
+{
+ printk(KERN_DEBUG "Beat: reset button pressed\n");
+ beat_pm_poweroff_flag = 0;
+ ctrl_alt_del();
+ return IRQ_HANDLED;
+}
+
+static struct beat_event_list {
+ const char *typecode;
+ irq_handler_t handler;
+ unsigned int virq;
+} beat_event_list[] = {
+ { "power", beat_power_event, 0 },
+ { "reset", beat_reset_event, 0 },
+};
+
+static int __init beat_register_event(void)
+{
+ u64 path[4], data[2];
+ int rc, i;
+ unsigned int virq;
+
+ for (i = 0; i < ARRAY_SIZE(beat_event_list); i++) {
+ struct beat_event_list *ev = &beat_event_list[i];
+
+ if (beat_construct_event_receive_port(data) != 0) {
+ printk(KERN_ERR "Beat: "
+ "cannot construct event receive port for %s\n",
+ ev->typecode);
+ return -EINVAL;
+ }
+
+ virq = irq_create_mapping(NULL, data[0]);
+ if (virq == NO_IRQ) {
+ printk(KERN_ERR "Beat: failed to get virtual IRQ"
+ " for event receive port for %s\n",
+ ev->typecode);
+ beat_destruct_event_receive_port(data[0]);
+ return -EIO;
+ }
+ ev->virq = virq;
+
+ rc = request_irq(virq, ev->handler, 0,
+ ev->typecode, NULL);
+ if (rc != 0) {
+ printk(KERN_ERR "Beat: failed to request virtual IRQ"
+ " for event receive port for %s\n",
+ ev->typecode);
+ beat_destruct_event_receive_port(data[0]);
+ return rc;
+ }
+
+ path[0] = 0x1000000065780000ul; /* 1,ex */
+ path[1] = 0x627574746f6e0000ul; /* button */
+ path[2] = 0;
+ strncpy((char *)&path[2], ev->typecode, 8);
+ path[3] = 0;
+ data[1] = 0;
+
+ beat_create_repository_node(path, data);
+ }
+ return 0;
+}
+
+static int __init beat_event_init(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_BEAT))
+ return -EINVAL;
+
+ beat_pm_poweroff_flag = 0;
+ return beat_register_event();
+}
+
+device_initcall(beat_event_init);
diff --git a/arch/powerpc/platforms/cell/beat.h b/arch/powerpc/platforms/cell/beat.h
new file mode 100644
index 00000000000..32c8efcedc8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat.h
@@ -0,0 +1,39 @@
+/*
+ * Guest OS Interfaces.
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CELLEB_BEAT_H
+#define _CELLEB_BEAT_H
+
+int64_t beat_get_term_char(uint64_t, uint64_t *, uint64_t *, uint64_t *);
+int64_t beat_put_term_char(uint64_t, uint64_t, uint64_t, uint64_t);
+int64_t beat_repository_encode(int, const char *, uint64_t[4]);
+void beat_restart(char *);
+void beat_power_off(void);
+void beat_halt(void);
+int beat_set_rtc_time(struct rtc_time *);
+void beat_get_rtc_time(struct rtc_time *);
+ssize_t beat_nvram_get_size(void);
+ssize_t beat_nvram_read(char *, size_t, loff_t *);
+ssize_t beat_nvram_write(char *, size_t, loff_t *);
+int beat_set_xdabr(unsigned long);
+void beat_power_save(void);
+void beat_kexec_cpu_down(int, int);
+
+#endif /* _CELLEB_BEAT_H */
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
new file mode 100644
index 00000000000..2516c1cf846
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -0,0 +1,442 @@
+/*
+ * "Cell Reference Set" HTAB support.
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/pseries/lpar.c:
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG_LOW
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+
+#include "beat_wrapper.h"
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while (0)
+#else
+#define DBG_LOW(fmt...) do { } while (0)
+#endif
+
+static DEFINE_RAW_SPINLOCK(beat_htab_lock);
+
+static inline unsigned int beat_read_mask(unsigned hpte_group)
+{
+ unsigned long rmask = 0;
+ u64 hpte_v[5];
+
+ beat_read_htab_entries(0, hpte_group + 0, hpte_v);
+ if (!(hpte_v[0] & HPTE_V_BOLTED))
+ rmask |= 0x8000;
+ if (!(hpte_v[1] & HPTE_V_BOLTED))
+ rmask |= 0x4000;
+ if (!(hpte_v[2] & HPTE_V_BOLTED))
+ rmask |= 0x2000;
+ if (!(hpte_v[3] & HPTE_V_BOLTED))
+ rmask |= 0x1000;
+ beat_read_htab_entries(0, hpte_group + 4, hpte_v);
+ if (!(hpte_v[0] & HPTE_V_BOLTED))
+ rmask |= 0x0800;
+ if (!(hpte_v[1] & HPTE_V_BOLTED))
+ rmask |= 0x0400;
+ if (!(hpte_v[2] & HPTE_V_BOLTED))
+ rmask |= 0x0200;
+ if (!(hpte_v[3] & HPTE_V_BOLTED))
+ rmask |= 0x0100;
+ hpte_group = ~hpte_group & (htab_hash_mask * HPTES_PER_GROUP);
+ beat_read_htab_entries(0, hpte_group + 0, hpte_v);
+ if (!(hpte_v[0] & HPTE_V_BOLTED))
+ rmask |= 0x80;
+ if (!(hpte_v[1] & HPTE_V_BOLTED))
+ rmask |= 0x40;
+ if (!(hpte_v[2] & HPTE_V_BOLTED))
+ rmask |= 0x20;
+ if (!(hpte_v[3] & HPTE_V_BOLTED))
+ rmask |= 0x10;
+ beat_read_htab_entries(0, hpte_group + 4, hpte_v);
+ if (!(hpte_v[0] & HPTE_V_BOLTED))
+ rmask |= 0x08;
+ if (!(hpte_v[1] & HPTE_V_BOLTED))
+ rmask |= 0x04;
+ if (!(hpte_v[2] & HPTE_V_BOLTED))
+ rmask |= 0x02;
+ if (!(hpte_v[3] & HPTE_V_BOLTED))
+ rmask |= 0x01;
+ return rmask;
+}
+
+static long beat_lpar_hpte_insert(unsigned long hpte_group,
+ unsigned long va, unsigned long pa,
+ unsigned long rflags, unsigned long vflags,
+ int psize, int ssize)
+{
+ unsigned long lpar_rc;
+ u64 hpte_v, hpte_r, slot;
+
+ /* same as iseries */
+ if (vflags & HPTE_V_SECONDARY)
+ return -1;
+
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+ "rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+
+ hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+ vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+ if (rflags & _PAGE_NO_CACHE)
+ hpte_r &= ~_PAGE_COHERENT;
+
+ raw_spin_lock(&beat_htab_lock);
+ lpar_rc = beat_read_mask(hpte_group);
+ if (lpar_rc == 0) {
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" full\n");
+ raw_spin_unlock(&beat_htab_lock);
+ return -1;
+ }
+
+ lpar_rc = beat_insert_htab_entry(0, hpte_group, lpar_rc << 48,
+ hpte_v, hpte_r, &slot);
+ raw_spin_unlock(&beat_htab_lock);
+
+ /*
+ * Since we try and ioremap PHBs we don't own, the pte insert
+ * will fail. However we must catch the failure in hash_page
+ * or we will loop forever, so return -2 in this case.
+ */
+ if (unlikely(lpar_rc != 0)) {
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" lpar err %lx\n", lpar_rc);
+ return -2;
+ }
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" -> slot: %lx\n", slot);
+
+ /* We have to pass down the secondary bucket bit here as well */
+ return (slot ^ hpte_group) & 15;
+}
+
+static long beat_lpar_hpte_remove(unsigned long hpte_group)
+{
+ DBG_LOW("hpte_remove(group=%lx)\n", hpte_group);
+ return -1;
+}
+
+static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
+{
+ unsigned long dword0;
+ unsigned long lpar_rc;
+ u64 dword[5];
+
+ lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
+
+ dword0 = dword[slot&3];
+
+ BUG_ON(lpar_rc != 0);
+
+ return dword0;
+}
+
+static void beat_lpar_hptab_clear(void)
+{
+ unsigned long size_bytes = 1UL << ppc64_pft_size;
+ unsigned long hpte_count = size_bytes >> 4;
+ int i;
+ u64 dummy0, dummy1;
+
+ /* TODO: Use bulk call */
+ for (i = 0; i < hpte_count; i++)
+ beat_write_htab_entry(0, i, 0, 0, -1UL, -1UL, &dummy0, &dummy1);
+}
+
+/*
+ * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up. So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero. For now I am paranoid.
+ */
+static long beat_lpar_hpte_updatepp(unsigned long slot,
+ unsigned long newpp,
+ unsigned long va,
+ int psize, int ssize, int local)
+{
+ unsigned long lpar_rc;
+ u64 dummy0, dummy1;
+ unsigned long want_v;
+
+ want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+
+ DBG_LOW(" update: "
+ "avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
+ want_v & HPTE_V_AVPN, slot, psize, newpp);
+
+ raw_spin_lock(&beat_htab_lock);
+ dummy0 = beat_lpar_hpte_getword0(slot);
+ if ((dummy0 & ~0x7FUL) != (want_v & ~0x7FUL)) {
+ DBG_LOW("not found !\n");
+ raw_spin_unlock(&beat_htab_lock);
+ return -1;
+ }
+
+ lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0,
+ &dummy1);
+ raw_spin_unlock(&beat_htab_lock);
+ if (lpar_rc != 0 || dummy0 == 0) {
+ DBG_LOW("not found !\n");
+ return -1;
+ }
+
+ DBG_LOW("ok %lx %lx\n", dummy0, dummy1);
+
+ BUG_ON(lpar_rc != 0);
+
+ return 0;
+}
+
+static long beat_lpar_hpte_find(unsigned long va, int psize)
+{
+ unsigned long hash;
+ unsigned long i, j;
+ long slot;
+ unsigned long want_v, hpte_v;
+
+ hash = hpt_hash(va, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
+ want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+
+ for (j = 0; j < 2; j++) {
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ hpte_v = beat_lpar_hpte_getword0(slot);
+
+ if (HPTE_V_COMPARE(hpte_v, want_v)
+ && (hpte_v & HPTE_V_VALID)
+ && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
+ /* HPTE matches */
+ if (j)
+ slot = -slot;
+ return slot;
+ }
+ ++slot;
+ }
+ hash = ~hash;
+ }
+
+ return -1;
+}
+
+static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
+ unsigned long ea,
+ int psize, int ssize)
+{
+ unsigned long lpar_rc, slot, vsid, va;
+ u64 dummy0, dummy1;
+
+ vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
+ va = (vsid << 28) | (ea & 0x0fffffff);
+
+ raw_spin_lock(&beat_htab_lock);
+ slot = beat_lpar_hpte_find(va, psize);
+ BUG_ON(slot == -1);
+
+ lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7,
+ &dummy0, &dummy1);
+ raw_spin_unlock(&beat_htab_lock);
+
+ BUG_ON(lpar_rc != 0);
+}
+
+static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+ int psize, int ssize, int local)
+{
+ unsigned long want_v;
+ unsigned long lpar_rc;
+ u64 dummy1, dummy2;
+ unsigned long flags;
+
+ DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
+ slot, va, psize, local);
+ want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+
+ raw_spin_lock_irqsave(&beat_htab_lock, flags);
+ dummy1 = beat_lpar_hpte_getword0(slot);
+
+ if ((dummy1 & ~0x7FUL) != (want_v & ~0x7FUL)) {
+ DBG_LOW("not found !\n");
+ raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
+ return;
+ }
+
+ lpar_rc = beat_write_htab_entry(0, slot, 0, 0, HPTE_V_VALID, 0,
+ &dummy1, &dummy2);
+ raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
+
+ BUG_ON(lpar_rc != 0);
+}
+
+void __init hpte_init_beat(void)
+{
+ ppc_md.hpte_invalidate = beat_lpar_hpte_invalidate;
+ ppc_md.hpte_updatepp = beat_lpar_hpte_updatepp;
+ ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
+ ppc_md.hpte_insert = beat_lpar_hpte_insert;
+ ppc_md.hpte_remove = beat_lpar_hpte_remove;
+ ppc_md.hpte_clear_all = beat_lpar_hptab_clear;
+}
+
+static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
+ unsigned long va, unsigned long pa,
+ unsigned long rflags, unsigned long vflags,
+ int psize, int ssize)
+{
+ unsigned long lpar_rc;
+ u64 hpte_v, hpte_r, slot;
+
+ /* same as iseries */
+ if (vflags & HPTE_V_SECONDARY)
+ return -1;
+
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+ "rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+
+ hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+ vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+ if (rflags & _PAGE_NO_CACHE)
+ hpte_r &= ~_PAGE_COHERENT;
+
+ /* insert into not-volted entry */
+ lpar_rc = beat_insert_htab_entry3(0, hpte_group, hpte_v, hpte_r,
+ HPTE_V_BOLTED, 0, &slot);
+ /*
+ * Since we try and ioremap PHBs we don't own, the pte insert
+ * will fail. However we must catch the failure in hash_page
+ * or we will loop forever, so return -2 in this case.
+ */
+ if (unlikely(lpar_rc != 0)) {
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" lpar err %lx\n", lpar_rc);
+ return -2;
+ }
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" -> slot: %lx\n", slot);
+
+ /* We have to pass down the secondary bucket bit here as well */
+ return (slot ^ hpte_group) & 15;
+}
+
+/*
+ * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up. So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero. For now I am paranoid.
+ */
+static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
+ unsigned long newpp,
+ unsigned long va,
+ int psize, int ssize, int local)
+{
+ unsigned long lpar_rc;
+ unsigned long want_v;
+ unsigned long pss;
+
+ want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+ pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
+
+ DBG_LOW(" update: "
+ "avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
+ want_v & HPTE_V_AVPN, slot, psize, newpp);
+
+ lpar_rc = beat_update_htab_permission3(0, slot, want_v, pss, 7, newpp);
+
+ if (lpar_rc == 0xfffffff7) {
+ DBG_LOW("not found !\n");
+ return -1;
+ }
+
+ DBG_LOW("ok\n");
+
+ BUG_ON(lpar_rc != 0);
+
+ return 0;
+}
+
+static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long va,
+ int psize, int ssize, int local)
+{
+ unsigned long want_v;
+ unsigned long lpar_rc;
+ unsigned long pss;
+
+ DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
+ slot, va, psize, local);
+ want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+ pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
+
+ lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
+
+ /* E_busy can be valid output: page may be already replaced */
+ BUG_ON(lpar_rc != 0 && lpar_rc != 0xfffffff7);
+}
+
+static int64_t _beat_lpar_hptab_clear_v3(void)
+{
+ return beat_clear_htab3(0);
+}
+
+static void beat_lpar_hptab_clear_v3(void)
+{
+ _beat_lpar_hptab_clear_v3();
+}
+
+void __init hpte_init_beat_v3(void)
+{
+ if (_beat_lpar_hptab_clear_v3() == 0) {
+ ppc_md.hpte_invalidate = beat_lpar_hpte_invalidate_v3;
+ ppc_md.hpte_updatepp = beat_lpar_hpte_updatepp_v3;
+ ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
+ ppc_md.hpte_insert = beat_lpar_hpte_insert_v3;
+ ppc_md.hpte_remove = beat_lpar_hpte_remove;
+ ppc_md.hpte_clear_all = beat_lpar_hptab_clear_v3;
+ } else {
+ ppc_md.hpte_invalidate = beat_lpar_hpte_invalidate;
+ ppc_md.hpte_updatepp = beat_lpar_hpte_updatepp;
+ ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
+ ppc_md.hpte_insert = beat_lpar_hpte_insert;
+ ppc_md.hpte_remove = beat_lpar_hpte_remove;
+ ppc_md.hpte_clear_all = beat_lpar_hptab_clear;
+ }
+}
diff --git a/arch/powerpc/platforms/cell/beat_hvCall.S b/arch/powerpc/platforms/cell/beat_hvCall.S
new file mode 100644
index 00000000000..74c81744894
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_hvCall.S
@@ -0,0 +1,287 @@
+/*
+ * Beat hypervisor call I/F
+ *
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/pseries/hvCall.S.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <asm/ppc_asm.h>
+
+#define STK_PARM(i) (48 + ((i)-3)*8)
+
+/* Not implemented on Beat, now */
+#define HCALL_INST_PRECALL
+#define HCALL_INST_POSTCALL
+
+ .text
+
+#define HVSC .long 0x44000022
+
+/* Note: takes only 7 input parameters at maximum */
+_GLOBAL(beat_hcall_norets)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_INST_PRECALL
+
+ mr r11,r3
+ mr r3,r4
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+
+ HVSC /* invoke the hypervisor */
+
+ HCALL_INST_POSTCALL
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+/* Note: takes 8 input parameters at maximum */
+_GLOBAL(beat_hcall_norets8)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_INST_PRECALL
+
+ mr r11,r3
+ mr r3,r4
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+ ld r10,STK_PARM(r10)(r1)
+
+ HVSC /* invoke the hypervisor */
+
+ HCALL_INST_POSTCALL
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+/* Note: takes only 6 input parameters, 1 output parameters at maximum */
+_GLOBAL(beat_hcall1)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_INST_PRECALL
+
+ std r4,STK_PARM(r4)(r1) /* save ret buffer */
+
+ mr r11,r3
+ mr r3,r5
+ mr r4,r6
+ mr r5,r7
+ mr r6,r8
+ mr r7,r9
+ mr r8,r10
+
+ HVSC /* invoke the hypervisor */
+
+ HCALL_INST_POSTCALL
+
+ ld r12,STK_PARM(r4)(r1)
+ std r4, 0(r12)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+/* Note: takes only 6 input parameters, 2 output parameters at maximum */
+_GLOBAL(beat_hcall2)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_INST_PRECALL
+
+ std r4,STK_PARM(r4)(r1) /* save ret buffer */
+
+ mr r11,r3
+ mr r3,r5
+ mr r4,r6
+ mr r5,r7
+ mr r6,r8
+ mr r7,r9
+ mr r8,r10
+
+ HVSC /* invoke the hypervisor */
+
+ HCALL_INST_POSTCALL
+
+ ld r12,STK_PARM(r4)(r1)
+ std r4, 0(r12)
+ std r5, 8(r12)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+/* Note: takes only 6 input parameters, 3 output parameters at maximum */
+_GLOBAL(beat_hcall3)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_INST_PRECALL
+
+ std r4,STK_PARM(r4)(r1) /* save ret buffer */
+
+ mr r11,r3
+ mr r3,r5
+ mr r4,r6
+ mr r5,r7
+ mr r6,r8
+ mr r7,r9
+ mr r8,r10
+
+ HVSC /* invoke the hypervisor */
+
+ HCALL_INST_POSTCALL
+
+ ld r12,STK_PARM(r4)(r1)
+ std r4, 0(r12)
+ std r5, 8(r12)
+ std r6, 16(r12)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+/* Note: takes only 6 input parameters, 4 output parameters at maximum */
+_GLOBAL(beat_hcall4)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_INST_PRECALL
+
+ std r4,STK_PARM(r4)(r1) /* save ret buffer */
+
+ mr r11,r3
+ mr r3,r5
+ mr r4,r6
+ mr r5,r7
+ mr r6,r8
+ mr r7,r9
+ mr r8,r10
+
+ HVSC /* invoke the hypervisor */
+
+ HCALL_INST_POSTCALL
+
+ ld r12,STK_PARM(r4)(r1)
+ std r4, 0(r12)
+ std r5, 8(r12)
+ std r6, 16(r12)
+ std r7, 24(r12)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+/* Note: takes only 6 input parameters, 5 output parameters at maximum */
+_GLOBAL(beat_hcall5)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_INST_PRECALL
+
+ std r4,STK_PARM(r4)(r1) /* save ret buffer */
+
+ mr r11,r3
+ mr r3,r5
+ mr r4,r6
+ mr r5,r7
+ mr r6,r8
+ mr r7,r9
+ mr r8,r10
+
+ HVSC /* invoke the hypervisor */
+
+ HCALL_INST_POSTCALL
+
+ ld r12,STK_PARM(r4)(r1)
+ std r4, 0(r12)
+ std r5, 8(r12)
+ std r6, 16(r12)
+ std r7, 24(r12)
+ std r8, 32(r12)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+/* Note: takes only 6 input parameters, 6 output parameters at maximum */
+_GLOBAL(beat_hcall6)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_INST_PRECALL
+
+ std r4,STK_PARM(r4)(r1) /* save ret buffer */
+
+ mr r11,r3
+ mr r3,r5
+ mr r4,r6
+ mr r5,r7
+ mr r6,r8
+ mr r7,r9
+ mr r8,r10
+
+ HVSC /* invoke the hypervisor */
+
+ HCALL_INST_POSTCALL
+
+ ld r12,STK_PARM(r4)(r1)
+ std r4, 0(r12)
+ std r5, 8(r12)
+ std r6, 16(r12)
+ std r7, 24(r12)
+ std r8, 32(r12)
+ std r9, 40(r12)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
new file mode 100644
index 00000000000..55015e1f693
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -0,0 +1,255 @@
+/*
+ * Celleb/Beat Interrupt controller
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+
+#include <asm/machdep.h>
+
+#include "beat_interrupt.h"
+#include "beat_wrapper.h"
+
+#define MAX_IRQS NR_IRQS
+static DEFINE_RAW_SPINLOCK(beatic_irq_mask_lock);
+static uint64_t beatic_irq_mask_enable[(MAX_IRQS+255)/64];
+static uint64_t beatic_irq_mask_ack[(MAX_IRQS+255)/64];
+
+static struct irq_host *beatic_host;
+
+/*
+ * In this implementation, "virq" == "IRQ plug number",
+ * "(irq_hw_number_t)hwirq" == "IRQ outlet number".
+ */
+
+/* assumption: locked */
+static inline void beatic_update_irq_mask(unsigned int irq_plug)
+{
+ int off;
+ unsigned long masks[4];
+
+ off = (irq_plug / 256) * 4;
+ masks[0] = beatic_irq_mask_enable[off + 0]
+ & beatic_irq_mask_ack[off + 0];
+ masks[1] = beatic_irq_mask_enable[off + 1]
+ & beatic_irq_mask_ack[off + 1];
+ masks[2] = beatic_irq_mask_enable[off + 2]
+ & beatic_irq_mask_ack[off + 2];
+ masks[3] = beatic_irq_mask_enable[off + 3]
+ & beatic_irq_mask_ack[off + 3];
+ if (beat_set_interrupt_mask(irq_plug&~255UL,
+ masks[0], masks[1], masks[2], masks[3]) != 0)
+ panic("Failed to set mask IRQ!");
+}
+
+static void beatic_mask_irq(struct irq_data *d)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+ beatic_irq_mask_enable[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
+ beatic_update_irq_mask(d->irq);
+ raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static void beatic_unmask_irq(struct irq_data *d)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+ beatic_irq_mask_enable[d->irq/64] |= 1UL << (63 - (d->irq%64));
+ beatic_update_irq_mask(d->irq);
+ raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static void beatic_ack_irq(struct irq_data *d)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+ beatic_irq_mask_ack[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
+ beatic_update_irq_mask(d->irq);
+ raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static void beatic_end_irq(struct irq_data *d)
+{
+ s64 err;
+ unsigned long flags;
+
+ err = beat_downcount_of_interrupt(d->irq);
+ if (err != 0) {
+ if ((err & 0xFFFFFFFF) != 0xFFFFFFF5) /* -11: wrong state */
+ panic("Failed to downcount IRQ! Error = %16llx", err);
+
+ printk(KERN_ERR "IRQ over-downcounted, plug %d\n", d->irq);
+ }
+ raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+ beatic_irq_mask_ack[d->irq/64] |= 1UL << (63 - (d->irq%64));
+ beatic_update_irq_mask(d->irq);
+ raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static struct irq_chip beatic_pic = {
+ .name = "CELL-BEAT",
+ .irq_unmask = beatic_unmask_irq,
+ .irq_mask = beatic_mask_irq,
+ .irq_eoi = beatic_end_irq,
+};
+
+/*
+ * Dispose binding hardware IRQ number (hw) and Virtuql IRQ number (virq),
+ * update flags.
+ *
+ * Note that the number (virq) is already assigned at upper layer.
+ */
+static void beatic_pic_host_unmap(struct irq_host *h, unsigned int virq)
+{
+ beat_destruct_irq_plug(virq);
+}
+
+/*
+ * Create or update binding hardware IRQ number (hw) and Virtuql
+ * IRQ number (virq). This is called only once for a given mapping.
+ *
+ * Note that the number (virq) is already assigned at upper layer.
+ */
+static int beatic_pic_host_map(struct irq_host *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ int64_t err;
+
+ err = beat_construct_and_connect_irq_plug(virq, hw);
+ if (err < 0)
+ return -EIO;
+
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, &beatic_pic, handle_fasteoi_irq);
+ return 0;
+}
+
+/*
+ * Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
+ * to pass away to irq_create_mapping().
+ *
+ * Called from irq_create_of_mapping() only.
+ * Note: We have only 1 entry to translate.
+ */
+static int beatic_pic_host_xlate(struct irq_host *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq,
+ unsigned int *out_flags)
+{
+ const u64 *intspec2 = (const u64 *)intspec;
+
+ *out_hwirq = *intspec2;
+ *out_flags |= IRQ_TYPE_LEVEL_LOW;
+ return 0;
+}
+
+static int beatic_pic_host_match(struct irq_host *h, struct device_node *np)
+{
+ /* Match all */
+ return 1;
+}
+
+static struct irq_host_ops beatic_pic_host_ops = {
+ .map = beatic_pic_host_map,
+ .unmap = beatic_pic_host_unmap,
+ .xlate = beatic_pic_host_xlate,
+ .match = beatic_pic_host_match,
+};
+
+/*
+ * Get an IRQ number
+ * Note: returns VIRQ
+ */
+static inline unsigned int beatic_get_irq_plug(void)
+{
+ int i;
+ uint64_t pending[4], ub;
+
+ for (i = 0; i < MAX_IRQS; i += 256) {
+ beat_detect_pending_interrupts(i, pending);
+ __asm__ ("cntlzd %0,%1":"=r"(ub):
+ "r"(pending[0] & beatic_irq_mask_enable[i/64+0]
+ & beatic_irq_mask_ack[i/64+0]));
+ if (ub != 64)
+ return i + ub + 0;
+ __asm__ ("cntlzd %0,%1":"=r"(ub):
+ "r"(pending[1] & beatic_irq_mask_enable[i/64+1]
+ & beatic_irq_mask_ack[i/64+1]));
+ if (ub != 64)
+ return i + ub + 64;
+ __asm__ ("cntlzd %0,%1":"=r"(ub):
+ "r"(pending[2] & beatic_irq_mask_enable[i/64+2]
+ & beatic_irq_mask_ack[i/64+2]));
+ if (ub != 64)
+ return i + ub + 128;
+ __asm__ ("cntlzd %0,%1":"=r"(ub):
+ "r"(pending[3] & beatic_irq_mask_enable[i/64+3]
+ & beatic_irq_mask_ack[i/64+3]));
+ if (ub != 64)
+ return i + ub + 192;
+ }
+
+ return NO_IRQ;
+}
+unsigned int beatic_get_irq(void)
+{
+ unsigned int ret;
+
+ ret = beatic_get_irq_plug();
+ if (ret != NO_IRQ)
+ beatic_ack_irq(irq_get_irq_data(ret));
+ return ret;
+}
+
+/*
+ */
+void __init beatic_init_IRQ(void)
+{
+ int i;
+
+ memset(beatic_irq_mask_enable, 0, sizeof(beatic_irq_mask_enable));
+ memset(beatic_irq_mask_ack, 255, sizeof(beatic_irq_mask_ack));
+ for (i = 0; i < MAX_IRQS; i += 256)
+ beat_set_interrupt_mask(i, 0L, 0L, 0L, 0L);
+
+ /* Set out get_irq function */
+ ppc_md.get_irq = beatic_get_irq;
+
+ /* Allocate an irq host */
+ beatic_host = irq_alloc_host(NULL, IRQ_HOST_MAP_NOMAP, 0,
+ &beatic_pic_host_ops,
+ 0);
+ BUG_ON(beatic_host == NULL);
+ irq_set_default_host(beatic_host);
+}
+
+void beatic_deinit_IRQ(void)
+{
+ int i;
+
+ for (i = 1; i < NR_IRQS; i++)
+ beat_destruct_irq_plug(i);
+}
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
new file mode 100644
index 00000000000..a7e52f91a07
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_interrupt.h
@@ -0,0 +1,30 @@
+/*
+ * Celleb/Beat Interrupt controller
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef ASM_BEAT_PIC_H
+#define ASM_BEAT_PIC_H
+#ifdef __KERNEL__
+
+extern void beatic_init_IRQ(void);
+extern unsigned int beatic_get_irq(void);
+extern void beatic_deinit_IRQ(void);
+
+#endif
+#endif /* ASM_BEAT_PIC_H */
diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
new file mode 100644
index 00000000000..3ce68556893
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_iommu.c
@@ -0,0 +1,115 @@
+/*
+ * Support for IOMMU on Celleb platform.
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+
+#include "beat_wrapper.h"
+
+#define DMA_FLAGS 0xf800000000000000UL /* r/w permitted, coherency required,
+ strongest order */
+
+static int __init find_dma_window(u64 *io_space_id, u64 *ioid,
+ u64 *base, u64 *size, u64 *io_page_size)
+{
+ struct device_node *dn;
+ const unsigned long *dma_window;
+
+ for_each_node_by_type(dn, "ioif") {
+ dma_window = of_get_property(dn, "toshiba,dma-window", NULL);
+ if (dma_window) {
+ *io_space_id = (dma_window[0] >> 32) & 0xffffffffUL;
+ *ioid = dma_window[0] & 0x7ffUL;
+ *base = dma_window[1];
+ *size = dma_window[2];
+ *io_page_size = 1 << dma_window[3];
+ of_node_put(dn);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static unsigned long celleb_dma_direct_offset;
+
+static void __init celleb_init_direct_mapping(void)
+{
+ u64 lpar_addr, io_addr;
+ u64 io_space_id, ioid, dma_base, dma_size, io_page_size;
+
+ if (!find_dma_window(&io_space_id, &ioid, &dma_base, &dma_size,
+ &io_page_size)) {
+ pr_info("No dma window found !\n");
+ return;
+ }
+
+ for (lpar_addr = 0; lpar_addr < dma_size; lpar_addr += io_page_size) {
+ io_addr = lpar_addr + dma_base;
+ (void)beat_put_iopte(io_space_id, io_addr, lpar_addr,
+ ioid, DMA_FLAGS);
+ }
+
+ celleb_dma_direct_offset = dma_base;
+}
+
+static void celleb_dma_dev_setup(struct device *dev)
+{
+ set_dma_ops(dev, &dma_direct_ops);
+ set_dma_offset(dev, celleb_dma_direct_offset);
+}
+
+static void celleb_pci_dma_dev_setup(struct pci_dev *pdev)
+{
+ celleb_dma_dev_setup(&pdev->dev);
+}
+
+static int celleb_of_bus_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ /* We are only intereted in device addition */
+ if (action != BUS_NOTIFY_ADD_DEVICE)
+ return 0;
+
+ celleb_dma_dev_setup(dev);
+
+ return 0;
+}
+
+static struct notifier_block celleb_of_bus_notifier = {
+ .notifier_call = celleb_of_bus_notify
+};
+
+static int __init celleb_init_iommu(void)
+{
+ celleb_init_direct_mapping();
+ ppc_md.pci_dma_dev_setup = celleb_pci_dma_dev_setup;
+ bus_register_notifier(&platform_bus_type, &celleb_of_bus_notifier);
+
+ return 0;
+}
+
+machine_arch_initcall(celleb_beat, celleb_init_iommu);
diff --git a/arch/powerpc/platforms/cell/beat_spu_priv1.c b/arch/powerpc/platforms/cell/beat_spu_priv1.c
new file mode 100644
index 00000000000..13f52589d3a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_spu_priv1.c
@@ -0,0 +1,205 @@
+/*
+ * spu hypervisor abstraction for Beat
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <asm/types.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+
+#include "beat_wrapper.h"
+
+static inline void _int_mask_set(struct spu *spu, int class, u64 mask)
+{
+ spu->shadow_int_mask_RW[class] = mask;
+ beat_set_irq_mask_for_spe(spu->spe_id, class, mask);
+}
+
+static inline u64 _int_mask_get(struct spu *spu, int class)
+{
+ return spu->shadow_int_mask_RW[class];
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+ _int_mask_set(spu, class, mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+ return _int_mask_get(spu, class);
+}
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+ old_mask = _int_mask_get(spu, class);
+ _int_mask_set(spu, class, old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+ old_mask = _int_mask_get(spu, class);
+ _int_mask_set(spu, class, old_mask | mask);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+ beat_clear_interrupt_status_of_spe(spu->spe_id, class, stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+ u64 int_stat;
+ beat_get_interrupt_status_of_spe(spu->spe_id, class, &int_stat);
+ return int_stat;
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+ return;
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+ u64 dar;
+ beat_get_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, mfc_dar_RW), &dar);
+ return dar;
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+ u64 dsisr;
+ beat_get_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, mfc_dsisr_RW), &dsisr);
+ return dsisr;
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+ beat_set_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, mfc_dsisr_RW), dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+ return;
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+ beat_set_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, mfc_sr1_RW), sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+ u64 sr1;
+ beat_get_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, mfc_sr1_RW), &sr1);
+ return sr1;
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+ beat_set_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, mfc_tclass_id_RW), tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+ u64 tclass_id;
+ beat_get_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, mfc_tclass_id_RW), &tclass_id);
+ return tclass_id;
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+ beat_set_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, tlb_invalidate_entry_W), 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+ beat_set_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, resource_allocation_groupID_RW),
+ id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+ u64 id;
+ beat_get_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, resource_allocation_groupID_RW),
+ &id);
+ return id;
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+ beat_set_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, resource_allocation_enable_RW),
+ enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+ u64 enable;
+ beat_get_spe_privileged_state_1_registers(
+ spu->spe_id,
+ offsetof(struct spu_priv1, resource_allocation_enable_RW),
+ &enable);
+ return enable;
+}
+
+const struct spu_priv1_ops spu_priv1_beat_ops = {
+ .int_mask_and = int_mask_and,
+ .int_mask_or = int_mask_or,
+ .int_mask_set = int_mask_set,
+ .int_mask_get = int_mask_get,
+ .int_stat_clear = int_stat_clear,
+ .int_stat_get = int_stat_get,
+ .cpu_affinity_set = cpu_affinity_set,
+ .mfc_dar_get = mfc_dar_get,
+ .mfc_dsisr_get = mfc_dsisr_get,
+ .mfc_dsisr_set = mfc_dsisr_set,
+ .mfc_sdr_setup = mfc_sdr_setup,
+ .mfc_sr1_set = mfc_sr1_set,
+ .mfc_sr1_get = mfc_sr1_get,
+ .mfc_tclass_id_set = mfc_tclass_id_set,
+ .mfc_tclass_id_get = mfc_tclass_id_get,
+ .tlb_invalidate = tlb_invalidate,
+ .resource_allocation_groupID_set = resource_allocation_groupID_set,
+ .resource_allocation_groupID_get = resource_allocation_groupID_get,
+ .resource_allocation_enable_set = resource_allocation_enable_set,
+ .resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/beat_syscall.h b/arch/powerpc/platforms/cell/beat_syscall.h
new file mode 100644
index 00000000000..8580dc7e179
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_syscall.h
@@ -0,0 +1,164 @@
+/*
+ * Beat hypervisor call numbers
+ *
+ * (C) Copyright 2004-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef BEAT_BEAT_syscall_H
+#define BEAT_BEAT_syscall_H
+
+#ifdef __ASSEMBLY__
+#define __BEAT_ADD_VENDOR_ID(__x, __v) ((__v)<<60|(__x))
+#else
+#define __BEAT_ADD_VENDOR_ID(__x, __v) ((u64)(__v)<<60|(__x))
+#endif
+#define HV_allocate_memory __BEAT_ADD_VENDOR_ID(0, 0)
+#define HV_construct_virtual_address_space __BEAT_ADD_VENDOR_ID(2, 0)
+#define HV_destruct_virtual_address_space __BEAT_ADD_VENDOR_ID(10, 0)
+#define HV_get_virtual_address_space_id_of_ppe __BEAT_ADD_VENDOR_ID(4, 0)
+#define HV_query_logical_partition_address_region_info \
+ __BEAT_ADD_VENDOR_ID(6, 0)
+#define HV_release_memory __BEAT_ADD_VENDOR_ID(13, 0)
+#define HV_select_virtual_address_space __BEAT_ADD_VENDOR_ID(7, 0)
+#define HV_load_range_registers __BEAT_ADD_VENDOR_ID(68, 0)
+#define HV_set_ppe_l2cache_rmt_entry __BEAT_ADD_VENDOR_ID(70, 0)
+#define HV_set_ppe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(71, 0)
+#define HV_set_spe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(72, 0)
+#define HV_get_io_address_translation_fault_info __BEAT_ADD_VENDOR_ID(14, 0)
+#define HV_get_iopte __BEAT_ADD_VENDOR_ID(16, 0)
+#define HV_preload_iopt_cache __BEAT_ADD_VENDOR_ID(17, 0)
+#define HV_put_iopte __BEAT_ADD_VENDOR_ID(15, 0)
+#define HV_connect_event_ports __BEAT_ADD_VENDOR_ID(21, 0)
+#define HV_construct_event_receive_port __BEAT_ADD_VENDOR_ID(18, 0)
+#define HV_destruct_event_receive_port __BEAT_ADD_VENDOR_ID(19, 0)
+#define HV_destruct_event_send_port __BEAT_ADD_VENDOR_ID(22, 0)
+#define HV_get_state_of_event_send_port __BEAT_ADD_VENDOR_ID(25, 0)
+#define HV_request_to_connect_event_ports __BEAT_ADD_VENDOR_ID(20, 0)
+#define HV_send_event_externally __BEAT_ADD_VENDOR_ID(23, 0)
+#define HV_send_event_locally __BEAT_ADD_VENDOR_ID(24, 0)
+#define HV_construct_and_connect_irq_plug __BEAT_ADD_VENDOR_ID(28, 0)
+#define HV_destruct_irq_plug __BEAT_ADD_VENDOR_ID(29, 0)
+#define HV_detect_pending_interrupts __BEAT_ADD_VENDOR_ID(26, 0)
+#define HV_end_of_interrupt __BEAT_ADD_VENDOR_ID(27, 0)
+#define HV_assign_control_signal_notification_port __BEAT_ADD_VENDOR_ID(45, 0)
+#define HV_end_of_control_signal_processing __BEAT_ADD_VENDOR_ID(48, 0)
+#define HV_get_control_signal __BEAT_ADD_VENDOR_ID(46, 0)
+#define HV_set_irq_mask_for_spe __BEAT_ADD_VENDOR_ID(61, 0)
+#define HV_shutdown_logical_partition __BEAT_ADD_VENDOR_ID(44, 0)
+#define HV_connect_message_ports __BEAT_ADD_VENDOR_ID(35, 0)
+#define HV_destruct_message_port __BEAT_ADD_VENDOR_ID(36, 0)
+#define HV_receive_message __BEAT_ADD_VENDOR_ID(37, 0)
+#define HV_get_message_port_info __BEAT_ADD_VENDOR_ID(34, 0)
+#define HV_request_to_connect_message_ports __BEAT_ADD_VENDOR_ID(33, 0)
+#define HV_send_message __BEAT_ADD_VENDOR_ID(32, 0)
+#define HV_get_logical_ppe_id __BEAT_ADD_VENDOR_ID(69, 0)
+#define HV_pause __BEAT_ADD_VENDOR_ID(9, 0)
+#define HV_destruct_shared_memory_handle __BEAT_ADD_VENDOR_ID(51, 0)
+#define HV_get_shared_memory_info __BEAT_ADD_VENDOR_ID(52, 0)
+#define HV_permit_sharing_memory __BEAT_ADD_VENDOR_ID(50, 0)
+#define HV_request_to_attach_shared_memory __BEAT_ADD_VENDOR_ID(49, 0)
+#define HV_enable_logical_spe_execution __BEAT_ADD_VENDOR_ID(55, 0)
+#define HV_construct_logical_spe __BEAT_ADD_VENDOR_ID(53, 0)
+#define HV_disable_logical_spe_execution __BEAT_ADD_VENDOR_ID(56, 0)
+#define HV_destruct_logical_spe __BEAT_ADD_VENDOR_ID(54, 0)
+#define HV_sense_spe_execution_status __BEAT_ADD_VENDOR_ID(58, 0)
+#define HV_insert_htab_entry __BEAT_ADD_VENDOR_ID(101, 0)
+#define HV_read_htab_entries __BEAT_ADD_VENDOR_ID(95, 0)
+#define HV_write_htab_entry __BEAT_ADD_VENDOR_ID(94, 0)
+#define HV_assign_io_address_translation_fault_port \
+ __BEAT_ADD_VENDOR_ID(100, 0)
+#define HV_set_interrupt_mask __BEAT_ADD_VENDOR_ID(73, 0)
+#define HV_get_logical_partition_id __BEAT_ADD_VENDOR_ID(74, 0)
+#define HV_create_repository_node2 __BEAT_ADD_VENDOR_ID(90, 0)
+#define HV_create_repository_node __BEAT_ADD_VENDOR_ID(90, 0) /* alias */
+#define HV_get_repository_node_value2 __BEAT_ADD_VENDOR_ID(91, 0)
+#define HV_get_repository_node_value __BEAT_ADD_VENDOR_ID(91, 0) /* alias */
+#define HV_modify_repository_node_value2 __BEAT_ADD_VENDOR_ID(92, 0)
+#define HV_modify_repository_node_value __BEAT_ADD_VENDOR_ID(92, 0) /* alias */
+#define HV_remove_repository_node2 __BEAT_ADD_VENDOR_ID(93, 0)
+#define HV_remove_repository_node __BEAT_ADD_VENDOR_ID(93, 0) /* alias */
+#define HV_cancel_shared_memory __BEAT_ADD_VENDOR_ID(104, 0)
+#define HV_clear_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(206, 0)
+#define HV_construct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(80, 0)
+#define HV_destruct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(81, 0)
+#define HV_disconnect_ipspc_service __BEAT_ADD_VENDOR_ID(88, 0)
+#define HV_execute_ipspc_command __BEAT_ADD_VENDOR_ID(86, 0)
+#define HV_get_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(205, 0)
+#define HV_get_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(208, 0)
+#define HV_permit_use_of_ipspc_service __BEAT_ADD_VENDOR_ID(85, 0)
+#define HV_reinitialize_logical_spe __BEAT_ADD_VENDOR_ID(82, 0)
+#define HV_request_ipspc_service __BEAT_ADD_VENDOR_ID(84, 0)
+#define HV_stop_ipspc_command __BEAT_ADD_VENDOR_ID(87, 0)
+#define HV_set_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(204, 0)
+#define HV_get_status_of_ipspc_service __BEAT_ADD_VENDOR_ID(203, 0)
+#define HV_put_characters_to_console __BEAT_ADD_VENDOR_ID(0x101, 1)
+#define HV_get_characters_from_console __BEAT_ADD_VENDOR_ID(0x102, 1)
+#define HV_get_base_clock __BEAT_ADD_VENDOR_ID(0x111, 1)
+#define HV_set_base_clock __BEAT_ADD_VENDOR_ID(0x112, 1)
+#define HV_get_frame_cycle __BEAT_ADD_VENDOR_ID(0x114, 1)
+#define HV_disable_console __BEAT_ADD_VENDOR_ID(0x115, 1)
+#define HV_disable_all_console __BEAT_ADD_VENDOR_ID(0x116, 1)
+#define HV_oneshot_timer __BEAT_ADD_VENDOR_ID(0x117, 1)
+#define HV_set_dabr __BEAT_ADD_VENDOR_ID(0x118, 1)
+#define HV_get_dabr __BEAT_ADD_VENDOR_ID(0x119, 1)
+#define HV_start_hv_stats __BEAT_ADD_VENDOR_ID(0x21c, 1)
+#define HV_stop_hv_stats __BEAT_ADD_VENDOR_ID(0x21d, 1)
+#define HV_get_hv_stats __BEAT_ADD_VENDOR_ID(0x21e, 1)
+#define HV_get_hv_error_stats __BEAT_ADD_VENDOR_ID(0x221, 1)
+#define HV_get_stats __BEAT_ADD_VENDOR_ID(0x224, 1)
+#define HV_get_heap_stats __BEAT_ADD_VENDOR_ID(0x225, 1)
+#define HV_get_memory_stats __BEAT_ADD_VENDOR_ID(0x227, 1)
+#define HV_get_memory_detail __BEAT_ADD_VENDOR_ID(0x228, 1)
+#define HV_set_priority_of_irq_outlet __BEAT_ADD_VENDOR_ID(0x122, 1)
+#define HV_get_physical_spe_by_reservation_id __BEAT_ADD_VENDOR_ID(0x128, 1)
+#define HV_get_spe_context __BEAT_ADD_VENDOR_ID(0x129, 1)
+#define HV_set_spe_context __BEAT_ADD_VENDOR_ID(0x12a, 1)
+#define HV_downcount_of_interrupt __BEAT_ADD_VENDOR_ID(0x12e, 1)
+#define HV_peek_spe_context __BEAT_ADD_VENDOR_ID(0x12f, 1)
+#define HV_read_bpa_register __BEAT_ADD_VENDOR_ID(0x131, 1)
+#define HV_write_bpa_register __BEAT_ADD_VENDOR_ID(0x132, 1)
+#define HV_map_context_table_of_spe __BEAT_ADD_VENDOR_ID(0x137, 1)
+#define HV_get_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x138, 1)
+#define HV_set_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x139, 1)
+#define HV_init_pm __BEAT_ADD_VENDOR_ID(0x150, 1)
+#define HV_set_pm_signal __BEAT_ADD_VENDOR_ID(0x151, 1)
+#define HV_get_pm_signal __BEAT_ADD_VENDOR_ID(0x152, 1)
+#define HV_set_pm_config __BEAT_ADD_VENDOR_ID(0x153, 1)
+#define HV_get_pm_config __BEAT_ADD_VENDOR_ID(0x154, 1)
+#define HV_get_inner_trace_data __BEAT_ADD_VENDOR_ID(0x155, 1)
+#define HV_set_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x156, 1)
+#define HV_get_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x157, 1)
+#define HV_set_pm_interrupt __BEAT_ADD_VENDOR_ID(0x158, 1)
+#define HV_get_pm_interrupt __BEAT_ADD_VENDOR_ID(0x159, 1)
+#define HV_kick_pm __BEAT_ADD_VENDOR_ID(0x160, 1)
+#define HV_construct_pm_context __BEAT_ADD_VENDOR_ID(0x164, 1)
+#define HV_destruct_pm_context __BEAT_ADD_VENDOR_ID(0x165, 1)
+#define HV_be_slow __BEAT_ADD_VENDOR_ID(0x170, 1)
+#define HV_assign_ipspc_server_connection_status_notification_port \
+ __BEAT_ADD_VENDOR_ID(0x173, 1)
+#define HV_get_raid_of_physical_spe __BEAT_ADD_VENDOR_ID(0x174, 1)
+#define HV_set_physical_spe_to_rag __BEAT_ADD_VENDOR_ID(0x175, 1)
+#define HV_release_physical_spe_from_rag __BEAT_ADD_VENDOR_ID(0x176, 1)
+#define HV_rtc_read __BEAT_ADD_VENDOR_ID(0x190, 1)
+#define HV_rtc_write __BEAT_ADD_VENDOR_ID(0x191, 1)
+#define HV_eeprom_read __BEAT_ADD_VENDOR_ID(0x192, 1)
+#define HV_eeprom_write __BEAT_ADD_VENDOR_ID(0x193, 1)
+#define HV_insert_htab_entry3 __BEAT_ADD_VENDOR_ID(0x104, 1)
+#define HV_invalidate_htab_entry3 __BEAT_ADD_VENDOR_ID(0x105, 1)
+#define HV_update_htab_permission3 __BEAT_ADD_VENDOR_ID(0x106, 1)
+#define HV_clear_htab3 __BEAT_ADD_VENDOR_ID(0x107, 1)
+#endif
diff --git a/arch/powerpc/platforms/cell/beat_udbg.c b/arch/powerpc/platforms/cell/beat_udbg.c
new file mode 100644
index 00000000000..350735bc888
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_udbg.c
@@ -0,0 +1,98 @@
+/*
+ * udbg function for Beat
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+
+#include "beat.h"
+
+#define celleb_vtermno 0
+
+static void udbg_putc_beat(char c)
+{
+ unsigned long rc;
+
+ if (c == '\n')
+ udbg_putc_beat('\r');
+
+ rc = beat_put_term_char(celleb_vtermno, 1, (uint64_t)c << 56, 0);
+}
+
+/* Buffered chars getc */
+static u64 inbuflen;
+static u64 inbuf[2]; /* must be 2 u64s */
+
+static int udbg_getc_poll_beat(void)
+{
+ /* The interface is tricky because it may return up to 16 chars.
+ * We save them statically for future calls to udbg_getc().
+ */
+ char ch, *buf = (char *)inbuf;
+ int i;
+ long rc;
+ if (inbuflen == 0) {
+ /* get some more chars. */
+ inbuflen = 0;
+ rc = beat_get_term_char(celleb_vtermno, &inbuflen,
+ inbuf+0, inbuf+1);
+ if (rc != 0)
+ inbuflen = 0; /* otherwise inbuflen is garbage */
+ }
+ if (inbuflen <= 0 || inbuflen > 16) {
+ /* Catch error case as well as other oddities (corruption) */
+ inbuflen = 0;
+ return -1;
+ }
+ ch = buf[0];
+ for (i = 1; i < inbuflen; i++) /* shuffle them down. */
+ buf[i-1] = buf[i];
+ inbuflen--;
+ return ch;
+}
+
+static int udbg_getc_beat(void)
+{
+ int ch;
+ for (;;) {
+ ch = udbg_getc_poll_beat();
+ if (ch == -1) {
+ /* This shouldn't be needed...but... */
+ volatile unsigned long delay;
+ for (delay = 0; delay < 2000000; delay++)
+ ;
+ } else {
+ return ch;
+ }
+ }
+}
+
+/* call this from early_init() for a working debug console on
+ * vterm capable LPAR machines
+ */
+void __init udbg_init_debug_beat(void)
+{
+ udbg_putc = udbg_putc_beat;
+ udbg_getc = udbg_getc_beat;
+ udbg_getc_poll = udbg_getc_poll_beat;
+}
diff --git a/arch/powerpc/platforms/cell/beat_wrapper.h b/arch/powerpc/platforms/cell/beat_wrapper.h
new file mode 100644
index 00000000000..c1109969f24
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_wrapper.h
@@ -0,0 +1,290 @@
+/*
+ * Beat hypervisor call I/F
+ *
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/pseries/plpar_wrapper.h.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef BEAT_HCALL
+#include <linux/string.h>
+#include "beat_syscall.h"
+
+/* defined in hvCall.S */
+extern s64 beat_hcall_norets(u64 opcode, ...);
+extern s64 beat_hcall_norets8(u64 opcode, u64 arg1, u64 arg2, u64 arg3,
+ u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8);
+extern s64 beat_hcall1(u64 opcode, u64 retbuf[1], ...);
+extern s64 beat_hcall2(u64 opcode, u64 retbuf[2], ...);
+extern s64 beat_hcall3(u64 opcode, u64 retbuf[3], ...);
+extern s64 beat_hcall4(u64 opcode, u64 retbuf[4], ...);
+extern s64 beat_hcall5(u64 opcode, u64 retbuf[5], ...);
+extern s64 beat_hcall6(u64 opcode, u64 retbuf[6], ...);
+
+static inline s64 beat_downcount_of_interrupt(u64 plug_id)
+{
+ return beat_hcall_norets(HV_downcount_of_interrupt, plug_id);
+}
+
+static inline s64 beat_set_interrupt_mask(u64 index,
+ u64 val0, u64 val1, u64 val2, u64 val3)
+{
+ return beat_hcall_norets(HV_set_interrupt_mask, index,
+ val0, val1, val2, val3);
+}
+
+static inline s64 beat_destruct_irq_plug(u64 plug_id)
+{
+ return beat_hcall_norets(HV_destruct_irq_plug, plug_id);
+}
+
+static inline s64 beat_construct_and_connect_irq_plug(u64 plug_id,
+ u64 outlet_id)
+{
+ return beat_hcall_norets(HV_construct_and_connect_irq_plug, plug_id,
+ outlet_id);
+}
+
+static inline s64 beat_detect_pending_interrupts(u64 index, u64 *retbuf)
+{
+ return beat_hcall4(HV_detect_pending_interrupts, retbuf, index);
+}
+
+static inline s64 beat_pause(u64 style)
+{
+ return beat_hcall_norets(HV_pause, style);
+}
+
+static inline s64 beat_read_htab_entries(u64 htab_id, u64 index, u64 *retbuf)
+{
+ return beat_hcall5(HV_read_htab_entries, retbuf, htab_id, index);
+}
+
+static inline s64 beat_insert_htab_entry(u64 htab_id, u64 group,
+ u64 bitmask, u64 hpte_v, u64 hpte_r, u64 *slot)
+{
+ u64 dummy[3];
+ s64 ret;
+
+ ret = beat_hcall3(HV_insert_htab_entry, dummy, htab_id, group,
+ bitmask, hpte_v, hpte_r);
+ *slot = dummy[0];
+ return ret;
+}
+
+static inline s64 beat_write_htab_entry(u64 htab_id, u64 slot,
+ u64 hpte_v, u64 hpte_r, u64 mask_v, u64 mask_r,
+ u64 *ret_v, u64 *ret_r)
+{
+ u64 dummy[2];
+ s64 ret;
+
+ ret = beat_hcall2(HV_write_htab_entry, dummy, htab_id, slot,
+ hpte_v, hpte_r, mask_v, mask_r);
+ *ret_v = dummy[0];
+ *ret_r = dummy[1];
+ return ret;
+}
+
+static inline s64 beat_insert_htab_entry3(u64 htab_id, u64 group,
+ u64 hpte_v, u64 hpte_r, u64 mask_v, u64 value_v, u64 *slot)
+{
+ u64 dummy[1];
+ s64 ret;
+
+ ret = beat_hcall1(HV_insert_htab_entry3, dummy, htab_id, group,
+ hpte_v, hpte_r, mask_v, value_v);
+ *slot = dummy[0];
+ return ret;
+}
+
+static inline s64 beat_invalidate_htab_entry3(u64 htab_id, u64 group,
+ u64 va, u64 pss)
+{
+ return beat_hcall_norets(HV_invalidate_htab_entry3,
+ htab_id, group, va, pss);
+}
+
+static inline s64 beat_update_htab_permission3(u64 htab_id, u64 group,
+ u64 va, u64 pss, u64 ptel_mask, u64 ptel_value)
+{
+ return beat_hcall_norets(HV_update_htab_permission3,
+ htab_id, group, va, pss, ptel_mask, ptel_value);
+}
+
+static inline s64 beat_clear_htab3(u64 htab_id)
+{
+ return beat_hcall_norets(HV_clear_htab3, htab_id);
+}
+
+static inline void beat_shutdown_logical_partition(u64 code)
+{
+ (void)beat_hcall_norets(HV_shutdown_logical_partition, code);
+}
+
+static inline s64 beat_rtc_write(u64 time_from_epoch)
+{
+ return beat_hcall_norets(HV_rtc_write, time_from_epoch);
+}
+
+static inline s64 beat_rtc_read(u64 *time_from_epoch)
+{
+ u64 dummy[1];
+ s64 ret;
+
+ ret = beat_hcall1(HV_rtc_read, dummy);
+ *time_from_epoch = dummy[0];
+ return ret;
+}
+
+#define BEAT_NVRW_CNT (sizeof(u64) * 6)
+
+static inline s64 beat_eeprom_write(u64 index, u64 length, u8 *buffer)
+{
+ u64 b[6];
+
+ if (length > BEAT_NVRW_CNT)
+ return -1;
+ memcpy(b, buffer, sizeof(b));
+ return beat_hcall_norets8(HV_eeprom_write, index, length,
+ b[0], b[1], b[2], b[3], b[4], b[5]);
+}
+
+static inline s64 beat_eeprom_read(u64 index, u64 length, u8 *buffer)
+{
+ u64 b[6];
+ s64 ret;
+
+ if (length > BEAT_NVRW_CNT)
+ return -1;
+ ret = beat_hcall6(HV_eeprom_read, b, index, length);
+ memcpy(buffer, b, length);
+ return ret;
+}
+
+static inline s64 beat_set_dabr(u64 value, u64 style)
+{
+ return beat_hcall_norets(HV_set_dabr, value, style);
+}
+
+static inline s64 beat_get_characters_from_console(u64 termno, u64 *len,
+ u8 *buffer)
+{
+ u64 dummy[3];
+ s64 ret;
+
+ ret = beat_hcall3(HV_get_characters_from_console, dummy, termno, len);
+ *len = dummy[0];
+ memcpy(buffer, dummy + 1, *len);
+ return ret;
+}
+
+static inline s64 beat_put_characters_to_console(u64 termno, u64 len,
+ u8 *buffer)
+{
+ u64 b[2];
+
+ memcpy(b, buffer, len);
+ return beat_hcall_norets(HV_put_characters_to_console, termno, len,
+ b[0], b[1]);
+}
+
+static inline s64 beat_get_spe_privileged_state_1_registers(
+ u64 id, u64 offsetof, u64 *value)
+{
+ u64 dummy[1];
+ s64 ret;
+
+ ret = beat_hcall1(HV_get_spe_privileged_state_1_registers, dummy, id,
+ offsetof);
+ *value = dummy[0];
+ return ret;
+}
+
+static inline s64 beat_set_irq_mask_for_spe(u64 id, u64 class, u64 mask)
+{
+ return beat_hcall_norets(HV_set_irq_mask_for_spe, id, class, mask);
+}
+
+static inline s64 beat_clear_interrupt_status_of_spe(u64 id, u64 class,
+ u64 mask)
+{
+ return beat_hcall_norets(HV_clear_interrupt_status_of_spe,
+ id, class, mask);
+}
+
+static inline s64 beat_set_spe_privileged_state_1_registers(
+ u64 id, u64 offsetof, u64 value)
+{
+ return beat_hcall_norets(HV_set_spe_privileged_state_1_registers,
+ id, offsetof, value);
+}
+
+static inline s64 beat_get_interrupt_status_of_spe(u64 id, u64 class, u64 *val)
+{
+ u64 dummy[1];
+ s64 ret;
+
+ ret = beat_hcall1(HV_get_interrupt_status_of_spe, dummy, id, class);
+ *val = dummy[0];
+ return ret;
+}
+
+static inline s64 beat_put_iopte(u64 ioas_id, u64 io_addr, u64 real_addr,
+ u64 ioid, u64 flags)
+{
+ return beat_hcall_norets(HV_put_iopte, ioas_id, io_addr, real_addr,
+ ioid, flags);
+}
+
+static inline s64 beat_construct_event_receive_port(u64 *port)
+{
+ u64 dummy[1];
+ s64 ret;
+
+ ret = beat_hcall1(HV_construct_event_receive_port, dummy);
+ *port = dummy[0];
+ return ret;
+}
+
+static inline s64 beat_destruct_event_receive_port(u64 port)
+{
+ s64 ret;
+
+ ret = beat_hcall_norets(HV_destruct_event_receive_port, port);
+ return ret;
+}
+
+static inline s64 beat_create_repository_node(u64 path[4], u64 data[2])
+{
+ s64 ret;
+
+ ret = beat_hcall_norets(HV_create_repository_node2,
+ path[0], path[1], path[2], path[3], data[0], data[1]);
+ return ret;
+}
+
+static inline s64 beat_get_repository_node_value(u64 lpid, u64 path[4],
+ u64 data[2])
+{
+ s64 ret;
+
+ ret = beat_hcall2(HV_get_repository_node_value2, data,
+ lpid, path[0], path[1], path[2], path[3]);
+ return ret;
+}
+
+#endif
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c
new file mode 100644
index 00000000000..d4c39e32f14
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c
@@ -0,0 +1,209 @@
+/*
+ * cpufreq driver for the cell processor
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/cell-regs.h>
+#include "cbe_cpufreq.h"
+
+static DEFINE_MUTEX(cbe_switch_mutex);
+
+
+/* the CBE supports an 8 step frequency scaling */
+static struct cpufreq_frequency_table cbe_freqs[] = {
+ {1, 0},
+ {2, 0},
+ {3, 0},
+ {4, 0},
+ {5, 0},
+ {6, 0},
+ {8, 0},
+ {10, 0},
+ {0, CPUFREQ_TABLE_END},
+};
+
+/*
+ * hardware specific functions
+ */
+
+static int set_pmode(unsigned int cpu, unsigned int slow_mode)
+{
+ int rc;
+
+ if (cbe_cpufreq_has_pmi)
+ rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode);
+ else
+ rc = cbe_cpufreq_set_pmode(cpu, slow_mode);
+
+ pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu));
+
+ return rc;
+}
+
+/*
+ * cpufreq functions
+ */
+
+static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+ const u32 *max_freqp;
+ u32 max_freq;
+ int i, cur_pmode;
+ struct device_node *cpu;
+
+ cpu = of_get_cpu_node(policy->cpu, NULL);
+
+ if (!cpu)
+ return -ENODEV;
+
+ pr_debug("init cpufreq on CPU %d\n", policy->cpu);
+
+ /*
+ * Let's check we can actually get to the CELL regs
+ */
+ if (!cbe_get_cpu_pmd_regs(policy->cpu) ||
+ !cbe_get_cpu_mic_tm_regs(policy->cpu)) {
+ pr_info("invalid CBE regs pointers for cpufreq\n");
+ return -EINVAL;
+ }
+
+ max_freqp = of_get_property(cpu, "clock-frequency", NULL);
+
+ of_node_put(cpu);
+
+ if (!max_freqp)
+ return -EINVAL;
+
+ /* we need the freq in kHz */
+ max_freq = *max_freqp / 1000;
+
+ pr_debug("max clock-frequency is at %u kHz\n", max_freq);
+ pr_debug("initializing frequency table\n");
+
+ /* initialize frequency table */
+ for (i=0; cbe_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) {
+ cbe_freqs[i].frequency = max_freq / cbe_freqs[i].index;
+ pr_debug("%d: %d\n", i, cbe_freqs[i].frequency);
+ }
+
+ /* if DEBUG is enabled set_pmode() measures the latency
+ * of a transition */
+ policy->cpuinfo.transition_latency = 25000;
+
+ cur_pmode = cbe_cpufreq_get_pmode(policy->cpu);
+ pr_debug("current pmode is at %d\n",cur_pmode);
+
+ policy->cur = cbe_freqs[cur_pmode].frequency;
+
+#ifdef CONFIG_SMP
+ cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
+#endif
+
+ cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
+
+ /* this ensures that policy->cpuinfo_min
+ * and policy->cpuinfo_max are set correctly */
+ return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs);
+}
+
+static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ return 0;
+}
+
+static int cbe_cpufreq_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, cbe_freqs);
+}
+
+static int cbe_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ int rc;
+ struct cpufreq_freqs freqs;
+ unsigned int cbe_pmode_new;
+
+ cpufreq_frequency_table_target(policy,
+ cbe_freqs,
+ target_freq,
+ relation,
+ &cbe_pmode_new);
+
+ freqs.old = policy->cur;
+ freqs.new = cbe_freqs[cbe_pmode_new].frequency;
+ freqs.cpu = policy->cpu;
+
+ mutex_lock(&cbe_switch_mutex);
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ pr_debug("setting frequency for cpu %d to %d kHz, " \
+ "1/%d of max frequency\n",
+ policy->cpu,
+ cbe_freqs[cbe_pmode_new].frequency,
+ cbe_freqs[cbe_pmode_new].index);
+
+ rc = set_pmode(policy->cpu, cbe_pmode_new);
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ mutex_unlock(&cbe_switch_mutex);
+
+ return rc;
+}
+
+static struct cpufreq_driver cbe_cpufreq_driver = {
+ .verify = cbe_cpufreq_verify,
+ .target = cbe_cpufreq_target,
+ .init = cbe_cpufreq_cpu_init,
+ .exit = cbe_cpufreq_cpu_exit,
+ .name = "cbe-cpufreq",
+ .owner = THIS_MODULE,
+ .flags = CPUFREQ_CONST_LOOPS,
+};
+
+/*
+ * module init and destoy
+ */
+
+static int __init cbe_cpufreq_init(void)
+{
+ if (!machine_is(cell))
+ return -ENODEV;
+
+ return cpufreq_register_driver(&cbe_cpufreq_driver);
+}
+
+static void __exit cbe_cpufreq_exit(void)
+{
+ cpufreq_unregister_driver(&cbe_cpufreq_driver);
+}
+
+module_init(cbe_cpufreq_init);
+module_exit(cbe_cpufreq_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.h b/arch/powerpc/platforms/cell/cbe_cpufreq.h
new file mode 100644
index 00000000000..c1d86bfa92f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.h
@@ -0,0 +1,24 @@
+/*
+ * cbe_cpufreq.h
+ *
+ * This file contains the definitions used by the cbe_cpufreq driver.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/types.h>
+
+int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode);
+int cbe_cpufreq_get_pmode(int cpu);
+
+int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
+
+#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE)
+extern bool cbe_cpufreq_has_pmi;
+#else
+#define cbe_cpufreq_has_pmi (0)
+#endif
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
new file mode 100644
index 00000000000..20472e487b6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
@@ -0,0 +1,115 @@
+/*
+ * pervasive backend for the cbe_cpufreq driver
+ *
+ * This driver makes use of the pervasive unit to
+ * engage the desired frequency.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <asm/machdep.h>
+#include <asm/hw_irq.h>
+#include <asm/cell-regs.h>
+
+#include "cbe_cpufreq.h"
+
+/* to write to MIC register */
+static u64 MIC_Slow_Fast_Timer_table[] = {
+ [0 ... 7] = 0x007fc00000000000ull,
+};
+
+/* more values for the MIC */
+static u64 MIC_Slow_Next_Timer_table[] = {
+ 0x0000240000000000ull,
+ 0x0000268000000000ull,
+ 0x000029C000000000ull,
+ 0x00002D0000000000ull,
+ 0x0000300000000000ull,
+ 0x0000334000000000ull,
+ 0x000039C000000000ull,
+ 0x00003FC000000000ull,
+};
+
+
+int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
+{
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+ unsigned long flags;
+ u64 value;
+#ifdef DEBUG
+ long time;
+#endif
+
+ local_irq_save(flags);
+
+ mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
+ pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+#ifdef DEBUG
+ time = jiffies;
+#endif
+
+ out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
+ out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
+
+ out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
+ out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
+
+ value = in_be64(&pmd_regs->pmcr);
+ /* set bits to zero */
+ value &= 0xFFFFFFFFFFFFFFF8ull;
+ /* set bits to next pmode */
+ value |= pmode;
+
+ out_be64(&pmd_regs->pmcr, value);
+
+#ifdef DEBUG
+ /* wait until new pmode appears in status register */
+ value = in_be64(&pmd_regs->pmsr) & 0x07;
+ while (value != pmode) {
+ cpu_relax();
+ value = in_be64(&pmd_regs->pmsr) & 0x07;
+ }
+
+ time = jiffies - time;
+ time = jiffies_to_msecs(time);
+ pr_debug("had to wait %lu ms for a transition using " \
+ "pervasive unit\n", time);
+#endif
+ local_irq_restore(flags);
+
+ return 0;
+}
+
+
+int cbe_cpufreq_get_pmode(int cpu)
+{
+ int ret;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+
+ pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+ ret = in_be64(&pmd_regs->pmsr) & 0x07;
+
+ return ret;
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
new file mode 100644
index 00000000000..60a07a4f932
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
@@ -0,0 +1,156 @@
+/*
+ * pmi backend for the cbe_cpufreq driver
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <asm/processor.h>
+#include <asm/prom.h>
+#include <asm/pmi.h>
+#include <asm/cell-regs.h>
+
+#ifdef DEBUG
+#include <asm/time.h>
+#endif
+
+#include "cbe_cpufreq.h"
+
+static u8 pmi_slow_mode_limit[MAX_CBE];
+
+bool cbe_cpufreq_has_pmi = false;
+EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi);
+
+/*
+ * hardware specific functions
+ */
+
+int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode)
+{
+ int ret;
+ pmi_message_t pmi_msg;
+#ifdef DEBUG
+ long time;
+#endif
+ pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
+ pmi_msg.data1 = cbe_cpu_to_node(cpu);
+ pmi_msg.data2 = pmode;
+
+#ifdef DEBUG
+ time = jiffies;
+#endif
+ pmi_send_message(pmi_msg);
+
+#ifdef DEBUG
+ time = jiffies - time;
+ time = jiffies_to_msecs(time);
+ pr_debug("had to wait %lu ms for a transition using " \
+ "PMI\n", time);
+#endif
+ ret = pmi_msg.data2;
+ pr_debug("PMI returned slow mode %d\n", ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi);
+
+
+static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
+{
+ u8 node, slow_mode;
+
+ BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
+
+ node = pmi_msg.data1;
+ slow_mode = pmi_msg.data2;
+
+ pmi_slow_mode_limit[node] = slow_mode;
+
+ pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode);
+}
+
+static int pmi_notifier(struct notifier_block *nb,
+ unsigned long event, void *data)
+{
+ struct cpufreq_policy *policy = data;
+ struct cpufreq_frequency_table *cbe_freqs;
+ u8 node;
+
+ /* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE
+ * and CPUFREQ_NOTIFY policy events?)
+ */
+ if (event == CPUFREQ_START)
+ return 0;
+
+ cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
+ node = cbe_cpu_to_node(policy->cpu);
+
+ pr_debug("got notified, event=%lu, node=%u\n", event, node);
+
+ if (pmi_slow_mode_limit[node] != 0) {
+ pr_debug("limiting node %d to slow mode %d\n",
+ node, pmi_slow_mode_limit[node]);
+
+ cpufreq_verify_within_limits(policy, 0,
+
+ cbe_freqs[pmi_slow_mode_limit[node]].frequency);
+ }
+
+ return 0;
+}
+
+static struct notifier_block pmi_notifier_block = {
+ .notifier_call = pmi_notifier,
+};
+
+static struct pmi_handler cbe_pmi_handler = {
+ .type = PMI_TYPE_FREQ_CHANGE,
+ .handle_pmi_message = cbe_cpufreq_handle_pmi,
+};
+
+
+
+static int __init cbe_cpufreq_pmi_init(void)
+{
+ cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0;
+
+ if (!cbe_cpufreq_has_pmi)
+ return -ENODEV;
+
+ cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
+
+ return 0;
+}
+
+static void __exit cbe_cpufreq_pmi_exit(void)
+{
+ cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
+ pmi_unregister_handler(&cbe_pmi_handler);
+}
+
+module_init(cbe_cpufreq_pmi_init);
+module_exit(cbe_cpufreq_pmi_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c
new file mode 100644
index 00000000000..2bb8031303f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_powerbutton.c
@@ -0,0 +1,118 @@
+/*
+ * driver for powerbutton on IBM cell blades
+ *
+ * (C) Copyright IBM Corp. 2005-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <asm/pmi.h>
+#include <asm/prom.h>
+
+static struct input_dev *button_dev;
+static struct platform_device *button_pdev;
+
+static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg)
+{
+ BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON);
+
+ input_report_key(button_dev, KEY_POWER, 1);
+ input_sync(button_dev);
+ input_report_key(button_dev, KEY_POWER, 0);
+ input_sync(button_dev);
+}
+
+static struct pmi_handler cbe_pmi_handler = {
+ .type = PMI_TYPE_POWER_BUTTON,
+ .handle_pmi_message = cbe_powerbutton_handle_pmi,
+};
+
+static int __init cbe_powerbutton_init(void)
+{
+ int ret = 0;
+ struct input_dev *dev;
+
+ if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) {
+ printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ dev = input_allocate_device();
+ if (!dev) {
+ ret = -ENOMEM;
+ printk(KERN_ERR "%s: Not enough memory.\n", __func__);
+ goto out;
+ }
+
+ set_bit(EV_KEY, dev->evbit);
+ set_bit(KEY_POWER, dev->keybit);
+
+ dev->name = "Power Button";
+ dev->id.bustype = BUS_HOST;
+
+ /* this makes the button look like an acpi power button
+ * no clue whether anyone relies on that though */
+ dev->id.product = 0x02;
+ dev->phys = "LNXPWRBN/button/input0";
+
+ button_pdev = platform_device_register_simple("power_button", 0, NULL, 0);
+ if (IS_ERR(button_pdev)) {
+ ret = PTR_ERR(button_pdev);
+ goto out_free_input;
+ }
+
+ dev->dev.parent = &button_pdev->dev;
+ ret = input_register_device(dev);
+ if (ret) {
+ printk(KERN_ERR "%s: Failed to register device\n", __func__);
+ goto out_free_pdev;
+ }
+
+ button_dev = dev;
+
+ ret = pmi_register_handler(&cbe_pmi_handler);
+ if (ret) {
+ printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__);
+ goto out_free_pdev;
+ }
+
+ goto out;
+
+out_free_pdev:
+ platform_device_unregister(button_pdev);
+out_free_input:
+ input_free_device(dev);
+out:
+ return ret;
+}
+
+static void __exit cbe_powerbutton_exit(void)
+{
+ pmi_unregister_handler(&cbe_pmi_handler);
+ platform_device_unregister(button_pdev);
+ input_free_device(button_dev);
+}
+
+module_init(cbe_powerbutton_init);
+module_exit(cbe_powerbutton_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
new file mode 100644
index 00000000000..1428d583c23
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -0,0 +1,281 @@
+/*
+ * cbe_regs.c
+ *
+ * Accessor routines for the various MMIO register blocks of the CBE
+ *
+ * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ */
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/cell-regs.h>
+
+/*
+ * Current implementation uses "cpu" nodes. We build our own mapping
+ * array of cpu numbers to cpu nodes locally for now to allow interrupt
+ * time code to have a fast path rather than call of_get_cpu_node(). If
+ * we implement cpu hotplug, we'll have to install an appropriate norifier
+ * in order to release references to the cpu going away
+ */
+static struct cbe_regs_map
+{
+ struct device_node *cpu_node;
+ struct device_node *be_node;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ struct cbe_iic_regs __iomem *iic_regs;
+ struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+ struct cbe_pmd_shadow_regs pmd_shadow_regs;
+} cbe_regs_maps[MAX_CBE];
+static int cbe_regs_map_count;
+
+static struct cbe_thread_map
+{
+ struct device_node *cpu_node;
+ struct device_node *be_node;
+ struct cbe_regs_map *regs;
+ unsigned int thread_id;
+ unsigned int cbe_id;
+} cbe_thread_map[NR_CPUS];
+
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
+
+static struct cbe_regs_map *cbe_find_map(struct device_node *np)
+{
+ int i;
+ struct device_node *tmp_np;
+
+ if (strcasecmp(np->type, "spe")) {
+ for (i = 0; i < cbe_regs_map_count; i++)
+ if (cbe_regs_maps[i].cpu_node == np ||
+ cbe_regs_maps[i].be_node == np)
+ return &cbe_regs_maps[i];
+ return NULL;
+ }
+
+ if (np->data)
+ return np->data;
+
+ /* walk up path until cpu or be node was found */
+ tmp_np = np;
+ do {
+ tmp_np = tmp_np->parent;
+ /* on a correct devicetree we wont get up to root */
+ BUG_ON(!tmp_np);
+ } while (strcasecmp(tmp_np->type, "cpu") &&
+ strcasecmp(tmp_np->type, "be"));
+
+ np->data = cbe_find_map(tmp_np);
+
+ return np->data;
+}
+
+struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
+
+struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
+
+struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return &map->pmd_shadow_regs;
+}
+
+struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return &map->pmd_shadow_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return map->iic_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return map->iic_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return map->mic_tm_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return map->mic_tm_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
+
+u32 cbe_get_hw_thread_id(int cpu)
+{
+ return cbe_thread_map[cpu].thread_id;
+}
+EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
+
+u32 cbe_cpu_to_node(int cpu)
+{
+ return cbe_thread_map[cpu].cbe_id;
+}
+EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
+
+u32 cbe_node_to_cpu(int node)
+{
+ return cpumask_first(&cbe_local_mask[node]);
+
+}
+EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
+
+static struct device_node *cbe_get_be_node(int cpu_id)
+{
+ struct device_node *np;
+
+ for_each_node_by_type (np, "be") {
+ int len,i;
+ const phandle *cpu_handle;
+
+ cpu_handle = of_get_property(np, "cpus", &len);
+
+ /*
+ * the CAB SLOF tree is non compliant, so we just assume
+ * there is only one node
+ */
+ if (WARN_ON_ONCE(!cpu_handle))
+ return np;
+
+ for (i=0; i<len; i++)
+ if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
+ return np;
+ }
+
+ return NULL;
+}
+
+void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+{
+ if(map->be_node) {
+ struct device_node *be, *np;
+
+ be = map->be_node;
+
+ for_each_node_by_type(np, "pervasive")
+ if (of_get_parent(np) == be)
+ map->pmd_regs = of_iomap(np, 0);
+
+ for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
+ if (of_get_parent(np) == be)
+ map->iic_regs = of_iomap(np, 2);
+
+ for_each_node_by_type(np, "mic-tm")
+ if (of_get_parent(np) == be)
+ map->mic_tm_regs = of_iomap(np, 0);
+ } else {
+ struct device_node *cpu;
+ /* That hack must die die die ! */
+ const struct address_prop {
+ unsigned long address;
+ unsigned int len;
+ } __attribute__((packed)) *prop;
+
+ cpu = map->cpu_node;
+
+ prop = of_get_property(cpu, "pervasive", NULL);
+ if (prop != NULL)
+ map->pmd_regs = ioremap(prop->address, prop->len);
+
+ prop = of_get_property(cpu, "iic", NULL);
+ if (prop != NULL)
+ map->iic_regs = ioremap(prop->address, prop->len);
+
+ prop = of_get_property(cpu, "mic-tm", NULL);
+ if (prop != NULL)
+ map->mic_tm_regs = ioremap(prop->address, prop->len);
+ }
+}
+
+
+void __init cbe_regs_init(void)
+{
+ int i;
+ unsigned int thread_id;
+ struct device_node *cpu;
+
+ /* Build local fast map of CPUs */
+ for_each_possible_cpu(i) {
+ cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
+ cbe_thread_map[i].be_node = cbe_get_be_node(i);
+ cbe_thread_map[i].thread_id = thread_id;
+ }
+
+ /* Find maps for each device tree CPU */
+ for_each_node_by_type(cpu, "cpu") {
+ struct cbe_regs_map *map;
+ unsigned int cbe_id;
+
+ cbe_id = cbe_regs_map_count++;
+ map = &cbe_regs_maps[cbe_id];
+
+ if (cbe_regs_map_count > MAX_CBE) {
+ printk(KERN_ERR "cbe_regs: More BE chips than supported"
+ "!\n");
+ cbe_regs_map_count--;
+ of_node_put(cpu);
+ return;
+ }
+ map->cpu_node = cpu;
+
+ for_each_possible_cpu(i) {
+ struct cbe_thread_map *thread = &cbe_thread_map[i];
+
+ if (thread->cpu_node == cpu) {
+ thread->regs = map;
+ thread->cbe_id = cbe_id;
+ map->be_node = thread->be_node;
+ cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
+ if(thread->thread_id == 0)
+ cpumask_set_cpu(i, &cbe_first_online_cpu);
+ }
+ }
+
+ cbe_fill_regs_map(map);
+ }
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
new file mode 100644
index 00000000000..94560db788b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -0,0 +1,399 @@
+/*
+ * thermal support for the cell processor
+ *
+ * This module adds some sysfs attributes to cpu and spu nodes.
+ * Base for measurements are the digital thermal sensors (DTS)
+ * located on the chip.
+ * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
+ * The attributes can be found under
+ * /sys/devices/system/cpu/cpuX/thermal
+ * /sys/devices/system/spu/spuX/thermal
+ *
+ * The following attributes are added for each node:
+ * temperature:
+ * contains the current temperature measured by the DTS
+ * throttle_begin:
+ * throttling begins when temperature is greater or equal to
+ * throttle_begin. Setting this value to 125 prevents throttling.
+ * throttle_end:
+ * throttling is being ceased, if the temperature is lower than
+ * throttle_end. Due to a delay between applying throttling and
+ * a reduced temperature this value should be less than throttle_begin.
+ * A value equal to throttle_begin provides only a very little hysteresis.
+ * throttle_full_stop:
+ * If the temperatrue is greater or equal to throttle_full_stop,
+ * full throttling is applied to the cpu or spu. This value should be
+ * greater than throttle_begin and throttle_end. Setting this value to
+ * 65 prevents the unit from running code at all.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <asm/spu.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/cell-regs.h>
+
+#include "spu_priv1_mmio.h"
+
+#define TEMP_MIN 65
+#define TEMP_MAX 125
+
+#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode) \
+struct device_attribute attr_ ## _prefix ## _ ## _name = { \
+ .attr = { .name = __stringify(_name), .mode = _mode }, \
+ .show = _prefix ## _show_ ## _name, \
+ .store = _prefix ## _store_ ## _name, \
+};
+
+static inline u8 reg_to_temp(u8 reg_value)
+{
+ return ((reg_value & 0x3f) << 1) + TEMP_MIN;
+}
+
+static inline u8 temp_to_reg(u8 temp)
+{
+ return ((temp - TEMP_MIN) >> 1) & 0x3f;
+}
+
+static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev)
+{
+ struct spu *spu;
+
+ spu = container_of(dev, struct spu, dev);
+
+ return cbe_get_pmd_regs(spu_devnode(spu));
+}
+
+/* returns the value for a given spu in a given register */
+static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg)
+{
+ union spe_reg value;
+ struct spu *spu;
+
+ spu = container_of(dev, struct spu, dev);
+ value.val = in_be64(&reg->val);
+
+ return value.spe[spu->spe_id];
+}
+
+static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ u8 value;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+
+ pmd_regs = get_pmd_regs(dev);
+
+ value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1);
+
+ return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
+{
+ u64 value;
+
+ value = in_be64(&pmd_regs->tm_tpr.val);
+ /* access the corresponding byte */
+ value >>= pos;
+ value &= 0x3F;
+
+ return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
+{
+ u64 reg_value;
+ int temp;
+ u64 new_value;
+ int ret;
+
+ ret = sscanf(buf, "%u", &temp);
+
+ if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
+ return -EINVAL;
+
+ new_value = temp_to_reg(temp);
+
+ reg_value = in_be64(&pmd_regs->tm_tpr.val);
+
+ /* zero out bits for new value */
+ reg_value &= ~(0xffull << pos);
+ /* set bits to new value */
+ reg_value |= new_value << pos;
+
+ out_be64(&pmd_regs->tm_tpr.val, reg_value);
+ return size;
+}
+
+static ssize_t spu_show_throttle_end(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(get_pmd_regs(dev), buf, 0);
+}
+
+static ssize_t spu_show_throttle_begin(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(get_pmd_regs(dev), buf, 8);
+}
+
+static ssize_t spu_show_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(get_pmd_regs(dev), buf, 16);
+}
+
+static ssize_t spu_store_throttle_end(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(get_pmd_regs(dev), buf, size, 0);
+}
+
+static ssize_t spu_store_throttle_begin(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(get_pmd_regs(dev), buf, size, 8);
+}
+
+static ssize_t spu_store_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(get_pmd_regs(dev), buf, size, 16);
+}
+
+static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos)
+{
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ u64 value;
+
+ pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+ value = in_be64(&pmd_regs->ts_ctsr2);
+
+ value = (value >> pos) & 0x3f;
+
+ return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+
+/* shows the temperature of the DTS on the PPE,
+ * located near the linear thermal sensor */
+static ssize_t ppe_show_temp0(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return ppe_show_temp(dev, buf, 32);
+}
+
+/* shows the temperature of the second DTS on the PPE */
+static ssize_t ppe_show_temp1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return ppe_show_temp(dev, buf, 0);
+}
+
+static ssize_t ppe_show_throttle_end(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32);
+}
+
+static ssize_t ppe_show_throttle_begin(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40);
+}
+
+static ssize_t ppe_show_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48);
+}
+
+static ssize_t ppe_store_throttle_end(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32);
+}
+
+static ssize_t ppe_store_throttle_begin(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40);
+}
+
+static ssize_t ppe_store_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48);
+}
+
+
+static struct device_attribute attr_spu_temperature = {
+ .attr = {.name = "temperature", .mode = 0400 },
+ .show = spu_show_temp,
+};
+
+static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+
+
+static struct attribute *spu_attributes[] = {
+ &attr_spu_temperature.attr,
+ &attr_spu_throttle_end.attr,
+ &attr_spu_throttle_begin.attr,
+ &attr_spu_throttle_full_stop.attr,
+ NULL,
+};
+
+static struct attribute_group spu_attribute_group = {
+ .name = "thermal",
+ .attrs = spu_attributes,
+};
+
+static struct device_attribute attr_ppe_temperature0 = {
+ .attr = {.name = "temperature0", .mode = 0400 },
+ .show = ppe_show_temp0,
+};
+
+static struct device_attribute attr_ppe_temperature1 = {
+ .attr = {.name = "temperature1", .mode = 0400 },
+ .show = ppe_show_temp1,
+};
+
+static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+
+static struct attribute *ppe_attributes[] = {
+ &attr_ppe_temperature0.attr,
+ &attr_ppe_temperature1.attr,
+ &attr_ppe_throttle_end.attr,
+ &attr_ppe_throttle_begin.attr,
+ &attr_ppe_throttle_full_stop.attr,
+ NULL,
+};
+
+static struct attribute_group ppe_attribute_group = {
+ .name = "thermal",
+ .attrs = ppe_attributes,
+};
+
+/*
+ * initialize throttling with default values
+ */
+static int __init init_default_values(void)
+{
+ int cpu;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ struct device *dev;
+ union ppe_spe_reg tpr;
+ union spe_reg str1;
+ u64 str2;
+ union spe_reg cr1;
+ u64 cr2;
+
+ /* TPR defaults */
+ /* ppe
+ * 1F - no full stop
+ * 08 - dynamic throttling starts if over 80 degrees
+ * 03 - dynamic throttling ceases if below 70 degrees */
+ tpr.ppe = 0x1F0803;
+ /* spe
+ * 10 - full stopped when over 96 degrees
+ * 08 - dynamic throttling starts if over 80 degrees
+ * 03 - dynamic throttling ceases if below 70 degrees
+ */
+ tpr.spe = 0x100803;
+
+ /* STR defaults */
+ /* str1
+ * 10 - stop 16 of 32 cycles
+ */
+ str1.val = 0x1010101010101010ull;
+ /* str2
+ * 10 - stop 16 of 32 cycles
+ */
+ str2 = 0x10;
+
+ /* CR defaults */
+ /* cr1
+ * 4 - normal operation
+ */
+ cr1.val = 0x0404040404040404ull;
+ /* cr2
+ * 4 - normal operation
+ */
+ cr2 = 0x04;
+
+ for_each_possible_cpu (cpu) {
+ pr_debug("processing cpu %d\n", cpu);
+ dev = get_cpu_device(cpu);
+
+ if (!dev) {
+ pr_info("invalid dev pointer for cbe_thermal\n");
+ return -EINVAL;
+ }
+
+ pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+
+ if (!pmd_regs) {
+ pr_info("invalid CBE regs pointer for cbe_thermal\n");
+ return -EINVAL;
+ }
+
+ out_be64(&pmd_regs->tm_str2, str2);
+ out_be64(&pmd_regs->tm_str1.val, str1.val);
+ out_be64(&pmd_regs->tm_tpr.val, tpr.val);
+ out_be64(&pmd_regs->tm_cr1.val, cr1.val);
+ out_be64(&pmd_regs->tm_cr2, cr2);
+ }
+
+ return 0;
+}
+
+
+static int __init thermal_init(void)
+{
+ int rc = init_default_values();
+
+ if (rc == 0) {
+ spu_add_dev_attr_group(&spu_attribute_group);
+ cpu_add_dev_attr_group(&ppe_attribute_group);
+ }
+
+ return rc;
+}
+module_init(thermal_init);
+
+static void __exit thermal_exit(void)
+{
+ spu_remove_dev_attr_group(&spu_attribute_group);
+ cpu_remove_dev_attr_group(&ppe_attribute_group);
+}
+module_exit(thermal_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
new file mode 100644
index 00000000000..5822141aa63
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -0,0 +1,500 @@
+/*
+ * Support for PCI on Celleb platform.
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/kernel/rtas_pci.c:
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/pci_regs.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#include "celleb_pci.h"
+
+#define MAX_PCI_DEVICES 32
+#define MAX_PCI_FUNCTIONS 8
+#define MAX_PCI_BASE_ADDRS 3 /* use 64 bit address */
+
+/* definition for fake pci configuration area for GbE, .... ,and etc. */
+
+struct celleb_pci_resource {
+ struct resource r[MAX_PCI_BASE_ADDRS];
+};
+
+struct celleb_pci_private {
+ unsigned char *fake_config[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
+ struct celleb_pci_resource *res[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
+};
+
+static inline u8 celleb_fake_config_readb(void *addr)
+{
+ u8 *p = addr;
+ return *p;
+}
+
+static inline u16 celleb_fake_config_readw(void *addr)
+{
+ __le16 *p = addr;
+ return le16_to_cpu(*p);
+}
+
+static inline u32 celleb_fake_config_readl(void *addr)
+{
+ __le32 *p = addr;
+ return le32_to_cpu(*p);
+}
+
+static inline void celleb_fake_config_writeb(u32 val, void *addr)
+{
+ u8 *p = addr;
+ *p = val;
+}
+
+static inline void celleb_fake_config_writew(u32 val, void *addr)
+{
+ __le16 val16;
+ __le16 *p = addr;
+ val16 = cpu_to_le16(val);
+ *p = val16;
+}
+
+static inline void celleb_fake_config_writel(u32 val, void *addr)
+{
+ __le32 val32;
+ __le32 *p = addr;
+ val32 = cpu_to_le32(val);
+ *p = val32;
+}
+
+static unsigned char *get_fake_config_start(struct pci_controller *hose,
+ int devno, int fn)
+{
+ struct celleb_pci_private *private = hose->private_data;
+
+ if (private == NULL)
+ return NULL;
+
+ return private->fake_config[devno][fn];
+}
+
+static struct celleb_pci_resource *get_resource_start(
+ struct pci_controller *hose,
+ int devno, int fn)
+{
+ struct celleb_pci_private *private = hose->private_data;
+
+ if (private == NULL)
+ return NULL;
+
+ return private->res[devno][fn];
+}
+
+
+static void celleb_config_read_fake(unsigned char *config, int where,
+ int size, u32 *val)
+{
+ char *p = config + where;
+
+ switch (size) {
+ case 1:
+ *val = celleb_fake_config_readb(p);
+ break;
+ case 2:
+ *val = celleb_fake_config_readw(p);
+ break;
+ case 4:
+ *val = celleb_fake_config_readl(p);
+ break;
+ }
+}
+
+static void celleb_config_write_fake(unsigned char *config, int where,
+ int size, u32 val)
+{
+ char *p = config + where;
+
+ switch (size) {
+ case 1:
+ celleb_fake_config_writeb(val, p);
+ break;
+ case 2:
+ celleb_fake_config_writew(val, p);
+ break;
+ case 4:
+ celleb_fake_config_writel(val, p);
+ break;
+ }
+}
+
+static int celleb_fake_pci_read_config(struct pci_bus *bus,
+ unsigned int devfn, int where, int size, u32 *val)
+{
+ char *config;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ unsigned int devno = devfn >> 3;
+ unsigned int fn = devfn & 0x7;
+
+ /* allignment check */
+ BUG_ON(where % size);
+
+ pr_debug(" fake read: bus=0x%x, ", bus->number);
+ config = get_fake_config_start(hose, devno, fn);
+
+ pr_debug("devno=0x%x, where=0x%x, size=0x%x, ", devno, where, size);
+ if (!config) {
+ pr_debug("failed\n");
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ celleb_config_read_fake(config, where, size, val);
+ pr_debug("val=0x%x\n", *val);
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+
+static int celleb_fake_pci_write_config(struct pci_bus *bus,
+ unsigned int devfn, int where, int size, u32 val)
+{
+ char *config;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct celleb_pci_resource *res;
+ unsigned int devno = devfn >> 3;
+ unsigned int fn = devfn & 0x7;
+
+ /* allignment check */
+ BUG_ON(where % size);
+
+ config = get_fake_config_start(hose, devno, fn);
+
+ if (!config)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (val == ~0) {
+ int i = (where - PCI_BASE_ADDRESS_0) >> 3;
+
+ switch (where) {
+ case PCI_BASE_ADDRESS_0:
+ case PCI_BASE_ADDRESS_2:
+ if (size != 4)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ res = get_resource_start(hose, devno, fn);
+ if (!res)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ celleb_config_write_fake(config, where, size,
+ (res->r[i].end - res->r[i].start));
+ return PCIBIOS_SUCCESSFUL;
+ case PCI_BASE_ADDRESS_1:
+ case PCI_BASE_ADDRESS_3:
+ case PCI_BASE_ADDRESS_4:
+ case PCI_BASE_ADDRESS_5:
+ break;
+ default:
+ break;
+ }
+ }
+
+ celleb_config_write_fake(config, where, size, val);
+ pr_debug(" fake write: where=%x, size=%d, val=%x\n",
+ where, size, val);
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops celleb_fake_pci_ops = {
+ .read = celleb_fake_pci_read_config,
+ .write = celleb_fake_pci_write_config,
+};
+
+static inline void celleb_setup_pci_base_addrs(struct pci_controller *hose,
+ unsigned int devno, unsigned int fn,
+ unsigned int num_base_addr)
+{
+ u32 val;
+ unsigned char *config;
+ struct celleb_pci_resource *res;
+
+ config = get_fake_config_start(hose, devno, fn);
+ res = get_resource_start(hose, devno, fn);
+
+ if (!config || !res)
+ return;
+
+ switch (num_base_addr) {
+ case 3:
+ val = (res->r[2].start & 0xfffffff0)
+ | PCI_BASE_ADDRESS_MEM_TYPE_64;
+ celleb_config_write_fake(config, PCI_BASE_ADDRESS_4, 4, val);
+ val = res->r[2].start >> 32;
+ celleb_config_write_fake(config, PCI_BASE_ADDRESS_5, 4, val);
+ /* FALLTHROUGH */
+ case 2:
+ val = (res->r[1].start & 0xfffffff0)
+ | PCI_BASE_ADDRESS_MEM_TYPE_64;
+ celleb_config_write_fake(config, PCI_BASE_ADDRESS_2, 4, val);
+ val = res->r[1].start >> 32;
+ celleb_config_write_fake(config, PCI_BASE_ADDRESS_3, 4, val);
+ /* FALLTHROUGH */
+ case 1:
+ val = (res->r[0].start & 0xfffffff0)
+ | PCI_BASE_ADDRESS_MEM_TYPE_64;
+ celleb_config_write_fake(config, PCI_BASE_ADDRESS_0, 4, val);
+ val = res->r[0].start >> 32;
+ celleb_config_write_fake(config, PCI_BASE_ADDRESS_1, 4, val);
+ break;
+ }
+
+ val = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+ celleb_config_write_fake(config, PCI_COMMAND, 2, val);
+}
+
+static int __init celleb_setup_fake_pci_device(struct device_node *node,
+ struct pci_controller *hose)
+{
+ unsigned int rlen;
+ int num_base_addr = 0;
+ u32 val;
+ const u32 *wi0, *wi1, *wi2, *wi3, *wi4;
+ unsigned int devno, fn;
+ struct celleb_pci_private *private = hose->private_data;
+ unsigned char **config = NULL;
+ struct celleb_pci_resource **res = NULL;
+ const char *name;
+ const unsigned long *li;
+ int size, result;
+
+ if (private == NULL) {
+ printk(KERN_ERR "PCI: "
+ "memory space for pci controller is not assigned\n");
+ goto error;
+ }
+
+ name = of_get_property(node, "model", &rlen);
+ if (!name) {
+ printk(KERN_ERR "PCI: model property not found.\n");
+ goto error;
+ }
+
+ wi4 = of_get_property(node, "reg", &rlen);
+ if (wi4 == NULL)
+ goto error;
+
+ devno = ((wi4[0] >> 8) & 0xff) >> 3;
+ fn = (wi4[0] >> 8) & 0x7;
+
+ pr_debug("PCI: celleb_setup_fake_pci() %s devno=%x fn=%x\n", name,
+ devno, fn);
+
+ size = 256;
+ config = &private->fake_config[devno][fn];
+ *config = zalloc_maybe_bootmem(size, GFP_KERNEL);
+ if (*config == NULL) {
+ printk(KERN_ERR "PCI: "
+ "not enough memory for fake configuration space\n");
+ goto error;
+ }
+ pr_debug("PCI: fake config area assigned 0x%016lx\n",
+ (unsigned long)*config);
+
+ size = sizeof(struct celleb_pci_resource);
+ res = &private->res[devno][fn];
+ *res = zalloc_maybe_bootmem(size, GFP_KERNEL);
+ if (*res == NULL) {
+ printk(KERN_ERR
+ "PCI: not enough memory for resource data space\n");
+ goto error;
+ }
+ pr_debug("PCI: res assigned 0x%016lx\n", (unsigned long)*res);
+
+ wi0 = of_get_property(node, "device-id", NULL);
+ wi1 = of_get_property(node, "vendor-id", NULL);
+ wi2 = of_get_property(node, "class-code", NULL);
+ wi3 = of_get_property(node, "revision-id", NULL);
+ if (!wi0 || !wi1 || !wi2 || !wi3) {
+ printk(KERN_ERR "PCI: Missing device tree properties.\n");
+ goto error;
+ }
+
+ celleb_config_write_fake(*config, PCI_DEVICE_ID, 2, wi0[0] & 0xffff);
+ celleb_config_write_fake(*config, PCI_VENDOR_ID, 2, wi1[0] & 0xffff);
+ pr_debug("class-code = 0x%08x\n", wi2[0]);
+
+ celleb_config_write_fake(*config, PCI_CLASS_PROG, 1, wi2[0] & 0xff);
+ celleb_config_write_fake(*config, PCI_CLASS_DEVICE, 2,
+ (wi2[0] >> 8) & 0xffff);
+ celleb_config_write_fake(*config, PCI_REVISION_ID, 1, wi3[0]);
+
+ while (num_base_addr < MAX_PCI_BASE_ADDRS) {
+ result = of_address_to_resource(node,
+ num_base_addr, &(*res)->r[num_base_addr]);
+ if (result)
+ break;
+ num_base_addr++;
+ }
+
+ celleb_setup_pci_base_addrs(hose, devno, fn, num_base_addr);
+
+ li = of_get_property(node, "interrupts", &rlen);
+ if (!li) {
+ printk(KERN_ERR "PCI: interrupts not found.\n");
+ goto error;
+ }
+ val = li[0];
+ celleb_config_write_fake(*config, PCI_INTERRUPT_PIN, 1, 1);
+ celleb_config_write_fake(*config, PCI_INTERRUPT_LINE, 1, val);
+
+#ifdef DEBUG
+ pr_debug("PCI: %s irq=%ld\n", name, li[0]);
+ for (i = 0; i < 6; i++) {
+ celleb_config_read_fake(*config,
+ PCI_BASE_ADDRESS_0 + 0x4 * i, 4,
+ &val);
+ pr_debug("PCI: %s fn=%d base_address_%d=0x%x\n",
+ name, fn, i, val);
+ }
+#endif
+
+ celleb_config_write_fake(*config, PCI_HEADER_TYPE, 1,
+ PCI_HEADER_TYPE_NORMAL);
+
+ return 0;
+
+error:
+ if (mem_init_done) {
+ if (config && *config)
+ kfree(*config);
+ if (res && *res)
+ kfree(*res);
+
+ } else {
+ if (config && *config) {
+ size = 256;
+ free_bootmem((unsigned long)(*config), size);
+ }
+ if (res && *res) {
+ size = sizeof(struct celleb_pci_resource);
+ free_bootmem((unsigned long)(*res), size);
+ }
+ }
+
+ return 1;
+}
+
+static int __init phb_set_bus_ranges(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ const int *bus_range;
+ unsigned int len;
+
+ bus_range = of_get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int))
+ return 1;
+
+ phb->first_busno = bus_range[0];
+ phb->last_busno = bus_range[1];
+
+ return 0;
+}
+
+static void __init celleb_alloc_private_mem(struct pci_controller *hose)
+{
+ hose->private_data =
+ zalloc_maybe_bootmem(sizeof(struct celleb_pci_private),
+ GFP_KERNEL);
+}
+
+static int __init celleb_setup_fake_pci(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ struct device_node *node;
+
+ phb->ops = &celleb_fake_pci_ops;
+ celleb_alloc_private_mem(phb);
+
+ for (node = of_get_next_child(dev, NULL);
+ node != NULL; node = of_get_next_child(dev, node))
+ celleb_setup_fake_pci_device(node, phb);
+
+ return 0;
+}
+
+static struct celleb_phb_spec celleb_fake_pci_spec __initdata = {
+ .setup = celleb_setup_fake_pci,
+};
+
+static struct of_device_id celleb_phb_match[] __initdata = {
+ {
+ .name = "pci-pseudo",
+ .data = &celleb_fake_pci_spec,
+ }, {
+ .name = "epci",
+ .data = &celleb_epci_spec,
+ }, {
+ .name = "pcie",
+ .data = &celleb_pciex_spec,
+ }, {
+ },
+};
+
+int __init celleb_setup_phb(struct pci_controller *phb)
+{
+ struct device_node *dev = phb->dn;
+ const struct of_device_id *match;
+ struct celleb_phb_spec *phb_spec;
+ int rc;
+
+ match = of_match_node(celleb_phb_match, dev);
+ if (!match)
+ return 1;
+
+ phb_set_bus_ranges(dev, phb);
+ phb->buid = 1;
+
+ phb_spec = match->data;
+ rc = (*phb_spec->setup)(dev, phb);
+ if (rc)
+ return 1;
+
+ if (phb_spec->ops)
+ iowa_register_bus(phb, phb_spec->ops,
+ phb_spec->iowa_init,
+ phb_spec->iowa_data);
+ return 0;
+}
+
+int celleb_pci_probe_mode(struct pci_bus *bus)
+{
+ return PCI_PROBE_DEVTREE;
+}
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
new file mode 100644
index 00000000000..a801fcc5f38
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_pci.h
@@ -0,0 +1,46 @@
+/*
+ * pci prototypes for Celleb platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CELLEB_PCI_H
+#define _CELLEB_PCI_H
+
+#include <linux/pci.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
+
+struct iowa_bus;
+
+struct celleb_phb_spec {
+ int (*setup)(struct device_node *, struct pci_controller *);
+ struct ppc_pci_io *ops;
+ int (*iowa_init)(struct iowa_bus *, void *);
+ void *iowa_data;
+};
+
+extern int celleb_setup_phb(struct pci_controller *);
+extern int celleb_pci_probe_mode(struct pci_bus *);
+
+extern struct celleb_phb_spec celleb_epci_spec;
+extern struct celleb_phb_spec celleb_pciex_spec;
+
+#endif /* _CELLEB_PCI_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc.h b/arch/powerpc/platforms/cell/celleb_scc.h
new file mode 100644
index 00000000000..b596a711c34
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc.h
@@ -0,0 +1,232 @@
+/*
+ * SCC (Super Companion Chip) definitions
+ *
+ * (C) Copyright 2004-2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CELLEB_SCC_H
+#define _CELLEB_SCC_H
+
+#define PCI_VENDOR_ID_TOSHIBA_2 0x102f
+#define PCI_DEVICE_ID_TOSHIBA_SCC_PCIEXC_BRIDGE 0x01b0
+#define PCI_DEVICE_ID_TOSHIBA_SCC_EPCI_BRIDGE 0x01b1
+#define PCI_DEVICE_ID_TOSHIBA_SCC_BRIDGE 0x01b2
+#define PCI_DEVICE_ID_TOSHIBA_SCC_GBE 0x01b3
+#define PCI_DEVICE_ID_TOSHIBA_SCC_ATA 0x01b4
+#define PCI_DEVICE_ID_TOSHIBA_SCC_USB2 0x01b5
+#define PCI_DEVICE_ID_TOSHIBA_SCC_USB 0x01b6
+#define PCI_DEVICE_ID_TOSHIBA_SCC_ENCDEC 0x01b7
+
+#define SCC_EPCI_REG 0x0000d000
+
+/* EPCI registers */
+#define SCC_EPCI_CNF10_REG 0x010
+#define SCC_EPCI_CNF14_REG 0x014
+#define SCC_EPCI_CNF18_REG 0x018
+#define SCC_EPCI_PVBAT 0x100
+#define SCC_EPCI_VPMBAT 0x104
+#define SCC_EPCI_VPIBAT 0x108
+#define SCC_EPCI_VCSR 0x110
+#define SCC_EPCI_VIENAB 0x114
+#define SCC_EPCI_VISTAT 0x118
+#define SCC_EPCI_VRDCOUNT 0x124
+#define SCC_EPCI_BAM0 0x12c
+#define SCC_EPCI_BAM1 0x134
+#define SCC_EPCI_BAM2 0x13c
+#define SCC_EPCI_IADR 0x164
+#define SCC_EPCI_CLKRST 0x800
+#define SCC_EPCI_INTSET 0x804
+#define SCC_EPCI_STATUS 0x808
+#define SCC_EPCI_ABTSET 0x80c
+#define SCC_EPCI_WATRP 0x810
+#define SCC_EPCI_DUMYRADR 0x814
+#define SCC_EPCI_SWRESP 0x818
+#define SCC_EPCI_CNTOPT 0x81c
+#define SCC_EPCI_ECMODE 0xf00
+#define SCC_EPCI_IOM_AC_NUM 5
+#define SCC_EPCI_IOM_ACTE(n) (0xf10 + (n) * 4)
+#define SCC_EPCI_IOT_AC_NUM 4
+#define SCC_EPCI_IOT_ACTE(n) (0xf30 + (n) * 4)
+#define SCC_EPCI_MAEA 0xf50
+#define SCC_EPCI_MAEC 0xf54
+#define SCC_EPCI_CKCTRL 0xff0
+
+/* bits for SCC_EPCI_VCSR */
+#define SCC_EPCI_VCSR_FRE 0x00020000
+#define SCC_EPCI_VCSR_FWE 0x00010000
+#define SCC_EPCI_VCSR_DR 0x00000400
+#define SCC_EPCI_VCSR_SR 0x00000008
+#define SCC_EPCI_VCSR_AT 0x00000004
+
+/* bits for SCC_EPCI_VIENAB/SCC_EPCI_VISTAT */
+#define SCC_EPCI_VISTAT_PMPE 0x00000008
+#define SCC_EPCI_VISTAT_PMFE 0x00000004
+#define SCC_EPCI_VISTAT_PRA 0x00000002
+#define SCC_EPCI_VISTAT_PRD 0x00000001
+#define SCC_EPCI_VISTAT_ALL 0x0000000f
+
+#define SCC_EPCI_VIENAB_PMPEE 0x00000008
+#define SCC_EPCI_VIENAB_PMFEE 0x00000004
+#define SCC_EPCI_VIENAB_PRA 0x00000002
+#define SCC_EPCI_VIENAB_PRD 0x00000001
+#define SCC_EPCI_VIENAB_ALL 0x0000000f
+
+/* bits for SCC_EPCI_CLKRST */
+#define SCC_EPCI_CLKRST_CKS_MASK 0x00030000
+#define SCC_EPCI_CLKRST_CKS_2 0x00000000
+#define SCC_EPCI_CLKRST_CKS_4 0x00010000
+#define SCC_EPCI_CLKRST_CKS_8 0x00020000
+#define SCC_EPCI_CLKRST_PCICRST 0x00000400
+#define SCC_EPCI_CLKRST_BC 0x00000200
+#define SCC_EPCI_CLKRST_PCIRST 0x00000100
+#define SCC_EPCI_CLKRST_PCKEN 0x00000001
+
+/* bits for SCC_EPCI_INTSET/SCC_EPCI_STATUS */
+#define SCC_EPCI_INT_2M 0x01000000
+#define SCC_EPCI_INT_RERR 0x00200000
+#define SCC_EPCI_INT_SERR 0x00100000
+#define SCC_EPCI_INT_PRTER 0x00080000
+#define SCC_EPCI_INT_SER 0x00040000
+#define SCC_EPCI_INT_PER 0x00020000
+#define SCC_EPCI_INT_PAI 0x00010000
+#define SCC_EPCI_INT_1M 0x00000100
+#define SCC_EPCI_INT_PME 0x00000010
+#define SCC_EPCI_INT_INTD 0x00000008
+#define SCC_EPCI_INT_INTC 0x00000004
+#define SCC_EPCI_INT_INTB 0x00000002
+#define SCC_EPCI_INT_INTA 0x00000001
+#define SCC_EPCI_INT_DEVINT 0x0000000f
+#define SCC_EPCI_INT_ALL 0x003f001f
+#define SCC_EPCI_INT_ALLERR 0x003f0000
+
+/* bits for SCC_EPCI_CKCTRL */
+#define SCC_EPCI_CKCTRL_CRST0 0x00010000
+#define SCC_EPCI_CKCTRL_CRST1 0x00020000
+#define SCC_EPCI_CKCTRL_OCLKEN 0x00000100
+#define SCC_EPCI_CKCTRL_LCLKEN 0x00000001
+
+#define SCC_EPCI_IDSEL_AD_TO_SLOT(ad) ((ad) - 10)
+#define SCC_EPCI_MAX_DEVNU SCC_EPCI_IDSEL_AD_TO_SLOT(32)
+
+/* bits for SCC_EPCI_CNTOPT */
+#define SCC_EPCI_CNTOPT_O2PMB 0x00000002
+
+/* SCC PCIEXC SMMIO registers */
+#define PEXCADRS 0x000
+#define PEXCWDATA 0x004
+#define PEXCRDATA 0x008
+#define PEXDADRS 0x010
+#define PEXDCMND 0x014
+#define PEXDWDATA 0x018
+#define PEXDRDATA 0x01c
+#define PEXREQID 0x020
+#define PEXTIDMAP 0x024
+#define PEXINTMASK 0x028
+#define PEXINTSTS 0x02c
+#define PEXAERRMASK 0x030
+#define PEXAERRSTS 0x034
+#define PEXPRERRMASK 0x040
+#define PEXPRERRSTS 0x044
+#define PEXPRERRID01 0x048
+#define PEXPRERRID23 0x04c
+#define PEXVDMASK 0x050
+#define PEXVDSTS 0x054
+#define PEXRCVCPLIDA 0x060
+#define PEXLENERRIDA 0x068
+#define PEXPHYPLLST 0x070
+#define PEXDMRDEN0 0x100
+#define PEXDMRDADR0 0x104
+#define PEXDMRDENX 0x110
+#define PEXDMRDADRX 0x114
+#define PEXECMODE 0xf00
+#define PEXMAEA(n) (0xf50 + (8 * n))
+#define PEXMAEC(n) (0xf54 + (8 * n))
+#define PEXCCRCTRL 0xff0
+
+/* SCC PCIEXC bits and shifts for PEXCADRS */
+#define PEXCADRS_BYTE_EN_SHIFT 20
+#define PEXCADRS_CMD_SHIFT 16
+#define PEXCADRS_CMD_READ (0xa << PEXCADRS_CMD_SHIFT)
+#define PEXCADRS_CMD_WRITE (0xb << PEXCADRS_CMD_SHIFT)
+
+/* SCC PCIEXC shifts for PEXDADRS */
+#define PEXDADRS_BUSNO_SHIFT 20
+#define PEXDADRS_DEVNO_SHIFT 15
+#define PEXDADRS_FUNCNO_SHIFT 12
+
+/* SCC PCIEXC bits and shifts for PEXDCMND */
+#define PEXDCMND_BYTE_EN_SHIFT 4
+#define PEXDCMND_IO_READ 0x2
+#define PEXDCMND_IO_WRITE 0x3
+#define PEXDCMND_CONFIG_READ 0xa
+#define PEXDCMND_CONFIG_WRITE 0xb
+
+/* SCC PCIEXC bits for PEXPHYPLLST */
+#define PEXPHYPLLST_PEXPHYAPLLST 0x00000001
+
+/* SCC PCIEXC bits for PEXECMODE */
+#define PEXECMODE_ALL_THROUGH 0x00000000
+#define PEXECMODE_ALL_8BIT 0x00550155
+#define PEXECMODE_ALL_16BIT 0x00aa02aa
+
+/* SCC PCIEXC bits for PEXCCRCTRL */
+#define PEXCCRCTRL_PEXIPCOREEN 0x00040000
+#define PEXCCRCTRL_PEXIPCONTEN 0x00020000
+#define PEXCCRCTRL_PEXPHYPLLEN 0x00010000
+#define PEXCCRCTRL_PCIEXCAOCKEN 0x00000100
+
+/* SCC PCIEXC port configuration registers */
+#define PEXTCERRCHK 0x21c
+#define PEXTAMAPB0 0x220
+#define PEXTAMAPL0 0x224
+#define PEXTAMAPB(n) (PEXTAMAPB0 + 8 * (n))
+#define PEXTAMAPL(n) (PEXTAMAPL0 + 8 * (n))
+#define PEXCHVC0P 0x500
+#define PEXCHVC0NP 0x504
+#define PEXCHVC0C 0x508
+#define PEXCDVC0P 0x50c
+#define PEXCDVC0NP 0x510
+#define PEXCDVC0C 0x514
+#define PEXCHVCXP 0x518
+#define PEXCHVCXNP 0x51c
+#define PEXCHVCXC 0x520
+#define PEXCDVCXP 0x524
+#define PEXCDVCXNP 0x528
+#define PEXCDVCXC 0x52c
+#define PEXCTTRG 0x530
+#define PEXTSCTRL 0x700
+#define PEXTSSTS 0x704
+#define PEXSKPCTRL 0x708
+
+/* UHC registers */
+#define SCC_UHC_CKRCTRL 0xff0
+#define SCC_UHC_ECMODE 0xf00
+
+/* bits for SCC_UHC_CKRCTRL */
+#define SCC_UHC_F48MCKLEN 0x00000001
+#define SCC_UHC_P_SUSPEND 0x00000002
+#define SCC_UHC_PHY_SUSPEND_SEL 0x00000004
+#define SCC_UHC_HCLKEN 0x00000100
+#define SCC_UHC_USBEN 0x00010000
+#define SCC_UHC_USBCEN 0x00020000
+#define SCC_UHC_PHYEN 0x00040000
+
+/* bits for SCC_UHC_ECMODE */
+#define SCC_UHC_ECMODE_BY_BYTE 0x00000555
+#define SCC_UHC_ECMODE_BY_WORD 0x00000aaa
+
+#endif /* _CELLEB_SCC_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c
new file mode 100644
index 00000000000..844c0facb4f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c
@@ -0,0 +1,429 @@
+/*
+ * Support for SCC external PCI
+ *
+ * (C) Copyright 2004-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/pci_regs.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#include "celleb_scc.h"
+#include "celleb_pci.h"
+
+#define MAX_PCI_DEVICES 32
+#define MAX_PCI_FUNCTIONS 8
+
+#define iob() __asm__ __volatile__("eieio; sync":::"memory")
+
+static inline PCI_IO_ADDR celleb_epci_get_epci_base(
+ struct pci_controller *hose)
+{
+ /*
+ * Note:
+ * Celleb epci uses cfg_addr as a base address for
+ * epci control registers.
+ */
+
+ return hose->cfg_addr;
+}
+
+static inline PCI_IO_ADDR celleb_epci_get_epci_cfg(
+ struct pci_controller *hose)
+{
+ /*
+ * Note:
+ * Celleb epci uses cfg_data as a base address for
+ * configuration area for epci devices.
+ */
+
+ return hose->cfg_data;
+}
+
+static inline void clear_and_disable_master_abort_interrupt(
+ struct pci_controller *hose)
+{
+ PCI_IO_ADDR epci_base;
+ PCI_IO_ADDR reg;
+ epci_base = celleb_epci_get_epci_base(hose);
+ reg = epci_base + PCI_COMMAND;
+ out_be32(reg, in_be32(reg) | (PCI_STATUS_REC_MASTER_ABORT << 16));
+}
+
+static int celleb_epci_check_abort(struct pci_controller *hose,
+ PCI_IO_ADDR addr)
+{
+ PCI_IO_ADDR reg;
+ PCI_IO_ADDR epci_base;
+ u32 val;
+
+ iob();
+ epci_base = celleb_epci_get_epci_base(hose);
+
+ reg = epci_base + PCI_COMMAND;
+ val = in_be32(reg);
+
+ if (val & (PCI_STATUS_REC_MASTER_ABORT << 16)) {
+ out_be32(reg,
+ (val & 0xffff) | (PCI_STATUS_REC_MASTER_ABORT << 16));
+
+ /* clear PCI Controller error, FRE, PMFE */
+ reg = epci_base + SCC_EPCI_STATUS;
+ out_be32(reg, SCC_EPCI_INT_PAI);
+
+ reg = epci_base + SCC_EPCI_VCSR;
+ val = in_be32(reg) & 0xffff;
+ val |= SCC_EPCI_VCSR_FRE;
+ out_be32(reg, val);
+
+ reg = epci_base + SCC_EPCI_VISTAT;
+ out_be32(reg, SCC_EPCI_VISTAT_PMFE);
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static PCI_IO_ADDR celleb_epci_make_config_addr(struct pci_bus *bus,
+ struct pci_controller *hose, unsigned int devfn, int where)
+{
+ PCI_IO_ADDR addr;
+
+ if (bus != hose->bus)
+ addr = celleb_epci_get_epci_cfg(hose) +
+ (((bus->number & 0xff) << 16)
+ | ((devfn & 0xff) << 8)
+ | (where & 0xff)
+ | 0x01000000);
+ else
+ addr = celleb_epci_get_epci_cfg(hose) +
+ (((devfn & 0xff) << 8) | (where & 0xff));
+
+ pr_debug("EPCI: config_addr = 0x%p\n", addr);
+
+ return addr;
+}
+
+static int celleb_epci_read_config(struct pci_bus *bus,
+ unsigned int devfn, int where, int size, u32 *val)
+{
+ PCI_IO_ADDR epci_base;
+ PCI_IO_ADDR addr;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ /* allignment check */
+ BUG_ON(where % size);
+
+ if (!celleb_epci_get_epci_cfg(hose))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (bus->number == hose->first_busno && devfn == 0) {
+ /* EPCI controller self */
+
+ epci_base = celleb_epci_get_epci_base(hose);
+ addr = epci_base + where;
+
+ switch (size) {
+ case 1:
+ *val = in_8(addr);
+ break;
+ case 2:
+ *val = in_be16(addr);
+ break;
+ case 4:
+ *val = in_be32(addr);
+ break;
+ default:
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ } else {
+
+ clear_and_disable_master_abort_interrupt(hose);
+ addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
+
+ switch (size) {
+ case 1:
+ *val = in_8(addr);
+ break;
+ case 2:
+ *val = in_le16(addr);
+ break;
+ case 4:
+ *val = in_le32(addr);
+ break;
+ default:
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+ }
+
+ pr_debug("EPCI: "
+ "addr=0x%p, devfn=0x%x, where=0x%x, size=0x%x, val=0x%x\n",
+ addr, devfn, where, size, *val);
+
+ return celleb_epci_check_abort(hose, NULL);
+}
+
+static int celleb_epci_write_config(struct pci_bus *bus,
+ unsigned int devfn, int where, int size, u32 val)
+{
+ PCI_IO_ADDR epci_base;
+ PCI_IO_ADDR addr;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ /* allignment check */
+ BUG_ON(where % size);
+
+ if (!celleb_epci_get_epci_cfg(hose))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (bus->number == hose->first_busno && devfn == 0) {
+ /* EPCI controller self */
+
+ epci_base = celleb_epci_get_epci_base(hose);
+ addr = epci_base + where;
+
+ switch (size) {
+ case 1:
+ out_8(addr, val);
+ break;
+ case 2:
+ out_be16(addr, val);
+ break;
+ case 4:
+ out_be32(addr, val);
+ break;
+ default:
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ } else {
+
+ clear_and_disable_master_abort_interrupt(hose);
+ addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
+
+ switch (size) {
+ case 1:
+ out_8(addr, val);
+ break;
+ case 2:
+ out_le16(addr, val);
+ break;
+ case 4:
+ out_le32(addr, val);
+ break;
+ default:
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+ }
+
+ return celleb_epci_check_abort(hose, addr);
+}
+
+struct pci_ops celleb_epci_ops = {
+ .read = celleb_epci_read_config,
+ .write = celleb_epci_write_config,
+};
+
+/* to be moved in FW */
+static int __init celleb_epci_init(struct pci_controller *hose)
+{
+ u32 val;
+ PCI_IO_ADDR reg;
+ PCI_IO_ADDR epci_base;
+ int hwres = 0;
+
+ epci_base = celleb_epci_get_epci_base(hose);
+
+ /* PCI core reset(Internal bus and PCI clock) */
+ reg = epci_base + SCC_EPCI_CKCTRL;
+ val = in_be32(reg);
+ if (val == 0x00030101)
+ hwres = 1;
+ else {
+ val &= ~(SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
+ out_be32(reg, val);
+
+ /* set PCI core clock */
+ val = in_be32(reg);
+ val |= (SCC_EPCI_CKCTRL_OCLKEN | SCC_EPCI_CKCTRL_LCLKEN);
+ out_be32(reg, val);
+
+ /* release PCI core reset (internal bus) */
+ val = in_be32(reg);
+ val |= SCC_EPCI_CKCTRL_CRST0;
+ out_be32(reg, val);
+
+ /* set PCI clock select */
+ reg = epci_base + SCC_EPCI_CLKRST;
+ val = in_be32(reg);
+ val &= ~SCC_EPCI_CLKRST_CKS_MASK;
+ val |= SCC_EPCI_CLKRST_CKS_2;
+ out_be32(reg, val);
+
+ /* set arbiter */
+ reg = epci_base + SCC_EPCI_ABTSET;
+ out_be32(reg, 0x0f1f001f); /* temporary value */
+
+ /* buffer on */
+ reg = epci_base + SCC_EPCI_CLKRST;
+ val = in_be32(reg);
+ val |= SCC_EPCI_CLKRST_BC;
+ out_be32(reg, val);
+
+ /* PCI clock enable */
+ val = in_be32(reg);
+ val |= SCC_EPCI_CLKRST_PCKEN;
+ out_be32(reg, val);
+
+ /* release PCI core reset (all) */
+ reg = epci_base + SCC_EPCI_CKCTRL;
+ val = in_be32(reg);
+ val |= (SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
+ out_be32(reg, val);
+
+ /* set base translation registers. (already set by Beat) */
+
+ /* set base address masks. (already set by Beat) */
+ }
+
+ /* release interrupt masks and clear all interrupts */
+ reg = epci_base + SCC_EPCI_INTSET;
+ out_be32(reg, 0x013f011f); /* all interrupts enable */
+ reg = epci_base + SCC_EPCI_VIENAB;
+ val = SCC_EPCI_VIENAB_PMPEE | SCC_EPCI_VIENAB_PMFEE;
+ out_be32(reg, val);
+ reg = epci_base + SCC_EPCI_STATUS;
+ out_be32(reg, 0xffffffff);
+ reg = epci_base + SCC_EPCI_VISTAT;
+ out_be32(reg, 0xffffffff);
+
+ /* disable PCI->IB address translation */
+ reg = epci_base + SCC_EPCI_VCSR;
+ val = in_be32(reg);
+ val &= ~(SCC_EPCI_VCSR_DR | SCC_EPCI_VCSR_AT);
+ out_be32(reg, val);
+
+ /* set base addresses. (no need to set?) */
+
+ /* memory space, bus master enable */
+ reg = epci_base + PCI_COMMAND;
+ val = PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+ out_be32(reg, val);
+
+ /* endian mode setup */
+ reg = epci_base + SCC_EPCI_ECMODE;
+ val = 0x00550155;
+ out_be32(reg, val);
+
+ /* set control option */
+ reg = epci_base + SCC_EPCI_CNTOPT;
+ val = in_be32(reg);
+ val |= SCC_EPCI_CNTOPT_O2PMB;
+ out_be32(reg, val);
+
+ /* XXX: temporay: set registers for address conversion setup */
+ reg = epci_base + SCC_EPCI_CNF10_REG;
+ out_be32(reg, 0x80000008);
+ reg = epci_base + SCC_EPCI_CNF14_REG;
+ out_be32(reg, 0x40000008);
+
+ reg = epci_base + SCC_EPCI_BAM0;
+ out_be32(reg, 0x80000000);
+ reg = epci_base + SCC_EPCI_BAM1;
+ out_be32(reg, 0xe0000000);
+
+ reg = epci_base + SCC_EPCI_PVBAT;
+ out_be32(reg, 0x80000000);
+
+ if (!hwres) {
+ /* release external PCI reset */
+ reg = epci_base + SCC_EPCI_CLKRST;
+ val = in_be32(reg);
+ val |= SCC_EPCI_CLKRST_PCIRST;
+ out_be32(reg, val);
+ }
+
+ return 0;
+}
+
+static int __init celleb_setup_epci(struct device_node *node,
+ struct pci_controller *hose)
+{
+ struct resource r;
+
+ pr_debug("PCI: celleb_setup_epci()\n");
+
+ /*
+ * Note:
+ * Celleb epci uses cfg_addr and cfg_data member of
+ * pci_controller structure in irregular way.
+ *
+ * cfg_addr is used to map for control registers of
+ * celleb epci.
+ *
+ * cfg_data is used for configuration area of devices
+ * on Celleb epci buses.
+ */
+
+ if (of_address_to_resource(node, 0, &r))
+ goto error;
+ hose->cfg_addr = ioremap(r.start, resource_size(&r));
+ if (!hose->cfg_addr)
+ goto error;
+ pr_debug("EPCI: cfg_addr map 0x%016llx->0x%016lx + 0x%016llx\n",
+ r.start, (unsigned long)hose->cfg_addr, resource_size(&r));
+
+ if (of_address_to_resource(node, 2, &r))
+ goto error;
+ hose->cfg_data = ioremap(r.start, resource_size(&r));
+ if (!hose->cfg_data)
+ goto error;
+ pr_debug("EPCI: cfg_data map 0x%016llx->0x%016lx + 0x%016llx\n",
+ r.start, (unsigned long)hose->cfg_data, resource_size(&r));
+
+ hose->ops = &celleb_epci_ops;
+ celleb_epci_init(hose);
+
+ return 0;
+
+error:
+ if (hose->cfg_addr)
+ iounmap(hose->cfg_addr);
+
+ if (hose->cfg_data)
+ iounmap(hose->cfg_data);
+ return 1;
+}
+
+struct celleb_phb_spec celleb_epci_spec __initdata = {
+ .setup = celleb_setup_epci,
+ .ops = &spiderpci_ops,
+ .iowa_init = &spiderpci_iowa_init,
+ .iowa_data = (void *)0,
+};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
new file mode 100644
index 00000000000..14be2bd358b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -0,0 +1,541 @@
+/*
+ * Support for Celleb PCI-Express.
+ *
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/iommu.h>
+#include <asm/byteorder.h>
+
+#include "celleb_scc.h"
+#include "celleb_pci.h"
+
+#define PEX_IN(base, off) in_be32((void __iomem *)(base) + (off))
+#define PEX_OUT(base, off, data) out_be32((void __iomem *)(base) + (off), (data))
+
+static void scc_pciex_io_flush(struct iowa_bus *bus)
+{
+ (void)PEX_IN(bus->phb->cfg_addr, PEXDMRDEN0);
+}
+
+/*
+ * Memory space access to device on PCIEX
+ */
+#define PCIEX_MMIO_READ(name, ret) \
+static ret scc_pciex_##name(const PCI_IO_ADDR addr) \
+{ \
+ ret val = __do_##name(addr); \
+ scc_pciex_io_flush(iowa_mem_find_bus(addr)); \
+ return val; \
+}
+
+#define PCIEX_MMIO_READ_STR(name) \
+static void scc_pciex_##name(const PCI_IO_ADDR addr, void *buf, \
+ unsigned long count) \
+{ \
+ __do_##name(addr, buf, count); \
+ scc_pciex_io_flush(iowa_mem_find_bus(addr)); \
+}
+
+PCIEX_MMIO_READ(readb, u8)
+PCIEX_MMIO_READ(readw, u16)
+PCIEX_MMIO_READ(readl, u32)
+PCIEX_MMIO_READ(readq, u64)
+PCIEX_MMIO_READ(readw_be, u16)
+PCIEX_MMIO_READ(readl_be, u32)
+PCIEX_MMIO_READ(readq_be, u64)
+PCIEX_MMIO_READ_STR(readsb)
+PCIEX_MMIO_READ_STR(readsw)
+PCIEX_MMIO_READ_STR(readsl)
+
+static void scc_pciex_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+ unsigned long n)
+{
+ __do_memcpy_fromio(dest, src, n);
+ scc_pciex_io_flush(iowa_mem_find_bus(src));
+}
+
+/*
+ * I/O port access to devices on PCIEX.
+ */
+
+static inline unsigned long get_bus_address(struct pci_controller *phb,
+ unsigned long port)
+{
+ return port - ((unsigned long)(phb->io_base_virt) - _IO_BASE);
+}
+
+static u32 scc_pciex_read_port(struct pci_controller *phb,
+ unsigned long port, int size)
+{
+ unsigned int byte_enable;
+ unsigned int cmd, shift;
+ unsigned long addr;
+ u32 data, ret;
+
+ BUG_ON(((port & 0x3ul) + size) > 4);
+
+ addr = get_bus_address(phb, port);
+ shift = addr & 0x3ul;
+ byte_enable = ((1 << size) - 1) << shift;
+ cmd = PEXDCMND_IO_READ | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
+ PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
+ PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
+ data = PEX_IN(phb->cfg_addr, PEXDRDATA);
+ ret = (data >> (shift * 8)) & (0xFFFFFFFF >> ((4 - size) * 8));
+
+ pr_debug("PCIEX:PIO READ:port=0x%lx, addr=0x%lx, size=%d, be=%x,"
+ " cmd=%x, data=%x, ret=%x\n", port, addr, size, byte_enable,
+ cmd, data, ret);
+
+ return ret;
+}
+
+static void scc_pciex_write_port(struct pci_controller *phb,
+ unsigned long port, int size, u32 val)
+{
+ unsigned int byte_enable;
+ unsigned int cmd, shift;
+ unsigned long addr;
+ u32 data;
+
+ BUG_ON(((port & 0x3ul) + size) > 4);
+
+ addr = get_bus_address(phb, port);
+ shift = addr & 0x3ul;
+ byte_enable = ((1 << size) - 1) << shift;
+ cmd = PEXDCMND_IO_WRITE | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
+ data = (val & (0xFFFFFFFF >> (4 - size) * 8)) << (shift * 8);
+ PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
+ PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
+ PEX_OUT(phb->cfg_addr, PEXDWDATA, data);
+
+ pr_debug("PCIEX:PIO WRITE:port=0x%lx, addr=%lx, size=%d, val=%x,"
+ " be=%x, cmd=%x, data=%x\n", port, addr, size, val,
+ byte_enable, cmd, data);
+}
+
+static u8 __scc_pciex_inb(struct pci_controller *phb, unsigned long port)
+{
+ return (u8)scc_pciex_read_port(phb, port, 1);
+}
+
+static u16 __scc_pciex_inw(struct pci_controller *phb, unsigned long port)
+{
+ u32 data;
+ if ((port & 0x3ul) < 3)
+ data = scc_pciex_read_port(phb, port, 2);
+ else {
+ u32 d1 = scc_pciex_read_port(phb, port, 1);
+ u32 d2 = scc_pciex_read_port(phb, port + 1, 1);
+ data = d1 | (d2 << 8);
+ }
+ return (u16)data;
+}
+
+static u32 __scc_pciex_inl(struct pci_controller *phb, unsigned long port)
+{
+ unsigned int mod = port & 0x3ul;
+ u32 data;
+ if (mod == 0)
+ data = scc_pciex_read_port(phb, port, 4);
+ else {
+ u32 d1 = scc_pciex_read_port(phb, port, 4 - mod);
+ u32 d2 = scc_pciex_read_port(phb, port + 1, mod);
+ data = d1 | (d2 << (mod * 8));
+ }
+ return data;
+}
+
+static void __scc_pciex_outb(struct pci_controller *phb,
+ u8 val, unsigned long port)
+{
+ scc_pciex_write_port(phb, port, 1, (u32)val);
+}
+
+static void __scc_pciex_outw(struct pci_controller *phb,
+ u16 val, unsigned long port)
+{
+ if ((port & 0x3ul) < 3)
+ scc_pciex_write_port(phb, port, 2, (u32)val);
+ else {
+ u32 d1 = val & 0x000000FF;
+ u32 d2 = (val & 0x0000FF00) >> 8;
+ scc_pciex_write_port(phb, port, 1, d1);
+ scc_pciex_write_port(phb, port + 1, 1, d2);
+ }
+}
+
+static void __scc_pciex_outl(struct pci_controller *phb,
+ u32 val, unsigned long port)
+{
+ unsigned int mod = port & 0x3ul;
+ if (mod == 0)
+ scc_pciex_write_port(phb, port, 4, val);
+ else {
+ u32 d1 = val & (0xFFFFFFFFul >> (mod * 8));
+ u32 d2 = val >> ((4 - mod) * 8);
+ scc_pciex_write_port(phb, port, 4 - mod, d1);
+ scc_pciex_write_port(phb, port + 1, mod, d2);
+ }
+}
+
+#define PCIEX_PIO_FUNC(size, name) \
+static u##size scc_pciex_in##name(unsigned long port) \
+{ \
+ struct iowa_bus *bus = iowa_pio_find_bus(port); \
+ u##size data = __scc_pciex_in##name(bus->phb, port); \
+ scc_pciex_io_flush(bus); \
+ return data; \
+} \
+static void scc_pciex_ins##name(unsigned long p, void *b, unsigned long c) \
+{ \
+ struct iowa_bus *bus = iowa_pio_find_bus(p); \
+ __le##size *dst = b; \
+ for (; c != 0; c--, dst++) \
+ *dst = cpu_to_le##size(__scc_pciex_in##name(bus->phb, p)); \
+ scc_pciex_io_flush(bus); \
+} \
+static void scc_pciex_out##name(u##size val, unsigned long port) \
+{ \
+ struct iowa_bus *bus = iowa_pio_find_bus(port); \
+ __scc_pciex_out##name(bus->phb, val, port); \
+} \
+static void scc_pciex_outs##name(unsigned long p, const void *b, \
+ unsigned long c) \
+{ \
+ struct iowa_bus *bus = iowa_pio_find_bus(p); \
+ const __le##size *src = b; \
+ for (; c != 0; c--, src++) \
+ __scc_pciex_out##name(bus->phb, le##size##_to_cpu(*src), p); \
+}
+#define __le8 u8
+#define cpu_to_le8(x) (x)
+#define le8_to_cpu(x) (x)
+PCIEX_PIO_FUNC(8, b)
+PCIEX_PIO_FUNC(16, w)
+PCIEX_PIO_FUNC(32, l)
+
+static struct ppc_pci_io scc_pciex_ops = {
+ .readb = scc_pciex_readb,
+ .readw = scc_pciex_readw,
+ .readl = scc_pciex_readl,
+ .readq = scc_pciex_readq,
+ .readw_be = scc_pciex_readw_be,
+ .readl_be = scc_pciex_readl_be,
+ .readq_be = scc_pciex_readq_be,
+ .readsb = scc_pciex_readsb,
+ .readsw = scc_pciex_readsw,
+ .readsl = scc_pciex_readsl,
+ .memcpy_fromio = scc_pciex_memcpy_fromio,
+ .inb = scc_pciex_inb,
+ .inw = scc_pciex_inw,
+ .inl = scc_pciex_inl,
+ .outb = scc_pciex_outb,
+ .outw = scc_pciex_outw,
+ .outl = scc_pciex_outl,
+ .insb = scc_pciex_insb,
+ .insw = scc_pciex_insw,
+ .insl = scc_pciex_insl,
+ .outsb = scc_pciex_outsb,
+ .outsw = scc_pciex_outsw,
+ .outsl = scc_pciex_outsl,
+};
+
+static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
+{
+ dma_addr_t dummy_page_da;
+ void *dummy_page_va;
+
+ dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dummy_page_va) {
+ pr_err("PCIEX:Alloc dummy_page_va failed\n");
+ return -1;
+ }
+
+ dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(bus->phb->parent, dummy_page_da)) {
+ pr_err("PCIEX:Map dummy page failed.\n");
+ kfree(dummy_page_va);
+ return -1;
+ }
+
+ PEX_OUT(bus->phb->cfg_addr, PEXDMRDADR0, dummy_page_da);
+
+ return 0;
+}
+
+/*
+ * config space access
+ */
+#define MK_PEXDADRS(bus_no, dev_no, func_no, addr) \
+ ((uint32_t)(((addr) & ~0x3UL) | \
+ ((bus_no) << PEXDADRS_BUSNO_SHIFT) | \
+ ((dev_no) << PEXDADRS_DEVNO_SHIFT) | \
+ ((func_no) << PEXDADRS_FUNCNO_SHIFT)))
+
+#define MK_PEXDCMND_BYTE_EN(addr, size) \
+ ((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT)
+#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size))
+
+static uint32_t config_read_pciex_dev(unsigned int __iomem *base,
+ uint64_t bus_no, uint64_t dev_no, uint64_t func_no,
+ uint64_t off, uint64_t size)
+{
+ uint32_t ret;
+ uint32_t addr, cmd;
+
+ addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
+ cmd = MK_PEXDCMND(PEXDCMND_CONFIG_READ, off, size);
+ PEX_OUT(base, PEXDADRS, addr);
+ PEX_OUT(base, PEXDCMND, cmd);
+ ret = (PEX_IN(base, PEXDRDATA)
+ >> ((off & (4-size)) * 8)) & ((0x1 << (size * 8)) - 1);
+ return ret;
+}
+
+static void config_write_pciex_dev(unsigned int __iomem *base, uint64_t bus_no,
+ uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size,
+ uint32_t data)
+{
+ uint32_t addr, cmd;
+
+ addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
+ cmd = MK_PEXDCMND(PEXDCMND_CONFIG_WRITE, off, size);
+ PEX_OUT(base, PEXDADRS, addr);
+ PEX_OUT(base, PEXDCMND, cmd);
+ PEX_OUT(base, PEXDWDATA,
+ (data & ((0x1 << (size * 8)) - 1)) << ((off & (4-size)) * 8));
+}
+
+#define MK_PEXCADRS_BYTE_EN(off, len) \
+ ((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT)
+#define MK_PEXCADRS(cmd, addr, size) \
+ ((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3))
+static uint32_t config_read_pciex_rc(unsigned int __iomem *base,
+ uint32_t where, uint32_t size)
+{
+ PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size));
+ return (PEX_IN(base, PEXCRDATA)
+ >> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1);
+}
+
+static void config_write_pciex_rc(unsigned int __iomem *base, uint32_t where,
+ uint32_t size, uint32_t val)
+{
+ uint32_t data;
+
+ data = (val & ((0x1 << (size * 8)) - 1)) << ((where & (4 - size)) * 8);
+ PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_WRITE, where, size));
+ PEX_OUT(base, PEXCWDATA, data);
+}
+
+/* Interfaces */
+/* Note: Work-around
+ * On SCC PCIEXC, one device is seen on all 32 dev_no.
+ * As SCC PCIEXC can have only one device on the bus, we look only one dev_no.
+ * (dev_no = 1)
+ */
+static int scc_pciex_read_config(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, unsigned int *val)
+{
+ struct pci_controller *phb = pci_bus_to_host(bus);
+
+ if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) {
+ *val = ~0;
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ if (bus->number == 0 && PCI_SLOT(devfn) == 0)
+ *val = config_read_pciex_rc(phb->cfg_addr, where, size);
+ else
+ *val = config_read_pciex_dev(phb->cfg_addr, bus->number,
+ PCI_SLOT(devfn), PCI_FUNC(devfn), where, size);
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, unsigned int val)
+{
+ struct pci_controller *phb = pci_bus_to_host(bus);
+
+ if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (bus->number == 0 && PCI_SLOT(devfn) == 0)
+ config_write_pciex_rc(phb->cfg_addr, where, size, val);
+ else
+ config_write_pciex_dev(phb->cfg_addr, bus->number,
+ PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops scc_pciex_pci_ops = {
+ scc_pciex_read_config,
+ scc_pciex_write_config,
+};
+
+static void pciex_clear_intr_all(unsigned int __iomem *base)
+{
+ PEX_OUT(base, PEXAERRSTS, 0xffffffff);
+ PEX_OUT(base, PEXPRERRSTS, 0xffffffff);
+ PEX_OUT(base, PEXINTSTS, 0xffffffff);
+}
+
+#if 0
+static void pciex_disable_intr_all(unsigned int *base)
+{
+ PEX_OUT(base, PEXINTMASK, 0x0);
+ PEX_OUT(base, PEXAERRMASK, 0x0);
+ PEX_OUT(base, PEXPRERRMASK, 0x0);
+ PEX_OUT(base, PEXVDMASK, 0x0);
+}
+#endif
+
+static void pciex_enable_intr_all(unsigned int __iomem *base)
+{
+ PEX_OUT(base, PEXINTMASK, 0x0000e7f1);
+ PEX_OUT(base, PEXAERRMASK, 0x03ff01ff);
+ PEX_OUT(base, PEXPRERRMASK, 0x0001010f);
+ PEX_OUT(base, PEXVDMASK, 0x00000001);
+}
+
+static void pciex_check_status(unsigned int __iomem *base)
+{
+ uint32_t err = 0;
+ uint32_t intsts, aerr, prerr, rcvcp, lenerr;
+ uint32_t maea, maec;
+
+ intsts = PEX_IN(base, PEXINTSTS);
+ aerr = PEX_IN(base, PEXAERRSTS);
+ prerr = PEX_IN(base, PEXPRERRSTS);
+ rcvcp = PEX_IN(base, PEXRCVCPLIDA);
+ lenerr = PEX_IN(base, PEXLENERRIDA);
+
+ if (intsts || aerr || prerr || rcvcp || lenerr)
+ err = 1;
+
+ pr_info("PCEXC interrupt!!\n");
+ pr_info("PEXINTSTS :0x%08x\n", intsts);
+ pr_info("PEXAERRSTS :0x%08x\n", aerr);
+ pr_info("PEXPRERRSTS :0x%08x\n", prerr);
+ pr_info("PEXRCVCPLIDA :0x%08x\n", rcvcp);
+ pr_info("PEXLENERRIDA :0x%08x\n", lenerr);
+
+ /* print detail of Protection Error */
+ if (intsts & 0x00004000) {
+ uint32_t i, n;
+ for (i = 0; i < 4; i++) {
+ n = 1 << i;
+ if (prerr & n) {
+ maea = PEX_IN(base, PEXMAEA(i));
+ maec = PEX_IN(base, PEXMAEC(i));
+ pr_info("PEXMAEC%d :0x%08x\n", i, maec);
+ pr_info("PEXMAEA%d :0x%08x\n", i, maea);
+ }
+ }
+ }
+
+ if (err)
+ pciex_clear_intr_all(base);
+}
+
+static irqreturn_t pciex_handle_internal_irq(int irq, void *dev_id)
+{
+ struct pci_controller *phb = dev_id;
+
+ pr_debug("PCIEX:pciex_handle_internal_irq(irq=%d)\n", irq);
+
+ BUG_ON(phb->cfg_addr == NULL);
+
+ pciex_check_status(phb->cfg_addr);
+
+ return IRQ_HANDLED;
+}
+
+static __init int celleb_setup_pciex(struct device_node *node,
+ struct pci_controller *phb)
+{
+ struct resource r;
+ struct of_irq oirq;
+ int virq;
+
+ /* SMMIO registers; used inside this file */
+ if (of_address_to_resource(node, 0, &r)) {
+ pr_err("PCIEXC:Failed to get config resource.\n");
+ return 1;
+ }
+ phb->cfg_addr = ioremap(r.start, resource_size(&r));
+ if (!phb->cfg_addr) {
+ pr_err("PCIEXC:Failed to remap SMMIO region.\n");
+ return 1;
+ }
+
+ /* Not use cfg_data, cmd and data regs are near address reg */
+ phb->cfg_data = NULL;
+
+ /* set pci_ops */
+ phb->ops = &scc_pciex_pci_ops;
+
+ /* internal interrupt handler */
+ if (of_irq_map_one(node, 1, &oirq)) {
+ pr_err("PCIEXC:Failed to map irq\n");
+ goto error;
+ }
+ virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+ oirq.size);
+ if (request_irq(virq, pciex_handle_internal_irq,
+ 0, "pciex", (void *)phb)) {
+ pr_err("PCIEXC:Failed to request irq\n");
+ goto error;
+ }
+
+ /* enable all interrupts */
+ pciex_clear_intr_all(phb->cfg_addr);
+ pciex_enable_intr_all(phb->cfg_addr);
+ /* MSI: TBD */
+
+ return 0;
+
+error:
+ phb->cfg_data = NULL;
+ if (phb->cfg_addr)
+ iounmap(phb->cfg_addr);
+ phb->cfg_addr = NULL;
+ return 1;
+}
+
+struct celleb_phb_spec celleb_pciex_spec __initdata = {
+ .setup = celleb_setup_pciex,
+ .ops = &scc_pciex_ops,
+ .iowa_init = &scc_pciex_iowa_init,
+};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
new file mode 100644
index 00000000000..3a16c5b3c46
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c
@@ -0,0 +1,101 @@
+/*
+ * setup serial port in SCC
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/tty.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/console.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+
+/* sio irq0=0xb00010022 irq0=0xb00010023 irq2=0xb00010024
+ mmio=0xfff000-0x1000,0xff2000-0x1000 */
+static int txx9_serial_bitmap __initdata;
+
+static struct {
+ uint32_t offset;
+ uint32_t index;
+} txx9_scc_tab[3] __initdata = {
+ { 0x300, 0 }, /* 0xFFF300 */
+ { 0x400, 0 }, /* 0xFFF400 */
+ { 0x800, 1 } /* 0xFF2800 */
+};
+
+static int __init txx9_serial_init(void)
+{
+ extern int early_serial_txx9_setup(struct uart_port *port);
+ struct device_node *node = NULL;
+ int i;
+ struct uart_port req;
+ struct of_irq irq;
+ struct resource res;
+
+ while ((node = of_find_compatible_node(node,
+ "serial", "toshiba,sio-scc")) != NULL) {
+ for (i = 0; i < ARRAY_SIZE(txx9_scc_tab); i++) {
+ if (!(txx9_serial_bitmap & (1<<i)))
+ continue;
+
+ if (of_irq_map_one(node, i, &irq))
+ continue;
+ if (of_address_to_resource(node,
+ txx9_scc_tab[i].index, &res))
+ continue;
+
+ memset(&req, 0, sizeof(req));
+ req.line = i;
+ req.iotype = UPIO_MEM;
+ req.mapbase = res.start + txx9_scc_tab[i].offset;
+#ifdef CONFIG_SERIAL_TXX9_CONSOLE
+ req.membase = ioremap(req.mapbase, 0x24);
+#endif
+ req.irq = irq_create_of_mapping(irq.controller,
+ irq.specifier, irq.size);
+ req.flags |= UPF_IOREMAP | UPF_BUGGY_UART
+ /*HAVE_CTS_LINE*/;
+ req.uartclk = 83300000;
+ early_serial_txx9_setup(&req);
+ }
+ }
+
+ return 0;
+}
+
+static int __init txx9_serial_config(char *ptr)
+{
+ int i;
+
+ for (;;) {
+ switch (get_option(&ptr, &i)) {
+ default:
+ return 0;
+ case 2:
+ txx9_serial_bitmap |= 1 << i;
+ break;
+ case 1:
+ txx9_serial_bitmap |= 1 << i;
+ return 0;
+ }
+ }
+}
+__setup("txx9_serial=", txx9_serial_config);
+
+console_initcall(txx9_serial_init);
diff --git a/arch/powerpc/platforms/cell/celleb_scc_uhc.c b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
new file mode 100644
index 00000000000..d63b720bfe3
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
@@ -0,0 +1,95 @@
+/*
+ * SCC (Super Companion Chip) UHC setup
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+
+#include "celleb_scc.h"
+
+#define UHC_RESET_WAIT_MAX 10000
+
+static inline int uhc_clkctrl_ready(u32 val)
+{
+ const u32 mask = SCC_UHC_USBCEN | SCC_UHC_USBCEN;
+ return((val & mask) == mask);
+}
+
+/*
+ * UHC(usb host controller) enable function.
+ * affect to both of OHCI and EHCI core module.
+ */
+static void enable_scc_uhc(struct pci_dev *dev)
+{
+ void __iomem *uhc_base;
+ u32 __iomem *uhc_clkctrl;
+ u32 __iomem *uhc_ecmode;
+ u32 val = 0;
+ int i;
+
+ if (!machine_is(celleb_beat) &&
+ !machine_is(celleb_native))
+ return;
+
+ uhc_base = ioremap(pci_resource_start(dev, 0),
+ pci_resource_len(dev, 0));
+ if (!uhc_base) {
+ printk(KERN_ERR "failed to map UHC register base.\n");
+ return;
+ }
+ uhc_clkctrl = uhc_base + SCC_UHC_CKRCTRL;
+ uhc_ecmode = uhc_base + SCC_UHC_ECMODE;
+
+ /* setup for normal mode */
+ val |= SCC_UHC_F48MCKLEN;
+ out_be32(uhc_clkctrl, val);
+ val |= SCC_UHC_PHY_SUSPEND_SEL;
+ out_be32(uhc_clkctrl, val);
+ udelay(10);
+ val |= SCC_UHC_PHYEN;
+ out_be32(uhc_clkctrl, val);
+ udelay(50);
+
+ /* disable reset */
+ val |= SCC_UHC_HCLKEN;
+ out_be32(uhc_clkctrl, val);
+ val |= (SCC_UHC_USBCEN | SCC_UHC_USBEN);
+ out_be32(uhc_clkctrl, val);
+ i = 0;
+ while (!uhc_clkctrl_ready(in_be32(uhc_clkctrl))) {
+ udelay(10);
+ if (i++ > UHC_RESET_WAIT_MAX) {
+ printk(KERN_ERR "Failed to disable UHC reset %x\n",
+ in_be32(uhc_clkctrl));
+ break;
+ }
+ }
+
+ /* Endian Conversion Mode for Master ALL area */
+ out_be32(uhc_ecmode, SCC_UHC_ECMODE_BY_BYTE);
+
+ iounmap(uhc_base);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA_2,
+ PCI_DEVICE_ID_TOSHIBA_SCC_USB, enable_scc_uhc);
diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
new file mode 100644
index 00000000000..1d5a4d8ddad
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_setup.c
@@ -0,0 +1,243 @@
+/*
+ * Celleb setup code
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/cell/setup.c:
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * Modified by PPC64 Team, IBM Corp
+ * Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "beat_interrupt.h"
+#include "beat_wrapper.h"
+#include "beat.h"
+#include "celleb_pci.h"
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+static char celleb_machine_type[128] = "Celleb";
+
+static void celleb_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *model = "";
+
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ /* using "CHRP" is to trick anaconda into installing FCx into Celleb */
+ seq_printf(m, "machine\t\t: %s %s\n", celleb_machine_type, model);
+ of_node_put(root);
+}
+
+static int __init celleb_machine_type_hack(char *ptr)
+{
+ strlcpy(celleb_machine_type, ptr, sizeof(celleb_machine_type));
+ return 0;
+}
+
+__setup("celleb_machine_type_hack=", celleb_machine_type_hack);
+
+static void celleb_progress(char *s, unsigned short hex)
+{
+ printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void __init celleb_setup_arch_common(void)
+{
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000;
+
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#endif
+}
+
+static struct of_device_id celleb_bus_ids[] __initdata = {
+ { .type = "scc", },
+ { .type = "ioif", }, /* old style */
+ {},
+};
+
+static int __init celleb_publish_devices(void)
+{
+ /* Publish OF platform devices for southbridge IOs */
+ of_platform_bus_probe(NULL, celleb_bus_ids, NULL);
+
+ return 0;
+}
+machine_device_initcall(celleb_beat, celleb_publish_devices);
+machine_device_initcall(celleb_native, celleb_publish_devices);
+
+
+/*
+ * functions for Celleb-Beat
+ */
+static void __init celleb_setup_arch_beat(void)
+{
+#ifdef CONFIG_SPU_BASE
+ spu_priv1_ops = &spu_priv1_beat_ops;
+ spu_management_ops = &spu_management_of_ops;
+#endif
+
+ celleb_setup_arch_common();
+}
+
+static int __init celleb_probe_beat(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "Beat"))
+ return 0;
+
+ powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS
+ | FW_FEATURE_BEAT | FW_FEATURE_LPAR;
+ hpte_init_beat_v3();
+
+ return 1;
+}
+
+
+/*
+ * functions for Celleb-native
+ */
+static void __init celleb_init_IRQ_native(void)
+{
+ iic_init_IRQ();
+ spider_init_IRQ();
+}
+
+static void __init celleb_setup_arch_native(void)
+{
+#ifdef CONFIG_SPU_BASE
+ spu_priv1_ops = &spu_priv1_mmio_ops;
+ spu_management_ops = &spu_management_of_ops;
+#endif
+
+ cbe_regs_init();
+
+#ifdef CONFIG_CBE_RAS
+ cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+ smp_init_cell();
+#endif
+
+ cbe_pervasive_init();
+
+ /* XXX: nvram initialization should be added */
+
+ celleb_setup_arch_common();
+}
+
+static int __init celleb_probe_native(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (of_flat_dt_is_compatible(root, "Beat") ||
+ !of_flat_dt_is_compatible(root, "TOSHIBA,Celleb"))
+ return 0;
+
+ powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS;
+ hpte_init_native();
+
+ return 1;
+}
+
+
+/*
+ * machine definitions
+ */
+define_machine(celleb_beat) {
+ .name = "Cell Reference Set (Beat)",
+ .probe = celleb_probe_beat,
+ .setup_arch = celleb_setup_arch_beat,
+ .show_cpuinfo = celleb_show_cpuinfo,
+ .restart = beat_restart,
+ .power_off = beat_power_off,
+ .halt = beat_halt,
+ .get_rtc_time = beat_get_rtc_time,
+ .set_rtc_time = beat_set_rtc_time,
+ .calibrate_decr = generic_calibrate_decr,
+ .progress = celleb_progress,
+ .power_save = beat_power_save,
+ .nvram_size = beat_nvram_get_size,
+ .nvram_read = beat_nvram_read,
+ .nvram_write = beat_nvram_write,
+ .set_dabr = beat_set_xdabr,
+ .init_IRQ = beatic_init_IRQ,
+ .get_irq = beatic_get_irq,
+ .pci_probe_mode = celleb_pci_probe_mode,
+ .pci_setup_phb = celleb_setup_phb,
+#ifdef CONFIG_KEXEC
+ .kexec_cpu_down = beat_kexec_cpu_down,
+#endif
+};
+
+define_machine(celleb_native) {
+ .name = "Cell Reference Set (native)",
+ .probe = celleb_probe_native,
+ .setup_arch = celleb_setup_arch_native,
+ .show_cpuinfo = celleb_show_cpuinfo,
+ .restart = rtas_restart,
+ .power_off = rtas_power_off,
+ .halt = rtas_halt,
+ .get_boot_time = rtas_get_boot_time,
+ .get_rtc_time = rtas_get_rtc_time,
+ .set_rtc_time = rtas_set_rtc_time,
+ .calibrate_decr = generic_calibrate_decr,
+ .progress = celleb_progress,
+ .pci_probe_mode = celleb_pci_probe_mode,
+ .pci_setup_phb = celleb_setup_phb,
+ .init_IRQ = celleb_init_IRQ_native,
+};
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
new file mode 100644
index 00000000000..23bc9db4317
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -0,0 +1,171 @@
+/*
+ * spu aware cpufreq governor for the cell processor
+ *
+ * © Copyright IBM Corporation 2006-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/atomic.h>
+#include <asm/machdep.h>
+#include <asm/spu.h>
+
+#define POLL_TIME 100000 /* in µs */
+#define EXP 753 /* exp(-1) in fixed-point */
+
+struct spu_gov_info_struct {
+ unsigned long busy_spus; /* fixed-point */
+ struct cpufreq_policy *policy;
+ struct delayed_work work;
+ unsigned int poll_int; /* µs */
+};
+static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info);
+
+static int calc_freq(struct spu_gov_info_struct *info)
+{
+ int cpu;
+ int busy_spus;
+
+ cpu = info->policy->cpu;
+ busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
+
+ CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1);
+ pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
+ cpu, busy_spus, info->busy_spus);
+
+ return info->policy->max * info->busy_spus / FIXED_1;
+}
+
+static void spu_gov_work(struct work_struct *work)
+{
+ struct spu_gov_info_struct *info;
+ int delay;
+ unsigned long target_freq;
+
+ info = container_of(work, struct spu_gov_info_struct, work.work);
+
+ /* after cancel_delayed_work_sync we unset info->policy */
+ BUG_ON(info->policy == NULL);
+
+ target_freq = calc_freq(info);
+ __cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H);
+
+ delay = usecs_to_jiffies(info->poll_int);
+ schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_init_work(struct spu_gov_info_struct *info)
+{
+ int delay = usecs_to_jiffies(info->poll_int);
+ INIT_DELAYED_WORK_DEFERRABLE(&info->work, spu_gov_work);
+ schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
+{
+ cancel_delayed_work_sync(&info->work);
+}
+
+static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
+{
+ unsigned int cpu = policy->cpu;
+ struct spu_gov_info_struct *info, *affected_info;
+ int i;
+ int ret = 0;
+
+ info = &per_cpu(spu_gov_info, cpu);
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if (!cpu_online(cpu)) {
+ printk(KERN_ERR "cpu %d is not online\n", cpu);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!policy->cur) {
+ printk(KERN_ERR "no cpu specified in policy\n");
+ ret = -EINVAL;
+ break;
+ }
+
+ /* initialize spu_gov_info for all affected cpus */
+ for_each_cpu(i, policy->cpus) {
+ affected_info = &per_cpu(spu_gov_info, i);
+ affected_info->policy = policy;
+ }
+
+ info->poll_int = POLL_TIME;
+
+ /* setup timer */
+ spu_gov_init_work(info);
+
+ break;
+
+ case CPUFREQ_GOV_STOP:
+ /* cancel timer */
+ spu_gov_cancel_work(info);
+
+ /* clean spu_gov_info for all affected cpus */
+ for_each_cpu (i, policy->cpus) {
+ info = &per_cpu(spu_gov_info, i);
+ info->policy = NULL;
+ }
+
+ break;
+ }
+
+ return ret;
+}
+
+static struct cpufreq_governor spu_governor = {
+ .name = "spudemand",
+ .governor = spu_gov_govern,
+ .owner = THIS_MODULE,
+};
+
+/*
+ * module init and destoy
+ */
+
+static int __init spu_gov_init(void)
+{
+ int ret;
+
+ ret = cpufreq_register_governor(&spu_governor);
+ if (ret)
+ printk(KERN_ERR "registration of governor failed\n");
+ return ret;
+}
+
+static void __exit spu_gov_exit(void)
+{
+ cpufreq_unregister_governor(&spu_governor);
+}
+
+
+module_init(spu_gov_init);
+module_exit(spu_gov_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
new file mode 100644
index 00000000000..96a433dd2d6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -0,0 +1,411 @@
+/*
+ * Cell Internal Interrupt Controller
+ *
+ * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ * IBM, Corp.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO:
+ * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers
+ * vs node numbers in the setup code
+ * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from
+ * a non-active node to the active node)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+struct iic {
+ struct cbe_iic_thread_regs __iomem *regs;
+ u8 target_id;
+ u8 eoi_stack[16];
+ int eoi_ptr;
+ struct device_node *node;
+};
+
+static DEFINE_PER_CPU(struct iic, cpu_iic);
+#define IIC_NODE_COUNT 2
+static struct irq_host *iic_host;
+
+/* Convert between "pending" bits and hw irq number */
+static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
+{
+ unsigned char unit = bits.source & 0xf;
+ unsigned char node = bits.source >> 4;
+ unsigned char class = bits.class & 3;
+
+ /* Decode IPIs */
+ if (bits.flags & CBE_IIC_IRQ_IPI)
+ return IIC_IRQ_TYPE_IPI | (bits.prio >> 4);
+ else
+ return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit;
+}
+
+static void iic_mask(struct irq_data *d)
+{
+}
+
+static void iic_unmask(struct irq_data *d)
+{
+}
+
+static void iic_eoi(struct irq_data *d)
+{
+ struct iic *iic = &__get_cpu_var(cpu_iic);
+ out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
+ BUG_ON(iic->eoi_ptr < 0);
+}
+
+static struct irq_chip iic_chip = {
+ .name = "CELL-IIC",
+ .irq_mask = iic_mask,
+ .irq_unmask = iic_unmask,
+ .irq_eoi = iic_eoi,
+};
+
+
+static void iic_ioexc_eoi(struct irq_data *d)
+{
+}
+
+static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct cbe_iic_regs __iomem *node_iic =
+ (void __iomem *)irq_desc_get_handler_data(desc);
+ unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC;
+ unsigned long bits, ack;
+ int cascade;
+
+ for (;;) {
+ bits = in_be64(&node_iic->iic_is);
+ if (bits == 0)
+ break;
+ /* pre-ack edge interrupts */
+ ack = bits & IIC_ISR_EDGE_MASK;
+ if (ack)
+ out_be64(&node_iic->iic_is, ack);
+ /* handle them */
+ for (cascade = 63; cascade >= 0; cascade--)
+ if (bits & (0x8000000000000000UL >> cascade)) {
+ unsigned int cirq =
+ irq_linear_revmap(iic_host,
+ base | cascade);
+ if (cirq != NO_IRQ)
+ generic_handle_irq(cirq);
+ }
+ /* post-ack level interrupts */
+ ack = bits & ~IIC_ISR_EDGE_MASK;
+ if (ack)
+ out_be64(&node_iic->iic_is, ack);
+ }
+ chip->irq_eoi(&desc->irq_data);
+}
+
+
+static struct irq_chip iic_ioexc_chip = {
+ .name = "CELL-IOEX",
+ .irq_mask = iic_mask,
+ .irq_unmask = iic_unmask,
+ .irq_eoi = iic_ioexc_eoi,
+};
+
+/* Get an IRQ number from the pending state register of the IIC */
+static unsigned int iic_get_irq(void)
+{
+ struct cbe_iic_pending_bits pending;
+ struct iic *iic;
+ unsigned int virq;
+
+ iic = &__get_cpu_var(cpu_iic);
+ *(unsigned long *) &pending =
+ in_be64((u64 __iomem *) &iic->regs->pending_destr);
+ if (!(pending.flags & CBE_IIC_IRQ_VALID))
+ return NO_IRQ;
+ virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
+ if (virq == NO_IRQ)
+ return NO_IRQ;
+ iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+ BUG_ON(iic->eoi_ptr > 15);
+ return virq;
+}
+
+void iic_setup_cpu(void)
+{
+ out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff);
+}
+
+u8 iic_get_target_id(int cpu)
+{
+ return per_cpu(cpu_iic, cpu).target_id;
+}
+
+EXPORT_SYMBOL_GPL(iic_get_target_id);
+
+#ifdef CONFIG_SMP
+
+/* Use the highest interrupt priorities for IPI */
+static inline int iic_msg_to_irq(int msg)
+{
+ return IIC_IRQ_TYPE_IPI + 0xf - msg;
+}
+
+void iic_message_pass(int cpu, int msg)
+{
+ out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4);
+}
+
+struct irq_host *iic_get_irq_host(int node)
+{
+ return iic_host;
+}
+EXPORT_SYMBOL_GPL(iic_get_irq_host);
+
+static void iic_request_ipi(int msg)
+{
+ int virq;
+
+ virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg));
+ if (virq == NO_IRQ) {
+ printk(KERN_ERR
+ "iic: failed to map IPI %s\n", smp_ipi_name[msg]);
+ return;
+ }
+
+ /*
+ * If smp_request_message_ipi encounters an error it will notify
+ * the error. If a message is not needed it will return non-zero.
+ */
+ if (smp_request_message_ipi(virq, msg))
+ irq_dispose_mapping(virq);
+}
+
+void iic_request_IPIs(void)
+{
+ iic_request_ipi(PPC_MSG_CALL_FUNCTION);
+ iic_request_ipi(PPC_MSG_RESCHEDULE);
+ iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE);
+ iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
+}
+
+#endif /* CONFIG_SMP */
+
+
+static int iic_host_match(struct irq_host *h, struct device_node *node)
+{
+ return of_device_is_compatible(node,
+ "IBM,CBEA-Internal-Interrupt-Controller");
+}
+
+static int iic_host_map(struct irq_host *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ switch (hw & IIC_IRQ_TYPE_MASK) {
+ case IIC_IRQ_TYPE_IPI:
+ irq_set_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
+ break;
+ case IIC_IRQ_TYPE_IOEXC:
+ irq_set_chip_and_handler(virq, &iic_ioexc_chip,
+ handle_edge_eoi_irq);
+ break;
+ default:
+ irq_set_chip_and_handler(virq, &iic_chip, handle_edge_eoi_irq);
+ }
+ return 0;
+}
+
+static int iic_host_xlate(struct irq_host *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+ unsigned int node, ext, unit, class;
+ const u32 *val;
+
+ if (!of_device_is_compatible(ct,
+ "IBM,CBEA-Internal-Interrupt-Controller"))
+ return -ENODEV;
+ if (intsize != 1)
+ return -ENODEV;
+ val = of_get_property(ct, "#interrupt-cells", NULL);
+ if (val == NULL || *val != 1)
+ return -ENODEV;
+
+ node = intspec[0] >> 24;
+ ext = (intspec[0] >> 16) & 0xff;
+ class = (intspec[0] >> 8) & 0xff;
+ unit = intspec[0] & 0xff;
+
+ /* Check if node is in supported range */
+ if (node > 1)
+ return -EINVAL;
+
+ /* Build up interrupt number, special case for IO exceptions */
+ *out_hwirq = (node << IIC_IRQ_NODE_SHIFT);
+ if (unit == IIC_UNIT_IIC && class == 1)
+ *out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext;
+ else
+ *out_hwirq |= IIC_IRQ_TYPE_NORMAL |
+ (class << IIC_IRQ_CLASS_SHIFT) | unit;
+
+ /* Dummy flags, ignored by iic code */
+ *out_flags = IRQ_TYPE_EDGE_RISING;
+
+ return 0;
+}
+
+static struct irq_host_ops iic_host_ops = {
+ .match = iic_host_match,
+ .map = iic_host_map,
+ .xlate = iic_host_xlate,
+};
+
+static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
+ struct device_node *node)
+{
+ /* XXX FIXME: should locate the linux CPU number from the HW cpu
+ * number properly. We are lucky for now
+ */
+ struct iic *iic = &per_cpu(cpu_iic, hw_cpu);
+
+ iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
+ BUG_ON(iic->regs == NULL);
+
+ iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
+ iic->eoi_stack[0] = 0xff;
+ iic->node = of_node_get(node);
+ out_be64(&iic->regs->prio, 0);
+
+ printk(KERN_INFO "IIC for CPU %d target id 0x%x : %s\n",
+ hw_cpu, iic->target_id, node->full_name);
+}
+
+static int __init setup_iic(void)
+{
+ struct device_node *dn;
+ struct resource r0, r1;
+ unsigned int node, cascade, found = 0;
+ struct cbe_iic_regs __iomem *node_iic;
+ const u32 *np;
+
+ for (dn = NULL;
+ (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
+ if (!of_device_is_compatible(dn,
+ "IBM,CBEA-Internal-Interrupt-Controller"))
+ continue;
+ np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL);
+ if (np == NULL) {
+ printk(KERN_WARNING "IIC: CPU association not found\n");
+ of_node_put(dn);
+ return -ENODEV;
+ }
+ if (of_address_to_resource(dn, 0, &r0) ||
+ of_address_to_resource(dn, 1, &r1)) {
+ printk(KERN_WARNING "IIC: Can't resolve addresses\n");
+ of_node_put(dn);
+ return -ENODEV;
+ }
+ found++;
+ init_one_iic(np[0], r0.start, dn);
+ init_one_iic(np[1], r1.start, dn);
+
+ /* Setup cascade for IO exceptions. XXX cleanup tricks to get
+ * node vs CPU etc...
+ * Note that we configure the IIC_IRR here with a hard coded
+ * priority of 1. We might want to improve that later.
+ */
+ node = np[0] >> 1;
+ node_iic = cbe_get_cpu_iic_regs(np[0]);
+ cascade = node << IIC_IRQ_NODE_SHIFT;
+ cascade |= 1 << IIC_IRQ_CLASS_SHIFT;
+ cascade |= IIC_UNIT_IIC;
+ cascade = irq_create_mapping(iic_host, cascade);
+ if (cascade == NO_IRQ)
+ continue;
+ /*
+ * irq_data is a generic pointer that gets passed back
+ * to us later, so the forced cast is fine.
+ */
+ irq_set_handler_data(cascade, (void __force *)node_iic);
+ irq_set_chained_handler(cascade, iic_ioexc_cascade);
+ out_be64(&node_iic->iic_ir,
+ (1 << 12) /* priority */ |
+ (node << 4) /* dest node */ |
+ IIC_UNIT_THREAD_0 /* route them to thread 0 */);
+ /* Flush pending (make sure it triggers if there is
+ * anything pending
+ */
+ out_be64(&node_iic->iic_is, 0xfffffffffffffffful);
+ }
+
+ if (found)
+ return 0;
+ else
+ return -ENODEV;
+}
+
+void __init iic_init_IRQ(void)
+{
+ /* Setup an irq host data structure */
+ iic_host = irq_alloc_host(NULL, IRQ_HOST_MAP_LINEAR, IIC_SOURCE_COUNT,
+ &iic_host_ops, IIC_IRQ_INVALID);
+ BUG_ON(iic_host == NULL);
+ irq_set_default_host(iic_host);
+
+ /* Discover and initialize iics */
+ if (setup_iic() < 0)
+ panic("IIC: Failed to initialize !\n");
+
+ /* Set master interrupt handling function */
+ ppc_md.get_irq = iic_get_irq;
+
+ /* Enable on current CPU */
+ iic_setup_cpu();
+}
+
+void iic_set_interrupt_routing(int cpu, int thread, int priority)
+{
+ struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu);
+ u64 iic_ir = 0;
+ int node = cpu >> 1;
+
+ /* Set which node and thread will handle the next interrupt */
+ iic_ir |= CBE_IIC_IR_PRIO(priority) |
+ CBE_IIC_IR_DEST_NODE(node);
+ if (thread == 0)
+ iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0);
+ else
+ iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1);
+ out_be64(&iic_regs->iic_ir, iic_ir);
+}
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
new file mode 100644
index 00000000000..4f60ae6ca35
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -0,0 +1,89 @@
+#ifndef ASM_CELL_PIC_H
+#define ASM_CELL_PIC_H
+#ifdef __KERNEL__
+/*
+ * Mapping of IIC pending bits into per-node interrupt numbers.
+ *
+ * Interrupt numbers are in the range 0...0x1ff where the top bit
+ * (0x100) represent the source node. Only 2 nodes are supported with
+ * the current code though it's trivial to extend that if necessary using
+ * higher level bits
+ *
+ * The bottom 8 bits are split into 2 type bits and 6 data bits that
+ * depend on the type:
+ *
+ * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source
+ * 01 (0x40 | data) : IO exception. data is the exception number as
+ * defined by bit numbers in IIC_SR
+ * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority)
+ * and node is always 0 (IPIs are per-cpu, their source is
+ * not relevant)
+ * 11 (0xc0 | data) : reserved
+ *
+ * In addition, interrupt number 0x80000000 is defined as always invalid
+ * (that is the node field is expected to never extend to move than 23 bits)
+ *
+ */
+
+enum {
+ IIC_IRQ_INVALID = 0x80000000u,
+ IIC_IRQ_NODE_MASK = 0x100,
+ IIC_IRQ_NODE_SHIFT = 8,
+ IIC_IRQ_MAX = 0x1ff,
+ IIC_IRQ_TYPE_MASK = 0xc0,
+ IIC_IRQ_TYPE_NORMAL = 0x00,
+ IIC_IRQ_TYPE_IOEXC = 0x40,
+ IIC_IRQ_TYPE_IPI = 0x80,
+ IIC_IRQ_CLASS_SHIFT = 4,
+ IIC_IRQ_CLASS_0 = 0x00,
+ IIC_IRQ_CLASS_1 = 0x10,
+ IIC_IRQ_CLASS_2 = 0x20,
+ IIC_SOURCE_COUNT = 0x200,
+
+ /* Here are defined the various source/dest units. Avoid using those
+ * definitions if you can, they are mostly here for reference
+ */
+ IIC_UNIT_SPU_0 = 0x4,
+ IIC_UNIT_SPU_1 = 0x7,
+ IIC_UNIT_SPU_2 = 0x3,
+ IIC_UNIT_SPU_3 = 0x8,
+ IIC_UNIT_SPU_4 = 0x2,
+ IIC_UNIT_SPU_5 = 0x9,
+ IIC_UNIT_SPU_6 = 0x1,
+ IIC_UNIT_SPU_7 = 0xa,
+ IIC_UNIT_IOC_0 = 0x0,
+ IIC_UNIT_IOC_1 = 0xb,
+ IIC_UNIT_THREAD_0 = 0xe, /* target only */
+ IIC_UNIT_THREAD_1 = 0xf, /* target only */
+ IIC_UNIT_IIC = 0xe, /* source only (IO exceptions) */
+
+ /* Base numbers for the external interrupts */
+ IIC_IRQ_EXT_IOIF0 =
+ IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0,
+ IIC_IRQ_EXT_IOIF1 =
+ IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1,
+
+ /* Base numbers for the IIC_ISR interrupts */
+ IIC_IRQ_IOEX_TMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63,
+ IIC_IRQ_IOEX_PMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62,
+ IIC_IRQ_IOEX_ATI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61,
+ IIC_IRQ_IOEX_MATBFI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60,
+ IIC_IRQ_IOEX_ELDI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59,
+
+ /* Which bits in IIC_ISR are edge sensitive */
+ IIC_ISR_EDGE_MASK = 0x4ul,
+};
+
+extern void iic_init_IRQ(void);
+extern void iic_message_pass(int cpu, int msg);
+extern void iic_request_IPIs(void);
+extern void iic_setup_cpu(void);
+
+extern u8 iic_get_target_id(int cpu);
+
+extern void spider_init_IRQ(void);
+
+extern void iic_set_interrupt_routing(int cpu, int thread, int priority);
+
+#endif
+#endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
new file mode 100644
index 00000000000..ae9fc7bc17d
--- /dev/null
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -0,0 +1,1235 @@
+/*
+ * IOMMU implementation for Cell Broadband Processor Architecture
+ *
+ * (C) Copyright IBM Corporation 2006-2008
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
+ * instead of leaving them mapped to some dummy page. This can be
+ * enabled once the appropriate workarounds for spider bugs have
+ * been enabled
+ */
+#define CELL_IOMMU_REAL_UNMAP
+
+/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of
+ * IO PTEs based on the transfer direction. That can be enabled
+ * once spider-net has been fixed to pass the correct direction
+ * to the DMA mapping functions
+ */
+#define CELL_IOMMU_STRICT_PROTECTION
+
+
+#define NR_IOMMUS 2
+
+/* IOC mmap registers */
+#define IOC_Reg_Size 0x2000
+
+#define IOC_IOPT_CacheInvd 0x908
+#define IOC_IOPT_CacheInvd_NE_Mask 0xffe0000000000000ul
+#define IOC_IOPT_CacheInvd_IOPTE_Mask 0x000003fffffffff8ul
+#define IOC_IOPT_CacheInvd_Busy 0x0000000000000001ul
+
+#define IOC_IOST_Origin 0x918
+#define IOC_IOST_Origin_E 0x8000000000000000ul
+#define IOC_IOST_Origin_HW 0x0000000000000800ul
+#define IOC_IOST_Origin_HL 0x0000000000000400ul
+
+#define IOC_IO_ExcpStat 0x920
+#define IOC_IO_ExcpStat_V 0x8000000000000000ul
+#define IOC_IO_ExcpStat_SPF_Mask 0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_S 0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_P 0x2000000000000000ul
+#define IOC_IO_ExcpStat_ADDR_Mask 0x00000007fffff000ul
+#define IOC_IO_ExcpStat_RW_Mask 0x0000000000000800ul
+#define IOC_IO_ExcpStat_IOID_Mask 0x00000000000007fful
+
+#define IOC_IO_ExcpMask 0x928
+#define IOC_IO_ExcpMask_SFE 0x4000000000000000ul
+#define IOC_IO_ExcpMask_PFE 0x2000000000000000ul
+
+#define IOC_IOCmd_Offset 0x1000
+
+#define IOC_IOCmd_Cfg 0xc00
+#define IOC_IOCmd_Cfg_TE 0x0000800000000000ul
+
+
+/* Segment table entries */
+#define IOSTE_V 0x8000000000000000ul /* valid */
+#define IOSTE_H 0x4000000000000000ul /* cache hint */
+#define IOSTE_PT_Base_RPN_Mask 0x3ffffffffffff000ul /* base RPN of IOPT */
+#define IOSTE_NPPT_Mask 0x0000000000000fe0ul /* no. pages in IOPT */
+#define IOSTE_PS_Mask 0x0000000000000007ul /* page size */
+#define IOSTE_PS_4K 0x0000000000000001ul /* - 4kB */
+#define IOSTE_PS_64K 0x0000000000000003ul /* - 64kB */
+#define IOSTE_PS_1M 0x0000000000000005ul /* - 1MB */
+#define IOSTE_PS_16M 0x0000000000000007ul /* - 16MB */
+
+
+/* IOMMU sizing */
+#define IO_SEGMENT_SHIFT 28
+#define IO_PAGENO_BITS(shift) (IO_SEGMENT_SHIFT - (shift))
+
+/* The high bit needs to be set on every DMA address */
+#define SPIDER_DMA_OFFSET 0x80000000ul
+
+struct iommu_window {
+ struct list_head list;
+ struct cbe_iommu *iommu;
+ unsigned long offset;
+ unsigned long size;
+ unsigned int ioid;
+ struct iommu_table table;
+};
+
+#define NAMESIZE 8
+struct cbe_iommu {
+ int nid;
+ char name[NAMESIZE];
+ void __iomem *xlate_regs;
+ void __iomem *cmd_regs;
+ unsigned long *stab;
+ unsigned long *ptab;
+ void *pad_page;
+ struct list_head windows;
+};
+
+/* Static array of iommus, one per node
+ * each contains a list of windows, keyed from dma_window property
+ * - on bus setup, look for a matching window, or create one
+ * - on dev setup, assign iommu_table ptr
+ */
+static struct cbe_iommu iommus[NR_IOMMUS];
+static int cbe_nr_iommus;
+
+static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
+ long n_ptes)
+{
+ u64 __iomem *reg;
+ u64 val;
+ long n;
+
+ reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
+
+ while (n_ptes > 0) {
+ /* we can invalidate up to 1 << 11 PTEs at once */
+ n = min(n_ptes, 1l << 11);
+ val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask)
+ | (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask)
+ | IOC_IOPT_CacheInvd_Busy;
+
+ out_be64(reg, val);
+ while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy)
+ ;
+
+ n_ptes -= n;
+ pte += n;
+ }
+}
+
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ int i;
+ unsigned long *io_pte, base_pte;
+ struct iommu_window *window =
+ container_of(tbl, struct iommu_window, table);
+
+ /* implementing proper protection causes problems with the spidernet
+ * driver - check mapping directions later, but allow read & write by
+ * default for now.*/
+#ifdef CELL_IOMMU_STRICT_PROTECTION
+ /* to avoid referencing a global, we use a trick here to setup the
+ * protection bit. "prot" is setup to be 3 fields of 4 bits apprended
+ * together for each of the 3 supported direction values. It is then
+ * shifted left so that the fields matching the desired direction
+ * lands on the appropriate bits, and other bits are masked out.
+ */
+ const unsigned long prot = 0xc48;
+ base_pte =
+ ((prot << (52 + 4 * direction)) &
+ (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) |
+ CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+ (window->ioid & CBE_IOPTE_IOID_Mask);
+#else
+ base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+ CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+ if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)))
+ base_pte &= ~CBE_IOPTE_SO_RW;
+
+ io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+ for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE)
+ io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
+
+ mb();
+
+ invalidate_tce_cache(window->iommu, io_pte, npages);
+
+ pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
+ index, npages, direction, base_pte);
+ return 0;
+}
+
+static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
+{
+
+ int i;
+ unsigned long *io_pte, pte;
+ struct iommu_window *window =
+ container_of(tbl, struct iommu_window, table);
+
+ pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages);
+
+#ifdef CELL_IOMMU_REAL_UNMAP
+ pte = 0;
+#else
+ /* spider bridge does PCI reads after freeing - insert a mapping
+ * to a scratch page instead of an invalid entry */
+ pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+ __pa(window->iommu->pad_page) |
+ (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+
+ io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+ for (i = 0; i < npages; i++)
+ io_pte[i] = pte;
+
+ mb();
+
+ invalidate_tce_cache(window->iommu, io_pte, npages);
+}
+
+static irqreturn_t ioc_interrupt(int irq, void *data)
+{
+ unsigned long stat, spf;
+ struct cbe_iommu *iommu = data;
+
+ stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+ spf = stat & IOC_IO_ExcpStat_SPF_Mask;
+
+ /* Might want to rate limit it */
+ printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat);
+ printk(KERN_ERR " V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n",
+ !!(stat & IOC_IO_ExcpStat_V),
+ (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ',
+ (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ',
+ (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write",
+ (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask));
+ printk(KERN_ERR " page=0x%016lx\n",
+ stat & IOC_IO_ExcpStat_ADDR_Mask);
+
+ /* clear interrupt */
+ stat &= ~IOC_IO_ExcpStat_V;
+ out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat);
+
+ return IRQ_HANDLED;
+}
+
+static int cell_iommu_find_ioc(int nid, unsigned long *base)
+{
+ struct device_node *np;
+ struct resource r;
+
+ *base = 0;
+
+ /* First look for new style /be nodes */
+ for_each_node_by_name(np, "ioc") {
+ if (of_node_to_nid(np) != nid)
+ continue;
+ if (of_address_to_resource(np, 0, &r)) {
+ printk(KERN_ERR "iommu: can't get address for %s\n",
+ np->full_name);
+ continue;
+ }
+ *base = r.start;
+ of_node_put(np);
+ return 0;
+ }
+
+ /* Ok, let's try the old way */
+ for_each_node_by_type(np, "cpu") {
+ const unsigned int *nidp;
+ const unsigned long *tmp;
+
+ nidp = of_get_property(np, "node-id", NULL);
+ if (nidp && *nidp == nid) {
+ tmp = of_get_property(np, "ioc-translation", NULL);
+ if (tmp) {
+ *base = *tmp;
+ of_node_put(np);
+ return 0;
+ }
+ }
+ }
+
+ return -ENODEV;
+}
+
+static void cell_iommu_setup_stab(struct cbe_iommu *iommu,
+ unsigned long dbase, unsigned long dsize,
+ unsigned long fbase, unsigned long fsize)
+{
+ struct page *page;
+ unsigned long segments, stab_size;
+
+ segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT;
+
+ pr_debug("%s: iommu[%d]: segments: %lu\n",
+ __func__, iommu->nid, segments);
+
+ /* set up the segment table */
+ stab_size = segments * sizeof(unsigned long);
+ page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size));
+ BUG_ON(!page);
+ iommu->stab = page_address(page);
+ memset(iommu->stab, 0, stab_size);
+}
+
+static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
+ unsigned long base, unsigned long size, unsigned long gap_base,
+ unsigned long gap_size, unsigned long page_shift)
+{
+ struct page *page;
+ int i;
+ unsigned long reg, segments, pages_per_segment, ptab_size,
+ n_pte_pages, start_seg, *ptab;
+
+ start_seg = base >> IO_SEGMENT_SHIFT;
+ segments = size >> IO_SEGMENT_SHIFT;
+ pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift);
+ /* PTEs for each segment must start on a 4K bounday */
+ pages_per_segment = max(pages_per_segment,
+ (1 << 12) / sizeof(unsigned long));
+
+ ptab_size = segments * pages_per_segment * sizeof(unsigned long);
+ pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__,
+ iommu->nid, ptab_size, get_order(ptab_size));
+ page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size));
+ BUG_ON(!page);
+
+ ptab = page_address(page);
+ memset(ptab, 0, ptab_size);
+
+ /* number of 4K pages needed for a page table */
+ n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12;
+
+ pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n",
+ __func__, iommu->nid, iommu->stab, ptab,
+ n_pte_pages);
+
+ /* initialise the STEs */
+ reg = IOSTE_V | ((n_pte_pages - 1) << 5);
+
+ switch (page_shift) {
+ case 12: reg |= IOSTE_PS_4K; break;
+ case 16: reg |= IOSTE_PS_64K; break;
+ case 20: reg |= IOSTE_PS_1M; break;
+ case 24: reg |= IOSTE_PS_16M; break;
+ default: BUG();
+ }
+
+ gap_base = gap_base >> IO_SEGMENT_SHIFT;
+ gap_size = gap_size >> IO_SEGMENT_SHIFT;
+
+ pr_debug("Setting up IOMMU stab:\n");
+ for (i = start_seg; i < (start_seg + segments); i++) {
+ if (i >= gap_base && i < (gap_base + gap_size)) {
+ pr_debug("\toverlap at %d, skipping\n", i);
+ continue;
+ }
+ iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) *
+ (i - start_seg));
+ pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]);
+ }
+
+ return ptab;
+}
+
+static void cell_iommu_enable_hardware(struct cbe_iommu *iommu)
+{
+ int ret;
+ unsigned long reg, xlate_base;
+ unsigned int virq;
+
+ if (cell_iommu_find_ioc(iommu->nid, &xlate_base))
+ panic("%s: missing IOC register mappings for node %d\n",
+ __func__, iommu->nid);
+
+ iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size);
+ iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset;
+
+ /* ensure that the STEs have updated */
+ mb();
+
+ /* setup interrupts for the iommu. */
+ reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+ out_be64(iommu->xlate_regs + IOC_IO_ExcpStat,
+ reg & ~IOC_IO_ExcpStat_V);
+ out_be64(iommu->xlate_regs + IOC_IO_ExcpMask,
+ IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE);
+
+ virq = irq_create_mapping(NULL,
+ IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
+ BUG_ON(virq == NO_IRQ);
+
+ ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu);
+ BUG_ON(ret);
+
+ /* set the IOC segment table origin register (and turn on the iommu) */
+ reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW;
+ out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg);
+ in_be64(iommu->xlate_regs + IOC_IOST_Origin);
+
+ /* turn on IO translation */
+ reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE;
+ out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
+}
+
+static void cell_iommu_setup_hardware(struct cbe_iommu *iommu,
+ unsigned long base, unsigned long size)
+{
+ cell_iommu_setup_stab(iommu, base, size, 0, 0);
+ iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
+ IOMMU_PAGE_SHIFT);
+ cell_iommu_enable_hardware(iommu);
+}
+
+#if 0/* Unused for now */
+static struct iommu_window *find_window(struct cbe_iommu *iommu,
+ unsigned long offset, unsigned long size)
+{
+ struct iommu_window *window;
+
+ /* todo: check for overlapping (but not equal) windows) */
+
+ list_for_each_entry(window, &(iommu->windows), list) {
+ if (window->offset == offset && window->size == size)
+ return window;
+ }
+
+ return NULL;
+}
+#endif
+
+static inline u32 cell_iommu_get_ioid(struct device_node *np)
+{
+ const u32 *ioid;
+
+ ioid = of_get_property(np, "ioid", NULL);
+ if (ioid == NULL) {
+ printk(KERN_WARNING "iommu: missing ioid for %s using 0\n",
+ np->full_name);
+ return 0;
+ }
+
+ return *ioid;
+}
+
+static struct iommu_window * __init
+cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
+ unsigned long offset, unsigned long size,
+ unsigned long pte_offset)
+{
+ struct iommu_window *window;
+ struct page *page;
+ u32 ioid;
+
+ ioid = cell_iommu_get_ioid(np);
+
+ window = kzalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid);
+ BUG_ON(window == NULL);
+
+ window->offset = offset;
+ window->size = size;
+ window->ioid = ioid;
+ window->iommu = iommu;
+
+ window->table.it_blocksize = 16;
+ window->table.it_base = (unsigned long)iommu->ptab;
+ window->table.it_index = iommu->nid;
+ window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT) + pte_offset;
+ window->table.it_size = size >> IOMMU_PAGE_SHIFT;
+
+ iommu_init_table(&window->table, iommu->nid);
+
+ pr_debug("\tioid %d\n", window->ioid);
+ pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
+ pr_debug("\tbase 0x%016lx\n", window->table.it_base);
+ pr_debug("\toffset 0x%lx\n", window->table.it_offset);
+ pr_debug("\tsize %ld\n", window->table.it_size);
+
+ list_add(&window->list, &iommu->windows);
+
+ if (offset != 0)
+ return window;
+
+ /* We need to map and reserve the first IOMMU page since it's used
+ * by the spider workaround. In theory, we only need to do that when
+ * running on spider but it doesn't really matter.
+ *
+ * This code also assumes that we have a window that starts at 0,
+ * which is the case on all spider based blades.
+ */
+ page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
+ BUG_ON(!page);
+ iommu->pad_page = page_address(page);
+ clear_page(iommu->pad_page);
+
+ __set_bit(0, window->table.it_map);
+ tce_build_cell(&window->table, window->table.it_offset, 1,
+ (unsigned long)iommu->pad_page, DMA_TO_DEVICE, NULL);
+ window->table.it_hint = window->table.it_blocksize;
+
+ return window;
+}
+
+static struct cbe_iommu *cell_iommu_for_node(int nid)
+{
+ int i;
+
+ for (i = 0; i < cbe_nr_iommus; i++)
+ if (iommus[i].nid == nid)
+ return &iommus[i];
+ return NULL;
+}
+
+static unsigned long cell_dma_direct_offset;
+
+static unsigned long dma_iommu_fixed_base;
+
+/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
+static int iommu_fixed_is_weak;
+
+static struct iommu_table *cell_get_iommu_table(struct device *dev)
+{
+ struct iommu_window *window;
+ struct cbe_iommu *iommu;
+
+ /* Current implementation uses the first window available in that
+ * node's iommu. We -might- do something smarter later though it may
+ * never be necessary
+ */
+ iommu = cell_iommu_for_node(dev_to_node(dev));
+ if (iommu == NULL || list_empty(&iommu->windows)) {
+ printk(KERN_ERR "iommu: missing iommu for %s (node %d)\n",
+ dev->of_node ? dev->of_node->full_name : "?",
+ dev_to_node(dev));
+ return NULL;
+ }
+ window = list_entry(iommu->windows.next, struct iommu_window, list);
+
+ return &window->table;
+}
+
+/* A coherent allocation implies strong ordering */
+
+static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ if (iommu_fixed_is_weak)
+ return iommu_alloc_coherent(dev, cell_get_iommu_table(dev),
+ size, dma_handle,
+ device_to_mask(dev), flag,
+ dev_to_node(dev));
+ else
+ return dma_direct_ops.alloc_coherent(dev, size, dma_handle,
+ flag);
+}
+
+static void dma_fixed_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ if (iommu_fixed_is_weak)
+ iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr,
+ dma_handle);
+ else
+ dma_direct_ops.free_coherent(dev, size, vaddr, dma_handle);
+}
+
+static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+ return dma_direct_ops.map_page(dev, page, offset, size,
+ direction, attrs);
+ else
+ return iommu_map_page(dev, cell_get_iommu_table(dev), page,
+ offset, size, device_to_mask(dev),
+ direction, attrs);
+}
+
+static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+ dma_direct_ops.unmap_page(dev, dma_addr, size, direction,
+ attrs);
+ else
+ iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size,
+ direction, attrs);
+}
+
+static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+ return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs);
+ else
+ return iommu_map_sg(dev, cell_get_iommu_table(dev), sg, nents,
+ device_to_mask(dev), direction, attrs);
+}
+
+static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+ dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs);
+ else
+ iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, direction,
+ attrs);
+}
+
+static int dma_fixed_dma_supported(struct device *dev, u64 mask)
+{
+ return mask == DMA_BIT_MASK(64);
+}
+
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask);
+
+struct dma_map_ops dma_iommu_fixed_ops = {
+ .alloc_coherent = dma_fixed_alloc_coherent,
+ .free_coherent = dma_fixed_free_coherent,
+ .map_sg = dma_fixed_map_sg,
+ .unmap_sg = dma_fixed_unmap_sg,
+ .dma_supported = dma_fixed_dma_supported,
+ .set_dma_mask = dma_set_mask_and_switch,
+ .map_page = dma_fixed_map_page,
+ .unmap_page = dma_fixed_unmap_page,
+};
+
+static void cell_dma_dev_setup_fixed(struct device *dev);
+
+static void cell_dma_dev_setup(struct device *dev)
+{
+ /* Order is important here, these are not mutually exclusive */
+ if (get_dma_ops(dev) == &dma_iommu_fixed_ops)
+ cell_dma_dev_setup_fixed(dev);
+ else if (get_pci_dma_ops() == &dma_iommu_ops)
+ set_iommu_table_base(dev, cell_get_iommu_table(dev));
+ else if (get_pci_dma_ops() == &dma_direct_ops)
+ set_dma_offset(dev, cell_dma_direct_offset);
+ else
+ BUG();
+}
+
+static void cell_pci_dma_dev_setup(struct pci_dev *dev)
+{
+ cell_dma_dev_setup(&dev->dev);
+}
+
+static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct device *dev = data;
+
+ /* We are only intereted in device addition */
+ if (action != BUS_NOTIFY_ADD_DEVICE)
+ return 0;
+
+ /* We use the PCI DMA ops */
+ dev->archdata.dma_ops = get_pci_dma_ops();
+
+ cell_dma_dev_setup(dev);
+
+ return 0;
+}
+
+static struct notifier_block cell_of_bus_notifier = {
+ .notifier_call = cell_of_bus_notify
+};
+
+static int __init cell_iommu_get_window(struct device_node *np,
+ unsigned long *base,
+ unsigned long *size)
+{
+ const void *dma_window;
+ unsigned long index;
+
+ /* Use ibm,dma-window if available, else, hard code ! */
+ dma_window = of_get_property(np, "ibm,dma-window", NULL);
+ if (dma_window == NULL) {
+ *base = 0;
+ *size = 0x80000000u;
+ return -ENODEV;
+ }
+
+ of_parse_dma_window(np, dma_window, &index, base, size);
+ return 0;
+}
+
+static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
+{
+ struct cbe_iommu *iommu;
+ int nid, i;
+
+ /* Get node ID */
+ nid = of_node_to_nid(np);
+ if (nid < 0) {
+ printk(KERN_ERR "iommu: failed to get node for %s\n",
+ np->full_name);
+ return NULL;
+ }
+ pr_debug("iommu: setting up iommu for node %d (%s)\n",
+ nid, np->full_name);
+
+ /* XXX todo: If we can have multiple windows on the same IOMMU, which
+ * isn't the case today, we probably want here to check wether the
+ * iommu for that node is already setup.
+ * However, there might be issue with getting the size right so let's
+ * ignore that for now. We might want to completely get rid of the
+ * multiple window support since the cell iommu supports per-page ioids
+ */
+
+ if (cbe_nr_iommus >= NR_IOMMUS) {
+ printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n",
+ np->full_name);
+ return NULL;
+ }
+
+ /* Init base fields */
+ i = cbe_nr_iommus++;
+ iommu = &iommus[i];
+ iommu->stab = NULL;
+ iommu->nid = nid;
+ snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i);
+ INIT_LIST_HEAD(&iommu->windows);
+
+ return iommu;
+}
+
+static void __init cell_iommu_init_one(struct device_node *np,
+ unsigned long offset)
+{
+ struct cbe_iommu *iommu;
+ unsigned long base, size;
+
+ iommu = cell_iommu_alloc(np);
+ if (!iommu)
+ return;
+
+ /* Obtain a window for it */
+ cell_iommu_get_window(np, &base, &size);
+
+ pr_debug("\ttranslating window 0x%lx...0x%lx\n",
+ base, base + size - 1);
+
+ /* Initialize the hardware */
+ cell_iommu_setup_hardware(iommu, base, size);
+
+ /* Setup the iommu_table */
+ cell_iommu_setup_window(iommu, np, base, size,
+ offset >> IOMMU_PAGE_SHIFT);
+}
+
+static void __init cell_disable_iommus(void)
+{
+ int node;
+ unsigned long base, val;
+ void __iomem *xregs, *cregs;
+
+ /* Make sure IOC translation is disabled on all nodes */
+ for_each_online_node(node) {
+ if (cell_iommu_find_ioc(node, &base))
+ continue;
+ xregs = ioremap(base, IOC_Reg_Size);
+ if (xregs == NULL)
+ continue;
+ cregs = xregs + IOC_IOCmd_Offset;
+
+ pr_debug("iommu: cleaning up iommu on node %d\n", node);
+
+ out_be64(xregs + IOC_IOST_Origin, 0);
+ (void)in_be64(xregs + IOC_IOST_Origin);
+ val = in_be64(cregs + IOC_IOCmd_Cfg);
+ val &= ~IOC_IOCmd_Cfg_TE;
+ out_be64(cregs + IOC_IOCmd_Cfg, val);
+ (void)in_be64(cregs + IOC_IOCmd_Cfg);
+
+ iounmap(xregs);
+ }
+}
+
+static int __init cell_iommu_init_disabled(void)
+{
+ struct device_node *np = NULL;
+ unsigned long base = 0, size;
+
+ /* When no iommu is present, we use direct DMA ops */
+ set_pci_dma_ops(&dma_direct_ops);
+
+ /* First make sure all IOC translation is turned off */
+ cell_disable_iommus();
+
+ /* If we have no Axon, we set up the spider DMA magic offset */
+ if (of_find_node_by_name(NULL, "axon") == NULL)
+ cell_dma_direct_offset = SPIDER_DMA_OFFSET;
+
+ /* Now we need to check to see where the memory is mapped
+ * in PCI space. We assume that all busses use the same dma
+ * window which is always the case so far on Cell, thus we
+ * pick up the first pci-internal node we can find and check
+ * the DMA window from there.
+ */
+ for_each_node_by_name(np, "axon") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ if (cell_iommu_get_window(np, &base, &size) == 0)
+ break;
+ }
+ if (np == NULL) {
+ for_each_node_by_name(np, "pci-internal") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ if (cell_iommu_get_window(np, &base, &size) == 0)
+ break;
+ }
+ }
+ of_node_put(np);
+
+ /* If we found a DMA window, we check if it's big enough to enclose
+ * all of physical memory. If not, we force enable IOMMU
+ */
+ if (np && size < memblock_end_of_DRAM()) {
+ printk(KERN_WARNING "iommu: force-enabled, dma window"
+ " (%ldMB) smaller than total memory (%lldMB)\n",
+ size >> 20, memblock_end_of_DRAM() >> 20);
+ return -ENODEV;
+ }
+
+ cell_dma_direct_offset += base;
+
+ if (cell_dma_direct_offset != 0)
+ ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
+
+ printk("iommu: disabled, direct DMA offset is 0x%lx\n",
+ cell_dma_direct_offset);
+
+ return 0;
+}
+
+/*
+ * Fixed IOMMU mapping support
+ *
+ * This code adds support for setting up a fixed IOMMU mapping on certain
+ * cell machines. For 64-bit devices this avoids the performance overhead of
+ * mapping and unmapping pages at runtime. 32-bit devices are unable to use
+ * the fixed mapping.
+ *
+ * The fixed mapping is established at boot, and maps all of physical memory
+ * 1:1 into device space at some offset. On machines with < 30 GB of memory
+ * we setup the fixed mapping immediately above the normal IOMMU window.
+ *
+ * For example a machine with 4GB of memory would end up with the normal
+ * IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In
+ * this case a 64-bit device wishing to DMA to 1GB would be told to DMA to
+ * 3GB, plus any offset required by firmware. The firmware offset is encoded
+ * in the "dma-ranges" property.
+ *
+ * On machines with 30GB or more of memory, we are unable to place the fixed
+ * mapping above the normal IOMMU window as we would run out of address space.
+ * Instead we move the normal IOMMU window to coincide with the hash page
+ * table, this region does not need to be part of the fixed mapping as no
+ * device should ever be DMA'ing to it. We then setup the fixed mapping
+ * from 0 to 32GB.
+ */
+
+static u64 cell_iommu_get_fixed_address(struct device *dev)
+{
+ u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR;
+ struct device_node *np;
+ const u32 *ranges = NULL;
+ int i, len, best, naddr, nsize, pna, range_size;
+
+ np = of_node_get(dev->of_node);
+ while (1) {
+ naddr = of_n_addr_cells(np);
+ nsize = of_n_size_cells(np);
+ np = of_get_next_parent(np);
+ if (!np)
+ break;
+
+ ranges = of_get_property(np, "dma-ranges", &len);
+
+ /* Ignore empty ranges, they imply no translation required */
+ if (ranges && len > 0)
+ break;
+ }
+
+ if (!ranges) {
+ dev_dbg(dev, "iommu: no dma-ranges found\n");
+ goto out;
+ }
+
+ len /= sizeof(u32);
+
+ pna = of_n_addr_cells(np);
+ range_size = naddr + nsize + pna;
+
+ /* dma-ranges format:
+ * child addr : naddr cells
+ * parent addr : pna cells
+ * size : nsize cells
+ */
+ for (i = 0, best = -1, best_size = 0; i < len; i += range_size) {
+ cpu_addr = of_translate_dma_address(np, ranges + i + naddr);
+ size = of_read_number(ranges + i + naddr + pna, nsize);
+
+ if (cpu_addr == 0 && size > best_size) {
+ best = i;
+ best_size = size;
+ }
+ }
+
+ if (best >= 0) {
+ dev_addr = of_read_number(ranges + best, naddr);
+ } else
+ dev_dbg(dev, "iommu: no suitable range found!\n");
+
+out:
+ of_node_put(np);
+
+ return dev_addr;
+}
+
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
+{
+ if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ return -EIO;
+
+ if (dma_mask == DMA_BIT_MASK(64) &&
+ cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+ {
+ dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+ set_dma_ops(dev, &dma_iommu_fixed_ops);
+ } else {
+ dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+ set_dma_ops(dev, get_pci_dma_ops());
+ }
+
+ cell_dma_dev_setup(dev);
+
+ *dev->dma_mask = dma_mask;
+
+ return 0;
+}
+
+static void cell_dma_dev_setup_fixed(struct device *dev)
+{
+ u64 addr;
+
+ addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base;
+ set_dma_offset(dev, addr);
+
+ dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
+}
+
+static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
+ unsigned long base_pte)
+{
+ unsigned long segment, offset;
+
+ segment = addr >> IO_SEGMENT_SHIFT;
+ offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24));
+ ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long));
+
+ pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n",
+ addr, ptab, segment, offset);
+
+ ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask);
+}
+
+static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
+ struct device_node *np, unsigned long dbase, unsigned long dsize,
+ unsigned long fbase, unsigned long fsize)
+{
+ unsigned long base_pte, uaddr, ioaddr, *ptab;
+
+ ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24);
+
+ dma_iommu_fixed_base = fbase;
+
+ pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase);
+
+ base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+ (cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask);
+
+ if (iommu_fixed_is_weak)
+ pr_info("IOMMU: Using weak ordering for fixed mapping\n");
+ else {
+ pr_info("IOMMU: Using strong ordering for fixed mapping\n");
+ base_pte |= CBE_IOPTE_SO_RW;
+ }
+
+ for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) {
+ /* Don't touch the dynamic region */
+ ioaddr = uaddr + fbase;
+ if (ioaddr >= dbase && ioaddr < (dbase + dsize)) {
+ pr_debug("iommu: fixed/dynamic overlap, skipping\n");
+ continue;
+ }
+
+ insert_16M_pte(uaddr, ptab, base_pte);
+ }
+
+ mb();
+}
+
+static int __init cell_iommu_fixed_mapping_init(void)
+{
+ unsigned long dbase, dsize, fbase, fsize, hbase, hend;
+ struct cbe_iommu *iommu;
+ struct device_node *np;
+
+ /* The fixed mapping is only supported on axon machines */
+ np = of_find_node_by_name(NULL, "axon");
+ of_node_put(np);
+
+ if (!np) {
+ pr_debug("iommu: fixed mapping disabled, no axons found\n");
+ return -1;
+ }
+
+ /* We must have dma-ranges properties for fixed mapping to work */
+ np = of_find_node_with_property(NULL, "dma-ranges");
+ of_node_put(np);
+
+ if (!np) {
+ pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+ return -1;
+ }
+
+ /* The default setup is to have the fixed mapping sit after the
+ * dynamic region, so find the top of the largest IOMMU window
+ * on any axon, then add the size of RAM and that's our max value.
+ * If that is > 32GB we have to do other shennanigans.
+ */
+ fbase = 0;
+ for_each_node_by_name(np, "axon") {
+ cell_iommu_get_window(np, &dbase, &dsize);
+ fbase = max(fbase, dbase + dsize);
+ }
+
+ fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT);
+ fsize = memblock_phys_mem_size();
+
+ if ((fbase + fsize) <= 0x800000000ul)
+ hbase = 0; /* use the device tree window */
+ else {
+ /* If we're over 32 GB we need to cheat. We can't map all of
+ * RAM with the fixed mapping, and also fit the dynamic
+ * region. So try to place the dynamic region where the hash
+ * table sits, drivers never need to DMA to it, we don't
+ * need a fixed mapping for that area.
+ */
+ if (!htab_address) {
+ pr_debug("iommu: htab is NULL, on LPAR? Huh?\n");
+ return -1;
+ }
+ hbase = __pa(htab_address);
+ hend = hbase + htab_size_bytes;
+
+ /* The window must start and end on a segment boundary */
+ if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) ||
+ (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) {
+ pr_debug("iommu: hash window not segment aligned\n");
+ return -1;
+ }
+
+ /* Check the hash window fits inside the real DMA window */
+ for_each_node_by_name(np, "axon") {
+ cell_iommu_get_window(np, &dbase, &dsize);
+
+ if (hbase < dbase || (hend > (dbase + dsize))) {
+ pr_debug("iommu: hash window doesn't fit in"
+ "real DMA window\n");
+ return -1;
+ }
+ }
+
+ fbase = 0;
+ }
+
+ /* Setup the dynamic regions */
+ for_each_node_by_name(np, "axon") {
+ iommu = cell_iommu_alloc(np);
+ BUG_ON(!iommu);
+
+ if (hbase == 0)
+ cell_iommu_get_window(np, &dbase, &dsize);
+ else {
+ dbase = hbase;
+ dsize = htab_size_bytes;
+ }
+
+ printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+ "fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
+ dbase + dsize, fbase, fbase + fsize);
+
+ cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
+ iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
+ IOMMU_PAGE_SHIFT);
+ cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
+ fbase, fsize);
+ cell_iommu_enable_hardware(iommu);
+ cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
+ }
+
+ dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch;
+ set_pci_dma_ops(&dma_iommu_ops);
+
+ return 0;
+}
+
+static int iommu_fixed_disabled;
+
+static int __init setup_iommu_fixed(char *str)
+{
+ struct device_node *pciep;
+
+ if (strcmp(str, "off") == 0)
+ iommu_fixed_disabled = 1;
+
+ /* If we can find a pcie-endpoint in the device tree assume that
+ * we're on a triblade or a CAB so by default the fixed mapping
+ * should be set to be weakly ordered; but only if the boot
+ * option WASN'T set for strong ordering
+ */
+ pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+ if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
+ iommu_fixed_is_weak = 1;
+
+ of_node_put(pciep);
+
+ return 1;
+}
+__setup("iommu_fixed=", setup_iommu_fixed);
+
+static u64 cell_dma_get_required_mask(struct device *dev)
+{
+ struct dma_map_ops *dma_ops;
+
+ if (!dev->dma_mask)
+ return 0;
+
+ if (!iommu_fixed_disabled &&
+ cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+ return DMA_BIT_MASK(64);
+
+ dma_ops = get_dma_ops(dev);
+ if (dma_ops->get_required_mask)
+ return dma_ops->get_required_mask(dev);
+
+ WARN_ONCE(1, "no get_required_mask in %p ops", dma_ops);
+
+ return DMA_BIT_MASK(64);
+}
+
+static int __init cell_iommu_init(void)
+{
+ struct device_node *np;
+
+ /* If IOMMU is disabled or we have little enough RAM to not need
+ * to enable it, we setup a direct mapping.
+ *
+ * Note: should we make sure we have the IOMMU actually disabled ?
+ */
+ if (iommu_is_off ||
+ (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull))
+ if (cell_iommu_init_disabled() == 0)
+ goto bail;
+
+ /* Setup various ppc_md. callbacks */
+ ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
+ ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
+ ppc_md.tce_build = tce_build_cell;
+ ppc_md.tce_free = tce_free_cell;
+
+ if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
+ goto bail;
+
+ /* Create an iommu for each /axon node. */
+ for_each_node_by_name(np, "axon") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ cell_iommu_init_one(np, 0);
+ }
+
+ /* Create an iommu for each toplevel /pci-internal node for
+ * old hardware/firmware
+ */
+ for_each_node_by_name(np, "pci-internal") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
+ }
+
+ /* Setup default PCI iommu ops */
+ set_pci_dma_ops(&dma_iommu_ops);
+
+ bail:
+ /* Register callbacks on OF platform device addition/removal
+ * to handle linking them to the right DMA operations
+ */
+ bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier);
+
+ return 0;
+}
+machine_arch_initcall(cell, cell_iommu_init);
+machine_arch_initcall(celleb_native, cell_iommu_init);
+
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
new file mode 100644
index 00000000000..efdacc82957
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -0,0 +1,132 @@
+/*
+ * CBE Pervasive Monitor and Debug
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ * Michael N. Day (mnday@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/pgtable.h>
+#include <asm/reg.h>
+#include <asm/cell-regs.h>
+
+#include "pervasive.h"
+
+static void cbe_power_save(void)
+{
+ unsigned long ctrl, thread_switch_control;
+
+ /*
+ * We need to hard disable interrupts, the local_irq_enable() done by
+ * our caller upon return will hard re-enable.
+ */
+ hard_irq_disable();
+
+ ctrl = mfspr(SPRN_CTRLF);
+
+ /* Enable DEC and EE interrupt request */
+ thread_switch_control = mfspr(SPRN_TSC_CELL);
+ thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST;
+
+ switch (ctrl & CTRL_CT) {
+ case CTRL_CT0:
+ thread_switch_control |= TSC_CELL_DEC_ENABLE_0;
+ break;
+ case CTRL_CT1:
+ thread_switch_control |= TSC_CELL_DEC_ENABLE_1;
+ break;
+ default:
+ printk(KERN_WARNING "%s: unknown configuration\n",
+ __func__);
+ break;
+ }
+ mtspr(SPRN_TSC_CELL, thread_switch_control);
+
+ /*
+ * go into low thread priority, medium priority will be
+ * restored for us after wake-up.
+ */
+ HMT_low();
+
+ /*
+ * atomically disable thread execution and runlatch.
+ * External and Decrementer exceptions are still handled when the
+ * thread is disabled but now enter in cbe_system_reset_exception()
+ */
+ ctrl &= ~(CTRL_RUNLATCH | CTRL_TE);
+ mtspr(SPRN_CTRLT, ctrl);
+}
+
+static int cbe_system_reset_exception(struct pt_regs *regs)
+{
+ switch (regs->msr & SRR1_WAKEMASK) {
+ case SRR1_WAKEEE:
+ do_IRQ(regs);
+ break;
+ case SRR1_WAKEDEC:
+ timer_interrupt(regs);
+ break;
+ case SRR1_WAKEMT:
+ return cbe_sysreset_hack();
+#ifdef CONFIG_CBE_RAS
+ case SRR1_WAKESYSERR:
+ cbe_system_error_exception(regs);
+ break;
+ case SRR1_WAKETHERM:
+ cbe_thermal_exception(regs);
+ break;
+#endif /* CONFIG_CBE_RAS */
+ default:
+ /* do system reset */
+ return 0;
+ }
+ /* everything handled */
+ return 1;
+}
+
+void __init cbe_pervasive_init(void)
+{
+ int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu);
+ if (!regs)
+ continue;
+
+ /* Enable Pause(0) control bit */
+ out_be64(&regs->pmcr, in_be64(&regs->pmcr) |
+ CBE_PMD_PAUSE_ZERO_CONTROL);
+ }
+
+ ppc_md.power_save = cbe_power_save;
+ ppc_md.system_reset_exception = cbe_system_reset_exception;
+}
diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h
new file mode 100644
index 00000000000..fd4d7b7092b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.h
@@ -0,0 +1,42 @@
+/*
+ * Cell Pervasive Monitor and Debug interface and HW structures
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ * David J. Erb (djerb@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#ifndef PERVASIVE_H
+#define PERVASIVE_H
+
+extern void cbe_pervasive_init(void);
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+extern int cbe_sysreset_hack(void);
+#else
+static inline int cbe_sysreset_hack(void)
+{
+ return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+#endif
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
new file mode 100644
index 00000000000..59c1a169410
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -0,0 +1,424 @@
+/*
+ * Cell Broadband Engine Performance Monitor
+ *
+ * (C) Copyright IBM Corporation 2001,2006
+ *
+ * Author:
+ * David Erb (djerb@us.ibm.com)
+ * Kevin Corry (kevcorry@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+#include <asm/machdep.h>
+#include <asm/pmc.h>
+#include <asm/reg.h>
+#include <asm/spu.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/*
+ * When writing to write-only mmio addresses, save a shadow copy. All of the
+ * registers are 32-bit, but stored in the upper-half of a 64-bit field in
+ * pmd_regs.
+ */
+
+#define WRITE_WO_MMIO(reg, x) \
+ do { \
+ u32 _x = (x); \
+ struct cbe_pmd_regs __iomem *pmd_regs; \
+ struct cbe_pmd_shadow_regs *shadow_regs; \
+ pmd_regs = cbe_get_cpu_pmd_regs(cpu); \
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \
+ out_be64(&(pmd_regs->reg), (((u64)_x) << 32)); \
+ shadow_regs->reg = _x; \
+ } while (0)
+
+#define READ_SHADOW_REG(val, reg) \
+ do { \
+ struct cbe_pmd_shadow_regs *shadow_regs; \
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \
+ (val) = shadow_regs->reg; \
+ } while (0)
+
+#define READ_MMIO_UPPER32(val, reg) \
+ do { \
+ struct cbe_pmd_regs __iomem *pmd_regs; \
+ pmd_regs = cbe_get_cpu_pmd_regs(cpu); \
+ (val) = (u32)(in_be64(&pmd_regs->reg) >> 32); \
+ } while (0)
+
+/*
+ * Physical counter registers.
+ * Each physical counter can act as one 32-bit counter or two 16-bit counters.
+ */
+
+u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr)
+{
+ u32 val_in_latch, val = 0;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ READ_SHADOW_REG(val_in_latch, counter_value_in_latch);
+
+ /* Read the latch or the actual counter, whichever is newer. */
+ if (val_in_latch & (1 << phys_ctr)) {
+ READ_SHADOW_REG(val, pm_ctr[phys_ctr]);
+ } else {
+ READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]);
+ }
+ }
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_phys_ctr);
+
+void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val)
+{
+ struct cbe_pmd_shadow_regs *shadow_regs;
+ u32 pm_ctrl;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ /* Writing to a counter only writes to a hardware latch.
+ * The new value is not propagated to the actual counter
+ * until the performance monitor is enabled.
+ */
+ WRITE_WO_MMIO(pm_ctr[phys_ctr], val);
+
+ pm_ctrl = cbe_read_pm(cpu, pm_control);
+ if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) {
+ /* The counters are already active, so we need to
+ * rewrite the pm_control register to "re-enable"
+ * the PMU.
+ */
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+ } else {
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+ shadow_regs->counter_value_in_latch |= (1 << phys_ctr);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(cbe_write_phys_ctr);
+
+/*
+ * "Logical" counter registers.
+ * These will read/write 16-bits or 32-bits depending on the
+ * current size of the counter. Counters 4 - 7 are always 16-bit.
+ */
+
+u32 cbe_read_ctr(u32 cpu, u32 ctr)
+{
+ u32 val;
+ u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+ val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+ if (cbe_get_ctr_size(cpu, phys_ctr) == 16)
+ val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff);
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_ctr);
+
+void cbe_write_ctr(u32 cpu, u32 ctr, u32 val)
+{
+ u32 phys_ctr;
+ u32 phys_val;
+
+ phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+ if (cbe_get_ctr_size(cpu, phys_ctr) == 16) {
+ phys_val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+ if (ctr < NR_PHYS_CTRS)
+ val = (val << 16) | (phys_val & 0xffff);
+ else
+ val = (val & 0xffff) | (phys_val & 0xffff0000);
+ }
+
+ cbe_write_phys_ctr(cpu, phys_ctr, val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_ctr);
+
+/*
+ * Counter-control registers.
+ * Each "logical" counter has a corresponding control register.
+ */
+
+u32 cbe_read_pm07_control(u32 cpu, u32 ctr)
+{
+ u32 pm07_control = 0;
+
+ if (ctr < NR_CTRS)
+ READ_SHADOW_REG(pm07_control, pm07_control[ctr]);
+
+ return pm07_control;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm07_control);
+
+void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val)
+{
+ if (ctr < NR_CTRS)
+ WRITE_WO_MMIO(pm07_control[ctr], val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm07_control);
+
+/*
+ * Other PMU control registers. Most of these are write-only.
+ */
+
+u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg)
+{
+ u32 val = 0;
+
+ switch (reg) {
+ case group_control:
+ READ_SHADOW_REG(val, group_control);
+ break;
+
+ case debug_bus_control:
+ READ_SHADOW_REG(val, debug_bus_control);
+ break;
+
+ case trace_address:
+ READ_MMIO_UPPER32(val, trace_address);
+ break;
+
+ case ext_tr_timer:
+ READ_SHADOW_REG(val, ext_tr_timer);
+ break;
+
+ case pm_status:
+ READ_MMIO_UPPER32(val, pm_status);
+ break;
+
+ case pm_control:
+ READ_SHADOW_REG(val, pm_control);
+ break;
+
+ case pm_interval:
+ READ_MMIO_UPPER32(val, pm_interval);
+ break;
+
+ case pm_start_stop:
+ READ_SHADOW_REG(val, pm_start_stop);
+ break;
+ }
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm);
+
+void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val)
+{
+ switch (reg) {
+ case group_control:
+ WRITE_WO_MMIO(group_control, val);
+ break;
+
+ case debug_bus_control:
+ WRITE_WO_MMIO(debug_bus_control, val);
+ break;
+
+ case trace_address:
+ WRITE_WO_MMIO(trace_address, val);
+ break;
+
+ case ext_tr_timer:
+ WRITE_WO_MMIO(ext_tr_timer, val);
+ break;
+
+ case pm_status:
+ WRITE_WO_MMIO(pm_status, val);
+ break;
+
+ case pm_control:
+ WRITE_WO_MMIO(pm_control, val);
+ break;
+
+ case pm_interval:
+ WRITE_WO_MMIO(pm_interval, val);
+ break;
+
+ case pm_start_stop:
+ WRITE_WO_MMIO(pm_start_stop, val);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm);
+
+/*
+ * Get/set the size of a physical counter to either 16 or 32 bits.
+ */
+
+u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr)
+{
+ u32 pm_ctrl, size = 0;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ pm_ctrl = cbe_read_pm(cpu, pm_control);
+ size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(cbe_get_ctr_size);
+
+void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size)
+{
+ u32 pm_ctrl;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ pm_ctrl = cbe_read_pm(cpu, pm_control);
+ switch (ctr_size) {
+ case 16:
+ pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr);
+ break;
+
+ case 32:
+ pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr);
+ break;
+ }
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+ }
+}
+EXPORT_SYMBOL_GPL(cbe_set_ctr_size);
+
+/*
+ * Enable/disable the entire performance monitoring unit.
+ * When we enable the PMU, all pending writes to counters get committed.
+ */
+
+void cbe_enable_pm(u32 cpu)
+{
+ struct cbe_pmd_shadow_regs *shadow_regs;
+ u32 pm_ctrl;
+
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+ shadow_regs->counter_value_in_latch = 0;
+
+ pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON;
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm);
+
+void cbe_disable_pm(u32 cpu)
+{
+ u32 pm_ctrl;
+ pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON;
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm);
+
+/*
+ * Reading from the trace_buffer.
+ * The trace buffer is two 64-bit registers. Reading from
+ * the second half automatically increments the trace_address.
+ */
+
+void cbe_read_trace_buffer(u32 cpu, u64 *buf)
+{
+ struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+ *buf++ = in_be64(&pmd_regs->trace_buffer_0_63);
+ *buf++ = in_be64(&pmd_regs->trace_buffer_64_127);
+}
+EXPORT_SYMBOL_GPL(cbe_read_trace_buffer);
+
+/*
+ * Enabling/disabling interrupts for the entire performance monitoring unit.
+ */
+
+u32 cbe_get_and_clear_pm_interrupts(u32 cpu)
+{
+ /* Reading pm_status clears the interrupt bits. */
+ return cbe_read_pm(cpu, pm_status);
+}
+EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts);
+
+void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask)
+{
+ /* Set which node and thread will handle the next interrupt. */
+ iic_set_interrupt_routing(cpu, thread, 0);
+
+ /* Enable the interrupt bits in the pm_status register. */
+ if (mask)
+ cbe_write_pm(cpu, pm_status, mask);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts);
+
+void cbe_disable_pm_interrupts(u32 cpu)
+{
+ cbe_get_and_clear_pm_interrupts(cpu);
+ cbe_write_pm(cpu, pm_status, 0);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
+
+static irqreturn_t cbe_pm_irq(int irq, void *dev_id)
+{
+ perf_irq(get_irq_regs());
+ return IRQ_HANDLED;
+}
+
+static int __init cbe_init_pm_irq(void)
+{
+ unsigned int irq;
+ int rc, node;
+
+ for_each_node(node) {
+ irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI |
+ (node << IIC_IRQ_NODE_SHIFT));
+ if (irq == NO_IRQ) {
+ printk("ERROR: Unable to allocate irq for node %d\n",
+ node);
+ return -EINVAL;
+ }
+
+ rc = request_irq(irq, cbe_pm_irq,
+ 0, "cbe-pmu-0", NULL);
+ if (rc) {
+ printk("ERROR: Request for irq on node %d failed\n",
+ node);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+machine_arch_initcall(cell, cbe_init_pm_irq);
+
+void cbe_sync_irq(int node)
+{
+ unsigned int irq;
+
+ irq = irq_find_mapping(NULL,
+ IIC_IRQ_IOEX_PMI
+ | (node << IIC_IRQ_NODE_SHIFT));
+
+ if (irq == NO_IRQ) {
+ printk(KERN_WARNING "ERROR, unable to get existing irq %d " \
+ "for node %d\n", irq, node);
+ return;
+ }
+
+ synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(cbe_sync_irq);
+
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
new file mode 100644
index 00000000000..7f9b6742f8b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/qpace_setup.c
@@ -0,0 +1,148 @@
+/*
+ * linux/arch/powerpc/platforms/cell/qpace_setup.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * Modified by PPC64 Team, IBM Corp
+ * Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ * Modified by Benjamin Krill <ben@codiert.org>, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/console.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/kexec.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+static void qpace_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *model = "";
+
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ seq_printf(m, "machine\t\t: CHRP %s\n", model);
+ of_node_put(root);
+}
+
+static void qpace_progress(char *s, unsigned short hex)
+{
+ printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static const struct of_device_id qpace_bus_ids[] __initdata = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .type = "spider", },
+ { .type = "axon", },
+ { .type = "plb5", },
+ { .type = "plb4", },
+ { .type = "opb", },
+ { .type = "ebc", },
+ {},
+};
+
+static int __init qpace_publish_devices(void)
+{
+ int node;
+
+ /* Publish OF platform devices for southbridge IOs */
+ of_platform_bus_probe(NULL, qpace_bus_ids, NULL);
+
+ /* There is no device for the MIC memory controller, thus we create
+ * a platform device for it to attach the EDAC driver to.
+ */
+ for_each_online_node(node) {
+ if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+ continue;
+ platform_device_register_simple("cbe-mic", node, NULL, 0);
+ }
+
+ return 0;
+}
+machine_subsys_initcall(qpace, qpace_publish_devices);
+
+static void __init qpace_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+ spu_priv1_ops = &spu_priv1_mmio_ops;
+ spu_management_ops = &spu_management_of_ops;
+#endif
+
+ cbe_regs_init();
+
+#ifdef CONFIG_CBE_RAS
+ cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+ smp_init_cell();
+#endif
+
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000;
+
+ cbe_pervasive_init();
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#endif
+}
+
+static int __init qpace_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "IBM,QPACE"))
+ return 0;
+
+ hpte_init_native();
+
+ return 1;
+}
+
+define_machine(qpace) {
+ .name = "QPACE",
+ .probe = qpace_probe,
+ .setup_arch = qpace_setup_arch,
+ .show_cpuinfo = qpace_show_cpuinfo,
+ .restart = rtas_restart,
+ .power_off = rtas_power_off,
+ .halt = rtas_halt,
+ .get_boot_time = rtas_get_boot_time,
+ .get_rtc_time = rtas_get_rtc_time,
+ .set_rtc_time = rtas_set_rtc_time,
+ .calibrate_decr = generic_calibrate_decr,
+ .progress = qpace_progress,
+ .init_IRQ = iic_init_IRQ,
+};
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
new file mode 100644
index 00000000000..5ec1e47a0d7
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2006-2008, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "ras.h"
+
+
+static void dump_fir(int cpu)
+{
+ struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
+ struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
+
+ if (pregs == NULL)
+ return;
+
+ /* Todo: do some nicer parsing of bits and based on them go down
+ * to other sub-units FIRs and not only IIC
+ */
+ printk(KERN_ERR "Global Checkstop FIR : 0x%016llx\n",
+ in_be64(&pregs->checkstop_fir));
+ printk(KERN_ERR "Global Recoverable FIR : 0x%016llx\n",
+ in_be64(&pregs->checkstop_fir));
+ printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n",
+ in_be64(&pregs->spec_att_mchk_fir));
+
+ if (iregs == NULL)
+ return;
+ printk(KERN_ERR "IOC FIR : 0x%016llx\n",
+ in_be64(&iregs->ioc_fir));
+
+}
+
+void cbe_system_error_exception(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
+ dump_fir(cpu);
+ dump_stack();
+}
+
+void cbe_maintenance_exception(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Nothing implemented for the maintenance interrupt at this point
+ */
+
+ printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
+ dump_stack();
+}
+
+void cbe_thermal_exception(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Nothing implemented for the thermal interrupt at this point
+ */
+
+ printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
+ dump_stack();
+}
+
+static int cbe_machine_check_handler(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
+ dump_fir(cpu);
+
+ /* No recovery from this code now, lets continue */
+ return 0;
+}
+
+struct ptcal_area {
+ struct list_head list;
+ int nid;
+ int order;
+ struct page *pages;
+};
+
+static LIST_HEAD(ptcal_list);
+
+static int ptcal_start_tok, ptcal_stop_tok;
+
+static int __init cbe_ptcal_enable_on_node(int nid, int order)
+{
+ struct ptcal_area *area;
+ int ret = -ENOMEM;
+ unsigned long addr;
+
+ if (is_kdump_kernel())
+ rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
+
+ area = kmalloc(sizeof(*area), GFP_KERNEL);
+ if (!area)
+ goto out_err;
+
+ area->nid = nid;
+ area->order = order;
+ area->pages = alloc_pages_exact_node(area->nid, GFP_KERNEL|GFP_THISNODE,
+ area->order);
+
+ if (!area->pages) {
+ printk(KERN_WARNING "%s: no page on node %d\n",
+ __func__, area->nid);
+ goto out_free_area;
+ }
+
+ /*
+ * We move the ptcal area to the middle of the allocated
+ * page, in order to avoid prefetches in memcpy and similar
+ * functions stepping on it.
+ */
+ addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1);
+ printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n",
+ __func__, area->nid, addr);
+
+ ret = -EIO;
+ if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
+ (unsigned int)(addr >> 32),
+ (unsigned int)(addr & 0xffffffff))) {
+ printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
+ __func__, nid);
+ goto out_free_pages;
+ }
+
+ list_add(&area->list, &ptcal_list);
+
+ return 0;
+
+out_free_pages:
+ __free_pages(area->pages, area->order);
+out_free_area:
+ kfree(area);
+out_err:
+ return ret;
+}
+
+static int __init cbe_ptcal_enable(void)
+{
+ const u32 *size;
+ struct device_node *np;
+ int order, found_mic = 0;
+
+ np = of_find_node_by_path("/rtas");
+ if (!np)
+ return -ENODEV;
+
+ size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
+ if (!size) {
+ of_node_put(np);
+ return -ENODEV;
+ }
+
+ pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
+ order = get_order(*size);
+ of_node_put(np);
+
+ /* support for malta device trees, with be@/mic@ nodes */
+ for_each_node_by_type(np, "mic-tm") {
+ cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
+ found_mic = 1;
+ }
+
+ if (found_mic)
+ return 0;
+
+ /* support for older device tree - use cpu nodes */
+ for_each_node_by_type(np, "cpu") {
+ const u32 *nid = of_get_property(np, "node-id", NULL);
+ if (!nid) {
+ printk(KERN_ERR "%s: node %s is missing node-id?\n",
+ __func__, np->full_name);
+ continue;
+ }
+ cbe_ptcal_enable_on_node(*nid, order);
+ found_mic = 1;
+ }
+
+ return found_mic ? 0 : -ENODEV;
+}
+
+static int cbe_ptcal_disable(void)
+{
+ struct ptcal_area *area, *tmp;
+ int ret = 0;
+
+ pr_debug("%s: disabling PTCAL\n", __func__);
+
+ list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
+ /* disable ptcal on this node */
+ if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
+ printk(KERN_ERR "%s: error disabling PTCAL "
+ "on node %d!\n", __func__,
+ area->nid);
+ ret = -EIO;
+ continue;
+ }
+
+ /* ensure we can access the PTCAL area */
+ memset(page_address(area->pages), 0,
+ 1 << (area->order + PAGE_SHIFT));
+
+ /* clean up */
+ list_del(&area->list);
+ __free_pages(area->pages, area->order);
+ kfree(area);
+ }
+
+ return ret;
+}
+
+static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
+ unsigned long code, void *data)
+{
+ return cbe_ptcal_disable();
+}
+
+static void cbe_ptcal_crash_shutdown(void)
+{
+ cbe_ptcal_disable();
+}
+
+static struct notifier_block cbe_ptcal_reboot_notifier = {
+ .notifier_call = cbe_ptcal_notify_reboot
+};
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+static int sysreset_hack;
+
+static int __init cbe_sysreset_init(void)
+{
+ struct cbe_pmd_regs __iomem *regs;
+
+ sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0");
+ if (!sysreset_hack)
+ return 0;
+
+ regs = cbe_get_cpu_pmd_regs(0);
+ if (!regs)
+ return 0;
+
+ /* Enable JTAG system-reset hack */
+ out_be32(&regs->fir_mode_reg,
+ in_be32(&regs->fir_mode_reg) |
+ CBE_PMD_FIR_MODE_M8);
+
+ return 0;
+}
+device_initcall(cbe_sysreset_init);
+
+int cbe_sysreset_hack(void)
+{
+ struct cbe_pmd_regs __iomem *regs;
+
+ /*
+ * The BMC can inject user triggered system reset exceptions,
+ * but cannot set the system reset reason in srr1,
+ * so check an extra register here.
+ */
+ if (sysreset_hack && (smp_processor_id() == 0)) {
+ regs = cbe_get_cpu_pmd_regs(0);
+ if (!regs)
+ return 0;
+ if (in_be64(&regs->ras_esc_0) & 0x0000ffff) {
+ out_be64(&regs->ras_esc_0, 0);
+ return 0;
+ }
+ }
+ return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+int __init cbe_ptcal_init(void)
+{
+ int ret;
+ ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
+ ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal");
+
+ if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
+ || ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
+ return -ENODEV;
+
+ ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
+ if (ret)
+ goto out1;
+
+ ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown);
+ if (ret)
+ goto out2;
+
+ return cbe_ptcal_enable();
+
+out2:
+ unregister_reboot_notifier(&cbe_ptcal_reboot_notifier);
+out1:
+ printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
+ return ret;
+}
+
+arch_initcall(cbe_ptcal_init);
+
+void __init cbe_ras_init(void)
+{
+ unsigned long hid0;
+
+ /*
+ * Enable System Error & thermal interrupts and wakeup conditions
+ */
+
+ hid0 = mfspr(SPRN_HID0);
+ hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
+ HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
+ mtspr(SPRN_HID0, hid0);
+ mb();
+
+ /*
+ * Install machine check handler. Leave setting of precise mode to
+ * what the firmware did for now
+ */
+ ppc_md.machine_check_exception = cbe_machine_check_handler;
+ mb();
+
+ /*
+ * For now, we assume that IOC_FIR is already set to forward some
+ * error conditions to the System Error handler. If that is not true
+ * then it will have to be fixed up here.
+ */
+}
diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h
new file mode 100644
index 00000000000..eb7ee54c82a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.h
@@ -0,0 +1,9 @@
+#ifndef RAS_H
+#define RAS_H
+
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+extern void cbe_ras_init(void);
+
+#endif /* RAS_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
new file mode 100644
index 00000000000..62002a7edfe
--- /dev/null
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -0,0 +1,280 @@
+/*
+ * linux/arch/powerpc/platforms/cell/cell_setup.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * Modified by PPC64 Team, IBM Corp
+ * Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/export.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/cputable.h>
+#include <asm/ppc-pci.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
+
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static void cell_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *model = "";
+
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ seq_printf(m, "machine\t\t: CHRP %s\n", model);
+ of_node_put(root);
+}
+
+static void cell_progress(char *s, unsigned short hex)
+{
+ printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
+{
+ struct pci_controller *hose;
+ const char *s;
+ int i;
+
+ if (!machine_is(cell))
+ return;
+
+ /* We're searching for a direct child of the PHB */
+ if (dev->bus->self != NULL || dev->devfn != 0)
+ return;
+
+ hose = pci_bus_to_host(dev->bus);
+ if (hose == NULL)
+ return;
+
+ /* Only on PCIE */
+ if (!of_device_is_compatible(hose->dn, "pciex"))
+ return;
+
+ /* And only on axon */
+ s = of_get_property(hose->dn, "model", NULL);
+ if (!s || strcmp(s, "Axon") != 0)
+ return;
+
+ for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+ dev->resource[i].start = dev->resource[i].end = 0;
+ dev->resource[i].flags = 0;
+ }
+
+ printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n",
+ pci_name(dev));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
+
+static int __devinit cell_setup_phb(struct pci_controller *phb)
+{
+ const char *model;
+ struct device_node *np;
+
+ int rc = rtas_setup_phb(phb);
+ if (rc)
+ return rc;
+
+ np = phb->dn;
+ model = of_get_property(np, "model", NULL);
+ if (model == NULL || strcmp(np->name, "pci"))
+ return 0;
+
+ /* Setup workarounds for spider */
+ if (strcmp(model, "Spider"))
+ return 0;
+
+ iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
+ (void *)SPIDER_PCI_REG_BASE);
+ return 0;
+}
+
+static const struct of_device_id cell_bus_ids[] __initdata = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .type = "spider", },
+ { .type = "axon", },
+ { .type = "plb5", },
+ { .type = "plb4", },
+ { .type = "opb", },
+ { .type = "ebc", },
+ {},
+};
+
+static int __init cell_publish_devices(void)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ struct device_node *np;
+ int node;
+
+ /* Publish OF platform devices for southbridge IOs */
+ of_platform_bus_probe(NULL, cell_bus_ids, NULL);
+
+ /* On spider based blades, we need to manually create the OF
+ * platform devices for the PCI host bridges
+ */
+ for_each_child_of_node(root, np) {
+ if (np->type == NULL || (strcmp(np->type, "pci") != 0 &&
+ strcmp(np->type, "pciex") != 0))
+ continue;
+ of_platform_device_create(np, NULL, NULL);
+ }
+
+ /* There is no device for the MIC memory controller, thus we create
+ * a platform device for it to attach the EDAC driver to.
+ */
+ for_each_online_node(node) {
+ if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+ continue;
+ platform_device_register_simple("cbe-mic", node, NULL, 0);
+ }
+
+ return 0;
+}
+machine_subsys_initcall(cell, cell_publish_devices);
+
+static void __init mpic_init_IRQ(void)
+{
+ struct device_node *dn;
+ struct mpic *mpic;
+
+ for (dn = NULL;
+ (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+ if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
+ continue;
+
+ /* The MPIC driver will get everything it needs from the
+ * device-tree, just pass 0 to all arguments
+ */
+ mpic = mpic_alloc(dn, 0, MPIC_SECONDARY, 0, 0, " MPIC ");
+ if (mpic == NULL)
+ continue;
+ mpic_init(mpic);
+ }
+}
+
+
+static void __init cell_init_irq(void)
+{
+ iic_init_IRQ();
+ spider_init_IRQ();
+ mpic_init_IRQ();
+}
+
+static void __init cell_set_dabrx(void)
+{
+ mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static void __init cell_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+ spu_priv1_ops = &spu_priv1_mmio_ops;
+ spu_management_ops = &spu_management_of_ops;
+#endif
+
+ cbe_regs_init();
+
+ cell_set_dabrx();
+
+#ifdef CONFIG_CBE_RAS
+ cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+ smp_init_cell();
+#endif
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000;
+
+ /* Find and initialize PCI host bridges */
+ init_pci_config_tokens();
+
+ cbe_pervasive_init();
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#endif
+
+ mmio_nvram_init();
+}
+
+static int __init cell_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "IBM,CBEA") &&
+ !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+ return 0;
+
+ hpte_init_native();
+
+ return 1;
+}
+
+define_machine(cell) {
+ .name = "Cell",
+ .probe = cell_probe,
+ .setup_arch = cell_setup_arch,
+ .show_cpuinfo = cell_show_cpuinfo,
+ .restart = rtas_restart,
+ .power_off = rtas_power_off,
+ .halt = rtas_halt,
+ .get_boot_time = rtas_get_boot_time,
+ .get_rtc_time = rtas_get_rtc_time,
+ .set_rtc_time = rtas_set_rtc_time,
+ .calibrate_decr = generic_calibrate_decr,
+ .progress = cell_progress,
+ .init_IRQ = cell_init_irq,
+ .pci_setup_phb = cell_setup_phb,
+};
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
new file mode 100644
index 00000000000..4a255cf8cd1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -0,0 +1,188 @@
+/*
+ * SMP support for BPA machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/system.h>
+#include <asm/rtas.h>
+#include <asm/cputhreads.h>
+
+#include "interrupt.h"
+#include <asm/udbg.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_t of_spin_map;
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware. For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ * 0 - failure
+ * 1 - success
+ */
+static inline int __devinit smp_startup_cpu(unsigned int lcpu)
+{
+ int status;
+ unsigned long start_here = __pa((u32)*((unsigned long *)
+ generic_secondary_smp_init));
+ unsigned int pcpu;
+ int start_cpu;
+
+ if (cpumask_test_cpu(lcpu, &of_spin_map))
+ /* Already started by OF and sitting in spin loop */
+ return 1;
+
+ pcpu = get_hard_smp_processor_id(lcpu);
+
+ /* Fixup atomic count: it exited inside IRQ handler. */
+ task_thread_info(paca[lcpu].__current)->preempt_count = 0;
+
+ /*
+ * If the RTAS start-cpu token does not exist then presume the
+ * cpu is already spinning.
+ */
+ start_cpu = rtas_token("start-cpu");
+ if (start_cpu == RTAS_UNKNOWN_SERVICE)
+ return 1;
+
+ status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu);
+ if (status != 0) {
+ printk(KERN_ERR "start-cpu failed: %i\n", status);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int __init smp_iic_probe(void)
+{
+ iic_request_IPIs();
+
+ return cpumask_weight(cpu_possible_mask);
+}
+
+static void __devinit smp_cell_setup_cpu(int cpu)
+{
+ if (cpu != boot_cpuid)
+ iic_setup_cpu();
+
+ /*
+ * change default DABRX to allow user watchpoints
+ */
+ mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static int __devinit smp_cell_kick_cpu(int nr)
+{
+ BUG_ON(nr < 0 || nr >= NR_CPUS);
+
+ if (!smp_startup_cpu(nr))
+ return -ENOENT;
+
+ /*
+ * The processor is currently spinning, waiting for the
+ * cpu_start field to become non-zero After we set cpu_start,
+ * the processor will continue on to secondary_start
+ */
+ paca[nr].cpu_start = 1;
+
+ return 0;
+}
+
+static int smp_cell_cpu_bootable(unsigned int nr)
+{
+ /* Special case - we inhibit secondary thread startup
+ * during boot if the user requests it. Odd-numbered
+ * cpus are assumed to be secondary threads.
+ */
+ if (system_state < SYSTEM_RUNNING &&
+ cpu_has_feature(CPU_FTR_SMT) &&
+ !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
+ return 0;
+
+ return 1;
+}
+static struct smp_ops_t bpa_iic_smp_ops = {
+ .message_pass = iic_message_pass,
+ .probe = smp_iic_probe,
+ .kick_cpu = smp_cell_kick_cpu,
+ .setup_cpu = smp_cell_setup_cpu,
+ .cpu_bootable = smp_cell_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_cell(void)
+{
+ int i;
+
+ DBG(" -> smp_init_cell()\n");
+
+ smp_ops = &bpa_iic_smp_ops;
+
+ /* Mark threads which are still spinning in hold loops. */
+ if (cpu_has_feature(CPU_FTR_SMT)) {
+ for_each_present_cpu(i) {
+ if (cpu_thread_in_core(i) == 0)
+ cpumask_set_cpu(i, &of_spin_map);
+ }
+ } else
+ cpumask_copy(&of_spin_map, cpu_present_mask);
+
+ cpumask_clear_cpu(boot_cpuid, &of_spin_map);
+
+ /* Non-lpar has additional take/give timebase */
+ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
+ smp_ops->give_timebase = rtas_give_timebase;
+ smp_ops->take_timebase = rtas_take_timebase;
+ }
+
+ DBG(" <- smp_init_cell()\n");
+}
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 00000000000..f1f7878893f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,184 @@
+/*
+ * IO workarounds for PCI on Celleb/Cell platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/io-workarounds.h>
+
+#define SPIDER_PCI_DISABLE_PREFETCH
+
+struct spiderpci_iowa_private {
+ void __iomem *regs;
+};
+
+static void spiderpci_io_flush(struct iowa_bus *bus)
+{
+ struct spiderpci_iowa_private *priv;
+ u32 val;
+
+ priv = bus->private;
+ val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+ iosync();
+}
+
+#define SPIDER_PCI_MMIO_READ(name, ret) \
+static ret spiderpci_##name(const PCI_IO_ADDR addr) \
+{ \
+ ret val = __do_##name(addr); \
+ spiderpci_io_flush(iowa_mem_find_bus(addr)); \
+ return val; \
+}
+
+#define SPIDER_PCI_MMIO_READ_STR(name) \
+static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, \
+ unsigned long count) \
+{ \
+ __do_##name(addr, buf, count); \
+ spiderpci_io_flush(iowa_mem_find_bus(addr)); \
+}
+
+SPIDER_PCI_MMIO_READ(readb, u8)
+SPIDER_PCI_MMIO_READ(readw, u16)
+SPIDER_PCI_MMIO_READ(readl, u32)
+SPIDER_PCI_MMIO_READ(readq, u64)
+SPIDER_PCI_MMIO_READ(readw_be, u16)
+SPIDER_PCI_MMIO_READ(readl_be, u32)
+SPIDER_PCI_MMIO_READ(readq_be, u64)
+SPIDER_PCI_MMIO_READ_STR(readsb)
+SPIDER_PCI_MMIO_READ_STR(readsw)
+SPIDER_PCI_MMIO_READ_STR(readsl)
+
+static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+ unsigned long n)
+{
+ __do_memcpy_fromio(dest, src, n);
+ spiderpci_io_flush(iowa_mem_find_bus(src));
+}
+
+static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
+ void __iomem *regs)
+{
+ void *dummy_page_va;
+ dma_addr_t dummy_page_da;
+
+#ifdef SPIDER_PCI_DISABLE_PREFETCH
+ u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
+ pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
+ out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
+#endif /* SPIDER_PCI_DISABLE_PREFETCH */
+
+ /* setup dummy read */
+ /*
+ * On CellBlade, we can't know that which XDR memory is used by
+ * kmalloc() to allocate dummy_page_va.
+ * In order to imporve the performance, the XDR which is used to
+ * allocate dummy_page_va is the nearest the spider-pci.
+ * We have to select the CBE which is the nearest the spider-pci
+ * to allocate memory from the best XDR, but I don't know that
+ * how to do.
+ *
+ * Celleb does not have this problem, because it has only one XDR.
+ */
+ dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dummy_page_va) {
+ pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
+ return -1;
+ }
+
+ dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(phb->parent, dummy_page_da)) {
+ pr_err("SPIDER-IOWA:Map dummy page filed.\n");
+ kfree(dummy_page_va);
+ return -1;
+ }
+
+ out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
+
+ return 0;
+}
+
+int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
+{
+ void __iomem *regs = NULL;
+ struct spiderpci_iowa_private *priv;
+ struct device_node *np = bus->phb->dn;
+ struct resource r;
+ unsigned long offset = (unsigned long)data;
+
+ pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n",
+ np->full_name);
+
+ priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+ if (!priv) {
+ pr_err("SPIDERPCI-IOWA:"
+ "Can't allocate struct spiderpci_iowa_private");
+ return -1;
+ }
+
+ if (of_address_to_resource(np, 0, &r)) {
+ pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
+ goto error;
+ }
+
+ regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
+ if (!regs) {
+ pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
+ goto error;
+ }
+ priv->regs = regs;
+ bus->private = priv;
+
+ if (spiderpci_pci_setup_chip(bus->phb, regs))
+ goto error;
+
+ return 0;
+
+error:
+ kfree(priv);
+ bus->private = NULL;
+
+ if (regs)
+ iounmap(regs);
+
+ return -1;
+}
+
+struct ppc_pci_io spiderpci_ops = {
+ .readb = spiderpci_readb,
+ .readw = spiderpci_readw,
+ .readl = spiderpci_readl,
+ .readq = spiderpci_readq,
+ .readw_be = spiderpci_readw_be,
+ .readl_be = spiderpci_readl_be,
+ .readq_be = spiderpci_readq_be,
+ .readsb = spiderpci_readsb,
+ .readsw = spiderpci_readsw,
+ .readsl = spiderpci_readsl,
+ .memcpy_fromio = spiderpci_memcpy_fromio,
+};
+
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
new file mode 100644
index 00000000000..442c28c00f8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -0,0 +1,364 @@
+/*
+ * External Interrupt Controller on Spider South Bridge
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/ioport.h>
+
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+
+#include "interrupt.h"
+
+/* register layout taken from Spider spec, table 7.4-4 */
+enum {
+ TIR_DEN = 0x004, /* Detection Enable Register */
+ TIR_MSK = 0x084, /* Mask Level Register */
+ TIR_EDC = 0x0c0, /* Edge Detection Clear Register */
+ TIR_PNDA = 0x100, /* Pending Register A */
+ TIR_PNDB = 0x104, /* Pending Register B */
+ TIR_CS = 0x144, /* Current Status Register */
+ TIR_LCSA = 0x150, /* Level Current Status Register A */
+ TIR_LCSB = 0x154, /* Level Current Status Register B */
+ TIR_LCSC = 0x158, /* Level Current Status Register C */
+ TIR_LCSD = 0x15c, /* Level Current Status Register D */
+ TIR_CFGA = 0x200, /* Setting Register A0 */
+ TIR_CFGB = 0x204, /* Setting Register B0 */
+ /* 0x208 ... 0x3ff Setting Register An/Bn */
+ TIR_PPNDA = 0x400, /* Packet Pending Register A */
+ TIR_PPNDB = 0x404, /* Packet Pending Register B */
+ TIR_PIERA = 0x408, /* Packet Output Error Register A */
+ TIR_PIERB = 0x40c, /* Packet Output Error Register B */
+ TIR_PIEN = 0x444, /* Packet Output Enable Register */
+ TIR_PIPND = 0x454, /* Packet Output Pending Register */
+ TIRDID = 0x484, /* Spider Device ID Register */
+ REISTIM = 0x500, /* Reissue Command Timeout Time Setting */
+ REISTIMEN = 0x504, /* Reissue Command Timeout Setting */
+ REISWAITEN = 0x508, /* Reissue Wait Control*/
+};
+
+#define SPIDER_CHIP_COUNT 4
+#define SPIDER_SRC_COUNT 64
+#define SPIDER_IRQ_INVALID 63
+
+struct spider_pic {
+ struct irq_host *host;
+ void __iomem *regs;
+ unsigned int node_id;
+};
+static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
+
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
+{
+ return irq_data_get_irq_chip_data(d);
+}
+
+static void __iomem *spider_get_irq_config(struct spider_pic *pic,
+ unsigned int src)
+{
+ return pic->regs + TIR_CFGA + 8 * src;
+}
+
+static void spider_unmask_irq(struct irq_data *d)
+{
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+ out_be32(cfg, in_be32(cfg) | 0x30000000u);
+}
+
+static void spider_mask_irq(struct irq_data *d)
+{
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+ out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+}
+
+static void spider_ack_irq(struct irq_data *d)
+{
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ unsigned int src = irqd_to_hwirq(d);
+
+ /* Reset edge detection logic if necessary
+ */
+ if (irqd_is_level_type(d))
+ return;
+
+ /* Only interrupts 47 to 50 can be set to edge */
+ if (src < 47 || src > 50)
+ return;
+
+ /* Perform the clear of the edge logic */
+ out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
+}
+
+static int spider_set_irq_type(struct irq_data *d, unsigned int type)
+{
+ unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ unsigned int hw = irqd_to_hwirq(d);
+ void __iomem *cfg = spider_get_irq_config(pic, hw);
+ u32 old_mask;
+ u32 ic;
+
+ /* Note that only level high is supported for most interrupts */
+ if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
+ (hw < 47 || hw > 50))
+ return -EINVAL;
+
+ /* Decode sense type */
+ switch(sense) {
+ case IRQ_TYPE_EDGE_RISING:
+ ic = 0x3;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ ic = 0x2;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ ic = 0x0;
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ case IRQ_TYPE_NONE:
+ ic = 0x1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Configure the source. One gross hack that was there before and
+ * that I've kept around is the priority to the BE which I set to
+ * be the same as the interrupt source number. I don't know wether
+ * that's supposed to make any kind of sense however, we'll have to
+ * decide that, but for now, I'm not changing the behaviour.
+ */
+ old_mask = in_be32(cfg) & 0x30000000u;
+ out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) |
+ (pic->node_id << 4) | 0xe);
+ out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
+
+ return 0;
+}
+
+static struct irq_chip spider_pic = {
+ .name = "SPIDER",
+ .irq_unmask = spider_unmask_irq,
+ .irq_mask = spider_mask_irq,
+ .irq_ack = spider_ack_irq,
+ .irq_set_type = spider_set_irq_type,
+};
+
+static int spider_host_map(struct irq_host *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
+
+ /* Set default irq type */
+ irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+ return 0;
+}
+
+static int spider_host_xlate(struct irq_host *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+ /* Spider interrupts have 2 cells, first is the interrupt source,
+ * second, well, I don't know for sure yet ... We mask the top bits
+ * because old device-trees encode a node number in there
+ */
+ *out_hwirq = intspec[0] & 0x3f;
+ *out_flags = IRQ_TYPE_LEVEL_HIGH;
+ return 0;
+}
+
+static struct irq_host_ops spider_host_ops = {
+ .map = spider_host_map,
+ .xlate = spider_host_xlate,
+};
+
+static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct spider_pic *pic = irq_desc_get_handler_data(desc);
+ unsigned int cs, virq;
+
+ cs = in_be32(pic->regs + TIR_CS) >> 24;
+ if (cs == SPIDER_IRQ_INVALID)
+ virq = NO_IRQ;
+ else
+ virq = irq_linear_revmap(pic->host, cs);
+
+ if (virq != NO_IRQ)
+ generic_handle_irq(virq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+/* For hooking up the cascace we have a problem. Our device-tree is
+ * crap and we don't know on which BE iic interrupt we are hooked on at
+ * least not the "standard" way. We can reconstitute it based on two
+ * informations though: which BE node we are connected to and wether
+ * we are connected to IOIF0 or IOIF1. Right now, we really only care
+ * about the IBM cell blade and we know that its firmware gives us an
+ * interrupt-map property which is pretty strange.
+ */
+static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
+{
+ unsigned int virq;
+ const u32 *imap, *tmp;
+ int imaplen, intsize, unit;
+ struct device_node *iic;
+
+ /* First, we check wether we have a real "interrupts" in the device
+ * tree in case the device-tree is ever fixed
+ */
+ struct of_irq oirq;
+ if (of_irq_map_one(pic->host->of_node, 0, &oirq) == 0) {
+ virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+ oirq.size);
+ return virq;
+ }
+
+ /* Now do the horrible hacks */
+ tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL);
+ if (tmp == NULL)
+ return NO_IRQ;
+ intsize = *tmp;
+ imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen);
+ if (imap == NULL || imaplen < (intsize + 1))
+ return NO_IRQ;
+ iic = of_find_node_by_phandle(imap[intsize]);
+ if (iic == NULL)
+ return NO_IRQ;
+ imap += intsize + 1;
+ tmp = of_get_property(iic, "#interrupt-cells", NULL);
+ if (tmp == NULL) {
+ of_node_put(iic);
+ return NO_IRQ;
+ }
+ intsize = *tmp;
+ /* Assume unit is last entry of interrupt specifier */
+ unit = imap[intsize - 1];
+ /* Ok, we have a unit, now let's try to get the node */
+ tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
+ if (tmp == NULL) {
+ of_node_put(iic);
+ return NO_IRQ;
+ }
+ /* ugly as hell but works for now */
+ pic->node_id = (*tmp) >> 1;
+ of_node_put(iic);
+
+ /* Ok, now let's get cracking. You may ask me why I just didn't match
+ * the iic host from the iic OF node, but that way I'm still compatible
+ * with really really old old firmwares for which we don't have a node
+ */
+ /* Manufacture an IIC interrupt number of class 2 */
+ virq = irq_create_mapping(NULL,
+ (pic->node_id << IIC_IRQ_NODE_SHIFT) |
+ (2 << IIC_IRQ_CLASS_SHIFT) |
+ unit);
+ if (virq == NO_IRQ)
+ printk(KERN_ERR "spider_pic: failed to map cascade !");
+ return virq;
+}
+
+
+static void __init spider_init_one(struct device_node *of_node, int chip,
+ unsigned long addr)
+{
+ struct spider_pic *pic = &spider_pics[chip];
+ int i, virq;
+
+ /* Map registers */
+ pic->regs = ioremap(addr, 0x1000);
+ if (pic->regs == NULL)
+ panic("spider_pic: can't map registers !");
+
+ /* Allocate a host */
+ pic->host = irq_alloc_host(of_node, IRQ_HOST_MAP_LINEAR,
+ SPIDER_SRC_COUNT, &spider_host_ops,
+ SPIDER_IRQ_INVALID);
+ if (pic->host == NULL)
+ panic("spider_pic: can't allocate irq host !");
+ pic->host->host_data = pic;
+
+ /* Go through all sources and disable them */
+ for (i = 0; i < SPIDER_SRC_COUNT; i++) {
+ void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
+ out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+ }
+
+ /* do not mask any interrupts because of level */
+ out_be32(pic->regs + TIR_MSK, 0x0);
+
+ /* enable interrupt packets to be output */
+ out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
+
+ /* Hook up the cascade interrupt to the iic and nodeid */
+ virq = spider_find_cascade_and_node(pic);
+ if (virq == NO_IRQ)
+ return;
+ irq_set_handler_data(virq, pic);
+ irq_set_chained_handler(virq, spider_irq_cascade);
+
+ printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n",
+ pic->node_id, addr, of_node->full_name);
+
+ /* Enable the interrupt detection enable bit. Do this last! */
+ out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
+}
+
+void __init spider_init_IRQ(void)
+{
+ struct resource r;
+ struct device_node *dn;
+ int chip = 0;
+
+ /* XXX node numbers are totally bogus. We _hope_ we get the device
+ * nodes in the right order here but that's definitely not guaranteed,
+ * we need to get the node from the device tree instead.
+ * There is currently no proper property for it (but our whole
+ * device-tree is bogus anyway) so all we can do is pray or maybe test
+ * the address and deduce the node-id
+ */
+ for (dn = NULL;
+ (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+ if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
+ if (of_address_to_resource(dn, 0, &r)) {
+ printk(KERN_WARNING "spider-pic: Failed\n");
+ continue;
+ }
+ } else if (of_device_is_compatible(dn, "sti,platform-spider-pic")
+ && (chip < 2)) {
+ static long hard_coded_pics[] =
+ { 0x24000008000ul, 0x34000008000ul};
+ r.start = hard_coded_pics[chip];
+ } else
+ continue;
+ spider_init_one(dn, chip++, r.start);
+ }
+}
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
new file mode 100644
index 00000000000..8b1213993b1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -0,0 +1,852 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/xmon.h>
+#include <asm/prom.h>
+#include <asm/kexec.h>
+
+const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
+const struct spu_priv1_ops *spu_priv1_ops;
+EXPORT_SYMBOL_GPL(spu_priv1_ops);
+
+struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(cbe_spu_info);
+
+/*
+ * The spufs fault-handling code needs to call force_sig_info to raise signals
+ * on DMA errors. Export it here to avoid general kernel-wide access to this
+ * function
+ */
+EXPORT_SYMBOL_GPL(force_sig_info);
+
+/*
+ * Protects cbe_spu_info and spu->number.
+ */
+static DEFINE_SPINLOCK(spu_lock);
+
+/*
+ * List of all spus in the system.
+ *
+ * This list is iterated by callers from irq context and callers that
+ * want to sleep. Thus modifications need to be done with both
+ * spu_full_list_lock and spu_full_list_mutex held, while iterating
+ * through it requires either of these locks.
+ *
+ * In addition spu_full_list_lock protects all assignmens to
+ * spu->mm.
+ */
+static LIST_HEAD(spu_full_list);
+static DEFINE_SPINLOCK(spu_full_list_lock);
+static DEFINE_MUTEX(spu_full_list_mutex);
+
+struct spu_slb {
+ u64 esid, vsid;
+};
+
+void spu_invalidate_slbs(struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ unsigned long flags;
+
+ spin_lock_irqsave(&spu->register_lock, flags);
+ if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+ out_be64(&priv2->slb_invalidate_all_W, 0UL);
+ spin_unlock_irqrestore(&spu->register_lock, flags);
+}
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
+
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
+{
+ struct spu *spu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&spu_full_list_lock, flags);
+ list_for_each_entry(spu, &spu_full_list, full_list) {
+ if (spu->mm == mm)
+ spu_invalidate_slbs(spu);
+ }
+ spin_unlock_irqrestore(&spu_full_list_lock, flags);
+}
+
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+{
+ int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+ /* Global TLBIE broadcast required with SPEs. */
+ bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&spu_full_list_lock, flags);
+ spu->mm = mm;
+ spin_unlock_irqrestore(&spu_full_list_lock, flags);
+ if (mm)
+ mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
+int spu_64k_pages_available(void)
+{
+ return mmu_psize_defs[MMU_PAGE_64K].shift != 0;
+}
+EXPORT_SYMBOL_GPL(spu_64k_pages_available);
+
+static void spu_restart_dma(struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+ else {
+ set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+ mb();
+ }
+}
+
+static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n",
+ __func__, slbe, slb->vsid, slb->esid);
+
+ out_be64(&priv2->slb_index_W, slbe);
+ /* set invalid before writing vsid */
+ out_be64(&priv2->slb_esid_RW, 0);
+ /* now it's safe to write the vsid */
+ out_be64(&priv2->slb_vsid_RW, slb->vsid);
+ /* setting the new esid makes the entry valid again */
+ out_be64(&priv2->slb_esid_RW, slb->esid);
+}
+
+static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
+{
+ struct mm_struct *mm = spu->mm;
+ struct spu_slb slb;
+ int psize;
+
+ pr_debug("%s\n", __func__);
+
+ slb.esid = (ea & ESID_MASK) | SLB_ESID_V;
+
+ switch(REGION_ID(ea)) {
+ case USER_REGION_ID:
+#ifdef CONFIG_PPC_MM_SLICES
+ psize = get_slice_psize(mm, ea);
+#else
+ psize = mm->context.user_psize;
+#endif
+ slb.vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M)
+ << SLB_VSID_SHIFT) | SLB_VSID_USER;
+ break;
+ case VMALLOC_REGION_ID:
+ if (ea < VMALLOC_END)
+ psize = mmu_vmalloc_psize;
+ else
+ psize = mmu_io_psize;
+ slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M)
+ << SLB_VSID_SHIFT) | SLB_VSID_KERNEL;
+ break;
+ case KERNEL_REGION_ID:
+ psize = mmu_linear_psize;
+ slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M)
+ << SLB_VSID_SHIFT) | SLB_VSID_KERNEL;
+ break;
+ default:
+ /* Future: support kernel segments so that drivers
+ * can use SPUs.
+ */
+ pr_debug("invalid region access at %016lx\n", ea);
+ return 1;
+ }
+ slb.vsid |= mmu_psize_defs[psize].sllp;
+
+ spu_load_slb(spu, spu->slb_replace, &slb);
+
+ spu->slb_replace++;
+ if (spu->slb_replace >= 8)
+ spu->slb_replace = 0;
+
+ spu_restart_dma(spu);
+ spu->stats.slb_flt++;
+ return 0;
+}
+
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX
+static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
+{
+ int ret;
+
+ pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea);
+
+ /*
+ * Handle kernel space hash faults immediately. User hash
+ * faults need to be deferred to process context.
+ */
+ if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
+ (REGION_ID(ea) != USER_REGION_ID)) {
+
+ spin_unlock(&spu->register_lock);
+ ret = hash_page(ea, _PAGE_PRESENT, 0x300);
+ spin_lock(&spu->register_lock);
+
+ if (!ret) {
+ spu_restart_dma(spu);
+ return 0;
+ }
+ }
+
+ spu->class_1_dar = ea;
+ spu->class_1_dsisr = dsisr;
+
+ spu->stop_callback(spu, 1);
+
+ spu->class_1_dar = 0;
+ spu->class_1_dsisr = 0;
+
+ return 0;
+}
+
+static void __spu_kernel_slb(void *addr, struct spu_slb *slb)
+{
+ unsigned long ea = (unsigned long)addr;
+ u64 llp;
+
+ if (REGION_ID(ea) == KERNEL_REGION_ID)
+ llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ else
+ llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+
+ slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) |
+ SLB_VSID_KERNEL | llp;
+ slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
+}
+
+/**
+ * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the
+ * address @new_addr is present.
+ */
+static inline int __slb_present(struct spu_slb *slbs, int nr_slbs,
+ void *new_addr)
+{
+ unsigned long ea = (unsigned long)new_addr;
+ int i;
+
+ for (i = 0; i < nr_slbs; i++)
+ if (!((slbs[i].esid ^ ea) & ESID_MASK))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * Setup the SPU kernel SLBs, in preparation for a context save/restore. We
+ * need to map both the context save area, and the save/restore code.
+ *
+ * Because the lscsa and code may cross segment boundaires, we check to see
+ * if mappings are required for the start and end of each range. We currently
+ * assume that the mappings are smaller that one segment - if not, something
+ * is seriously wrong.
+ */
+void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,
+ void *code, int code_size)
+{
+ struct spu_slb slbs[4];
+ int i, nr_slbs = 0;
+ /* start and end addresses of both mappings */
+ void *addrs[] = {
+ lscsa, (void *)lscsa + sizeof(*lscsa) - 1,
+ code, code + code_size - 1
+ };
+
+ /* check the set of addresses, and create a new entry in the slbs array
+ * if there isn't already a SLB for that address */
+ for (i = 0; i < ARRAY_SIZE(addrs); i++) {
+ if (__slb_present(slbs, nr_slbs, addrs[i]))
+ continue;
+
+ __spu_kernel_slb(addrs[i], &slbs[nr_slbs]);
+ nr_slbs++;
+ }
+
+ spin_lock_irq(&spu->register_lock);
+ /* Add the set of SLBs */
+ for (i = 0; i < nr_slbs; i++)
+ spu_load_slb(spu, i, &slbs[i]);
+ spin_unlock_irq(&spu->register_lock);
+}
+EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);
+
+static irqreturn_t
+spu_irq_class_0(int irq, void *data)
+{
+ struct spu *spu;
+ unsigned long stat, mask;
+
+ spu = data;
+
+ spin_lock(&spu->register_lock);
+ mask = spu_int_mask_get(spu, 0);
+ stat = spu_int_stat_get(spu, 0) & mask;
+
+ spu->class_0_pending |= stat;
+ spu->class_0_dar = spu_mfc_dar_get(spu);
+ spu->stop_callback(spu, 0);
+ spu->class_0_pending = 0;
+ spu->class_0_dar = 0;
+
+ spu_int_stat_clear(spu, 0, stat);
+ spin_unlock(&spu->register_lock);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t
+spu_irq_class_1(int irq, void *data)
+{
+ struct spu *spu;
+ unsigned long stat, mask, dar, dsisr;
+
+ spu = data;
+
+ /* atomically read & clear class1 status. */
+ spin_lock(&spu->register_lock);
+ mask = spu_int_mask_get(spu, 1);
+ stat = spu_int_stat_get(spu, 1) & mask;
+ dar = spu_mfc_dar_get(spu);
+ dsisr = spu_mfc_dsisr_get(spu);
+ if (stat & CLASS1_STORAGE_FAULT_INTR)
+ spu_mfc_dsisr_set(spu, 0ul);
+ spu_int_stat_clear(spu, 1, stat);
+
+ pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat,
+ dar, dsisr);
+
+ if (stat & CLASS1_SEGMENT_FAULT_INTR)
+ __spu_trap_data_seg(spu, dar);
+
+ if (stat & CLASS1_STORAGE_FAULT_INTR)
+ __spu_trap_data_map(spu, dar, dsisr);
+
+ if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_GET_INTR)
+ ;
+
+ if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_PUT_INTR)
+ ;
+
+ spu->class_1_dsisr = 0;
+ spu->class_1_dar = 0;
+
+ spin_unlock(&spu->register_lock);
+
+ return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static irqreturn_t
+spu_irq_class_2(int irq, void *data)
+{
+ struct spu *spu;
+ unsigned long stat;
+ unsigned long mask;
+ const int mailbox_intrs =
+ CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR;
+
+ spu = data;
+ spin_lock(&spu->register_lock);
+ stat = spu_int_stat_get(spu, 2);
+ mask = spu_int_mask_get(spu, 2);
+ /* ignore interrupts we're not waiting for */
+ stat &= mask;
+ /* mailbox interrupts are level triggered. mask them now before
+ * acknowledging */
+ if (stat & mailbox_intrs)
+ spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs));
+ /* acknowledge all interrupts before the callbacks */
+ spu_int_stat_clear(spu, 2, stat);
+
+ pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
+
+ if (stat & CLASS2_MAILBOX_INTR)
+ spu->ibox_callback(spu);
+
+ if (stat & CLASS2_SPU_STOP_INTR)
+ spu->stop_callback(spu, 2);
+
+ if (stat & CLASS2_SPU_HALT_INTR)
+ spu->stop_callback(spu, 2);
+
+ if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR)
+ spu->mfc_callback(spu);
+
+ if (stat & CLASS2_MAILBOX_THRESHOLD_INTR)
+ spu->wbox_callback(spu);
+
+ spu->stats.class2_intr++;
+
+ spin_unlock(&spu->register_lock);
+
+ return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int spu_request_irqs(struct spu *spu)
+{
+ int ret = 0;
+
+ if (spu->irqs[0] != NO_IRQ) {
+ snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
+ spu->number);
+ ret = request_irq(spu->irqs[0], spu_irq_class_0,
+ 0, spu->irq_c0, spu);
+ if (ret)
+ goto bail0;
+ }
+ if (spu->irqs[1] != NO_IRQ) {
+ snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
+ spu->number);
+ ret = request_irq(spu->irqs[1], spu_irq_class_1,
+ 0, spu->irq_c1, spu);
+ if (ret)
+ goto bail1;
+ }
+ if (spu->irqs[2] != NO_IRQ) {
+ snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
+ spu->number);
+ ret = request_irq(spu->irqs[2], spu_irq_class_2,
+ 0, spu->irq_c2, spu);
+ if (ret)
+ goto bail2;
+ }
+ return 0;
+
+bail2:
+ if (spu->irqs[1] != NO_IRQ)
+ free_irq(spu->irqs[1], spu);
+bail1:
+ if (spu->irqs[0] != NO_IRQ)
+ free_irq(spu->irqs[0], spu);
+bail0:
+ return ret;
+}
+
+static void spu_free_irqs(struct spu *spu)
+{
+ if (spu->irqs[0] != NO_IRQ)
+ free_irq(spu->irqs[0], spu);
+ if (spu->irqs[1] != NO_IRQ)
+ free_irq(spu->irqs[1], spu);
+ if (spu->irqs[2] != NO_IRQ)
+ free_irq(spu->irqs[2], spu);
+}
+
+void spu_init_channels(struct spu *spu)
+{
+ static const struct {
+ unsigned channel;
+ unsigned count;
+ } zero_list[] = {
+ { 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },
+ { 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },
+ }, count_list[] = {
+ { 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },
+ { 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },
+ { 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },
+ };
+ struct spu_priv2 __iomem *priv2;
+ int i;
+
+ priv2 = spu->priv2;
+
+ /* initialize all channel data to zero */
+ for (i = 0; i < ARRAY_SIZE(zero_list); i++) {
+ int count;
+
+ out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);
+ for (count = 0; count < zero_list[i].count; count++)
+ out_be64(&priv2->spu_chnldata_RW, 0);
+ }
+
+ /* initialize channel counts to meaningful values */
+ for (i = 0; i < ARRAY_SIZE(count_list); i++) {
+ out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);
+ out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
+ }
+}
+EXPORT_SYMBOL_GPL(spu_init_channels);
+
+static struct bus_type spu_subsys = {
+ .name = "spu",
+ .dev_name = "spu",
+};
+
+int spu_add_dev_attr(struct device_attribute *attr)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list)
+ device_create_file(&spu->dev, attr);
+ mutex_unlock(&spu_full_list_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr);
+
+int spu_add_dev_attr_group(struct attribute_group *attrs)
+{
+ struct spu *spu;
+ int rc = 0;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list) {
+ rc = sysfs_create_group(&spu->dev.kobj, attrs);
+
+ /* we're in trouble here, but try unwinding anyway */
+ if (rc) {
+ printk(KERN_ERR "%s: can't create sysfs group '%s'\n",
+ __func__, attrs->name);
+
+ list_for_each_entry_continue_reverse(spu,
+ &spu_full_list, full_list)
+ sysfs_remove_group(&spu->dev.kobj, attrs);
+ break;
+ }
+ }
+
+ mutex_unlock(&spu_full_list_mutex);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr_group);
+
+
+void spu_remove_dev_attr(struct device_attribute *attr)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list)
+ device_remove_file(&spu->dev, attr);
+ mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr);
+
+void spu_remove_dev_attr_group(struct attribute_group *attrs)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list)
+ sysfs_remove_group(&spu->dev.kobj, attrs);
+ mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);
+
+static int spu_create_dev(struct spu *spu)
+{
+ int ret;
+
+ spu->dev.id = spu->number;
+ spu->dev.bus = &spu_subsys;
+ ret = device_register(&spu->dev);
+ if (ret) {
+ printk(KERN_ERR "Can't register SPU %d with sysfs\n",
+ spu->number);
+ return ret;
+ }
+
+ sysfs_add_device_to_node(&spu->dev, spu->node);
+
+ return 0;
+}
+
+static int __init create_spu(void *data)
+{
+ struct spu *spu;
+ int ret;
+ static int number;
+ unsigned long flags;
+ struct timespec ts;
+
+ ret = -ENOMEM;
+ spu = kzalloc(sizeof (*spu), GFP_KERNEL);
+ if (!spu)
+ goto out;
+
+ spu->alloc_state = SPU_FREE;
+
+ spin_lock_init(&spu->register_lock);
+ spin_lock(&spu_lock);
+ spu->number = number++;
+ spin_unlock(&spu_lock);
+
+ ret = spu_create_spu(spu, data);
+
+ if (ret)
+ goto out_free;
+
+ spu_mfc_sdr_setup(spu);
+ spu_mfc_sr1_set(spu, 0x33);
+ ret = spu_request_irqs(spu);
+ if (ret)
+ goto out_destroy;
+
+ ret = spu_create_dev(spu);
+ if (ret)
+ goto out_free_irqs;
+
+ mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+ list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
+ cbe_spu_info[spu->node].n_spus++;
+ mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+
+ mutex_lock(&spu_full_list_mutex);
+ spin_lock_irqsave(&spu_full_list_lock, flags);
+ list_add(&spu->full_list, &spu_full_list);
+ spin_unlock_irqrestore(&spu_full_list_lock, flags);
+ mutex_unlock(&spu_full_list_mutex);
+
+ spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+ ktime_get_ts(&ts);
+ spu->stats.tstamp = timespec_to_ns(&ts);
+
+ INIT_LIST_HEAD(&spu->aff_list);
+
+ goto out;
+
+out_free_irqs:
+ spu_free_irqs(spu);
+out_destroy:
+ spu_destroy_spu(spu);
+out_free:
+ kfree(spu);
+out:
+ return ret;
+}
+
+static const char *spu_state_names[] = {
+ "user", "system", "iowait", "idle"
+};
+
+static unsigned long long spu_acct_time(struct spu *spu,
+ enum spu_utilization_state state)
+{
+ struct timespec ts;
+ unsigned long long time = spu->stats.times[state];
+
+ /*
+ * If the spu is idle or the context is stopped, utilization
+ * statistics are not updated. Apply the time delta from the
+ * last recorded state of the spu.
+ */
+ if (spu->stats.util_state == state) {
+ ktime_get_ts(&ts);
+ time += timespec_to_ns(&ts) - spu->stats.tstamp;
+ }
+
+ return time / NSEC_PER_MSEC;
+}
+
+
+static ssize_t spu_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct spu *spu = container_of(dev, struct spu, dev);
+
+ return sprintf(buf, "%s %llu %llu %llu %llu "
+ "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+ spu_state_names[spu->stats.util_state],
+ spu_acct_time(spu, SPU_UTIL_USER),
+ spu_acct_time(spu, SPU_UTIL_SYSTEM),
+ spu_acct_time(spu, SPU_UTIL_IOWAIT),
+ spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
+ spu->stats.vol_ctx_switch,
+ spu->stats.invol_ctx_switch,
+ spu->stats.slb_flt,
+ spu->stats.hash_flt,
+ spu->stats.min_flt,
+ spu->stats.maj_flt,
+ spu->stats.class2_intr,
+ spu->stats.libassist);
+}
+
+static DEVICE_ATTR(stat, 0644, spu_stat_show, NULL);
+
+#ifdef CONFIG_KEXEC
+
+struct crash_spu_info {
+ struct spu *spu;
+ u32 saved_spu_runcntl_RW;
+ u32 saved_spu_status_R;
+ u32 saved_spu_npc_RW;
+ u64 saved_mfc_sr1_RW;
+ u64 saved_mfc_dar;
+ u64 saved_mfc_dsisr;
+};
+
+#define CRASH_NUM_SPUS 16 /* Enough for current hardware */
+static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
+
+static void crash_kexec_stop_spus(void)
+{
+ struct spu *spu;
+ int i;
+ u64 tmp;
+
+ for (i = 0; i < CRASH_NUM_SPUS; i++) {
+ if (!crash_spu_info[i].spu)
+ continue;
+
+ spu = crash_spu_info[i].spu;
+
+ crash_spu_info[i].saved_spu_runcntl_RW =
+ in_be32(&spu->problem->spu_runcntl_RW);
+ crash_spu_info[i].saved_spu_status_R =
+ in_be32(&spu->problem->spu_status_R);
+ crash_spu_info[i].saved_spu_npc_RW =
+ in_be32(&spu->problem->spu_npc_RW);
+
+ crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu);
+ crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu);
+ tmp = spu_mfc_sr1_get(spu);
+ crash_spu_info[i].saved_mfc_sr1_RW = tmp;
+
+ tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ spu_mfc_sr1_set(spu, tmp);
+
+ __delay(200);
+ }
+}
+
+static void crash_register_spus(struct list_head *list)
+{
+ struct spu *spu;
+ int ret;
+
+ list_for_each_entry(spu, list, full_list) {
+ if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
+ continue;
+
+ crash_spu_info[spu->number].spu = spu;
+ }
+
+ ret = crash_shutdown_register(&crash_kexec_stop_spus);
+ if (ret)
+ printk(KERN_ERR "Could not register SPU crash handler");
+}
+
+#else
+static inline void crash_register_spus(struct list_head *list)
+{
+}
+#endif
+
+static void spu_shutdown(void)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list) {
+ spu_free_irqs(spu);
+ spu_destroy_spu(spu);
+ }
+ mutex_unlock(&spu_full_list_mutex);
+}
+
+static struct syscore_ops spu_syscore_ops = {
+ .shutdown = spu_shutdown,
+};
+
+static int __init init_spu_base(void)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ mutex_init(&cbe_spu_info[i].list_mutex);
+ INIT_LIST_HEAD(&cbe_spu_info[i].spus);
+ }
+
+ if (!spu_management_ops)
+ goto out;
+
+ /* create system subsystem for spus */
+ ret = subsys_system_register(&spu_subsys, NULL);
+ if (ret)
+ goto out;
+
+ ret = spu_enumerate_spus(create_spu);
+
+ if (ret < 0) {
+ printk(KERN_WARNING "%s: Error initializing spus\n",
+ __func__);
+ goto out_unregister_subsys;
+ }
+
+ if (ret > 0)
+ fb_append_extra_logo(&logo_spe_clut224, ret);
+
+ mutex_lock(&spu_full_list_mutex);
+ xmon_register_spus(&spu_full_list);
+ crash_register_spus(&spu_full_list);
+ mutex_unlock(&spu_full_list_mutex);
+ spu_add_dev_attr(&dev_attr_stat);
+ register_syscore_ops(&spu_syscore_ops);
+
+ spu_init_affinity();
+
+ return 0;
+
+ out_unregister_subsys:
+ bus_unregister(&spu_subsys);
+ out:
+ return ret;
+}
+module_init(init_spu_base);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
new file mode 100644
index 00000000000..75d613313f1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -0,0 +1,74 @@
+/*
+ * System call callback functions for SPUs
+ */
+
+#undef DEBUG
+
+#include <linux/kallsyms.h>
+#include <linux/export.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+
+/*
+ * This table defines the system calls that an SPU can call.
+ * It is currently a subset of the 64 bit powerpc system calls,
+ * with the exact semantics.
+ *
+ * The reasons for disabling some of the system calls are:
+ * 1. They interact with the way SPU syscalls are handled
+ * and we can't let them execute ever:
+ * restart_syscall, exit, for, execve, ptrace, ...
+ * 2. They are deprecated and replaced by other means:
+ * uselib, pciconfig_*, sysfs, ...
+ * 3. They are somewhat interacting with the system in a way
+ * we don't want an SPU to:
+ * reboot, init_module, mount, kexec_load
+ * 4. They are optional and we can't rely on them being
+ * linked into the kernel. Unfortunately, the cond_syscall
+ * helper does not work here as it does not add the necessary
+ * opd symbols:
+ * mbind, mq_open, ipc, ...
+ */
+
+static void *spu_syscall_table[] = {
+#define SYSCALL(func) sys_ni_syscall,
+#define COMPAT_SYS(func) sys_ni_syscall,
+#define PPC_SYS(func) sys_ni_syscall,
+#define OLDSYS(func) sys_ni_syscall,
+#define SYS32ONLY(func) sys_ni_syscall,
+#define SYSX(f, f3264, f32) sys_ni_syscall,
+
+#define SYSCALL_SPU(func) sys_##func,
+#define COMPAT_SYS_SPU(func) sys_##func,
+#define PPC_SYS_SPU(func) ppc_##func,
+#define SYSX_SPU(f, f3264, f32) f,
+
+#include <asm/systbl.h>
+};
+
+long spu_sys_callback(struct spu_syscall_block *s)
+{
+ long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+
+ if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) {
+ pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret);
+ return -ENOSYS;
+ }
+
+ syscall = spu_syscall_table[s->nr_ret];
+
+#ifdef DEBUG
+ print_symbol(KERN_DEBUG "SPU-syscall %s:", (unsigned long)syscall);
+ printk("syscall%ld(%lx, %lx, %lx, %lx, %lx, %lx)\n",
+ s->nr_ret,
+ s->parm[0], s->parm[1], s->parm[2],
+ s->parm[3], s->parm[4], s->parm[5]);
+#endif
+
+ return syscall(s->parm[0], s->parm[1], s->parm[2],
+ s->parm[3], s->parm[4], s->parm[5]);
+}
+EXPORT_SYMBOL_GPL(spu_sys_callback);
diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/platforms/cell/spu_fault.c
new file mode 100644
index 00000000000..641e7273d75
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_fault.c
@@ -0,0 +1,94 @@
+/*
+ * SPU mm fault handler
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2007
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+ unsigned long dsisr, unsigned *flt)
+{
+ struct vm_area_struct *vma;
+ unsigned long is_write;
+ int ret;
+
+ if (mm == NULL)
+ return -EFAULT;
+
+ if (mm->pgd == NULL)
+ return -EFAULT;
+
+ down_read(&mm->mmap_sem);
+ ret = -EFAULT;
+ vma = find_vma(mm, ea);
+ if (!vma)
+ goto out_unlock;
+
+ if (ea < vma->vm_start) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto out_unlock;
+ if (expand_stack(vma, ea))
+ goto out_unlock;
+ }
+
+ is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+ if (is_write) {
+ if (!(vma->vm_flags & VM_WRITE))
+ goto out_unlock;
+ } else {
+ if (dsisr & MFC_DSISR_ACCESS_DENIED)
+ goto out_unlock;
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ goto out_unlock;
+ }
+
+ ret = 0;
+ *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+ if (unlikely(*flt & VM_FAULT_ERROR)) {
+ if (*flt & VM_FAULT_OOM) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ } else if (*flt & VM_FAULT_SIGBUS) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+ BUG();
+ }
+
+ if (*flt & VM_FAULT_MAJOR)
+ current->maj_flt++;
+ else
+ current->min_flt++;
+
+out_unlock:
+ up_read(&mm->mmap_sem);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(spu_handle_mm_fault);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
new file mode 100644
index 00000000000..2bb6977c0a5
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -0,0 +1,556 @@
+/*
+ * spu management operations for of based platforms
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/export.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "spufs/spufs.h"
+#include "interrupt.h"
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+ return spu->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static u64 __init find_spu_unit_number(struct device_node *spe)
+{
+ const unsigned int *prop;
+ int proplen;
+
+ /* new device trees should provide the physical-id attribute */
+ prop = of_get_property(spe, "physical-id", &proplen);
+ if (proplen == 4)
+ return (u64)*prop;
+
+ /* celleb device tree provides the unit-id */
+ prop = of_get_property(spe, "unit-id", &proplen);
+ if (proplen == 4)
+ return (u64)*prop;
+
+ /* legacy device trees provide the id in the reg attribute */
+ prop = of_get_property(spe, "reg", &proplen);
+ if (proplen == 4)
+ return (u64)*prop;
+
+ return 0;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ iounmap(spu->priv1);
+ iounmap(spu->priv2);
+ iounmap(spu->problem);
+ iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+ struct device_node *np)
+{
+ unsigned int isrc;
+ const u32 *tmp;
+ int nid;
+
+ /* Get the interrupt source unit from the device-tree */
+ tmp = of_get_property(np, "isrc", NULL);
+ if (!tmp)
+ return -ENODEV;
+ isrc = tmp[0];
+
+ tmp = of_get_property(np->parent->parent, "node-id", NULL);
+ if (!tmp) {
+ printk(KERN_WARNING "%s: can't find node-id\n", __func__);
+ nid = spu->node;
+ } else
+ nid = tmp[0];
+
+ /* Add the node number */
+ isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+ /* Now map interrupts of all 3 classes */
+ spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+ spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+ spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+ /* Right now, we only fail if class 2 failed */
+ return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+}
+
+static void __iomem * __init spu_map_prop_old(struct spu *spu,
+ struct device_node *n,
+ const char *name)
+{
+ const struct address_prop {
+ unsigned long address;
+ unsigned int len;
+ } __attribute__((packed)) *prop;
+ int proplen;
+
+ prop = of_get_property(n, name, &proplen);
+ if (prop == NULL || proplen != sizeof (struct address_prop))
+ return NULL;
+
+ return ioremap(prop->address, prop->len);
+}
+
+static int __init spu_map_device_old(struct spu *spu)
+{
+ struct device_node *node = spu->devnode;
+ const char *prop;
+ int ret;
+
+ ret = -ENODEV;
+ spu->name = of_get_property(node, "name", NULL);
+ if (!spu->name)
+ goto out;
+
+ prop = of_get_property(node, "local-store", NULL);
+ if (!prop)
+ goto out;
+ spu->local_store_phys = *(unsigned long *)prop;
+
+ /* we use local store as ram, not io memory */
+ spu->local_store = (void __force *)
+ spu_map_prop_old(spu, node, "local-store");
+ if (!spu->local_store)
+ goto out;
+
+ prop = of_get_property(node, "problem", NULL);
+ if (!prop)
+ goto out_unmap;
+ spu->problem_phys = *(unsigned long *)prop;
+
+ spu->problem = spu_map_prop_old(spu, node, "problem");
+ if (!spu->problem)
+ goto out_unmap;
+
+ spu->priv2 = spu_map_prop_old(spu, node, "priv2");
+ if (!spu->priv2)
+ goto out_unmap;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ spu->priv1 = spu_map_prop_old(spu, node, "priv1");
+ if (!spu->priv1)
+ goto out_unmap;
+ }
+
+ ret = 0;
+ goto out;
+
+out_unmap:
+ spu_unmap(spu);
+out:
+ return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+ struct of_irq oirq;
+ int ret;
+ int i;
+
+ for (i=0; i < 3; i++) {
+ ret = of_irq_map_one(np, i, &oirq);
+ if (ret) {
+ pr_debug("spu_new: failed to get irq %d\n", i);
+ goto err;
+ }
+ ret = -EINVAL;
+ pr_debug(" irq %d no 0x%x on %s\n", i, oirq.specifier[0],
+ oirq.controller->full_name);
+ spu->irqs[i] = irq_create_of_mapping(oirq.controller,
+ oirq.specifier, oirq.size);
+ if (spu->irqs[i] == NO_IRQ) {
+ pr_debug("spu_new: failed to map it !\n");
+ goto err;
+ }
+ }
+ return 0;
+
+err:
+ pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier,
+ spu->name);
+ for (; i >= 0; i--) {
+ if (spu->irqs[i] != NO_IRQ)
+ irq_dispose_mapping(spu->irqs[i]);
+ }
+ return ret;
+}
+
+static int spu_map_resource(struct spu *spu, int nr,
+ void __iomem** virt, unsigned long *phys)
+{
+ struct device_node *np = spu->devnode;
+ struct resource resource = { };
+ unsigned long len;
+ int ret;
+
+ ret = of_address_to_resource(np, nr, &resource);
+ if (ret)
+ return ret;
+ if (phys)
+ *phys = resource.start;
+ len = resource_size(&resource);
+ *virt = ioremap(resource.start, len);
+ if (!*virt)
+ return -EINVAL;
+ return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+ struct device_node *np = spu->devnode;
+ int ret = -ENODEV;
+
+ spu->name = of_get_property(np, "name", NULL);
+ if (!spu->name)
+ goto out;
+
+ ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+ &spu->local_store_phys);
+ if (ret) {
+ pr_debug("spu_new: failed to map %s resource 0\n",
+ np->full_name);
+ goto out;
+ }
+ ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+ &spu->problem_phys);
+ if (ret) {
+ pr_debug("spu_new: failed to map %s resource 1\n",
+ np->full_name);
+ goto out_unmap;
+ }
+ ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+ if (ret) {
+ pr_debug("spu_new: failed to map %s resource 2\n",
+ np->full_name);
+ goto out_unmap;
+ }
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ ret = spu_map_resource(spu, 3,
+ (void __iomem**)&spu->priv1, NULL);
+ if (ret) {
+ pr_debug("spu_new: failed to map %s resource 3\n",
+ np->full_name);
+ goto out_unmap;
+ }
+ pr_debug("spu_new: %s maps:\n", np->full_name);
+ pr_debug(" local store : 0x%016lx -> 0x%p\n",
+ spu->local_store_phys, spu->local_store);
+ pr_debug(" problem state : 0x%016lx -> 0x%p\n",
+ spu->problem_phys, spu->problem);
+ pr_debug(" priv2 : 0x%p\n", spu->priv2);
+ pr_debug(" priv1 : 0x%p\n", spu->priv1);
+
+ return 0;
+
+out_unmap:
+ spu_unmap(spu);
+out:
+ pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+ return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+ int ret;
+ struct device_node *node;
+ unsigned int n = 0;
+
+ ret = -ENODEV;
+ for (node = of_find_node_by_type(NULL, "spe");
+ node; node = of_find_node_by_type(node, "spe")) {
+ ret = fn(node);
+ if (ret) {
+ printk(KERN_WARNING "%s: Error initializing %s\n",
+ __func__, node->name);
+ break;
+ }
+ n++;
+ }
+ return ret ? ret : n;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+ int ret;
+ struct device_node *spe = (struct device_node *)data;
+ static int legacy_map = 0, legacy_irq = 0;
+
+ spu->devnode = of_node_get(spe);
+ spu->spe_id = find_spu_unit_number(spe);
+
+ spu->node = of_node_to_nid(spe);
+ if (spu->node >= MAX_NUMNODES) {
+ printk(KERN_WARNING "SPE %s on node %d ignored,"
+ " node number too big\n", spe->full_name, spu->node);
+ printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ ret = spu_map_device(spu);
+ if (ret) {
+ if (!legacy_map) {
+ legacy_map = 1;
+ printk(KERN_WARNING "%s: Legacy device tree found, "
+ "trying to map old style\n", __func__);
+ }
+ ret = spu_map_device_old(spu);
+ if (ret) {
+ printk(KERN_ERR "Unable to map %s\n",
+ spu->name);
+ goto out;
+ }
+ }
+
+ ret = spu_map_interrupts(spu, spe);
+ if (ret) {
+ if (!legacy_irq) {
+ legacy_irq = 1;
+ printk(KERN_WARNING "%s: Legacy device tree found, "
+ "trying old style irq\n", __func__);
+ }
+ ret = spu_map_interrupts_old(spu, spe);
+ if (ret) {
+ printk(KERN_ERR "%s: could not map interrupts\n",
+ spu->name);
+ goto out_unmap;
+ }
+ }
+
+ pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
+ spu->local_store, spu->problem, spu->priv1,
+ spu->priv2, spu->number);
+ goto out;
+
+out_unmap:
+ spu_unmap(spu);
+out:
+ return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+ spu_unmap(spu);
+ of_node_put(spu->devnode);
+ return 0;
+}
+
+static void enable_spu_by_master_run(struct spu_context *ctx)
+{
+ ctx->ops->master_start(ctx);
+}
+
+static void disable_spu_by_master_run(struct spu_context *ctx)
+{
+ ctx->ops->master_stop(ctx);
+}
+
+/* Hardcoded affinity idxs for qs20 */
+#define QS20_SPES_PER_BE 8
+static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *spu_lookup_reg(int node, u32 reg)
+{
+ struct spu *spu;
+ const u32 *spu_reg;
+
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ spu_reg = of_get_property(spu_devnode(spu), "reg", NULL);
+ if (*spu_reg == reg)
+ return spu;
+ }
+ return NULL;
+}
+
+static void init_affinity_qs20_harcoded(void)
+{
+ int node, i;
+ struct spu *last_spu, *spu;
+ u32 reg;
+
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ last_spu = NULL;
+ for (i = 0; i < QS20_SPES_PER_BE; i++) {
+ reg = qs20_reg_idxs[i];
+ spu = spu_lookup_reg(node, reg);
+ if (!spu)
+ continue;
+ spu->has_mem_affinity = qs20_reg_memory[reg];
+ if (last_spu)
+ list_add_tail(&spu->aff_list,
+ &last_spu->aff_list);
+ last_spu = spu;
+ }
+ }
+}
+
+static int of_has_vicinity(void)
+{
+ struct device_node *dn;
+
+ for_each_node_by_type(dn, "spe") {
+ if (of_find_property(dn, "vicinity", NULL)) {
+ of_node_put(dn);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static struct spu *devnode_spu(int cbe, struct device_node *dn)
+{
+ struct spu *spu;
+
+ list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
+ if (spu_devnode(spu) == dn)
+ return spu;
+ return NULL;
+}
+
+static struct spu *
+neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
+{
+ struct spu *spu;
+ struct device_node *spu_dn;
+ const phandle *vic_handles;
+ int lenp, i;
+
+ list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
+ spu_dn = spu_devnode(spu);
+ if (spu_dn == avoid)
+ continue;
+ vic_handles = of_get_property(spu_dn, "vicinity", &lenp);
+ for (i=0; i < (lenp / sizeof(phandle)); i++) {
+ if (vic_handles[i] == target->phandle)
+ return spu;
+ }
+ }
+ return NULL;
+}
+
+static void init_affinity_node(int cbe)
+{
+ struct spu *spu, *last_spu;
+ struct device_node *vic_dn, *last_spu_dn;
+ phandle avoid_ph;
+ const phandle *vic_handles;
+ const char *name;
+ int lenp, i, added;
+
+ last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu,
+ cbe_list);
+ avoid_ph = 0;
+ for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
+ last_spu_dn = spu_devnode(last_spu);
+ vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp);
+
+ /*
+ * Walk through each phandle in vicinity property of the spu
+ * (tipically two vicinity phandles per spe node)
+ */
+ for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+ if (vic_handles[i] == avoid_ph)
+ continue;
+
+ vic_dn = of_find_node_by_phandle(vic_handles[i]);
+ if (!vic_dn)
+ continue;
+
+ /* a neighbour might be spe, mic-tm, or bif0 */
+ name = of_get_property(vic_dn, "name", NULL);
+ if (!name)
+ continue;
+
+ if (strcmp(name, "spe") == 0) {
+ spu = devnode_spu(cbe, vic_dn);
+ avoid_ph = last_spu_dn->phandle;
+ } else {
+ /*
+ * "mic-tm" and "bif0" nodes do not have
+ * vicinity property. So we need to find the
+ * spe which has vic_dn as neighbour, but
+ * skipping the one we came from (last_spu_dn)
+ */
+ spu = neighbour_spu(cbe, vic_dn, last_spu_dn);
+ if (!spu)
+ continue;
+ if (!strcmp(name, "mic-tm")) {
+ last_spu->has_mem_affinity = 1;
+ spu->has_mem_affinity = 1;
+ }
+ avoid_ph = vic_dn->phandle;
+ }
+
+ list_add_tail(&spu->aff_list, &last_spu->aff_list);
+ last_spu = spu;
+ break;
+ }
+ }
+}
+
+static void init_affinity_fw(void)
+{
+ int cbe;
+
+ for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
+ init_affinity_node(cbe);
+}
+
+static int __init init_affinity(void)
+{
+ if (of_has_vicinity()) {
+ init_affinity_fw();
+ } else {
+ long root = of_get_flat_dt_root();
+ if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+ init_affinity_qs20_harcoded();
+ else
+ printk("No affinity configuration found\n");
+ }
+
+ return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+ .enumerate_spus = of_enumerate_spus,
+ .create_spu = of_create_spu,
+ .destroy_spu = of_destroy_spu,
+ .enable_spu = enable_spu_by_master_run,
+ .disable_spu = disable_spu_by_master_run,
+ .init_affinity = init_affinity,
+};
diff --git a/arch/powerpc/platforms/cell/spu_notify.c b/arch/powerpc/platforms/cell/spu_notify.c
new file mode 100644
index 00000000000..afdf857c318
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_notify.c
@@ -0,0 +1,68 @@
+/*
+ * Move OProfile dependencies from spufs module to the kernel so it
+ * can run on non-cell PPC.
+ *
+ * Copyright (C) IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/notifier.h>
+#include <asm/spu.h>
+#include "spufs/spufs.h"
+
+static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
+
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
+{
+ blocking_notifier_call_chain(&spu_switch_notifier,
+ ctx ? ctx->object_id : 0, spu);
+}
+EXPORT_SYMBOL_GPL(spu_switch_notify);
+
+int spu_switch_event_register(struct notifier_block *n)
+{
+ int ret;
+ ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
+ if (!ret)
+ notify_spus_active();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_register);
+
+int spu_switch_event_unregister(struct notifier_block *n)
+{
+ return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
+
+void spu_set_profile_private_kref(struct spu_context *ctx,
+ struct kref *prof_info_kref,
+ void (* prof_info_release) (struct kref *kref))
+{
+ ctx->prof_priv_kref = prof_info_kref;
+ ctx->prof_priv_release = prof_info_release;
+}
+EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
+
+void *spu_get_profile_private_kref(struct spu_context *ctx)
+{
+ return ctx->prof_priv_kref;
+}
+EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
+
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
new file mode 100644
index 00000000000..66d33724f16
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -0,0 +1,180 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+
+ old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+ out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+
+ old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+ out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+ out_be64(&spu->priv1->int_mask_RW[class], mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+ return in_be64(&spu->priv1->int_mask_RW[class]);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+ out_be64(&spu->priv1->int_stat_RW[class], stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+ return in_be64(&spu->priv1->int_stat_RW[class]);
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+ u64 target;
+ u64 route;
+
+ if (nr_cpus_node(spu->node)) {
+ const struct cpumask *spumask = cpumask_of_node(spu->node),
+ *cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+ if (!cpumask_intersects(spumask, cpumask))
+ return;
+ }
+
+ target = iic_get_target_id(cpu);
+ route = target << 48 | target << 32 | target << 16;
+ out_be64(&spu->priv1->int_route_RW, route);
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_dar_RW);
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_dsisr_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+ out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+ out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+ out_be64(&spu->priv1->mfc_sr1_RW, sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_sr1_RW);
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+ out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_tclass_id_RW);
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+ out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+ out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->resource_allocation_groupID_RW);
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+ out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->resource_allocation_enable_RW);
+}
+
+const struct spu_priv1_ops spu_priv1_mmio_ops =
+{
+ .int_mask_and = int_mask_and,
+ .int_mask_or = int_mask_or,
+ .int_mask_set = int_mask_set,
+ .int_mask_get = int_mask_get,
+ .int_stat_clear = int_stat_clear,
+ .int_stat_get = int_stat_get,
+ .cpu_affinity_set = cpu_affinity_set,
+ .mfc_dar_get = mfc_dar_get,
+ .mfc_dsisr_get = mfc_dsisr_get,
+ .mfc_dsisr_set = mfc_dsisr_set,
+ .mfc_sdr_setup = mfc_sdr_setup,
+ .mfc_sr1_set = mfc_sr1_set,
+ .mfc_sr1_get = mfc_sr1_get,
+ .mfc_tclass_id_set = mfc_tclass_id_set,
+ .mfc_tclass_id_get = mfc_tclass_id_get,
+ .tlb_invalidate = tlb_invalidate,
+ .resource_allocation_groupID_set = resource_allocation_groupID_set,
+ .resource_allocation_groupID_get = resource_allocation_groupID_get,
+ .resource_allocation_enable_set = resource_allocation_enable_set,
+ .resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
new file mode 100644
index 00000000000..7b62bd1cc25
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
@@ -0,0 +1,26 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef SPU_PRIV1_MMIO_H
+#define SPU_PRIV1_MMIO_H
+
+struct device_node *spu_devnode(struct spu *spu);
+
+#endif /* SPU_PRIV1_MMIO_H */
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
new file mode 100644
index 00000000000..714bbfc3162
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -0,0 +1,178 @@
+/*
+ * SPU file system -- system call stubs
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * (C) Copyright 2006-2007, IBM Corporation
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
+
+#include <asm/spu.h>
+
+/* protected by rcu */
+static struct spufs_calls *spufs_calls;
+
+#ifdef CONFIG_SPU_FS_MODULE
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+ struct spufs_calls *calls = NULL;
+
+ rcu_read_lock();
+ calls = rcu_dereference(spufs_calls);
+ if (calls && !try_module_get(calls->owner))
+ calls = NULL;
+ rcu_read_unlock();
+
+ return calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls)
+{
+ BUG_ON(calls != spufs_calls);
+
+ /* we don't need to rcu this, as we hold a reference to the module */
+ module_put(spufs_calls->owner);
+}
+
+#else /* !defined CONFIG_SPU_FS_MODULE */
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+ return spufs_calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls) { }
+
+#endif /* CONFIG_SPU_FS_MODULE */
+
+SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
+ umode_t, mode, int, neighbor_fd)
+{
+ long ret;
+ struct file *neighbor;
+ int fput_needed;
+ struct spufs_calls *calls;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return -ENOSYS;
+
+ if (flags & SPU_CREATE_AFFINITY_SPU) {
+ ret = -EBADF;
+ neighbor = fget_light(neighbor_fd, &fput_needed);
+ if (neighbor) {
+ ret = calls->create_thread(name, flags, mode, neighbor);
+ fput_light(neighbor, fput_needed);
+ }
+ } else
+ ret = calls->create_thread(name, flags, mode, NULL);
+
+ spufs_calls_put(calls);
+ return ret;
+}
+
+asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
+{
+ long ret;
+ struct file *filp;
+ int fput_needed;
+ struct spufs_calls *calls;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return -ENOSYS;
+
+ ret = -EBADF;
+ filp = fget_light(fd, &fput_needed);
+ if (filp) {
+ ret = calls->spu_run(filp, unpc, ustatus);
+ fput_light(filp, fput_needed);
+ }
+
+ spufs_calls_put(calls);
+ return ret;
+}
+
+int elf_coredump_extra_notes_size(void)
+{
+ struct spufs_calls *calls;
+ int ret;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return 0;
+
+ ret = calls->coredump_extra_notes_size();
+
+ spufs_calls_put(calls);
+
+ return ret;
+}
+
+int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+{
+ struct spufs_calls *calls;
+ int ret;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return 0;
+
+ ret = calls->coredump_extra_notes_write(file, foffset);
+
+ spufs_calls_put(calls);
+
+ return ret;
+}
+
+void notify_spus_active(void)
+{
+ struct spufs_calls *calls;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return;
+
+ calls->notify_spus_active();
+ spufs_calls_put(calls);
+
+ return;
+}
+
+int register_spu_syscalls(struct spufs_calls *calls)
+{
+ if (spufs_calls)
+ return -EBUSY;
+
+ rcu_assign_pointer(spufs_calls, calls);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_spu_syscalls);
+
+void unregister_spu_syscalls(struct spufs_calls *calls)
+{
+ BUG_ON(spufs_calls->owner != calls->owner);
+ rcu_assign_pointer(spufs_calls, NULL);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_spu_syscalls);
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
new file mode 100644
index 00000000000..b9d5d678aa4
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -0,0 +1,61 @@
+
+obj-$(CONFIG_SPU_FS) += spufs.o
+spufs-y += inode.o file.o context.o syscalls.o coredump.o
+spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
+spufs-y += switch.o fault.o lscsa_alloc.o
+
+# magic for the trace events
+CFLAGS_sched.o := -I$(src)
+
+# Rules to build switch.o with the help of SPU tool chain
+SPU_CROSS := spu-
+SPU_CC := $(SPU_CROSS)gcc
+SPU_AS := $(SPU_CROSS)gcc
+SPU_LD := $(SPU_CROSS)ld
+SPU_OBJCOPY := $(SPU_CROSS)objcopy
+SPU_CFLAGS := -O2 -Wall -I$(srctree)/include -D__KERNEL__
+SPU_AFLAGS := -c -D__ASSEMBLY__ -I$(srctree)/include -D__KERNEL__
+SPU_LDFLAGS := -N -Ttext=0x0
+
+$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h
+clean-files := spu_save_dump.h spu_restore_dump.h
+
+# Compile SPU files
+ cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $<
+quiet_cmd_spu_cc = SPU_CC $@
+$(obj)/spu_%.o: $(src)/spu_%.c
+ $(call if_changed,spu_cc)
+
+# Assemble SPU files
+ cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $<
+quiet_cmd_spu_as = SPU_AS $@
+$(obj)/spu_%.o: $(src)/spu_%.S
+ $(call if_changed,spu_as)
+
+# Link SPU Executables
+ cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^
+quiet_cmd_spu_ld = SPU_LD $@
+$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o
+ $(call if_changed,spu_ld)
+
+# Copy into binary format
+ cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@
+quiet_cmd_spu_objcopy = OBJCOPY $@
+$(obj)/spu_%.bin: $(src)/spu_%
+ $(call if_changed,spu_objcopy)
+
+# create C code from ELF executable
+cmd_hexdump = ( \
+ echo "/*" ; \
+ echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \
+ echo " * Hex-dump auto generated from $*.c." ; \
+ echo " * Do not edit!" ; \
+ echo " */" ; \
+ echo "static unsigned int $*_code[] " \
+ "__attribute__((__aligned__(128))) = {" ; \
+ hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \
+ echo "};" ; \
+ ) > $@
+quiet_cmd_hexdump = HEXDUMP $@
+$(obj)/%_dump.h: $(obj)/%.bin
+ $(call if_changed,hexdump)
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
new file mode 100644
index 00000000000..6e8a9ef8590
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -0,0 +1,413 @@
+/* backing_ops.c - query/set operations on saved SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * These register operations allow SPUFS to operate on saved
+ * SPU contexts rather than hardware.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/poll.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+/*
+ * Reads/writes to various problem and priv2 registers require
+ * state changes, i.e. generate SPU events, modify channel
+ * counts, etc.
+ */
+
+static void gen_spu_event(struct spu_context *ctx, u32 event)
+{
+ u64 ch0_cnt;
+ u64 ch0_data;
+ u64 ch1_data;
+
+ ch0_cnt = ctx->csa.spu_chnlcnt_RW[0];
+ ch0_data = ctx->csa.spu_chnldata_RW[0];
+ ch1_data = ctx->csa.spu_chnldata_RW[1];
+ ctx->csa.spu_chnldata_RW[0] |= event;
+ if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) {
+ ctx->csa.spu_chnlcnt_RW[0] = 1;
+ }
+}
+
+static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data)
+{
+ u32 mbox_stat;
+ int ret = 0;
+
+ spin_lock(&ctx->csa.register_lock);
+ mbox_stat = ctx->csa.prob.mb_stat_R;
+ if (mbox_stat & 0x0000ff) {
+ /* Read the first available word.
+ * Implementation note: the depth
+ * of pu_mb_R is currently 1.
+ */
+ *data = ctx->csa.prob.pu_mb_R;
+ ctx->csa.prob.mb_stat_R &= ~(0x0000ff);
+ ctx->csa.spu_chnlcnt_RW[28] = 1;
+ gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT);
+ ret = 4;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+ return ret;
+}
+
+static u32 spu_backing_mbox_stat_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.mb_stat_R;
+}
+
+static unsigned int spu_backing_mbox_stat_poll(struct spu_context *ctx,
+ unsigned int events)
+{
+ int ret;
+ u32 stat;
+
+ ret = 0;
+ spin_lock_irq(&ctx->csa.register_lock);
+ stat = ctx->csa.prob.mb_stat_R;
+
+ /* if the requested event is there, return the poll
+ mask, otherwise enable the interrupt to get notified,
+ but first mark any pending interrupts as done so
+ we don't get woken up unnecessarily */
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ if (stat & 0xff0000)
+ ret |= POLLIN | POLLRDNORM;
+ else {
+ ctx->csa.priv1.int_stat_class2_RW &=
+ ~CLASS2_MAILBOX_INTR;
+ ctx->csa.priv1.int_mask_class2_RW |=
+ CLASS2_ENABLE_MAILBOX_INTR;
+ }
+ }
+ if (events & (POLLOUT | POLLWRNORM)) {
+ if (stat & 0x00ff00)
+ ret = POLLOUT | POLLWRNORM;
+ else {
+ ctx->csa.priv1.int_stat_class2_RW &=
+ ~CLASS2_MAILBOX_THRESHOLD_INTR;
+ ctx->csa.priv1.int_mask_class2_RW |=
+ CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+ }
+ }
+ spin_unlock_irq(&ctx->csa.register_lock);
+ return ret;
+}
+
+static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data)
+{
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ if (ctx->csa.prob.mb_stat_R & 0xff0000) {
+ /* Read the first available word.
+ * Implementation note: the depth
+ * of puint_mb_R is currently 1.
+ */
+ *data = ctx->csa.priv2.puint_mb_R;
+ ctx->csa.prob.mb_stat_R &= ~(0xff0000);
+ ctx->csa.spu_chnlcnt_RW[30] = 1;
+ gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt */
+ ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR;
+ ret = 0;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+ return ret;
+}
+
+static int spu_backing_wbox_write(struct spu_context *ctx, u32 data)
+{
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) {
+ int slot = ctx->csa.spu_chnlcnt_RW[29];
+ int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8;
+
+ /* We have space to write wbox_data.
+ * Implementation note: the depth
+ * of spu_mb_W is currently 4.
+ */
+ BUG_ON(avail != (4 - slot));
+ ctx->csa.spu_mailbox_data[slot] = data;
+ ctx->csa.spu_chnlcnt_RW[29] = ++slot;
+ ctx->csa.prob.mb_stat_R &= ~(0x00ff00);
+ ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8);
+ gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt when space
+ becomes available */
+ ctx->csa.priv1.int_mask_class2_RW |=
+ CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+ ret = 0;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+ return ret;
+}
+
+static u32 spu_backing_signal1_read(struct spu_context *ctx)
+{
+ return ctx->csa.spu_chnldata_RW[3];
+}
+
+static void spu_backing_signal1_write(struct spu_context *ctx, u32 data)
+{
+ spin_lock(&ctx->csa.register_lock);
+ if (ctx->csa.priv2.spu_cfg_RW & 0x1)
+ ctx->csa.spu_chnldata_RW[3] |= data;
+ else
+ ctx->csa.spu_chnldata_RW[3] = data;
+ ctx->csa.spu_chnlcnt_RW[3] = 1;
+ gen_spu_event(ctx, MFC_SIGNAL_1_EVENT);
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static u32 spu_backing_signal2_read(struct spu_context *ctx)
+{
+ return ctx->csa.spu_chnldata_RW[4];
+}
+
+static void spu_backing_signal2_write(struct spu_context *ctx, u32 data)
+{
+ spin_lock(&ctx->csa.register_lock);
+ if (ctx->csa.priv2.spu_cfg_RW & 0x2)
+ ctx->csa.spu_chnldata_RW[4] |= data;
+ else
+ ctx->csa.spu_chnldata_RW[4] = data;
+ ctx->csa.spu_chnlcnt_RW[4] = 1;
+ gen_spu_event(ctx, MFC_SIGNAL_2_EVENT);
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+ u64 tmp;
+
+ spin_lock(&ctx->csa.register_lock);
+ tmp = ctx->csa.priv2.spu_cfg_RW;
+ if (val)
+ tmp |= 1;
+ else
+ tmp &= ~1;
+ ctx->csa.priv2.spu_cfg_RW = tmp;
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal1_type_get(struct spu_context *ctx)
+{
+ return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0);
+}
+
+static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+ u64 tmp;
+
+ spin_lock(&ctx->csa.register_lock);
+ tmp = ctx->csa.priv2.spu_cfg_RW;
+ if (val)
+ tmp |= 2;
+ else
+ tmp &= ~2;
+ ctx->csa.priv2.spu_cfg_RW = tmp;
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal2_type_get(struct spu_context *ctx)
+{
+ return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0);
+}
+
+static u32 spu_backing_npc_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.spu_npc_RW;
+}
+
+static void spu_backing_npc_write(struct spu_context *ctx, u32 val)
+{
+ ctx->csa.prob.spu_npc_RW = val;
+}
+
+static u32 spu_backing_status_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.spu_status_R;
+}
+
+static char *spu_backing_get_ls(struct spu_context *ctx)
+{
+ return ctx->csa.lscsa->ls;
+}
+
+static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val)
+{
+ ctx->csa.priv2.spu_privcntl_RW = val;
+}
+
+static u32 spu_backing_runcntl_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.spu_runcntl_RW;
+}
+
+static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
+{
+ spin_lock(&ctx->csa.register_lock);
+ ctx->csa.prob.spu_runcntl_RW = val;
+ if (val & SPU_RUNCNTL_RUNNABLE) {
+ ctx->csa.prob.spu_status_R &=
+ ~SPU_STATUS_STOPPED_BY_STOP &
+ ~SPU_STATUS_STOPPED_BY_HALT &
+ ~SPU_STATUS_SINGLE_STEP &
+ ~SPU_STATUS_INVALID_INSTR &
+ ~SPU_STATUS_INVALID_CH;
+ ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
+ } else {
+ ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_runcntl_stop(struct spu_context *ctx)
+{
+ spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
+}
+
+static void spu_backing_master_start(struct spu_context *ctx)
+{
+ struct spu_state *csa = &ctx->csa;
+ u64 sr1;
+
+ spin_lock(&csa->register_lock);
+ sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ csa->priv1.mfc_sr1_RW = sr1;
+ spin_unlock(&csa->register_lock);
+}
+
+static void spu_backing_master_stop(struct spu_context *ctx)
+{
+ struct spu_state *csa = &ctx->csa;
+ u64 sr1;
+
+ spin_lock(&csa->register_lock);
+ sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ csa->priv1.mfc_sr1_RW = sr1;
+ spin_unlock(&csa->register_lock);
+}
+
+static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
+ u32 mode)
+{
+ struct spu_problem_collapsed *prob = &ctx->csa.prob;
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ ret = -EAGAIN;
+ if (prob->dma_querytype_RW)
+ goto out;
+ ret = 0;
+ /* FIXME: what are the side-effects of this? */
+ prob->dma_querymask_RW = mask;
+ prob->dma_querytype_RW = mode;
+ /* In the current implementation, the SPU context is always
+ * acquired in runnable state when new bits are added to the
+ * mask (tagwait), so it's sufficient just to mask
+ * dma_tagstatus_R with the 'mask' parameter here.
+ */
+ ctx->csa.prob.dma_tagstatus_R &= mask;
+out:
+ spin_unlock(&ctx->csa.register_lock);
+
+ return ret;
+}
+
+static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx)
+{
+ return ctx->csa.prob.dma_tagstatus_R;
+}
+
+static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx)
+{
+ return ctx->csa.prob.dma_qstatus_R;
+}
+
+static int spu_backing_send_mfc_command(struct spu_context *ctx,
+ struct mfc_dma_command *cmd)
+{
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ ret = -EAGAIN;
+ /* FIXME: set up priv2->puq */
+ spin_unlock(&ctx->csa.register_lock);
+
+ return ret;
+}
+
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+ ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+}
+
+struct spu_context_ops spu_backing_ops = {
+ .mbox_read = spu_backing_mbox_read,
+ .mbox_stat_read = spu_backing_mbox_stat_read,
+ .mbox_stat_poll = spu_backing_mbox_stat_poll,
+ .ibox_read = spu_backing_ibox_read,
+ .wbox_write = spu_backing_wbox_write,
+ .signal1_read = spu_backing_signal1_read,
+ .signal1_write = spu_backing_signal1_write,
+ .signal2_read = spu_backing_signal2_read,
+ .signal2_write = spu_backing_signal2_write,
+ .signal1_type_set = spu_backing_signal1_type_set,
+ .signal1_type_get = spu_backing_signal1_type_get,
+ .signal2_type_set = spu_backing_signal2_type_set,
+ .signal2_type_get = spu_backing_signal2_type_get,
+ .npc_read = spu_backing_npc_read,
+ .npc_write = spu_backing_npc_write,
+ .status_read = spu_backing_status_read,
+ .get_ls = spu_backing_get_ls,
+ .privcntl_write = spu_backing_privcntl_write,
+ .runcntl_read = spu_backing_runcntl_read,
+ .runcntl_write = spu_backing_runcntl_write,
+ .runcntl_stop = spu_backing_runcntl_stop,
+ .master_start = spu_backing_master_start,
+ .master_stop = spu_backing_master_stop,
+ .set_mfc_query = spu_backing_set_mfc_query,
+ .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
+ .get_mfc_free_elements = spu_backing_get_mfc_free_elements,
+ .send_mfc_command = spu_backing_send_mfc_command,
+ .restart_dma = spu_backing_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
new file mode 100644
index 00000000000..9c6790d17ed
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -0,0 +1,188 @@
+/*
+ * SPU file system -- SPU context management
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include "spufs.h"
+#include "sputrace.h"
+
+
+atomic_t nr_spu_contexts = ATOMIC_INIT(0);
+
+struct spu_context *alloc_spu_context(struct spu_gang *gang)
+{
+ struct spu_context *ctx;
+ struct timespec ts;
+
+ ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+ if (!ctx)
+ goto out;
+ /* Binding to physical processor deferred
+ * until spu_activate().
+ */
+ if (spu_init_csa(&ctx->csa))
+ goto out_free;
+ spin_lock_init(&ctx->mmio_lock);
+ mutex_init(&ctx->mapping_lock);
+ kref_init(&ctx->kref);
+ mutex_init(&ctx->state_mutex);
+ mutex_init(&ctx->run_mutex);
+ init_waitqueue_head(&ctx->ibox_wq);
+ init_waitqueue_head(&ctx->wbox_wq);
+ init_waitqueue_head(&ctx->stop_wq);
+ init_waitqueue_head(&ctx->mfc_wq);
+ init_waitqueue_head(&ctx->run_wq);
+ ctx->state = SPU_STATE_SAVED;
+ ctx->ops = &spu_backing_ops;
+ ctx->owner = get_task_mm(current);
+ INIT_LIST_HEAD(&ctx->rq);
+ INIT_LIST_HEAD(&ctx->aff_list);
+ if (gang)
+ spu_gang_add_ctx(gang, ctx);
+
+ __spu_update_sched_info(ctx);
+ spu_set_timeslice(ctx);
+ ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
+ ktime_get_ts(&ts);
+ ctx->stats.tstamp = timespec_to_ns(&ts);
+
+ atomic_inc(&nr_spu_contexts);
+ goto out;
+out_free:
+ kfree(ctx);
+ ctx = NULL;
+out:
+ return ctx;
+}
+
+void destroy_spu_context(struct kref *kref)
+{
+ struct spu_context *ctx;
+ ctx = container_of(kref, struct spu_context, kref);
+ spu_context_nospu_trace(destroy_spu_context__enter, ctx);
+ mutex_lock(&ctx->state_mutex);
+ spu_deactivate(ctx);
+ mutex_unlock(&ctx->state_mutex);
+ spu_fini_csa(&ctx->csa);
+ if (ctx->gang)
+ spu_gang_remove_ctx(ctx->gang, ctx);
+ if (ctx->prof_priv_kref)
+ kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
+ BUG_ON(!list_empty(&ctx->rq));
+ atomic_dec(&nr_spu_contexts);
+ kfree(ctx->switch_log);
+ kfree(ctx);
+}
+
+struct spu_context * get_spu_context(struct spu_context *ctx)
+{
+ kref_get(&ctx->kref);
+ return ctx;
+}
+
+int put_spu_context(struct spu_context *ctx)
+{
+ return kref_put(&ctx->kref, &destroy_spu_context);
+}
+
+/* give up the mm reference when the context is about to be destroyed */
+void spu_forget(struct spu_context *ctx)
+{
+ struct mm_struct *mm;
+
+ /*
+ * This is basically an open-coded spu_acquire_saved, except that
+ * we don't acquire the state mutex interruptible, and we don't
+ * want this context to be rescheduled on release.
+ */
+ mutex_lock(&ctx->state_mutex);
+ if (ctx->state != SPU_STATE_SAVED)
+ spu_deactivate(ctx);
+
+ mm = ctx->owner;
+ ctx->owner = NULL;
+ mmput(mm);
+ spu_release(ctx);
+}
+
+void spu_unmap_mappings(struct spu_context *ctx)
+{
+ mutex_lock(&ctx->mapping_lock);
+ if (ctx->local_store)
+ unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+ if (ctx->mfc)
+ unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1);
+ if (ctx->cntl)
+ unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1);
+ if (ctx->signal1)
+ unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+ if (ctx->signal2)
+ unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+ if (ctx->mss)
+ unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1);
+ if (ctx->psmap)
+ unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1);
+ mutex_unlock(&ctx->mapping_lock);
+}
+
+/**
+ * spu_acquire_saved - lock spu contex and make sure it is in saved state
+ * @ctx: spu contex to lock
+ */
+int spu_acquire_saved(struct spu_context *ctx)
+{
+ int ret;
+
+ spu_context_nospu_trace(spu_acquire_saved__enter, ctx);
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ if (ctx->state != SPU_STATE_SAVED) {
+ set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
+ spu_deactivate(ctx);
+ }
+
+ return 0;
+}
+
+/**
+ * spu_release_saved - unlock spu context and return it to the runqueue
+ * @ctx: context to unlock
+ */
+void spu_release_saved(struct spu_context *ctx)
+{
+ BUG_ON(ctx->state != SPU_STATE_SAVED);
+
+ if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) &&
+ test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+ spu_activate(ctx, 0);
+
+ spu_release(ctx);
+}
+
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
new file mode 100644
index 00000000000..03c5fce2a5b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -0,0 +1,250 @@
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/elf.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
+ size_t size, loff_t *off)
+{
+ u64 data;
+ int ret;
+
+ if (spufs_coredump_read[num].read)
+ return spufs_coredump_read[num].read(ctx, buffer, size, off);
+
+ data = spufs_coredump_read[num].get(ctx);
+ ret = snprintf(buffer, size, "0x%.16llx", data);
+ if (ret >= size)
+ return size;
+ return ++ret; /* count trailing NULL */
+}
+
+/*
+ * These are the only things you should do on a core-file: use only these
+ * functions to write out all the necessary info.
+ */
+static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
+{
+ unsigned long limit = rlimit(RLIMIT_CORE);
+ ssize_t written;
+
+ if (*foffset + nr > limit)
+ return -EIO;
+
+ written = file->f_op->write(file, addr, nr, &file->f_pos);
+ *foffset += written;
+
+ if (written != nr)
+ return -EIO;
+
+ return 0;
+}
+
+static int spufs_dump_align(struct file *file, char *buf, loff_t new_off,
+ loff_t *foffset)
+{
+ int rc, size;
+
+ size = min((loff_t)PAGE_SIZE, new_off - *foffset);
+ memset(buf, 0, size);
+
+ rc = 0;
+ while (rc == 0 && new_off > *foffset) {
+ size = min((loff_t)PAGE_SIZE, new_off - *foffset);
+ rc = spufs_dump_write(file, buf, size, foffset);
+ }
+
+ return rc;
+}
+
+static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
+{
+ int i, sz, total = 0;
+ char *name;
+ char fullname[80];
+
+ for (i = 0; spufs_coredump_read[i].name != NULL; i++) {
+ name = spufs_coredump_read[i].name;
+ sz = spufs_coredump_read[i].size;
+
+ sprintf(fullname, "SPU/%d/%s", dfd, name);
+
+ total += sizeof(struct elf_note);
+ total += roundup(strlen(fullname) + 1, 4);
+ total += roundup(sz, 4);
+ }
+
+ return total;
+}
+
+/*
+ * The additional architecture-specific notes for Cell are various
+ * context files in the spu context.
+ *
+ * This function iterates over all open file descriptors and sees
+ * if they are a directory in spufs. In that case we use spufs
+ * internal functionality to dump them without needing to actually
+ * open the files.
+ */
+static struct spu_context *coredump_next_context(int *fd)
+{
+ struct fdtable *fdt = files_fdtable(current->files);
+ struct file *file;
+ struct spu_context *ctx = NULL;
+
+ for (; *fd < fdt->max_fds; (*fd)++) {
+ if (!FD_ISSET(*fd, fdt->open_fds))
+ continue;
+
+ file = fcheck(*fd);
+
+ if (!file || file->f_op != &spufs_context_fops)
+ continue;
+
+ ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx;
+ if (ctx->flags & SPU_CREATE_NOSCHED)
+ continue;
+
+ break;
+ }
+
+ return ctx;
+}
+
+int spufs_coredump_extra_notes_size(void)
+{
+ struct spu_context *ctx;
+ int size = 0, rc, fd;
+
+ fd = 0;
+ while ((ctx = coredump_next_context(&fd)) != NULL) {
+ rc = spu_acquire_saved(ctx);
+ if (rc)
+ break;
+ rc = spufs_ctx_note_size(ctx, fd);
+ spu_release_saved(ctx);
+ if (rc < 0)
+ break;
+
+ size += rc;
+
+ /* start searching the next fd next time */
+ fd++;
+ }
+
+ return size;
+}
+
+static int spufs_arch_write_note(struct spu_context *ctx, int i,
+ struct file *file, int dfd, loff_t *foffset)
+{
+ loff_t pos = 0;
+ int sz, rc, nread, total = 0;
+ const int bufsz = PAGE_SIZE;
+ char *name;
+ char fullname[80], *buf;
+ struct elf_note en;
+
+ buf = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ name = spufs_coredump_read[i].name;
+ sz = spufs_coredump_read[i].size;
+
+ sprintf(fullname, "SPU/%d/%s", dfd, name);
+ en.n_namesz = strlen(fullname) + 1;
+ en.n_descsz = sz;
+ en.n_type = NT_SPU;
+
+ rc = spufs_dump_write(file, &en, sizeof(en), foffset);
+ if (rc)
+ goto out;
+
+ rc = spufs_dump_write(file, fullname, en.n_namesz, foffset);
+ if (rc)
+ goto out;
+
+ rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset);
+ if (rc)
+ goto out;
+
+ do {
+ nread = do_coredump_read(i, ctx, buf, bufsz, &pos);
+ if (nread > 0) {
+ rc = spufs_dump_write(file, buf, nread, foffset);
+ if (rc)
+ goto out;
+ total += nread;
+ }
+ } while (nread == bufsz && total < sz);
+
+ if (nread < 0) {
+ rc = nread;
+ goto out;
+ }
+
+ rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4),
+ foffset);
+
+out:
+ free_page((unsigned long)buf);
+ return rc;
+}
+
+int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+{
+ struct spu_context *ctx;
+ int fd, j, rc;
+
+ fd = 0;
+ while ((ctx = coredump_next_context(&fd)) != NULL) {
+ rc = spu_acquire_saved(ctx);
+ if (rc)
+ return rc;
+
+ for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
+ rc = spufs_arch_write_note(ctx, j, file, fd, foffset);
+ if (rc) {
+ spu_release_saved(ctx);
+ return rc;
+ }
+ }
+
+ spu_release_saved(ctx);
+
+ /* start searching the next fd next time */
+ fd++;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 00000000000..8cb6260cc80
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,191 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/**
+ * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag.
+ *
+ * If the context was created with events, we just set the return event.
+ * Otherwise, send an appropriate signal to the process.
+ */
+static void spufs_handle_event(struct spu_context *ctx,
+ unsigned long ea, int type)
+{
+ siginfo_t info;
+
+ if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+ ctx->event_return |= type;
+ wake_up_all(&ctx->stop_wq);
+ return;
+ }
+
+ memset(&info, 0, sizeof(info));
+
+ switch (type) {
+ case SPE_EVENT_INVALID_DMA:
+ info.si_signo = SIGBUS;
+ info.si_code = BUS_OBJERR;
+ break;
+ case SPE_EVENT_SPE_DATA_STORAGE:
+ info.si_signo = SIGSEGV;
+ info.si_addr = (void __user *)ea;
+ info.si_code = SEGV_ACCERR;
+ ctx->ops->restart_dma(ctx);
+ break;
+ case SPE_EVENT_DMA_ALIGNMENT:
+ info.si_signo = SIGBUS;
+ /* DAR isn't set for an alignment fault :( */
+ info.si_code = BUS_ADRALN;
+ break;
+ case SPE_EVENT_SPE_ERROR:
+ info.si_signo = SIGILL;
+ info.si_addr = (void __user *)(unsigned long)
+ ctx->ops->npc_read(ctx) - 4;
+ info.si_code = ILL_ILLOPC;
+ break;
+ }
+
+ if (info.si_signo)
+ force_sig_info(info.si_signo, &info, current);
+}
+
+int spufs_handle_class0(struct spu_context *ctx)
+{
+ unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK;
+
+ if (likely(!stat))
+ return 0;
+
+ if (stat & CLASS0_DMA_ALIGNMENT_INTR)
+ spufs_handle_event(ctx, ctx->csa.class_0_dar,
+ SPE_EVENT_DMA_ALIGNMENT);
+
+ if (stat & CLASS0_INVALID_DMA_COMMAND_INTR)
+ spufs_handle_event(ctx, ctx->csa.class_0_dar,
+ SPE_EVENT_INVALID_DMA);
+
+ if (stat & CLASS0_SPU_ERROR_INTR)
+ spufs_handle_event(ctx, ctx->csa.class_0_dar,
+ SPE_EVENT_SPE_ERROR);
+
+ ctx->csa.class_0_pending = 0;
+
+ return -EIO;
+}
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ * in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+ u64 ea, dsisr, access;
+ unsigned long flags;
+ unsigned flt = 0;
+ int ret;
+
+ /*
+ * dar and dsisr get passed from the registers
+ * to the spu_context, to this function, but not
+ * back to the spu if it gets scheduled again.
+ *
+ * if we don't handle the fault for a saved context
+ * in time, we can still expect to get the same fault
+ * the immediately after the context restore.
+ */
+ ea = ctx->csa.class_1_dar;
+ dsisr = ctx->csa.class_1_dsisr;
+
+ if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+ return 0;
+
+ spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
+
+ pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea,
+ dsisr, ctx->state);
+
+ ctx->stats.hash_flt++;
+ if (ctx->state == SPU_STATE_RUNNABLE)
+ ctx->spu->stats.hash_flt++;
+
+ /* we must not hold the lock when entering spu_handle_mm_fault */
+ spu_release(ctx);
+
+ access = (_PAGE_PRESENT | _PAGE_USER);
+ access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+ local_irq_save(flags);
+ ret = hash_page(ea, access, 0x300);
+ local_irq_restore(flags);
+
+ /* hashing failed, so try the actual fault handler */
+ if (ret)
+ ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt);
+
+ /*
+ * This is nasty: we need the state_mutex for all the bookkeeping even
+ * if the syscall was interrupted by a signal. ewww.
+ */
+ mutex_lock(&ctx->state_mutex);
+
+ /*
+ * Clear dsisr under ctxt lock after handling the fault, so that
+ * time slicing will not preempt the context while the page fault
+ * handler is running. Context switch code removes mappings.
+ */
+ ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0;
+
+ /*
+ * If we handled the fault successfully and are in runnable
+ * state, restart the DMA.
+ * In case of unhandled error report the problem to user space.
+ */
+ if (!ret) {
+ if (flt & VM_FAULT_MAJOR)
+ ctx->stats.maj_flt++;
+ else
+ ctx->stats.min_flt++;
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ if (flt & VM_FAULT_MAJOR)
+ ctx->spu->stats.maj_flt++;
+ else
+ ctx->spu->stats.min_flt++;
+ }
+
+ if (ctx->spu)
+ ctx->ops->restart_dma(ctx);
+ } else
+ spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
new file mode 100644
index 00000000000..0cfece4cf6e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -0,0 +1,2775 @@
+/*
+ * SPU file system -- file contents
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/export.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/ptrace.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/spu.h>
+#include <asm/spu_info.h>
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+#include "sputrace.h"
+
+#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+
+/* Simple attribute files */
+struct spufs_attr {
+ int (*get)(void *, u64 *);
+ int (*set)(void *, u64);
+ char get_buf[24]; /* enough to store a u64 and "\n\0" */
+ char set_buf[24];
+ void *data;
+ const char *fmt; /* format for read operation */
+ struct mutex mutex; /* protects access to these buffers */
+};
+
+static int spufs_attr_open(struct inode *inode, struct file *file,
+ int (*get)(void *, u64 *), int (*set)(void *, u64),
+ const char *fmt)
+{
+ struct spufs_attr *attr;
+
+ attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ attr->get = get;
+ attr->set = set;
+ attr->data = inode->i_private;
+ attr->fmt = fmt;
+ mutex_init(&attr->mutex);
+ file->private_data = attr;
+
+ return nonseekable_open(inode, file);
+}
+
+static int spufs_attr_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t spufs_attr_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct spufs_attr *attr;
+ size_t size;
+ ssize_t ret;
+
+ attr = file->private_data;
+ if (!attr->get)
+ return -EACCES;
+
+ ret = mutex_lock_interruptible(&attr->mutex);
+ if (ret)
+ return ret;
+
+ if (*ppos) { /* continued read */
+ size = strlen(attr->get_buf);
+ } else { /* first read */
+ u64 val;
+ ret = attr->get(attr->data, &val);
+ if (ret)
+ goto out;
+
+ size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+ attr->fmt, (unsigned long long)val);
+ }
+
+ ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+out:
+ mutex_unlock(&attr->mutex);
+ return ret;
+}
+
+static ssize_t spufs_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct spufs_attr *attr;
+ u64 val;
+ size_t size;
+ ssize_t ret;
+
+ attr = file->private_data;
+ if (!attr->set)
+ return -EACCES;
+
+ ret = mutex_lock_interruptible(&attr->mutex);
+ if (ret)
+ return ret;
+
+ ret = -EFAULT;
+ size = min(sizeof(attr->set_buf) - 1, len);
+ if (copy_from_user(attr->set_buf, buf, size))
+ goto out;
+
+ ret = len; /* claim we got the whole input */
+ attr->set_buf[size] = '\0';
+ val = simple_strtol(attr->set_buf, NULL, 0);
+ attr->set(attr->data, val);
+out:
+ mutex_unlock(&attr->mutex);
+ return ret;
+}
+
+#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+ __simple_attr_check_format(__fmt, 0ull); \
+ return spufs_attr_open(inode, file, __get, __set, __fmt); \
+} \
+static const struct file_operations __fops = { \
+ .owner = THIS_MODULE, \
+ .open = __fops ## _open, \
+ .release = spufs_attr_release, \
+ .read = spufs_attr_read, \
+ .write = spufs_attr_write, \
+ .llseek = generic_file_llseek, \
+};
+
+
+static int
+spufs_mem_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->local_store = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->local_store = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static ssize_t
+__spufs_mem_read(struct spu_context *ctx, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ char *local_store = ctx->ops->get_ls(ctx);
+ return simple_read_from_buffer(buffer, size, pos, local_store,
+ LS_SIZE);
+}
+
+static ssize_t
+spufs_mem_read(struct file *file, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_mem_read(ctx, buffer, size, pos);
+ spu_release(ctx);
+
+ return ret;
+}
+
+static ssize_t
+spufs_mem_write(struct file *file, const char __user *buffer,
+ size_t size, loff_t *ppos)
+{
+ struct spu_context *ctx = file->private_data;
+ char *local_store;
+ loff_t pos = *ppos;
+ int ret;
+
+ if (pos > LS_SIZE)
+ return -EFBIG;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ local_store = ctx->ops->get_ls(ctx);
+ size = simple_write_to_buffer(local_store, LS_SIZE, ppos, buffer, size);
+ spu_release(ctx);
+
+ return size;
+}
+
+static int
+spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long address = (unsigned long)vmf->virtual_address;
+ unsigned long pfn, offset;
+
+#ifdef CONFIG_SPU_FS_64K_LS
+ struct spu_state *csa = &ctx->csa;
+ int psize;
+
+ /* Check what page size we are using */
+ psize = get_slice_psize(vma->vm_mm, address);
+
+ /* Some sanity checking */
+ BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
+
+ /* Wow, 64K, cool, we need to align the address though */
+ if (csa->use_big_pages) {
+ BUG_ON(vma->vm_start & 0xffff);
+ address &= ~0xfffful;
+ }
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+ offset = vmf->pgoff << PAGE_SHIFT;
+ if (offset >= LS_SIZE)
+ return VM_FAULT_SIGBUS;
+
+ pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
+ address, offset);
+
+ if (spu_acquire(ctx))
+ return VM_FAULT_NOPAGE;
+
+ if (ctx->state == SPU_STATE_SAVED) {
+ vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+ pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset);
+ } else {
+ vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+ pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
+ }
+ vm_insert_pfn(vma, address, pfn);
+
+ spu_release(ctx);
+
+ return VM_FAULT_NOPAGE;
+}
+
+static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+ unsigned long address,
+ void *buf, int len, int write)
+{
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long offset = address - vma->vm_start;
+ char *local_store;
+
+ if (write && !(vma->vm_flags & VM_WRITE))
+ return -EACCES;
+ if (spu_acquire(ctx))
+ return -EINTR;
+ if ((offset + len) > vma->vm_end)
+ len = vma->vm_end - offset;
+ local_store = ctx->ops->get_ls(ctx);
+ if (write)
+ memcpy_toio(local_store + offset, buf, len);
+ else
+ memcpy_fromio(buf, local_store + offset, len);
+ spu_release(ctx);
+ return len;
+}
+
+static const struct vm_operations_struct spufs_mem_mmap_vmops = {
+ .fault = spufs_mem_mmap_fault,
+ .access = spufs_mem_mmap_access,
+};
+
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+#ifdef CONFIG_SPU_FS_64K_LS
+ struct spu_context *ctx = file->private_data;
+ struct spu_state *csa = &ctx->csa;
+
+ /* Sanity check VMA alignment */
+ if (csa->use_big_pages) {
+ pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
+ " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
+ vma->vm_pgoff);
+ if (vma->vm_start & 0xffff)
+ return -EINVAL;
+ if (vma->vm_pgoff & 0xf)
+ return -EINVAL;
+ }
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_mem_mmap_vmops;
+ return 0;
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+static unsigned long spufs_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_state *csa = &ctx->csa;
+
+ /* If not using big pages, fallback to normal MM g_u_a */
+ if (!csa->use_big_pages)
+ return current->mm->get_unmapped_area(file, addr, len,
+ pgoff, flags);
+
+ /* Else, try to obtain a 64K pages slice */
+ return slice_get_unmapped_area(addr, len, flags,
+ MMU_PAGE_64K, 1, 0);
+}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+static const struct file_operations spufs_mem_fops = {
+ .open = spufs_mem_open,
+ .release = spufs_mem_release,
+ .read = spufs_mem_read,
+ .write = spufs_mem_write,
+ .llseek = generic_file_llseek,
+ .mmap = spufs_mem_mmap,
+#ifdef CONFIG_SPU_FS_64K_LS
+ .get_unmapped_area = spufs_get_unmapped_area,
+#endif
+};
+
+static int spufs_ps_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf,
+ unsigned long ps_offs,
+ unsigned long ps_size)
+{
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
+ int ret = 0;
+
+ spu_context_nospu_trace(spufs_ps_fault__enter, ctx);
+
+ if (offset >= ps_size)
+ return VM_FAULT_SIGBUS;
+
+ if (fatal_signal_pending(current))
+ return VM_FAULT_SIGBUS;
+
+ /*
+ * Because we release the mmap_sem, the context may be destroyed while
+ * we're in spu_wait. Grab an extra reference so it isn't destroyed
+ * in the meantime.
+ */
+ get_spu_context(ctx);
+
+ /*
+ * We have to wait for context to be loaded before we have
+ * pages to hand out to the user, but we don't want to wait
+ * with the mmap_sem held.
+ * It is possible to drop the mmap_sem here, but then we need
+ * to return VM_FAULT_NOPAGE because the mappings may have
+ * hanged.
+ */
+ if (spu_acquire(ctx))
+ goto refault;
+
+ if (ctx->state == SPU_STATE_SAVED) {
+ up_read(&current->mm->mmap_sem);
+ spu_context_nospu_trace(spufs_ps_fault__sleep, ctx);
+ ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+ spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu);
+ down_read(&current->mm->mmap_sem);
+ } else {
+ area = ctx->spu->problem_phys + ps_offs;
+ vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+ (area + offset) >> PAGE_SHIFT);
+ spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
+ }
+
+ if (!ret)
+ spu_release(ctx);
+
+refault:
+ put_spu_context(ctx);
+ return VM_FAULT_NOPAGE;
+}
+
+#if SPUFS_MMAP_4K
+static int spufs_cntl_mmap_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
+ .fault = spufs_cntl_mmap_fault,
+};
+
+/*
+ * mmap support for problem state control area [0x4000 - 0x4fff].
+ */
+static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_cntl_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_cntl_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_cntl_get(void *data, u64 *val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ *val = ctx->ops->status_read(ctx);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static int spufs_cntl_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->runcntl_write(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static int spufs_cntl_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->cntl = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return simple_attr_open(inode, file, spufs_cntl_get,
+ spufs_cntl_set, "0x%08lx");
+}
+
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ simple_attr_release(inode, file);
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->cntl = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static const struct file_operations spufs_cntl_fops = {
+ .open = spufs_cntl_open,
+ .release = spufs_cntl_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = generic_file_llseek,
+ .mmap = spufs_cntl_mmap,
+};
+
+static int
+spufs_regs_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ file->private_data = i->i_ctx;
+ return 0;
+}
+
+static ssize_t
+__spufs_regs_read(struct spu_context *ctx, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return simple_read_from_buffer(buffer, size, pos,
+ lscsa->gprs, sizeof lscsa->gprs);
+}
+
+static ssize_t
+spufs_regs_read(struct file *file, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ /* pre-check for file position: if we'd return EOF, there's no point
+ * causing a deschedule */
+ if (*pos >= sizeof(ctx->csa.lscsa->gprs))
+ return 0;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_regs_read(ctx, buffer, size, pos);
+ spu_release_saved(ctx);
+ return ret;
+}
+
+static ssize_t
+spufs_regs_write(struct file *file, const char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ if (*pos >= sizeof(lscsa->gprs))
+ return -EFBIG;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+
+ size = simple_write_to_buffer(lscsa->gprs, sizeof(lscsa->gprs), pos,
+ buffer, size);
+
+ spu_release_saved(ctx);
+ return size;
+}
+
+static const struct file_operations spufs_regs_fops = {
+ .open = spufs_regs_open,
+ .read = spufs_regs_read,
+ .write = spufs_regs_write,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t
+__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer,
+ size_t size, loff_t * pos)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return simple_read_from_buffer(buffer, size, pos,
+ &lscsa->fpcr, sizeof(lscsa->fpcr));
+}
+
+static ssize_t
+spufs_fpcr_read(struct file *file, char __user * buffer,
+ size_t size, loff_t * pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_fpcr_read(ctx, buffer, size, pos);
+ spu_release_saved(ctx);
+ return ret;
+}
+
+static ssize_t
+spufs_fpcr_write(struct file *file, const char __user * buffer,
+ size_t size, loff_t * pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ if (*pos >= sizeof(lscsa->fpcr))
+ return -EFBIG;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+
+ size = simple_write_to_buffer(&lscsa->fpcr, sizeof(lscsa->fpcr), pos,
+ buffer, size);
+
+ spu_release_saved(ctx);
+ return size;
+}
+
+static const struct file_operations spufs_fpcr_fops = {
+ .open = spufs_regs_open,
+ .read = spufs_fpcr_read,
+ .write = spufs_fpcr_write,
+ .llseek = generic_file_llseek,
+};
+
+/* generic open function for all pipe-like files */
+static int spufs_pipe_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ file->private_data = i->i_ctx;
+
+ return nonseekable_open(inode, file);
+}
+
+/*
+ * Read as many bytes from the mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ */
+static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 mbox_data, __user *udata;
+ ssize_t count;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ udata = (void __user *)buf;
+
+ count = spu_acquire(ctx);
+ if (count)
+ return count;
+
+ for (count = 0; (count + 4) <= len; count += 4, udata++) {
+ int ret;
+ ret = ctx->ops->mbox_read(ctx, &mbox_data);
+ if (ret == 0)
+ break;
+
+ /*
+ * at the end of the mapped area, we can fault
+ * but still need to return the data we have
+ * read successfully so far.
+ */
+ ret = __put_user(mbox_data, udata);
+ if (ret) {
+ if (!count)
+ count = -EFAULT;
+ break;
+ }
+ }
+ spu_release(ctx);
+
+ if (!count)
+ count = -EAGAIN;
+
+ return count;
+}
+
+static const struct file_operations spufs_mbox_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_mbox_read,
+ .llseek = no_llseek,
+};
+
+static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+ u32 mbox_stat;
+
+ if (len < 4)
+ return -EINVAL;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff;
+
+ spu_release(ctx);
+
+ if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat))
+ return -EFAULT;
+
+ return 4;
+}
+
+static const struct file_operations spufs_mbox_stat_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_mbox_stat_read,
+ .llseek = no_llseek,
+};
+
+/* low-level ibox access function */
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data)
+{
+ return ctx->ops->ibox_read(ctx, data);
+}
+
+static int spufs_ibox_fasync(int fd, struct file *file, int on)
+{
+ struct spu_context *ctx = file->private_data;
+
+ return fasync_helper(fd, file, on, &ctx->ibox_fasync);
+}
+
+/* interrupt-level ibox callback function. */
+void spufs_ibox_callback(struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ if (!ctx)
+ return;
+
+ wake_up_all(&ctx->ibox_wq);
+ kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN);
+}
+
+/*
+ * Read as many bytes from the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * any data is available, but return when we have been able to
+ * read something.
+ */
+static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 ibox_data, __user *udata;
+ ssize_t count;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ udata = (void __user *)buf;
+
+ count = spu_acquire(ctx);
+ if (count)
+ goto out;
+
+ /* wait only for the first element */
+ count = 0;
+ if (file->f_flags & O_NONBLOCK) {
+ if (!spu_ibox_read(ctx, &ibox_data)) {
+ count = -EAGAIN;
+ goto out_unlock;
+ }
+ } else {
+ count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+ if (count)
+ goto out;
+ }
+
+ /* if we can't write at all, return -EFAULT */
+ count = __put_user(ibox_data, udata);
+ if (count)
+ goto out_unlock;
+
+ for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+ int ret;
+ ret = ctx->ops->ibox_read(ctx, &ibox_data);
+ if (ret == 0)
+ break;
+ /*
+ * at the end of the mapped area, we can fault
+ * but still need to return the data we have
+ * read successfully so far.
+ */
+ ret = __put_user(ibox_data, udata);
+ if (ret)
+ break;
+ }
+
+out_unlock:
+ spu_release(ctx);
+out:
+ return count;
+}
+
+static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait)
+{
+ struct spu_context *ctx = file->private_data;
+ unsigned int mask;
+
+ poll_wait(file, &ctx->ibox_wq, wait);
+
+ /*
+ * For now keep this uninterruptible and also ignore the rule
+ * that poll should not sleep. Will be fixed later.
+ */
+ mutex_lock(&ctx->state_mutex);
+ mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM);
+ spu_release(ctx);
+
+ return mask;
+}
+
+static const struct file_operations spufs_ibox_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_ibox_read,
+ .poll = spufs_ibox_poll,
+ .fasync = spufs_ibox_fasync,
+ .llseek = no_llseek,
+};
+
+static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+ u32 ibox_stat;
+
+ if (len < 4)
+ return -EINVAL;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff;
+ spu_release(ctx);
+
+ if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat))
+ return -EFAULT;
+
+ return 4;
+}
+
+static const struct file_operations spufs_ibox_stat_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_ibox_stat_read,
+ .llseek = no_llseek,
+};
+
+/* low-level mailbox write */
+size_t spu_wbox_write(struct spu_context *ctx, u32 data)
+{
+ return ctx->ops->wbox_write(ctx, data);
+}
+
+static int spufs_wbox_fasync(int fd, struct file *file, int on)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = fasync_helper(fd, file, on, &ctx->wbox_fasync);
+
+ return ret;
+}
+
+/* interrupt-level wbox callback function. */
+void spufs_wbox_callback(struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ if (!ctx)
+ return;
+
+ wake_up_all(&ctx->wbox_wq);
+ kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT);
+}
+
+/*
+ * Write as many bytes to the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - the mailbox is full
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * space is availabyl, but return when we have been able to
+ * write something.
+ */
+static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 wbox_data, __user *udata;
+ ssize_t count;
+
+ if (len < 4)
+ return -EINVAL;
+
+ udata = (void __user *)buf;
+ if (!access_ok(VERIFY_READ, buf, len))
+ return -EFAULT;
+
+ if (__get_user(wbox_data, udata))
+ return -EFAULT;
+
+ count = spu_acquire(ctx);
+ if (count)
+ goto out;
+
+ /*
+ * make sure we can at least write one element, by waiting
+ * in case of !O_NONBLOCK
+ */
+ count = 0;
+ if (file->f_flags & O_NONBLOCK) {
+ if (!spu_wbox_write(ctx, wbox_data)) {
+ count = -EAGAIN;
+ goto out_unlock;
+ }
+ } else {
+ count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+ if (count)
+ goto out;
+ }
+
+
+ /* write as much as possible */
+ for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+ int ret;
+ ret = __get_user(wbox_data, udata);
+ if (ret)
+ break;
+
+ ret = spu_wbox_write(ctx, wbox_data);
+ if (ret == 0)
+ break;
+ }
+
+out_unlock:
+ spu_release(ctx);
+out:
+ return count;
+}
+
+static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait)
+{
+ struct spu_context *ctx = file->private_data;
+ unsigned int mask;
+
+ poll_wait(file, &ctx->wbox_wq, wait);
+
+ /*
+ * For now keep this uninterruptible and also ignore the rule
+ * that poll should not sleep. Will be fixed later.
+ */
+ mutex_lock(&ctx->state_mutex);
+ mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM);
+ spu_release(ctx);
+
+ return mask;
+}
+
+static const struct file_operations spufs_wbox_fops = {
+ .open = spufs_pipe_open,
+ .write = spufs_wbox_write,
+ .poll = spufs_wbox_poll,
+ .fasync = spufs_wbox_fasync,
+ .llseek = no_llseek,
+};
+
+static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+ u32 wbox_stat;
+
+ if (len < 4)
+ return -EINVAL;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff;
+ spu_release(ctx);
+
+ if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat))
+ return -EFAULT;
+
+ return 4;
+}
+
+static const struct file_operations spufs_wbox_stat_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_wbox_stat_read,
+ .llseek = no_llseek,
+};
+
+static int spufs_signal1_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->signal1 = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->signal1 = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ int ret = 0;
+ u32 data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (ctx->csa.spu_chnlcnt_RW[3]) {
+ data = ctx->csa.spu_chnldata_RW[3];
+ ret = 4;
+ }
+
+ if (!ret)
+ goto out;
+
+ if (copy_to_user(buf, &data, 4))
+ return -EFAULT;
+
+out:
+ return ret;
+}
+
+static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_signal1_read(ctx, buf, len, pos);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx;
+ ssize_t ret;
+ u32 data;
+
+ ctx = file->private_data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (copy_from_user(&data, buf, 4))
+ return -EFAULT;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal1_write(ctx, data);
+ spu_release(ctx);
+
+ return 4;
+}
+
+static int
+spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+ return spufs_ps_fault(vma, vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+ /* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+ * signal 1 and 2 area
+ */
+ return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal1_mmap_vmops = {
+ .fault = spufs_signal1_mmap_fault,
+};
+
+static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_signal1_mmap_vmops;
+ return 0;
+}
+
+static const struct file_operations spufs_signal1_fops = {
+ .open = spufs_signal1_open,
+ .release = spufs_signal1_release,
+ .read = spufs_signal1_read,
+ .write = spufs_signal1_write,
+ .mmap = spufs_signal1_mmap,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal1_nosched_fops = {
+ .open = spufs_signal1_open,
+ .release = spufs_signal1_release,
+ .write = spufs_signal1_write,
+ .mmap = spufs_signal1_mmap,
+ .llseek = no_llseek,
+};
+
+static int spufs_signal2_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->signal2 = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->signal2 = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ int ret = 0;
+ u32 data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (ctx->csa.spu_chnlcnt_RW[4]) {
+ data = ctx->csa.spu_chnldata_RW[4];
+ ret = 4;
+ }
+
+ if (!ret)
+ goto out;
+
+ if (copy_to_user(buf, &data, 4))
+ return -EFAULT;
+
+out:
+ return ret;
+}
+
+static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_signal2_read(ctx, buf, len, pos);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx;
+ ssize_t ret;
+ u32 data;
+
+ ctx = file->private_data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (copy_from_user(&data, buf, 4))
+ return -EFAULT;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal2_write(ctx, data);
+ spu_release(ctx);
+
+ return 4;
+}
+
+#if SPUFS_MMAP_4K
+static int
+spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+ return spufs_ps_fault(vma, vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+ /* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+ * signal 1 and 2 area
+ */
+ return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal2_mmap_vmops = {
+ .fault = spufs_signal2_mmap_fault,
+};
+
+static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_signal2_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_signal2_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static const struct file_operations spufs_signal2_fops = {
+ .open = spufs_signal2_open,
+ .release = spufs_signal2_release,
+ .read = spufs_signal2_read,
+ .write = spufs_signal2_write,
+ .mmap = spufs_signal2_mmap,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal2_nosched_fops = {
+ .open = spufs_signal2_open,
+ .release = spufs_signal2_release,
+ .write = spufs_signal2_write,
+ .mmap = spufs_signal2_mmap,
+ .llseek = no_llseek,
+};
+
+/*
+ * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the
+ * work of acquiring (or not) the SPU context before calling through
+ * to the actual get routine. The set routine is called directly.
+ */
+#define SPU_ATTR_NOACQUIRE 0
+#define SPU_ATTR_ACQUIRE 1
+#define SPU_ATTR_ACQUIRE_SAVED 2
+
+#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire) \
+static int __##__get(void *data, u64 *val) \
+{ \
+ struct spu_context *ctx = data; \
+ int ret = 0; \
+ \
+ if (__acquire == SPU_ATTR_ACQUIRE) { \
+ ret = spu_acquire(ctx); \
+ if (ret) \
+ return ret; \
+ *val = __get(ctx); \
+ spu_release(ctx); \
+ } else if (__acquire == SPU_ATTR_ACQUIRE_SAVED) { \
+ ret = spu_acquire_saved(ctx); \
+ if (ret) \
+ return ret; \
+ *val = __get(ctx); \
+ spu_release_saved(ctx); \
+ } else \
+ *val = __get(ctx); \
+ \
+ return 0; \
+} \
+DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt);
+
+static int spufs_signal1_type_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal1_type_set(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static u64 spufs_signal1_type_get(struct spu_context *ctx)
+{
+ return ctx->ops->signal1_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get,
+ spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+
+static int spufs_signal2_type_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal2_type_set(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static u64 spufs_signal2_type_get(struct spu_context *ctx)
+{
+ return ctx->ops->signal2_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
+ spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+#if SPUFS_MMAP_4K
+static int
+spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mss_mmap_vmops = {
+ .fault = spufs_mss_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_mss_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mss_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mss_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ file->private_data = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!i->i_openers++)
+ ctx->mss = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->mss = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static const struct file_operations spufs_mss_fops = {
+ .open = spufs_mss_open,
+ .release = spufs_mss_release,
+ .mmap = spufs_mss_mmap,
+ .llseek = no_llseek,
+};
+
+static int
+spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
+ .fault = spufs_psmap_mmap_fault,
+};
+
+/*
+ * mmap support for full problem state area [0x00000 - 0x1ffff].
+ */
+static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_psmap_mmap_vmops;
+ return 0;
+}
+
+static int spufs_psmap_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = i->i_ctx;
+ if (!i->i_openers++)
+ ctx->psmap = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->psmap = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static const struct file_operations spufs_psmap_fops = {
+ .open = spufs_psmap_open,
+ .release = spufs_psmap_release,
+ .mmap = spufs_psmap_mmap,
+ .llseek = no_llseek,
+};
+
+
+#if SPUFS_MMAP_4K
+static int
+spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
+ .fault = spufs_mfc_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_mfc_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mfc_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mfc_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ /* we don't want to deal with DMA into other processes */
+ if (ctx->owner != current->mm)
+ return -EINVAL;
+
+ if (atomic_read(&inode->i_count) != 1)
+ return -EBUSY;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->mfc = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->mfc = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+/* interrupt-level mfc callback function. */
+void spufs_mfc_callback(struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ if (!ctx)
+ return;
+
+ wake_up_all(&ctx->mfc_wq);
+
+ pr_debug("%s %s\n", __func__, spu->name);
+ if (ctx->mfc_fasync) {
+ u32 free_elements, tagstatus;
+ unsigned int mask;
+
+ /* no need for spu_acquire in interrupt context */
+ free_elements = ctx->ops->get_mfc_free_elements(ctx);
+ tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+
+ mask = 0;
+ if (free_elements & 0xffff)
+ mask |= POLLOUT;
+ if (tagstatus & ctx->tagwait)
+ mask |= POLLIN;
+
+ kill_fasync(&ctx->mfc_fasync, SIGIO, mask);
+ }
+}
+
+static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
+{
+ /* See if there is one tag group is complete */
+ /* FIXME we need locking around tagwait */
+ *status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait;
+ ctx->tagwait &= ~*status;
+ if (*status)
+ return 1;
+
+ /* enable interrupt waiting for any tag group,
+ may silently fail if interrupts are already enabled */
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+ return 0;
+}
+
+static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret = -EINVAL;
+ u32 status;
+
+ if (size != 4)
+ goto out;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ ret = -EINVAL;
+ if (file->f_flags & O_NONBLOCK) {
+ status = ctx->ops->read_mfc_tagstatus(ctx);
+ if (!(status & ctx->tagwait))
+ ret = -EAGAIN;
+ else
+ /* XXX(hch): shouldn't we clear ret here? */
+ ctx->tagwait &= ~status;
+ } else {
+ ret = spufs_wait(ctx->mfc_wq,
+ spufs_read_mfc_tagstatus(ctx, &status));
+ if (ret)
+ goto out;
+ }
+ spu_release(ctx);
+
+ ret = 4;
+ if (copy_to_user(buffer, &status, 4))
+ ret = -EFAULT;
+
+out:
+ return ret;
+}
+
+static int spufs_check_valid_dma(struct mfc_dma_command *cmd)
+{
+ pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa,
+ cmd->ea, cmd->size, cmd->tag, cmd->cmd);
+
+ switch (cmd->cmd) {
+ case MFC_PUT_CMD:
+ case MFC_PUTF_CMD:
+ case MFC_PUTB_CMD:
+ case MFC_GET_CMD:
+ case MFC_GETF_CMD:
+ case MFC_GETB_CMD:
+ break;
+ default:
+ pr_debug("invalid DMA opcode %x\n", cmd->cmd);
+ return -EIO;
+ }
+
+ if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) {
+ pr_debug("invalid DMA alignment, ea %llx lsa %x\n",
+ cmd->ea, cmd->lsa);
+ return -EIO;
+ }
+
+ switch (cmd->size & 0xf) {
+ case 1:
+ break;
+ case 2:
+ if (cmd->lsa & 1)
+ goto error;
+ break;
+ case 4:
+ if (cmd->lsa & 3)
+ goto error;
+ break;
+ case 8:
+ if (cmd->lsa & 7)
+ goto error;
+ break;
+ case 0:
+ if (cmd->lsa & 15)
+ goto error;
+ break;
+ error:
+ default:
+ pr_debug("invalid DMA alignment %x for size %x\n",
+ cmd->lsa & 0xf, cmd->size);
+ return -EIO;
+ }
+
+ if (cmd->size > 16 * 1024) {
+ pr_debug("invalid DMA size %x\n", cmd->size);
+ return -EIO;
+ }
+
+ if (cmd->tag & 0xfff0) {
+ /* we reserve the higher tag numbers for kernel use */
+ pr_debug("invalid DMA tag\n");
+ return -EIO;
+ }
+
+ if (cmd->class) {
+ /* not supported in this version */
+ pr_debug("invalid DMA class\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int spu_send_mfc_command(struct spu_context *ctx,
+ struct mfc_dma_command cmd,
+ int *error)
+{
+ *error = ctx->ops->send_mfc_command(ctx, &cmd);
+ if (*error == -EAGAIN) {
+ /* wait for any tag group to complete
+ so we have space for the new command */
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+ /* try again, because the queue might be
+ empty again */
+ *error = ctx->ops->send_mfc_command(ctx, &cmd);
+ if (*error == -EAGAIN)
+ return 0;
+ }
+ return 1;
+}
+
+static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct mfc_dma_command cmd;
+ int ret = -EINVAL;
+
+ if (size != sizeof cmd)
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(&cmd, buffer, sizeof cmd))
+ goto out;
+
+ ret = spufs_check_valid_dma(&cmd);
+ if (ret)
+ goto out;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ goto out;
+
+ ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+ if (ret)
+ goto out;
+
+ if (file->f_flags & O_NONBLOCK) {
+ ret = ctx->ops->send_mfc_command(ctx, &cmd);
+ } else {
+ int status;
+ ret = spufs_wait(ctx->mfc_wq,
+ spu_send_mfc_command(ctx, cmd, &status));
+ if (ret)
+ goto out;
+ if (status)
+ ret = status;
+ }
+
+ if (ret)
+ goto out_unlock;
+
+ ctx->tagwait |= 1 << cmd.tag;
+ ret = size;
+
+out_unlock:
+ spu_release(ctx);
+out:
+ return ret;
+}
+
+static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 free_elements, tagstatus;
+ unsigned int mask;
+
+ poll_wait(file, &ctx->mfc_wq, wait);
+
+ /*
+ * For now keep this uninterruptible and also ignore the rule
+ * that poll should not sleep. Will be fixed later.
+ */
+ mutex_lock(&ctx->state_mutex);
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
+ free_elements = ctx->ops->get_mfc_free_elements(ctx);
+ tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+ spu_release(ctx);
+
+ mask = 0;
+ if (free_elements & 0xffff)
+ mask |= POLLOUT | POLLWRNORM;
+ if (tagstatus & ctx->tagwait)
+ mask |= POLLIN | POLLRDNORM;
+
+ pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
+ free_elements, tagstatus, ctx->tagwait);
+
+ return mask;
+}
+
+static int spufs_mfc_flush(struct file *file, fl_owner_t id)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ goto out;
+#if 0
+/* this currently hangs */
+ ret = spufs_wait(ctx->mfc_wq,
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
+ if (ret)
+ goto out;
+ ret = spufs_wait(ctx->mfc_wq,
+ ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
+ if (ret)
+ goto out;
+#else
+ ret = 0;
+#endif
+ spu_release(ctx);
+out:
+ return ret;
+}
+
+static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (!err) {
+ mutex_lock(&inode->i_mutex);
+ err = spufs_mfc_flush(file, NULL);
+ mutex_unlock(&inode->i_mutex);
+ }
+ return err;
+}
+
+static int spufs_mfc_fasync(int fd, struct file *file, int on)
+{
+ struct spu_context *ctx = file->private_data;
+
+ return fasync_helper(fd, file, on, &ctx->mfc_fasync);
+}
+
+static const struct file_operations spufs_mfc_fops = {
+ .open = spufs_mfc_open,
+ .release = spufs_mfc_release,
+ .read = spufs_mfc_read,
+ .write = spufs_mfc_write,
+ .poll = spufs_mfc_poll,
+ .flush = spufs_mfc_flush,
+ .fsync = spufs_mfc_fsync,
+ .fasync = spufs_mfc_fasync,
+ .mmap = spufs_mfc_mmap,
+ .llseek = no_llseek,
+};
+
+static int spufs_npc_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->npc_write(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static u64 spufs_npc_get(struct spu_context *ctx)
+{
+ return ctx->ops->npc_read(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set,
+ "0x%llx\n", SPU_ATTR_ACQUIRE);
+
+static int spufs_decr_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ lscsa->decr.slot[0] = (u32) val;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_decr_get(struct spu_context *ctx)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return lscsa->decr.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
+ "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_decr_status_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ if (val)
+ ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
+ else
+ ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_decr_status_get(struct spu_context *ctx)
+{
+ if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
+ return SPU_DECR_STATUS_RUNNING;
+ else
+ return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
+ spufs_decr_status_set, "0x%llx\n",
+ SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_event_mask_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ lscsa->event_mask.slot[0] = (u32) val;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_event_mask_get(struct spu_context *ctx)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return lscsa->event_mask.slot[0];
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
+ spufs_event_mask_set, "0x%llx\n",
+ SPU_ATTR_ACQUIRE_SAVED);
+
+static u64 spufs_event_status_get(struct spu_context *ctx)
+{
+ struct spu_state *state = &ctx->csa;
+ u64 stat;
+ stat = state->spu_chnlcnt_RW[0];
+ if (stat)
+ return state->spu_chnldata_RW[0];
+ return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
+ NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static int spufs_srr0_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ lscsa->srr0.slot[0] = (u32) val;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_srr0_get(struct spu_context *ctx)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return lscsa->srr0.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
+ "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static u64 spufs_id_get(struct spu_context *ctx)
+{
+ u64 num;
+
+ if (ctx->state == SPU_STATE_RUNNABLE)
+ num = ctx->spu->number;
+ else
+ num = (unsigned int)-1;
+
+ return num;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n",
+ SPU_ATTR_ACQUIRE)
+
+static u64 spufs_object_id_get(struct spu_context *ctx)
+{
+ /* FIXME: Should there really be no locking here? */
+ return ctx->object_id;
+}
+
+static int spufs_object_id_set(void *data, u64 id)
+{
+ struct spu_context *ctx = data;
+ ctx->object_id = id;
+
+ return 0;
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get,
+ spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE);
+
+static u64 spufs_lslr_get(struct spu_context *ctx)
+{
+ return ctx->csa.priv2.spu_lslr_RW;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n",
+ SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_info_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+ file->private_data = ctx;
+ return 0;
+}
+
+static int spufs_caps_show(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+
+ if (!(ctx->flags & SPU_CREATE_NOSCHED))
+ seq_puts(s, "sched\n");
+ if (!(ctx->flags & SPU_CREATE_ISOLATE))
+ seq_puts(s, "step\n");
+ return 0;
+}
+
+static int spufs_caps_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_caps_fops = {
+ .open = spufs_caps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ u32 data;
+
+ /* EOF if there's no entry in the mbox */
+ if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
+ return 0;
+
+ data = ctx->csa.prob.pu_mb_R;
+
+ return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_mbox_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_mbox_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_mbox_info_read,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t __spufs_ibox_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ u32 data;
+
+ /* EOF if there's no entry in the ibox */
+ if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
+ return 0;
+
+ data = ctx->csa.priv2.puint_mb_R;
+
+ return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_ibox_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_ibox_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_ibox_info_read,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t __spufs_wbox_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ int i, cnt;
+ u32 data[4];
+ u32 wbox_stat;
+
+ wbox_stat = ctx->csa.prob.mb_stat_R;
+ cnt = 4 - ((wbox_stat & 0x00ff00) >> 8);
+ for (i = 0; i < cnt; i++) {
+ data[i] = ctx->csa.spu_mailbox_data[i];
+ }
+
+ return simple_read_from_buffer(buf, len, pos, &data,
+ cnt * sizeof(u32));
+}
+
+static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_wbox_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_wbox_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_wbox_info_read,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t __spufs_dma_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ struct spu_dma_info info;
+ struct mfc_cq_sr *qp, *spuqp;
+ int i;
+
+ info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+ info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+ info.dma_info_status = ctx->csa.spu_chnldata_RW[24];
+ info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+ info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+ for (i = 0; i < 16; i++) {
+ qp = &info.dma_info_command_data[i];
+ spuqp = &ctx->csa.priv2.spuq[i];
+
+ qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
+ qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
+ qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
+ qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
+ }
+
+ return simple_read_from_buffer(buf, len, pos, &info,
+ sizeof info);
+}
+
+static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_dma_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_dma_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_dma_info_read,
+ .llseek = no_llseek,
+};
+
+static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ struct spu_proxydma_info info;
+ struct mfc_cq_sr *qp, *puqp;
+ int ret = sizeof info;
+ int i;
+
+ if (len < ret)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+ info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+ info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
+ for (i = 0; i < 8; i++) {
+ qp = &info.proxydma_info_command_data[i];
+ puqp = &ctx->csa.priv2.puq[i];
+
+ qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
+ qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
+ qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
+ qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
+ }
+
+ return simple_read_from_buffer(buf, len, pos, &info,
+ sizeof info);
+}
+
+static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_proxydma_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_proxydma_info_read,
+ .llseek = no_llseek,
+};
+
+static int spufs_show_tid(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+
+ seq_printf(s, "%d\n", ctx->tid);
+ return 0;
+}
+
+static int spufs_tid_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_tid_fops = {
+ .open = spufs_tid_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const char *ctx_state_names[] = {
+ "user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+ enum spu_utilization_state state)
+{
+ struct timespec ts;
+ unsigned long long time = ctx->stats.times[state];
+
+ /*
+ * In general, utilization statistics are updated by the controlling
+ * thread as the spu context moves through various well defined
+ * state transitions, but if the context is lazily loaded its
+ * utilization statistics are not updated as the controlling thread
+ * is not tightly coupled with the execution of the spu context. We
+ * calculate and apply the time delta from the last recorded state
+ * of the spu context.
+ */
+ if (ctx->spu && ctx->stats.util_state == state) {
+ ktime_get_ts(&ts);
+ time += timespec_to_ns(&ts) - ctx->stats.tstamp;
+ }
+
+ return time / NSEC_PER_MSEC;
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+ unsigned long long slb_flts = ctx->stats.slb_flt;
+
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ slb_flts += (ctx->spu->stats.slb_flt -
+ ctx->stats.slb_flt_base);
+ }
+
+ return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+ unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ class2_intrs += (ctx->spu->stats.class2_intr -
+ ctx->stats.class2_intr_base);
+ }
+
+ return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ seq_printf(s, "%s %llu %llu %llu %llu "
+ "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+ ctx_state_names[ctx->stats.util_state],
+ spufs_acct_time(ctx, SPU_UTIL_USER),
+ spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+ spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+ spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
+ ctx->stats.vol_ctx_switch,
+ ctx->stats.invol_ctx_switch,
+ spufs_slb_flts(ctx),
+ ctx->stats.hash_flt,
+ ctx->stats.min_flt,
+ ctx->stats.maj_flt,
+ spufs_class2_intrs(ctx),
+ ctx->stats.libassist);
+ spu_release(ctx);
+ return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+ .open = spufs_stat_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static inline int spufs_switch_log_used(struct spu_context *ctx)
+{
+ return (ctx->switch_log->head - ctx->switch_log->tail) %
+ SWITCH_LOG_BUFSIZE;
+}
+
+static inline int spufs_switch_log_avail(struct spu_context *ctx)
+{
+ return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx);
+}
+
+static int spufs_switch_log_open(struct inode *inode, struct file *file)
+{
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ int rc;
+
+ rc = spu_acquire(ctx);
+ if (rc)
+ return rc;
+
+ if (ctx->switch_log) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ ctx->switch_log = kmalloc(sizeof(struct switch_log) +
+ SWITCH_LOG_BUFSIZE * sizeof(struct switch_log_entry),
+ GFP_KERNEL);
+
+ if (!ctx->switch_log) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ ctx->switch_log->head = ctx->switch_log->tail = 0;
+ init_waitqueue_head(&ctx->switch_log->wait);
+ rc = 0;
+
+out:
+ spu_release(ctx);
+ return rc;
+}
+
+static int spufs_switch_log_release(struct inode *inode, struct file *file)
+{
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ int rc;
+
+ rc = spu_acquire(ctx);
+ if (rc)
+ return rc;
+
+ kfree(ctx->switch_log);
+ ctx->switch_log = NULL;
+ spu_release(ctx);
+
+ return 0;
+}
+
+static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
+{
+ struct switch_log_entry *p;
+
+ p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
+
+ return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n",
+ (unsigned int) p->tstamp.tv_sec,
+ (unsigned int) p->tstamp.tv_nsec,
+ p->spu_id,
+ (unsigned int) p->type,
+ (unsigned int) p->val,
+ (unsigned long long) p->timebase);
+}
+
+static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ int error = 0, cnt = 0;
+
+ if (!buf)
+ return -EINVAL;
+
+ error = spu_acquire(ctx);
+ if (error)
+ return error;
+
+ while (cnt < len) {
+ char tbuf[128];
+ int width;
+
+ if (spufs_switch_log_used(ctx) == 0) {
+ if (cnt > 0) {
+ /* If there's data ready to go, we can
+ * just return straight away */
+ break;
+
+ } else if (file->f_flags & O_NONBLOCK) {
+ error = -EAGAIN;
+ break;
+
+ } else {
+ /* spufs_wait will drop the mutex and
+ * re-acquire, but since we're in read(), the
+ * file cannot be _released (and so
+ * ctx->switch_log is stable).
+ */
+ error = spufs_wait(ctx->switch_log->wait,
+ spufs_switch_log_used(ctx) > 0);
+
+ /* On error, spufs_wait returns without the
+ * state mutex held */
+ if (error)
+ return error;
+
+ /* We may have had entries read from underneath
+ * us while we dropped the mutex in spufs_wait,
+ * so re-check */
+ if (spufs_switch_log_used(ctx) == 0)
+ continue;
+ }
+ }
+
+ width = switch_log_sprint(ctx, tbuf, sizeof(tbuf));
+ if (width < len)
+ ctx->switch_log->tail =
+ (ctx->switch_log->tail + 1) %
+ SWITCH_LOG_BUFSIZE;
+ else
+ /* If the record is greater than space available return
+ * partial buffer (so far) */
+ break;
+
+ error = copy_to_user(buf + cnt, tbuf, width);
+ if (error)
+ break;
+ cnt += width;
+ }
+
+ spu_release(ctx);
+
+ return cnt == 0 ? error : cnt;
+}
+
+static unsigned int spufs_switch_log_poll(struct file *file, poll_table *wait)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ unsigned int mask = 0;
+ int rc;
+
+ poll_wait(file, &ctx->switch_log->wait, wait);
+
+ rc = spu_acquire(ctx);
+ if (rc)
+ return rc;
+
+ if (spufs_switch_log_used(ctx) > 0)
+ mask |= POLLIN;
+
+ spu_release(ctx);
+
+ return mask;
+}
+
+static const struct file_operations spufs_switch_log_fops = {
+ .owner = THIS_MODULE,
+ .open = spufs_switch_log_open,
+ .read = spufs_switch_log_read,
+ .poll = spufs_switch_log_poll,
+ .release = spufs_switch_log_release,
+ .llseek = no_llseek,
+};
+
+/**
+ * Log a context switch event to a switch log reader.
+ *
+ * Must be called with ctx->state_mutex held.
+ */
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+ u32 type, u32 val)
+{
+ if (!ctx->switch_log)
+ return;
+
+ if (spufs_switch_log_avail(ctx) > 1) {
+ struct switch_log_entry *p;
+
+ p = ctx->switch_log->log + ctx->switch_log->head;
+ ktime_get_ts(&p->tstamp);
+ p->timebase = get_tb();
+ p->spu_id = spu ? spu->number : -1;
+ p->type = type;
+ p->val = val;
+
+ ctx->switch_log->head =
+ (ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE;
+ }
+
+ wake_up(&ctx->switch_log->wait);
+}
+
+static int spufs_show_ctx(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+ u64 mfc_control_RW;
+
+ mutex_lock(&ctx->state_mutex);
+ if (ctx->spu) {
+ struct spu *spu = ctx->spu;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ spin_lock_irq(&spu->register_lock);
+ mfc_control_RW = in_be64(&priv2->mfc_control_RW);
+ spin_unlock_irq(&spu->register_lock);
+ } else {
+ struct spu_state *csa = &ctx->csa;
+
+ mfc_control_RW = csa->priv2.mfc_control_RW;
+ }
+
+ seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)"
+ " %c %llx %llx %llx %llx %x %x\n",
+ ctx->state == SPU_STATE_SAVED ? 'S' : 'R',
+ ctx->flags,
+ ctx->sched_flags,
+ ctx->prio,
+ ctx->time_slice,
+ ctx->spu ? ctx->spu->number : -1,
+ !list_empty(&ctx->rq) ? 'q' : ' ',
+ ctx->csa.class_0_pending,
+ ctx->csa.class_0_dar,
+ ctx->csa.class_1_dsisr,
+ mfc_control_RW,
+ ctx->ops->runcntl_read(ctx),
+ ctx->ops->status_read(ctx));
+
+ mutex_unlock(&ctx->state_mutex);
+
+ return 0;
+}
+
+static int spufs_ctx_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_ctx_fops = {
+ .open = spufs_ctx_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+const struct spufs_tree_descr spufs_dir_contents[] = {
+ { "capabilities", &spufs_caps_fops, 0444, },
+ { "mem", &spufs_mem_fops, 0666, LS_SIZE, },
+ { "regs", &spufs_regs_fops, 0666, sizeof(struct spu_reg128[128]), },
+ { "mbox", &spufs_mbox_fops, 0444, },
+ { "ibox", &spufs_ibox_fops, 0444, },
+ { "wbox", &spufs_wbox_fops, 0222, },
+ { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+ { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+ { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+ { "signal1", &spufs_signal1_fops, 0666, },
+ { "signal2", &spufs_signal2_fops, 0666, },
+ { "signal1_type", &spufs_signal1_type, 0666, },
+ { "signal2_type", &spufs_signal2_type, 0666, },
+ { "cntl", &spufs_cntl_fops, 0666, },
+ { "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), },
+ { "lslr", &spufs_lslr_ops, 0444, },
+ { "mfc", &spufs_mfc_fops, 0666, },
+ { "mss", &spufs_mss_fops, 0666, },
+ { "npc", &spufs_npc_ops, 0666, },
+ { "srr0", &spufs_srr0_ops, 0666, },
+ { "decr", &spufs_decr_ops, 0666, },
+ { "decr_status", &spufs_decr_status_ops, 0666, },
+ { "event_mask", &spufs_event_mask_ops, 0666, },
+ { "event_status", &spufs_event_status_ops, 0444, },
+ { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+ { "phys-id", &spufs_id_ops, 0666, },
+ { "object-id", &spufs_object_id_ops, 0666, },
+ { "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), },
+ { "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), },
+ { "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), },
+ { "dma_info", &spufs_dma_info_fops, 0444,
+ sizeof(struct spu_dma_info), },
+ { "proxydma_info", &spufs_proxydma_info_fops, 0444,
+ sizeof(struct spu_proxydma_info)},
+ { "tid", &spufs_tid_fops, 0444, },
+ { "stat", &spufs_stat_fops, 0444, },
+ { "switch_log", &spufs_switch_log_fops, 0444 },
+ {},
+};
+
+const struct spufs_tree_descr spufs_dir_nosched_contents[] = {
+ { "capabilities", &spufs_caps_fops, 0444, },
+ { "mem", &spufs_mem_fops, 0666, LS_SIZE, },
+ { "mbox", &spufs_mbox_fops, 0444, },
+ { "ibox", &spufs_ibox_fops, 0444, },
+ { "wbox", &spufs_wbox_fops, 0222, },
+ { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+ { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+ { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+ { "signal1", &spufs_signal1_nosched_fops, 0222, },
+ { "signal2", &spufs_signal2_nosched_fops, 0222, },
+ { "signal1_type", &spufs_signal1_type, 0666, },
+ { "signal2_type", &spufs_signal2_type, 0666, },
+ { "mss", &spufs_mss_fops, 0666, },
+ { "mfc", &spufs_mfc_fops, 0666, },
+ { "cntl", &spufs_cntl_fops, 0666, },
+ { "npc", &spufs_npc_ops, 0666, },
+ { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+ { "phys-id", &spufs_id_ops, 0666, },
+ { "object-id", &spufs_object_id_ops, 0666, },
+ { "tid", &spufs_tid_fops, 0444, },
+ { "stat", &spufs_stat_fops, 0444, },
+ {},
+};
+
+const struct spufs_tree_descr spufs_dir_debug_contents[] = {
+ { ".ctx", &spufs_ctx_fops, 0444, },
+ {},
+};
+
+const struct spufs_coredump_reader spufs_coredump_read[] = {
+ { "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])},
+ { "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) },
+ { "lslr", NULL, spufs_lslr_get, 19 },
+ { "decr", NULL, spufs_decr_get, 19 },
+ { "decr_status", NULL, spufs_decr_status_get, 19 },
+ { "mem", __spufs_mem_read, NULL, LS_SIZE, },
+ { "signal1", __spufs_signal1_read, NULL, sizeof(u32) },
+ { "signal1_type", NULL, spufs_signal1_type_get, 19 },
+ { "signal2", __spufs_signal2_read, NULL, sizeof(u32) },
+ { "signal2_type", NULL, spufs_signal2_type_get, 19 },
+ { "event_mask", NULL, spufs_event_mask_get, 19 },
+ { "event_status", NULL, spufs_event_status_get, 19 },
+ { "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) },
+ { "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) },
+ { "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)},
+ { "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)},
+ { "proxydma_info", __spufs_proxydma_info_read,
+ NULL, sizeof(struct spu_proxydma_info)},
+ { "object-id", NULL, spufs_object_id_get, 19 },
+ { "npc", NULL, spufs_npc_get, 19 },
+ { NULL },
+};
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c
new file mode 100644
index 00000000000..71a44325302
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -0,0 +1,87 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "spufs.h"
+
+struct spu_gang *alloc_spu_gang(void)
+{
+ struct spu_gang *gang;
+
+ gang = kzalloc(sizeof *gang, GFP_KERNEL);
+ if (!gang)
+ goto out;
+
+ kref_init(&gang->kref);
+ mutex_init(&gang->mutex);
+ mutex_init(&gang->aff_mutex);
+ INIT_LIST_HEAD(&gang->list);
+ INIT_LIST_HEAD(&gang->aff_list_head);
+
+out:
+ return gang;
+}
+
+static void destroy_spu_gang(struct kref *kref)
+{
+ struct spu_gang *gang;
+ gang = container_of(kref, struct spu_gang, kref);
+ WARN_ON(gang->contexts || !list_empty(&gang->list));
+ kfree(gang);
+}
+
+struct spu_gang *get_spu_gang(struct spu_gang *gang)
+{
+ kref_get(&gang->kref);
+ return gang;
+}
+
+int put_spu_gang(struct spu_gang *gang)
+{
+ return kref_put(&gang->kref, &destroy_spu_gang);
+}
+
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+ mutex_lock(&gang->mutex);
+ ctx->gang = get_spu_gang(gang);
+ list_add(&ctx->gang_list, &gang->list);
+ gang->contexts++;
+ mutex_unlock(&gang->mutex);
+}
+
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+ mutex_lock(&gang->mutex);
+ WARN_ON(ctx->gang != gang);
+ if (!list_empty(&ctx->aff_list)) {
+ list_del_init(&ctx->aff_list);
+ gang->aff_flags &= ~AFF_OFFSETS_SET;
+ }
+ list_del_init(&ctx->gang_list);
+ gang->contexts--;
+ mutex_unlock(&gang->mutex);
+
+ put_spu_gang(gang);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
new file mode 100644
index 00000000000..8655c4cbefc
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -0,0 +1,349 @@
+/* hw_ops.c - query/set operations on active SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 mbox_stat;
+ int ret = 0;
+
+ spin_lock_irq(&spu->register_lock);
+ mbox_stat = in_be32(&prob->mb_stat_R);
+ if (mbox_stat & 0x0000ff) {
+ *data = in_be32(&prob->pu_mb_R);
+ ret = 4;
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static u32 spu_hw_mbox_stat_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->mb_stat_R);
+}
+
+static unsigned int spu_hw_mbox_stat_poll(struct spu_context *ctx,
+ unsigned int events)
+{
+ struct spu *spu = ctx->spu;
+ int ret = 0;
+ u32 stat;
+
+ spin_lock_irq(&spu->register_lock);
+ stat = in_be32(&spu->problem->mb_stat_R);
+
+ /* if the requested event is there, return the poll
+ mask, otherwise enable the interrupt to get notified,
+ but first mark any pending interrupts as done so
+ we don't get woken up unnecessarily */
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ if (stat & 0xff0000)
+ ret |= POLLIN | POLLRDNORM;
+ else {
+ spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
+ spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+ }
+ }
+ if (events & (POLLOUT | POLLWRNORM)) {
+ if (stat & 0x00ff00)
+ ret = POLLOUT | POLLWRNORM;
+ else {
+ spu_int_stat_clear(spu, 2,
+ CLASS2_MAILBOX_THRESHOLD_INTR);
+ spu_int_mask_or(spu, 2,
+ CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+ }
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_problem __iomem *prob = spu->problem;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int ret;
+
+ spin_lock_irq(&spu->register_lock);
+ if (in_be32(&prob->mb_stat_R) & 0xff0000) {
+ /* read the first available word */
+ *data = in_be64(&priv2->puint_mb_R);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt */
+ spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+ ret = 0;
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static int spu_hw_wbox_write(struct spu_context *ctx, u32 data)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_problem __iomem *prob = spu->problem;
+ int ret;
+
+ spin_lock_irq(&spu->register_lock);
+ if (in_be32(&prob->mb_stat_R) & 0x00ff00) {
+ /* we have space to write wbox_data to */
+ out_be32(&prob->spu_mb_W, data);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt when space
+ becomes available */
+ spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+ ret = 0;
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static void spu_hw_signal1_write(struct spu_context *ctx, u32 data)
+{
+ out_be32(&ctx->spu->problem->signal_notify1, data);
+}
+
+static void spu_hw_signal2_write(struct spu_context *ctx, u32 data)
+{
+ out_be32(&ctx->spu->problem->signal_notify2, data);
+}
+
+static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 tmp;
+
+ spin_lock_irq(&spu->register_lock);
+ tmp = in_be64(&priv2->spu_cfg_RW);
+ if (val)
+ tmp |= 1;
+ else
+ tmp &= ~1;
+ out_be64(&priv2->spu_cfg_RW, tmp);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal1_type_get(struct spu_context *ctx)
+{
+ return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0);
+}
+
+static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 tmp;
+
+ spin_lock_irq(&spu->register_lock);
+ tmp = in_be64(&priv2->spu_cfg_RW);
+ if (val)
+ tmp |= 2;
+ else
+ tmp &= ~2;
+ out_be64(&priv2->spu_cfg_RW, tmp);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal2_type_get(struct spu_context *ctx)
+{
+ return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0);
+}
+
+static u32 spu_hw_npc_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->spu_npc_RW);
+}
+
+static void spu_hw_npc_write(struct spu_context *ctx, u32 val)
+{
+ out_be32(&ctx->spu->problem->spu_npc_RW, val);
+}
+
+static u32 spu_hw_status_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->spu_status_R);
+}
+
+static char *spu_hw_get_ls(struct spu_context *ctx)
+{
+ return ctx->spu->local_store;
+}
+
+static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val)
+{
+ out_be64(&ctx->spu->priv2->spu_privcntl_RW, val);
+}
+
+static u32 spu_hw_runcntl_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->spu_runcntl_RW);
+}
+
+static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
+{
+ spin_lock_irq(&ctx->spu->register_lock);
+ if (val & SPU_RUNCNTL_ISOLATE)
+ spu_hw_privcntl_write(ctx,
+ SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK);
+ out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
+ spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_runcntl_stop(struct spu_context *ctx)
+{
+ spin_lock_irq(&ctx->spu->register_lock);
+ out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING)
+ cpu_relax();
+ spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_master_start(struct spu_context *ctx)
+{
+ struct spu *spu = ctx->spu;
+ u64 sr1;
+
+ spin_lock_irq(&spu->register_lock);
+ sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ spu_mfc_sr1_set(spu, sr1);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static void spu_hw_master_stop(struct spu_context *ctx)
+{
+ struct spu *spu = ctx->spu;
+ u64 sr1;
+
+ spin_lock_irq(&spu->register_lock);
+ sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ spu_mfc_sr1_set(spu, sr1);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
+{
+ struct spu_problem __iomem *prob = ctx->spu->problem;
+ int ret;
+
+ spin_lock_irq(&ctx->spu->register_lock);
+ ret = -EAGAIN;
+ if (in_be32(&prob->dma_querytype_RW))
+ goto out;
+ ret = 0;
+ out_be32(&prob->dma_querymask_RW, mask);
+ out_be32(&prob->dma_querytype_RW, mode);
+out:
+ spin_unlock_irq(&ctx->spu->register_lock);
+ return ret;
+}
+
+static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx)
+{
+ return in_be32(&ctx->spu->problem->dma_tagstatus_R);
+}
+
+static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->dma_qstatus_R);
+}
+
+static int spu_hw_send_mfc_command(struct spu_context *ctx,
+ struct mfc_dma_command *cmd)
+{
+ u32 status;
+ struct spu_problem __iomem *prob = ctx->spu->problem;
+
+ spin_lock_irq(&ctx->spu->register_lock);
+ out_be32(&prob->mfc_lsa_W, cmd->lsa);
+ out_be64(&prob->mfc_ea_W, cmd->ea);
+ out_be32(&prob->mfc_union_W.by32.mfc_size_tag32,
+ cmd->size << 16 | cmd->tag);
+ out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32,
+ cmd->class << 16 | cmd->cmd);
+ status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+ spin_unlock_irq(&ctx->spu->register_lock);
+
+ switch (status & 0xffff) {
+ case 0:
+ return 0;
+ case 2:
+ return -EAGAIN;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+ struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+ if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
+struct spu_context_ops spu_hw_ops = {
+ .mbox_read = spu_hw_mbox_read,
+ .mbox_stat_read = spu_hw_mbox_stat_read,
+ .mbox_stat_poll = spu_hw_mbox_stat_poll,
+ .ibox_read = spu_hw_ibox_read,
+ .wbox_write = spu_hw_wbox_write,
+ .signal1_write = spu_hw_signal1_write,
+ .signal2_write = spu_hw_signal2_write,
+ .signal1_type_set = spu_hw_signal1_type_set,
+ .signal1_type_get = spu_hw_signal1_type_get,
+ .signal2_type_set = spu_hw_signal2_type_set,
+ .signal2_type_get = spu_hw_signal2_type_get,
+ .npc_read = spu_hw_npc_read,
+ .npc_write = spu_hw_npc_write,
+ .status_read = spu_hw_status_read,
+ .get_ls = spu_hw_get_ls,
+ .privcntl_write = spu_hw_privcntl_write,
+ .runcntl_read = spu_hw_runcntl_read,
+ .runcntl_write = spu_hw_runcntl_write,
+ .runcntl_stop = spu_hw_runcntl_stop,
+ .master_start = spu_hw_master_start,
+ .master_stop = spu_hw_master_stop,
+ .set_mfc_query = spu_hw_set_mfc_query,
+ .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
+ .get_mfc_free_elements = spu_hw_get_mfc_free_elements,
+ .send_mfc_command = spu_hw_send_mfc_command,
+ .restart_dma = spu_hw_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
new file mode 100644
index 00000000000..d4a094ca96f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -0,0 +1,865 @@
+
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/backing-dev.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/parser.h>
+
+#include <asm/prom.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+struct spufs_sb_info {
+ int debug;
+};
+
+static struct kmem_cache *spufs_inode_cache;
+char *isolated_loader;
+static int isolated_loader_size;
+
+static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+static struct inode *
+spufs_alloc_inode(struct super_block *sb)
+{
+ struct spufs_inode_info *ei;
+
+ ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
+ if (!ei)
+ return NULL;
+
+ ei->i_gang = NULL;
+ ei->i_ctx = NULL;
+ ei->i_openers = 0;
+
+ return &ei->vfs_inode;
+}
+
+static void spufs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
+}
+
+static void spufs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, spufs_i_callback);
+}
+
+static void
+spufs_init_once(void *p)
+{
+ struct spufs_inode_info *ei = p;
+
+ inode_init_once(&ei->vfs_inode);
+}
+
+static struct inode *
+spufs_new_inode(struct super_block *sb, umode_t mode)
+{
+ struct inode *inode;
+
+ inode = new_inode(sb);
+ if (!inode)
+ goto out;
+
+ inode->i_mode = mode;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+out:
+ return inode;
+}
+
+static int
+spufs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+
+ if ((attr->ia_valid & ATTR_SIZE) &&
+ (attr->ia_size != inode->i_size))
+ return -EINVAL;
+ setattr_copy(inode, attr);
+ mark_inode_dirty(inode);
+ return 0;
+}
+
+
+static int
+spufs_new_file(struct super_block *sb, struct dentry *dentry,
+ const struct file_operations *fops, umode_t mode,
+ size_t size, struct spu_context *ctx)
+{
+ static const struct inode_operations spufs_file_iops = {
+ .setattr = spufs_setattr,
+ };
+ struct inode *inode;
+ int ret;
+
+ ret = -ENOSPC;
+ inode = spufs_new_inode(sb, S_IFREG | mode);
+ if (!inode)
+ goto out;
+
+ ret = 0;
+ inode->i_op = &spufs_file_iops;
+ inode->i_fop = fops;
+ inode->i_size = size;
+ inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
+ d_add(dentry, inode);
+out:
+ return ret;
+}
+
+static void
+spufs_evict_inode(struct inode *inode)
+{
+ struct spufs_inode_info *ei = SPUFS_I(inode);
+ end_writeback(inode);
+ if (ei->i_ctx)
+ put_spu_context(ei->i_ctx);
+ if (ei->i_gang)
+ put_spu_gang(ei->i_gang);
+}
+
+static void spufs_prune_dir(struct dentry *dir)
+{
+ struct dentry *dentry, *tmp;
+
+ mutex_lock(&dir->d_inode->i_mutex);
+ list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
+ spin_lock(&dentry->d_lock);
+ if (!(d_unhashed(dentry)) && dentry->d_inode) {
+ dget_dlock(dentry);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+ simple_unlink(dir->d_inode, dentry);
+ /* XXX: what was dcache_lock protecting here? Other
+ * filesystems (IB, configfs) release dcache_lock
+ * before unlink */
+ dput(dentry);
+ } else {
+ spin_unlock(&dentry->d_lock);
+ }
+ }
+ shrink_dcache_parent(dir);
+ mutex_unlock(&dir->d_inode->i_mutex);
+}
+
+/* Caller must hold parent->i_mutex */
+static int spufs_rmdir(struct inode *parent, struct dentry *dir)
+{
+ /* remove all entries */
+ spufs_prune_dir(dir);
+ d_drop(dir);
+
+ return simple_rmdir(parent, dir);
+}
+
+static int spufs_fill_dir(struct dentry *dir,
+ const struct spufs_tree_descr *files, umode_t mode,
+ struct spu_context *ctx)
+{
+ struct dentry *dentry, *tmp;
+ int ret;
+
+ while (files->name && files->name[0]) {
+ ret = -ENOMEM;
+ dentry = d_alloc_name(dir, files->name);
+ if (!dentry)
+ goto out;
+ ret = spufs_new_file(dir->d_sb, dentry, files->ops,
+ files->mode & mode, files->size, ctx);
+ if (ret)
+ goto out;
+ files++;
+ }
+ return 0;
+out:
+ /*
+ * remove all children from dir. dir->inode is not set so don't
+ * just simply use spufs_prune_dir() and panic afterwards :)
+ * dput() looks like it will do the right thing:
+ * - dec parent's ref counter
+ * - remove child from parent's child list
+ * - free child's inode if possible
+ * - free child
+ */
+ list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
+ dput(dentry);
+ }
+
+ shrink_dcache_parent(dir);
+ return ret;
+}
+
+static int spufs_dir_close(struct inode *inode, struct file *file)
+{
+ struct spu_context *ctx;
+ struct inode *parent;
+ struct dentry *dir;
+ int ret;
+
+ dir = file->f_path.dentry;
+ parent = dir->d_parent->d_inode;
+ ctx = SPUFS_I(dir->d_inode)->i_ctx;
+
+ mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+ ret = spufs_rmdir(parent, dir);
+ mutex_unlock(&parent->i_mutex);
+ WARN_ON(ret);
+
+ /* We have to give up the mm_struct */
+ spu_forget(ctx);
+
+ return dcache_dir_close(inode, file);
+}
+
+const struct file_operations spufs_context_fops = {
+ .open = dcache_dir_open,
+ .release = spufs_dir_close,
+ .llseek = dcache_dir_lseek,
+ .read = generic_read_dir,
+ .readdir = dcache_readdir,
+ .fsync = noop_fsync,
+};
+EXPORT_SYMBOL_GPL(spufs_context_fops);
+
+static int
+spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
+ umode_t mode)
+{
+ int ret;
+ struct inode *inode;
+ struct spu_context *ctx;
+
+ ret = -ENOSPC;
+ inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+ if (!inode)
+ goto out;
+
+ if (dir->i_mode & S_ISGID) {
+ inode->i_gid = dir->i_gid;
+ inode->i_mode &= S_ISGID;
+ }
+ ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
+ SPUFS_I(inode)->i_ctx = ctx;
+ if (!ctx)
+ goto out_iput;
+
+ ctx->flags = flags;
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ if (flags & SPU_CREATE_NOSCHED)
+ ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
+ mode, ctx);
+ else
+ ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+
+ if (ret)
+ goto out_free_ctx;
+
+ if (spufs_get_sb_info(dir->i_sb)->debug)
+ ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
+ mode, ctx);
+
+ if (ret)
+ goto out_free_ctx;
+
+ d_instantiate(dentry, inode);
+ dget(dentry);
+ inc_nlink(dir);
+ inc_nlink(dentry->d_inode);
+ goto out;
+
+out_free_ctx:
+ spu_forget(ctx);
+ put_spu_context(ctx);
+out_iput:
+ iput(inode);
+out:
+ return ret;
+}
+
+static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
+{
+ int ret;
+ struct file *filp;
+
+ ret = get_unused_fd();
+ if (ret < 0) {
+ dput(dentry);
+ mntput(mnt);
+ goto out;
+ }
+
+ filp = dentry_open(dentry, mnt, O_RDONLY, current_cred());
+ if (IS_ERR(filp)) {
+ put_unused_fd(ret);
+ ret = PTR_ERR(filp);
+ goto out;
+ }
+
+ filp->f_op = &spufs_context_fops;
+ fd_install(ret, filp);
+out:
+ return ret;
+}
+
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+ struct file *filp)
+{
+ struct spu_context *tmp, *neighbor, *err;
+ int count, node;
+ int aff_supp;
+
+ aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
+ struct spu, cbe_list))->aff_list);
+
+ if (!aff_supp)
+ return ERR_PTR(-EINVAL);
+
+ if (flags & SPU_CREATE_GANG)
+ return ERR_PTR(-EINVAL);
+
+ if (flags & SPU_CREATE_AFFINITY_MEM &&
+ gang->aff_ref_ctx &&
+ gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+ return ERR_PTR(-EEXIST);
+
+ if (gang->aff_flags & AFF_MERGED)
+ return ERR_PTR(-EBUSY);
+
+ neighbor = NULL;
+ if (flags & SPU_CREATE_AFFINITY_SPU) {
+ if (!filp || filp->f_op != &spufs_context_fops)
+ return ERR_PTR(-EINVAL);
+
+ neighbor = get_spu_context(
+ SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
+
+ if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+ !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+ !list_entry(neighbor->aff_list.next, struct spu_context,
+ aff_list)->aff_head) {
+ err = ERR_PTR(-EEXIST);
+ goto out_put_neighbor;
+ }
+
+ if (gang != neighbor->gang) {
+ err = ERR_PTR(-EINVAL);
+ goto out_put_neighbor;
+ }
+
+ count = 1;
+ list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+ count++;
+ if (list_empty(&neighbor->aff_list))
+ count++;
+
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ if ((cbe_spu_info[node].n_spus - atomic_read(
+ &cbe_spu_info[node].reserved_spus)) >= count)
+ break;
+ }
+
+ if (node == MAX_NUMNODES) {
+ err = ERR_PTR(-EEXIST);
+ goto out_put_neighbor;
+ }
+ }
+
+ return neighbor;
+
+out_put_neighbor:
+ put_spu_context(neighbor);
+ return err;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+ struct spu_context *neighbor)
+{
+ if (flags & SPU_CREATE_AFFINITY_MEM)
+ ctx->gang->aff_ref_ctx = ctx;
+
+ if (flags & SPU_CREATE_AFFINITY_SPU) {
+ if (list_empty(&neighbor->aff_list)) {
+ list_add_tail(&neighbor->aff_list,
+ &ctx->gang->aff_list_head);
+ neighbor->aff_head = 1;
+ }
+
+ if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+ || list_entry(neighbor->aff_list.next, struct spu_context,
+ aff_list)->aff_head) {
+ list_add(&ctx->aff_list, &neighbor->aff_list);
+ } else {
+ list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+ if (neighbor->aff_head) {
+ neighbor->aff_head = 0;
+ ctx->aff_head = 1;
+ }
+ }
+
+ if (!ctx->gang->aff_ref_ctx)
+ ctx->gang->aff_ref_ctx = ctx;
+ }
+}
+
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+ struct vfsmount *mnt, int flags, umode_t mode,
+ struct file *aff_filp)
+{
+ int ret;
+ int affinity;
+ struct spu_gang *gang;
+ struct spu_context *neighbor;
+
+ ret = -EPERM;
+ if ((flags & SPU_CREATE_NOSCHED) &&
+ !capable(CAP_SYS_NICE))
+ goto out_unlock;
+
+ ret = -EINVAL;
+ if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
+ == SPU_CREATE_ISOLATE)
+ goto out_unlock;
+
+ ret = -ENODEV;
+ if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
+ goto out_unlock;
+
+ gang = NULL;
+ neighbor = NULL;
+ affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+ if (affinity) {
+ gang = SPUFS_I(inode)->i_gang;
+ ret = -EINVAL;
+ if (!gang)
+ goto out_unlock;
+ mutex_lock(&gang->aff_mutex);
+ neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+ if (IS_ERR(neighbor)) {
+ ret = PTR_ERR(neighbor);
+ goto out_aff_unlock;
+ }
+ }
+
+ ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
+ if (ret)
+ goto out_aff_unlock;
+
+ if (affinity) {
+ spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
+ neighbor);
+ if (neighbor)
+ put_spu_context(neighbor);
+ }
+
+ /*
+ * get references for dget and mntget, will be released
+ * in error path of *_open().
+ */
+ ret = spufs_context_open(dget(dentry), mntget(mnt));
+ if (ret < 0) {
+ WARN_ON(spufs_rmdir(inode, dentry));
+ if (affinity)
+ mutex_unlock(&gang->aff_mutex);
+ mutex_unlock(&inode->i_mutex);
+ spu_forget(SPUFS_I(dentry->d_inode)->i_ctx);
+ goto out;
+ }
+
+out_aff_unlock:
+ if (affinity)
+ mutex_unlock(&gang->aff_mutex);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
+out:
+ dput(dentry);
+ return ret;
+}
+
+static int
+spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ int ret;
+ struct inode *inode;
+ struct spu_gang *gang;
+
+ ret = -ENOSPC;
+ inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+ if (!inode)
+ goto out;
+
+ ret = 0;
+ if (dir->i_mode & S_ISGID) {
+ inode->i_gid = dir->i_gid;
+ inode->i_mode &= S_ISGID;
+ }
+ gang = alloc_spu_gang();
+ SPUFS_I(inode)->i_ctx = NULL;
+ SPUFS_I(inode)->i_gang = gang;
+ if (!gang)
+ goto out_iput;
+
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+
+ d_instantiate(dentry, inode);
+ inc_nlink(dir);
+ inc_nlink(dentry->d_inode);
+ return ret;
+
+out_iput:
+ iput(inode);
+out:
+ return ret;
+}
+
+static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt)
+{
+ int ret;
+ struct file *filp;
+
+ ret = get_unused_fd();
+ if (ret < 0) {
+ dput(dentry);
+ mntput(mnt);
+ goto out;
+ }
+
+ filp = dentry_open(dentry, mnt, O_RDONLY, current_cred());
+ if (IS_ERR(filp)) {
+ put_unused_fd(ret);
+ ret = PTR_ERR(filp);
+ goto out;
+ }
+
+ filp->f_op = &simple_dir_operations;
+ fd_install(ret, filp);
+out:
+ return ret;
+}
+
+static int spufs_create_gang(struct inode *inode,
+ struct dentry *dentry,
+ struct vfsmount *mnt, umode_t mode)
+{
+ int ret;
+
+ ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
+ if (ret)
+ goto out;
+
+ /*
+ * get references for dget and mntget, will be released
+ * in error path of *_open().
+ */
+ ret = spufs_gang_open(dget(dentry), mntget(mnt));
+ if (ret < 0) {
+ int err = simple_rmdir(inode, dentry);
+ WARN_ON(err);
+ }
+
+out:
+ mutex_unlock(&inode->i_mutex);
+ dput(dentry);
+ return ret;
+}
+
+
+static struct file_system_type spufs_type;
+
+long spufs_create(struct path *path, struct dentry *dentry,
+ unsigned int flags, umode_t mode, struct file *filp)
+{
+ int ret;
+
+ ret = -EINVAL;
+ /* check if we are on spufs */
+ if (path->dentry->d_sb->s_type != &spufs_type)
+ goto out;
+
+ /* don't accept undefined flags */
+ if (flags & (~SPU_CREATE_FLAG_ALL))
+ goto out;
+
+ /* only threads can be underneath a gang */
+ if (path->dentry != path->dentry->d_sb->s_root) {
+ if ((flags & SPU_CREATE_GANG) ||
+ !SPUFS_I(path->dentry->d_inode)->i_gang)
+ goto out;
+ }
+
+ mode &= ~current_umask();
+
+ if (flags & SPU_CREATE_GANG)
+ ret = spufs_create_gang(path->dentry->d_inode,
+ dentry, path->mnt, mode);
+ else
+ ret = spufs_create_context(path->dentry->d_inode,
+ dentry, path->mnt, flags, mode,
+ filp);
+ if (ret >= 0)
+ fsnotify_mkdir(path->dentry->d_inode, dentry);
+ return ret;
+
+out:
+ mutex_unlock(&path->dentry->d_inode->i_mutex);
+ return ret;
+}
+
+/* File system initialization */
+enum {
+ Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+};
+
+static const match_table_t spufs_tokens = {
+ { Opt_uid, "uid=%d" },
+ { Opt_gid, "gid=%d" },
+ { Opt_mode, "mode=%o" },
+ { Opt_debug, "debug" },
+ { Opt_err, NULL },
+};
+
+static int
+spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token, option;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, spufs_tokens, args);
+ switch (token) {
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return 0;
+ root->i_uid = option;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return 0;
+ root->i_gid = option;
+ break;
+ case Opt_mode:
+ if (match_octal(&args[0], &option))
+ return 0;
+ root->i_mode = option | S_IFDIR;
+ break;
+ case Opt_debug:
+ spufs_get_sb_info(sb)->debug = 1;
+ break;
+ default:
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void spufs_exit_isolated_loader(void)
+{
+ free_pages((unsigned long) isolated_loader,
+ get_order(isolated_loader_size));
+}
+
+static void
+spufs_init_isolated_loader(void)
+{
+ struct device_node *dn;
+ const char *loader;
+ int size;
+
+ dn = of_find_node_by_path("/spu-isolation");
+ if (!dn)
+ return;
+
+ loader = of_get_property(dn, "loader", &size);
+ if (!loader)
+ return;
+
+ /* the loader must be align on a 16 byte boundary */
+ isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
+ if (!isolated_loader)
+ return;
+
+ isolated_loader_size = size;
+ memcpy(isolated_loader, loader, size);
+ printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
+}
+
+static int
+spufs_create_root(struct super_block *sb, void *data)
+{
+ struct inode *inode;
+ int ret;
+
+ ret = -ENODEV;
+ if (!spu_management_ops)
+ goto out;
+
+ ret = -ENOMEM;
+ inode = spufs_new_inode(sb, S_IFDIR | 0775);
+ if (!inode)
+ goto out;
+
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ SPUFS_I(inode)->i_ctx = NULL;
+ inc_nlink(inode);
+
+ ret = -EINVAL;
+ if (!spufs_parse_options(sb, data, inode))
+ goto out_iput;
+
+ ret = -ENOMEM;
+ sb->s_root = d_alloc_root(inode);
+ if (!sb->s_root)
+ goto out_iput;
+
+ return 0;
+out_iput:
+ iput(inode);
+out:
+ return ret;
+}
+
+static int
+spufs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct spufs_sb_info *info;
+ static const struct super_operations s_ops = {
+ .alloc_inode = spufs_alloc_inode,
+ .destroy_inode = spufs_destroy_inode,
+ .statfs = simple_statfs,
+ .evict_inode = spufs_evict_inode,
+ .show_options = generic_show_options,
+ };
+
+ save_mount_options(sb, data);
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = SPUFS_MAGIC;
+ sb->s_op = &s_ops;
+ sb->s_fs_info = info;
+
+ return spufs_create_root(sb, data);
+}
+
+static struct dentry *
+spufs_mount(struct file_system_type *fstype, int flags,
+ const char *name, void *data)
+{
+ return mount_single(fstype, flags, data, spufs_fill_super);
+}
+
+static struct file_system_type spufs_type = {
+ .owner = THIS_MODULE,
+ .name = "spufs",
+ .mount = spufs_mount,
+ .kill_sb = kill_litter_super,
+};
+
+static int __init spufs_init(void)
+{
+ int ret;
+
+ ret = -ENODEV;
+ if (!spu_management_ops)
+ goto out;
+
+ ret = -ENOMEM;
+ spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
+ sizeof(struct spufs_inode_info), 0,
+ SLAB_HWCACHE_ALIGN, spufs_init_once);
+
+ if (!spufs_inode_cache)
+ goto out;
+ ret = spu_sched_init();
+ if (ret)
+ goto out_cache;
+ ret = register_filesystem(&spufs_type);
+ if (ret)
+ goto out_sched;
+ ret = register_spu_syscalls(&spufs_calls);
+ if (ret)
+ goto out_fs;
+
+ spufs_init_isolated_loader();
+
+ return 0;
+
+out_fs:
+ unregister_filesystem(&spufs_type);
+out_sched:
+ spu_sched_exit();
+out_cache:
+ kmem_cache_destroy(spufs_inode_cache);
+out:
+ return ret;
+}
+module_init(spufs_init);
+
+static void __exit spufs_exit(void)
+{
+ spu_sched_exit();
+ spufs_exit_isolated_loader();
+ unregister_spu_syscalls(&spufs_calls);
+ unregister_filesystem(&spufs_type);
+ kmem_cache_destroy(spufs_inode_cache);
+}
+module_exit(spufs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 00000000000..147069938cf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,183 @@
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+#include "spufs.h"
+
+static int spu_alloc_lscsa_std(struct spu_state *csa)
+{
+ struct spu_lscsa *lscsa;
+ unsigned char *p;
+
+ lscsa = vzalloc(sizeof(struct spu_lscsa));
+ if (!lscsa)
+ return -ENOMEM;
+ csa->lscsa = lscsa;
+
+ /* Set LS pages reserved to allow for user-space mapping. */
+ for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ SetPageReserved(vmalloc_to_page(p));
+
+ return 0;
+}
+
+static void spu_free_lscsa_std(struct spu_state *csa)
+{
+ /* Clear reserved bit before vfree. */
+ unsigned char *p;
+
+ if (csa->lscsa == NULL)
+ return;
+
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ ClearPageReserved(vmalloc_to_page(p));
+
+ vfree(csa->lscsa);
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+
+#define SPU_64K_PAGE_SHIFT 16
+#define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
+#define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER)
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+ struct page **pgarray;
+ unsigned char *p;
+ int i, j, n_4k;
+
+ /* Check availability of 64K pages */
+ if (!spu_64k_pages_available())
+ goto fail;
+
+ csa->use_big_pages = 1;
+
+ pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
+ csa);
+
+ /* First try to allocate our 64K pages. We need 5 of them
+ * with the current implementation. In the future, we should try
+ * to separate the lscsa with the actual local store image, thus
+ * allowing us to require only 4 64K pages per context
+ */
+ for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
+ /* XXX This is likely to fail, we should use a special pool
+ * similar to what hugetlbfs does.
+ */
+ csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
+ SPU_64K_PAGE_ORDER);
+ if (csa->lscsa_pages[i] == NULL)
+ goto fail;
+ }
+
+ pr_debug(" success ! creating vmap...\n");
+
+ /* Now we need to create a vmalloc mapping of these for the kernel
+ * and SPU context switch code to use. Currently, we stick to a
+ * normal kernel vmalloc mapping, which in our case will be 4K
+ */
+ n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
+ pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
+ if (pgarray == NULL)
+ goto fail;
+ for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+ for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
+ /* We assume all the struct page's are contiguous
+ * which should be hopefully the case for an order 4
+ * allocation..
+ */
+ pgarray[i * SPU_64K_PAGE_COUNT + j] =
+ csa->lscsa_pages[i] + j;
+ csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
+ kfree(pgarray);
+ if (csa->lscsa == NULL)
+ goto fail;
+
+ memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
+
+ /* Set LS pages reserved to allow for user-space mapping.
+ *
+ * XXX isn't that a bit obsolete ? I think we should just
+ * make sure the page count is high enough. Anyway, won't harm
+ * for now
+ */
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ SetPageReserved(vmalloc_to_page(p));
+
+ pr_debug(" all good !\n");
+
+ return 0;
+fail:
+ pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
+ spu_free_lscsa(csa);
+ return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+ unsigned char *p;
+ int i;
+
+ if (!csa->use_big_pages) {
+ spu_free_lscsa_std(csa);
+ return;
+ }
+ csa->use_big_pages = 0;
+
+ if (csa->lscsa == NULL)
+ goto free_pages;
+
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ ClearPageReserved(vmalloc_to_page(p));
+
+ vunmap(csa->lscsa);
+ csa->lscsa = NULL;
+
+ free_pages:
+
+ for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+ if (csa->lscsa_pages[i])
+ __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
+}
+
+#else /* CONFIG_SPU_FS_64K_LS */
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+ return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+ spu_free_lscsa_std(csa);
+}
+
+#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
new file mode 100644
index 00000000000..4ddf769a64e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -0,0 +1,454 @@
+#define DEBUG
+
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include "spufs.h"
+
+/* interrupt-level stop callback function. */
+void spufs_stop_callback(struct spu *spu, int irq)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ /*
+ * It should be impossible to preempt a context while an exception
+ * is being processed, since the context switch code is specially
+ * coded to deal with interrupts ... But, just in case, sanity check
+ * the context pointer. It is OK to return doing nothing since
+ * the exception will be regenerated when the context is resumed.
+ */
+ if (ctx) {
+ /* Copy exception arguments into module specific structure */
+ switch(irq) {
+ case 0 :
+ ctx->csa.class_0_pending = spu->class_0_pending;
+ ctx->csa.class_0_dar = spu->class_0_dar;
+ break;
+ case 1 :
+ ctx->csa.class_1_dsisr = spu->class_1_dsisr;
+ ctx->csa.class_1_dar = spu->class_1_dar;
+ break;
+ case 2 :
+ break;
+ }
+
+ /* ensure that the exception status has hit memory before a
+ * thread waiting on the context's stop queue is woken */
+ smp_wmb();
+
+ wake_up_all(&ctx->stop_wq);
+ }
+}
+
+int spu_stopped(struct spu_context *ctx, u32 *stat)
+{
+ u64 dsisr;
+ u32 stopped;
+
+ stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+
+top:
+ *stat = ctx->ops->status_read(ctx);
+ if (*stat & stopped) {
+ /*
+ * If the spu hasn't finished stopping, we need to
+ * re-read the register to get the stopped value.
+ */
+ if (*stat & SPU_STATUS_RUNNING)
+ goto top;
+ return 1;
+ }
+
+ if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+ return 1;
+
+ dsisr = ctx->csa.class_1_dsisr;
+ if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
+ return 1;
+
+ if (ctx->csa.class_0_pending)
+ return 1;
+
+ return 0;
+}
+
+static int spu_setup_isolated(struct spu_context *ctx)
+{
+ int ret;
+ u64 __iomem *mfc_cntl;
+ u64 sr1;
+ u32 status;
+ unsigned long timeout;
+ const u32 status_loading = SPU_STATUS_RUNNING
+ | SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
+
+ ret = -ENODEV;
+ if (!isolated_loader)
+ goto out;
+
+ /*
+ * We need to exclude userspace access to the context.
+ *
+ * To protect against memory access we invalidate all ptes
+ * and make sure the pagefault handlers block on the mutex.
+ */
+ spu_unmap_mappings(ctx);
+
+ mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
+
+ /* purge the MFC DMA queue to ensure no spurious accesses before we
+ * enter kernel mode */
+ timeout = jiffies + HZ;
+ out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST);
+ while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK)
+ != MFC_CNTL_PURGE_DMA_COMPLETE) {
+ if (time_after(jiffies, timeout)) {
+ printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+ cond_resched();
+ }
+
+ /* clear purge status */
+ out_be64(mfc_cntl, 0);
+
+ /* put the SPE in kernel mode to allow access to the loader */
+ sr1 = spu_mfc_sr1_get(ctx->spu);
+ sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK;
+ spu_mfc_sr1_set(ctx->spu, sr1);
+
+ /* start the loader */
+ ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32);
+ ctx->ops->signal2_write(ctx,
+ (unsigned long)isolated_loader & 0xffffffff);
+
+ ctx->ops->runcntl_write(ctx,
+ SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+
+ ret = 0;
+ timeout = jiffies + HZ;
+ while (((status = ctx->ops->status_read(ctx)) & status_loading) ==
+ status_loading) {
+ if (time_after(jiffies, timeout)) {
+ printk(KERN_ERR "%s: timeout waiting for loader\n",
+ __func__);
+ ret = -EIO;
+ goto out_drop_priv;
+ }
+ cond_resched();
+ }
+
+ if (!(status & SPU_STATUS_RUNNING)) {
+ /* If isolated LOAD has failed: run SPU, we will get a stop-and
+ * signal later. */
+ pr_debug("%s: isolated LOAD failed\n", __func__);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+ ret = -EACCES;
+ goto out_drop_priv;
+ }
+
+ if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+ /* This isn't allowed by the CBEA, but check anyway */
+ pr_debug("%s: SPU fell out of isolated mode?\n", __func__);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
+ ret = -EINVAL;
+ goto out_drop_priv;
+ }
+
+out_drop_priv:
+ /* Finished accessing the loader. Drop kernel mode */
+ sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
+ spu_mfc_sr1_set(ctx->spu, sr1);
+
+out:
+ return ret;
+}
+
+static int spu_run_init(struct spu_context *ctx, u32 *npc)
+{
+ unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
+ int ret;
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ /*
+ * NOSCHED is synchronous scheduling with respect to the caller.
+ * The caller waits for the context to be loaded.
+ */
+ if (ctx->flags & SPU_CREATE_NOSCHED) {
+ if (ctx->state == SPU_STATE_SAVED) {
+ ret = spu_activate(ctx, 0);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /*
+ * Apply special setup as required.
+ */
+ if (ctx->flags & SPU_CREATE_ISOLATE) {
+ if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
+ ret = spu_setup_isolated(ctx);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * If userspace has set the runcntrl register (eg, to
+ * issue an isolated exit), we need to re-set it here
+ */
+ runcntl = ctx->ops->runcntl_read(ctx) &
+ (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+ if (runcntl == 0)
+ runcntl = SPU_RUNCNTL_RUNNABLE;
+ } else {
+ unsigned long privcntl;
+
+ if (test_thread_flag(TIF_SINGLESTEP))
+ privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP;
+ else
+ privcntl = SPU_PRIVCNTL_MODE_NORMAL;
+
+ ctx->ops->privcntl_write(ctx, privcntl);
+ ctx->ops->npc_write(ctx, *npc);
+ }
+
+ ctx->ops->runcntl_write(ctx, runcntl);
+
+ if (ctx->flags & SPU_CREATE_NOSCHED) {
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+ } else {
+
+ if (ctx->state == SPU_STATE_SAVED) {
+ ret = spu_activate(ctx, 0);
+ if (ret)
+ return ret;
+ } else {
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+ }
+ }
+
+ set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+ return 0;
+}
+
+static int spu_run_fini(struct spu_context *ctx, u32 *npc,
+ u32 *status)
+{
+ int ret = 0;
+
+ spu_del_from_rq(ctx);
+
+ *status = ctx->ops->status_read(ctx);
+ *npc = ctx->ops->npc_read(ctx);
+
+ spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+ clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+ spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status);
+ spu_release(ctx);
+
+ if (signal_pending(current))
+ ret = -ERESTARTSYS;
+
+ return ret;
+}
+
+/*
+ * SPU syscall restarting is tricky because we violate the basic
+ * assumption that the signal handler is running on the interrupted
+ * thread. Here instead, the handler runs on PowerPC user space code,
+ * while the syscall was called from the SPU.
+ * This means we can only do a very rough approximation of POSIX
+ * signal semantics.
+ */
+static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret,
+ unsigned int *npc)
+{
+ int ret;
+
+ switch (*spu_ret) {
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ /*
+ * Enter the regular syscall restarting for
+ * sys_spu_run, then restart the SPU syscall
+ * callback.
+ */
+ *npc -= 8;
+ ret = -ERESTARTSYS;
+ break;
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /*
+ * Restart block is too hard for now, just return -EINTR
+ * to the SPU.
+ * ERESTARTNOHAND comes from sys_pause, we also return
+ * -EINTR from there.
+ * Assume that we need to be restarted ourselves though.
+ */
+ *spu_ret = -EINTR;
+ ret = -ERESTARTSYS;
+ break;
+ default:
+ printk(KERN_WARNING "%s: unexpected return code %ld\n",
+ __func__, *spu_ret);
+ ret = 0;
+ }
+ return ret;
+}
+
+static int spu_process_callback(struct spu_context *ctx)
+{
+ struct spu_syscall_block s;
+ u32 ls_pointer, npc;
+ void __iomem *ls;
+ long spu_ret;
+ int ret;
+
+ /* get syscall block from local store */
+ npc = ctx->ops->npc_read(ctx) & ~3;
+ ls = (void __iomem *)ctx->ops->get_ls(ctx);
+ ls_pointer = in_be32(ls + npc);
+ if (ls_pointer > (LS_SIZE - sizeof(s)))
+ return -EFAULT;
+ memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
+
+ /* do actual syscall without pinning the spu */
+ ret = 0;
+ spu_ret = -ENOSYS;
+ npc += 4;
+
+ if (s.nr_ret < __NR_syscalls) {
+ spu_release(ctx);
+ /* do actual system call from here */
+ spu_ret = spu_sys_callback(&s);
+ if (spu_ret <= -ERESTARTSYS) {
+ ret = spu_handle_restartsys(ctx, &spu_ret, &npc);
+ }
+ mutex_lock(&ctx->state_mutex);
+ if (ret == -ERESTARTSYS)
+ return ret;
+ }
+
+ /* need to re-get the ls, as it may have changed when we released the
+ * spu */
+ ls = (void __iomem *)ctx->ops->get_ls(ctx);
+
+ /* write result, jump over indirect pointer */
+ memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
+ ctx->ops->npc_write(ctx, npc);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+ return ret;
+}
+
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
+{
+ int ret;
+ struct spu *spu;
+ u32 status;
+
+ if (mutex_lock_interruptible(&ctx->run_mutex))
+ return -ERESTARTSYS;
+
+ ctx->event_return = 0;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ goto out_unlock;
+
+ spu_enable_spu(ctx);
+
+ spu_update_sched_info(ctx);
+
+ ret = spu_run_init(ctx, npc);
+ if (ret) {
+ spu_release(ctx);
+ goto out;
+ }
+
+ do {
+ ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
+ if (unlikely(ret)) {
+ /*
+ * This is nasty: we need the state_mutex for all the
+ * bookkeeping even if the syscall was interrupted by
+ * a signal. ewww.
+ */
+ mutex_lock(&ctx->state_mutex);
+ break;
+ }
+ spu = ctx->spu;
+ if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
+ &ctx->sched_flags))) {
+ if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
+ spu_switch_notify(spu, ctx);
+ continue;
+ }
+ }
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+ ret = spu_process_callback(ctx);
+ if (ret)
+ break;
+ status &= ~SPU_STATUS_STOPPED_BY_STOP;
+ }
+ ret = spufs_handle_class1(ctx);
+ if (ret)
+ break;
+
+ ret = spufs_handle_class0(ctx);
+ if (ret)
+ break;
+
+ if (signal_pending(current))
+ ret = -ERESTARTSYS;
+ } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+ SPU_STATUS_STOPPED_BY_HALT |
+ SPU_STATUS_SINGLE_STEP)));
+
+ spu_disable_spu(ctx);
+ ret = spu_run_fini(ctx, npc, &status);
+ spu_yield(ctx);
+
+ if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+ ctx->stats.libassist++;
+
+ if ((ret == 0) ||
+ ((ret == -ERESTARTSYS) &&
+ ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+ (status & SPU_STATUS_SINGLE_STEP) ||
+ ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+ ret = status;
+
+ /* Note: we don't need to force_sig SIGTRAP on single-step
+ * since we have TIF_SINGLESTEP set, thus the kernel will do
+ * it upon return from the syscall anyawy
+ */
+ if (unlikely(status & SPU_STATUS_SINGLE_STEP))
+ ret = -ERESTARTSYS;
+
+ else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
+ && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
+ force_sig(SIGTRAP, current);
+ ret = -ERESTARTSYS;
+ }
+
+out:
+ *event = ctx->event_return;
+out_unlock:
+ mutex_unlock(&ctx->run_mutex);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
new file mode 100644
index 00000000000..965d381abd7
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -0,0 +1,1174 @@
+/* sched.c - SPU scheduler.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * 2006-03-31 NUMA domains added.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/numa.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_priv1.h>
+#include "spufs.h"
+#define CREATE_TRACE_POINTS
+#include "sputrace.h"
+
+struct spu_prio_array {
+ DECLARE_BITMAP(bitmap, MAX_PRIO);
+ struct list_head runq[MAX_PRIO];
+ spinlock_t runq_lock;
+ int nr_waiting;
+};
+
+static unsigned long spu_avenrun[3];
+static struct spu_prio_array *spu_prio;
+static struct task_struct *spusched_task;
+static struct timer_list spusched_timer;
+static struct timer_list spuloadavg_timer;
+
+/*
+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+ */
+#define NORMAL_PRIO 120
+
+/*
+ * Frequency of the spu scheduler tick. By default we do one SPU scheduler
+ * tick for every 10 CPU scheduler ticks.
+ */
+#define SPUSCHED_TICK (10)
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
+ * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ */
+#define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
+#define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK))
+
+#define MAX_USER_PRIO (MAX_PRIO - MAX_RT_PRIO)
+#define SCALE_PRIO(x, prio) \
+ max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
+
+/*
+ * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
+ * [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+void spu_set_timeslice(struct spu_context *ctx)
+{
+ if (ctx->prio < NORMAL_PRIO)
+ ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
+ else
+ ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
+}
+
+/*
+ * Update scheduling information from the owning thread.
+ */
+void __spu_update_sched_info(struct spu_context *ctx)
+{
+ /*
+ * assert that the context is not on the runqueue, so it is safe
+ * to change its scheduling parameters.
+ */
+ BUG_ON(!list_empty(&ctx->rq));
+
+ /*
+ * 32-Bit assignments are atomic on powerpc, and we don't care about
+ * memory ordering here because retrieving the controlling thread is
+ * per definition racy.
+ */
+ ctx->tid = current->pid;
+
+ /*
+ * We do our own priority calculations, so we normally want
+ * ->static_prio to start with. Unfortunately this field
+ * contains junk for threads with a realtime scheduling
+ * policy so we have to look at ->prio in this case.
+ */
+ if (rt_prio(current->prio))
+ ctx->prio = current->prio;
+ else
+ ctx->prio = current->static_prio;
+ ctx->policy = current->policy;
+
+ /*
+ * TO DO: the context may be loaded, so we may need to activate
+ * it again on a different node. But it shouldn't hurt anything
+ * to update its parameters, because we know that the scheduler
+ * is not actively looking at this field, since it is not on the
+ * runqueue. The context will be rescheduled on the proper node
+ * if it is timesliced or preempted.
+ */
+ cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
+
+ /* Save the current cpu id for spu interrupt routing. */
+ ctx->last_ran = raw_smp_processor_id();
+}
+
+void spu_update_sched_info(struct spu_context *ctx)
+{
+ int node;
+
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ node = ctx->spu->node;
+
+ /*
+ * Take list_mutex to sync with find_victim().
+ */
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ __spu_update_sched_info(ctx);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ } else {
+ __spu_update_sched_info(ctx);
+ }
+}
+
+static int __node_allowed(struct spu_context *ctx, int node)
+{
+ if (nr_cpus_node(node)) {
+ const struct cpumask *mask = cpumask_of_node(node);
+
+ if (cpumask_intersects(mask, &ctx->cpus_allowed))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int node_allowed(struct spu_context *ctx, int node)
+{
+ int rval;
+
+ spin_lock(&spu_prio->runq_lock);
+ rval = __node_allowed(ctx, node);
+ spin_unlock(&spu_prio->runq_lock);
+
+ return rval;
+}
+
+void do_notify_spus_active(void)
+{
+ int node;
+
+ /*
+ * Wake up the active spu_contexts.
+ *
+ * When the awakened processes see their "notify_active" flag is set,
+ * they will call spu_switch_notify().
+ */
+ for_each_online_node(node) {
+ struct spu *spu;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->alloc_state != SPU_FREE) {
+ struct spu_context *ctx = spu->ctx;
+ set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+ &ctx->sched_flags);
+ mb();
+ wake_up_all(&ctx->stop_wq);
+ }
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+}
+
+/**
+ * spu_bind_context - bind spu context to physical spu
+ * @spu: physical spu to bind to
+ * @ctx: context to bind
+ */
+static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
+{
+ spu_context_trace(spu_bind_context__enter, ctx, spu);
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ if (ctx->flags & SPU_CREATE_NOSCHED)
+ atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
+
+ ctx->stats.slb_flt_base = spu->stats.slb_flt;
+ ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
+ spu_associate_mm(spu, ctx->owner);
+
+ spin_lock_irq(&spu->register_lock);
+ spu->ctx = ctx;
+ spu->flags = 0;
+ ctx->spu = spu;
+ ctx->ops = &spu_hw_ops;
+ spu->pid = current->pid;
+ spu->tgid = current->tgid;
+ spu->ibox_callback = spufs_ibox_callback;
+ spu->wbox_callback = spufs_wbox_callback;
+ spu->stop_callback = spufs_stop_callback;
+ spu->mfc_callback = spufs_mfc_callback;
+ spin_unlock_irq(&spu->register_lock);
+
+ spu_unmap_mappings(ctx);
+
+ spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0);
+ spu_restore(&ctx->csa, spu);
+ spu->timestamp = jiffies;
+ spu_switch_notify(spu, ctx);
+ ctx->state = SPU_STATE_RUNNABLE;
+
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+}
+
+/*
+ * Must be used with the list_mutex held.
+ */
+static inline int sched_spu(struct spu *spu)
+{
+ BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
+ return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+ struct spu_context *ctx;
+
+ list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+ if (list_empty(&ctx->aff_list))
+ list_add(&ctx->aff_list, &gang->aff_list_head);
+ }
+ gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+ struct spu_context *ctx;
+ int offset;
+
+ offset = -1;
+ list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+ aff_list) {
+ if (&ctx->aff_list == &gang->aff_list_head)
+ break;
+ ctx->aff_offset = offset--;
+ }
+
+ offset = 0;
+ list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+ if (&ctx->aff_list == &gang->aff_list_head)
+ break;
+ ctx->aff_offset = offset++;
+ }
+
+ gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
+ int group_size, int lowest_offset)
+{
+ struct spu *spu;
+ int node, n;
+
+ /*
+ * TODO: A better algorithm could be used to find a good spu to be
+ * used as reference location for the ctxs chain.
+ */
+ node = cpu_to_node(raw_smp_processor_id());
+ for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ /*
+ * "available_spus" counts how many spus are not potentially
+ * going to be used by other affinity gangs whose reference
+ * context is already in place. Although this code seeks to
+ * avoid having affinity gangs with a summed amount of
+ * contexts bigger than the amount of spus in the node,
+ * this may happen sporadically. In this case, available_spus
+ * becomes negative, which is harmless.
+ */
+ int available_spus;
+
+ node = (node < MAX_NUMNODES) ? node : 0;
+ if (!node_allowed(ctx, node))
+ continue;
+
+ available_spus = 0;
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset
+ && spu->ctx->gang->aff_ref_spu)
+ available_spus -= spu->ctx->gang->contexts;
+ available_spus++;
+ }
+ if (available_spus < ctx->gang->contexts) {
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ continue;
+ }
+
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if ((!mem_aff || spu->has_mem_affinity) &&
+ sched_spu(spu)) {
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ return spu;
+ }
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+ return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+ int mem_aff, gs, lowest_offset;
+ struct spu_context *ctx;
+ struct spu *tmp;
+
+ mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+ lowest_offset = 0;
+ gs = 0;
+
+ list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+ gs++;
+
+ list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+ aff_list) {
+ if (&ctx->aff_list == &gang->aff_list_head)
+ break;
+ lowest_offset = ctx->aff_offset;
+ }
+
+ gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs,
+ lowest_offset);
+}
+
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
+{
+ struct spu *spu;
+
+ spu = NULL;
+ if (offset >= 0) {
+ list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+ BUG_ON(spu->node != node);
+ if (offset == 0)
+ break;
+ if (sched_spu(spu))
+ offset--;
+ }
+ } else {
+ list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+ BUG_ON(spu->node != node);
+ if (offset == 0)
+ break;
+ if (sched_spu(spu))
+ offset++;
+ }
+ }
+
+ return spu;
+}
+
+/*
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+static int has_affinity(struct spu_context *ctx)
+{
+ struct spu_gang *gang = ctx->gang;
+
+ if (list_empty(&ctx->aff_list))
+ return 0;
+
+ if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+ ctx->gang->aff_ref_spu = NULL;
+
+ if (!gang->aff_ref_spu) {
+ if (!(gang->aff_flags & AFF_MERGED))
+ aff_merge_remaining_ctxs(gang);
+ if (!(gang->aff_flags & AFF_OFFSETS_SET))
+ aff_set_offsets(gang);
+ aff_set_ref_point_location(gang);
+ }
+
+ return gang->aff_ref_spu != NULL;
+}
+
+/**
+ * spu_unbind_context - unbind spu context from physical spu
+ * @spu: physical spu to unbind from
+ * @ctx: context to unbind
+ */
+static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
+{
+ u32 status;
+
+ spu_context_trace(spu_unbind_context__enter, ctx, spu);
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+ atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
+
+ if (ctx->gang)
+ /*
+ * If ctx->gang->aff_sched_count is positive, SPU affinity is
+ * being considered in this gang. Using atomic_dec_if_positive
+ * allow us to skip an explicit check for affinity in this gang
+ */
+ atomic_dec_if_positive(&ctx->gang->aff_sched_count);
+
+ spu_switch_notify(spu, NULL);
+ spu_unmap_mappings(ctx);
+ spu_save(&ctx->csa, spu);
+ spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0);
+
+ spin_lock_irq(&spu->register_lock);
+ spu->timestamp = jiffies;
+ ctx->state = SPU_STATE_SAVED;
+ spu->ibox_callback = NULL;
+ spu->wbox_callback = NULL;
+ spu->stop_callback = NULL;
+ spu->mfc_callback = NULL;
+ spu->pid = 0;
+ spu->tgid = 0;
+ ctx->ops = &spu_backing_ops;
+ spu->flags = 0;
+ spu->ctx = NULL;
+ spin_unlock_irq(&spu->register_lock);
+
+ spu_associate_mm(spu, NULL);
+
+ ctx->stats.slb_flt +=
+ (spu->stats.slb_flt - ctx->stats.slb_flt_base);
+ ctx->stats.class2_intr +=
+ (spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+ /* This maps the underlying spu state to idle */
+ spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+ ctx->spu = NULL;
+
+ if (spu_stopped(ctx, &status))
+ wake_up_all(&ctx->stop_wq);
+}
+
+/**
+ * spu_add_to_rq - add a context to the runqueue
+ * @ctx: context to add
+ */
+static void __spu_add_to_rq(struct spu_context *ctx)
+{
+ /*
+ * Unfortunately this code path can be called from multiple threads
+ * on behalf of a single context due to the way the problem state
+ * mmap support works.
+ *
+ * Fortunately we need to wake up all these threads at the same time
+ * and can simply skip the runqueue addition for every but the first
+ * thread getting into this codepath.
+ *
+ * It's still quite hacky, and long-term we should proxy all other
+ * threads through the owner thread so that spu_run is in control
+ * of all the scheduling activity for a given context.
+ */
+ if (list_empty(&ctx->rq)) {
+ list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+ set_bit(ctx->prio, spu_prio->bitmap);
+ if (!spu_prio->nr_waiting++)
+ mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+ }
+}
+
+static void spu_add_to_rq(struct spu_context *ctx)
+{
+ spin_lock(&spu_prio->runq_lock);
+ __spu_add_to_rq(ctx);
+ spin_unlock(&spu_prio->runq_lock);
+}
+
+static void __spu_del_from_rq(struct spu_context *ctx)
+{
+ int prio = ctx->prio;
+
+ if (!list_empty(&ctx->rq)) {
+ if (!--spu_prio->nr_waiting)
+ del_timer(&spusched_timer);
+ list_del_init(&ctx->rq);
+
+ if (list_empty(&spu_prio->runq[prio]))
+ clear_bit(prio, spu_prio->bitmap);
+ }
+}
+
+void spu_del_from_rq(struct spu_context *ctx)
+{
+ spin_lock(&spu_prio->runq_lock);
+ __spu_del_from_rq(ctx);
+ spin_unlock(&spu_prio->runq_lock);
+}
+
+static void spu_prio_wait(struct spu_context *ctx)
+{
+ DEFINE_WAIT(wait);
+
+ /*
+ * The caller must explicitly wait for a context to be loaded
+ * if the nosched flag is set. If NOSCHED is not set, the caller
+ * queues the context and waits for an spu event or error.
+ */
+ BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED));
+
+ spin_lock(&spu_prio->runq_lock);
+ prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
+ if (!signal_pending(current)) {
+ __spu_add_to_rq(ctx);
+ spin_unlock(&spu_prio->runq_lock);
+ mutex_unlock(&ctx->state_mutex);
+ schedule();
+ mutex_lock(&ctx->state_mutex);
+ spin_lock(&spu_prio->runq_lock);
+ __spu_del_from_rq(ctx);
+ }
+ spin_unlock(&spu_prio->runq_lock);
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&ctx->stop_wq, &wait);
+}
+
+static struct spu *spu_get_idle(struct spu_context *ctx)
+{
+ struct spu *spu, *aff_ref_spu;
+ int node, n;
+
+ spu_context_nospu_trace(spu_get_idle__enter, ctx);
+
+ if (ctx->gang) {
+ mutex_lock(&ctx->gang->aff_mutex);
+ if (has_affinity(ctx)) {
+ aff_ref_spu = ctx->gang->aff_ref_spu;
+ atomic_inc(&ctx->gang->aff_sched_count);
+ mutex_unlock(&ctx->gang->aff_mutex);
+ node = aff_ref_spu->node;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
+ if (spu && spu->alloc_state == SPU_FREE)
+ goto found;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ atomic_dec(&ctx->gang->aff_sched_count);
+ goto not_found;
+ }
+ mutex_unlock(&ctx->gang->aff_mutex);
+ }
+ node = cpu_to_node(raw_smp_processor_id());
+ for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ node = (node < MAX_NUMNODES) ? node : 0;
+ if (!node_allowed(ctx, node))
+ continue;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->alloc_state == SPU_FREE)
+ goto found;
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+
+ not_found:
+ spu_context_nospu_trace(spu_get_idle__not_found, ctx);
+ return NULL;
+
+ found:
+ spu->alloc_state = SPU_USED;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ spu_context_trace(spu_get_idle__found, ctx, spu);
+ spu_init_channels(spu);
+ return spu;
+}
+
+/**
+ * find_victim - find a lower priority context to preempt
+ * @ctx: canidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
+{
+ struct spu_context *victim = NULL;
+ struct spu *spu;
+ int node, n;
+
+ spu_context_nospu_trace(spu_find_victim__enter, ctx);
+
+ /*
+ * Look for a possible preemption candidate on the local node first.
+ * If there is no candidate look at the other nodes. This isn't
+ * exactly fair, but so far the whole spu scheduler tries to keep
+ * a strong node affinity. We might want to fine-tune this in
+ * the future.
+ */
+ restart:
+ node = cpu_to_node(raw_smp_processor_id());
+ for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ node = (node < MAX_NUMNODES) ? node : 0;
+ if (!node_allowed(ctx, node))
+ continue;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ struct spu_context *tmp = spu->ctx;
+
+ if (tmp && tmp->prio > ctx->prio &&
+ !(tmp->flags & SPU_CREATE_NOSCHED) &&
+ (!victim || tmp->prio > victim->prio)) {
+ victim = spu->ctx;
+ }
+ }
+ if (victim)
+ get_spu_context(victim);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ if (victim) {
+ /*
+ * This nests ctx->state_mutex, but we always lock
+ * higher priority contexts before lower priority
+ * ones, so this is safe until we introduce
+ * priority inheritance schemes.
+ *
+ * XXX if the highest priority context is locked,
+ * this can loop a long time. Might be better to
+ * look at another context or give up after X retries.
+ */
+ if (!mutex_trylock(&victim->state_mutex)) {
+ put_spu_context(victim);
+ victim = NULL;
+ goto restart;
+ }
+
+ spu = victim->spu;
+ if (!spu || victim->prio <= ctx->prio) {
+ /*
+ * This race can happen because we've dropped
+ * the active list mutex. Not a problem, just
+ * restart the search.
+ */
+ mutex_unlock(&victim->state_mutex);
+ put_spu_context(victim);
+ victim = NULL;
+ goto restart;
+ }
+
+ spu_context_trace(__spu_deactivate__unload, ctx, spu);
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ cbe_spu_info[node].nr_active--;
+ spu_unbind_context(spu, victim);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ victim->stats.invol_ctx_switch++;
+ spu->stats.invol_ctx_switch++;
+ if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags))
+ spu_add_to_rq(victim);
+
+ mutex_unlock(&victim->state_mutex);
+ put_spu_context(victim);
+
+ return spu;
+ }
+ }
+
+ return NULL;
+}
+
+static void __spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+ int node = spu->node;
+ int success = 0;
+
+ spu_set_timeslice(ctx);
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ if (spu->ctx == NULL) {
+ spu_bind_context(spu, ctx);
+ cbe_spu_info[node].nr_active++;
+ spu->alloc_state = SPU_USED;
+ success = 1;
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ if (success)
+ wake_up_all(&ctx->run_wq);
+ else
+ spu_add_to_rq(ctx);
+}
+
+static void spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+ /* not a candidate for interruptible because it's called either
+ from the scheduler thread or from spu_deactivate */
+ mutex_lock(&ctx->state_mutex);
+ if (ctx->state == SPU_STATE_SAVED)
+ __spu_schedule(spu, ctx);
+ spu_release(ctx);
+}
+
+/**
+ * spu_unschedule - remove a context from a spu, and possibly release it.
+ * @spu: The SPU to unschedule from
+ * @ctx: The context currently scheduled on the SPU
+ * @free_spu Whether to free the SPU for other contexts
+ *
+ * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the
+ * SPU is made available for other contexts (ie, may be returned by
+ * spu_get_idle). If this is zero, the caller is expected to schedule another
+ * context to this spu.
+ *
+ * Should be called with ctx->state_mutex held.
+ */
+static void spu_unschedule(struct spu *spu, struct spu_context *ctx,
+ int free_spu)
+{
+ int node = spu->node;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ cbe_spu_info[node].nr_active--;
+ if (free_spu)
+ spu->alloc_state = SPU_FREE;
+ spu_unbind_context(spu, ctx);
+ ctx->stats.invol_ctx_switch++;
+ spu->stats.invol_ctx_switch++;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+}
+
+/**
+ * spu_activate - find a free spu for a context and execute it
+ * @ctx: spu context to schedule
+ * @flags: flags (currently ignored)
+ *
+ * Tries to find a free spu to run @ctx. If no free spu is available
+ * add the context to the runqueue so it gets woken up once an spu
+ * is available.
+ */
+int spu_activate(struct spu_context *ctx, unsigned long flags)
+{
+ struct spu *spu;
+
+ /*
+ * If there are multiple threads waiting for a single context
+ * only one actually binds the context while the others will
+ * only be able to acquire the state_mutex once the context
+ * already is in runnable state.
+ */
+ if (ctx->spu)
+ return 0;
+
+spu_activate_top:
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ spu = spu_get_idle(ctx);
+ /*
+ * If this is a realtime thread we try to get it running by
+ * preempting a lower priority thread.
+ */
+ if (!spu && rt_prio(ctx->prio))
+ spu = find_victim(ctx);
+ if (spu) {
+ unsigned long runcntl;
+
+ runcntl = ctx->ops->runcntl_read(ctx);
+ __spu_schedule(spu, ctx);
+ if (runcntl & SPU_RUNCNTL_RUNNABLE)
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+
+ return 0;
+ }
+
+ if (ctx->flags & SPU_CREATE_NOSCHED) {
+ spu_prio_wait(ctx);
+ goto spu_activate_top;
+ }
+
+ spu_add_to_rq(ctx);
+
+ return 0;
+}
+
+/**
+ * grab_runnable_context - try to find a runnable context
+ *
+ * Remove the highest priority context on the runqueue and return it
+ * to the caller. Returns %NULL if no runnable context was found.
+ */
+static struct spu_context *grab_runnable_context(int prio, int node)
+{
+ struct spu_context *ctx;
+ int best;
+
+ spin_lock(&spu_prio->runq_lock);
+ best = find_first_bit(spu_prio->bitmap, prio);
+ while (best < prio) {
+ struct list_head *rq = &spu_prio->runq[best];
+
+ list_for_each_entry(ctx, rq, rq) {
+ /* XXX(hch): check for affinity here as well */
+ if (__node_allowed(ctx, node)) {
+ __spu_del_from_rq(ctx);
+ goto found;
+ }
+ }
+ best++;
+ }
+ ctx = NULL;
+ found:
+ spin_unlock(&spu_prio->runq_lock);
+ return ctx;
+}
+
+static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_context *new = NULL;
+
+ if (spu) {
+ new = grab_runnable_context(max_prio, spu->node);
+ if (new || force) {
+ spu_unschedule(spu, ctx, new == NULL);
+ if (new) {
+ if (new->flags & SPU_CREATE_NOSCHED)
+ wake_up(&new->stop_wq);
+ else {
+ spu_release(ctx);
+ spu_schedule(spu, new);
+ /* this one can't easily be made
+ interruptible */
+ mutex_lock(&ctx->state_mutex);
+ }
+ }
+ }
+ }
+
+ return new != NULL;
+}
+
+/**
+ * spu_deactivate - unbind a context from it's physical spu
+ * @ctx: spu context to unbind
+ *
+ * Unbind @ctx from the physical spu it is running on and schedule
+ * the highest priority context to run on the freed physical spu.
+ */
+void spu_deactivate(struct spu_context *ctx)
+{
+ spu_context_nospu_trace(spu_deactivate__enter, ctx);
+ __spu_deactivate(ctx, 1, MAX_PRIO);
+}
+
+/**
+ * spu_yield - yield a physical spu if others are waiting
+ * @ctx: spu context to yield
+ *
+ * Check if there is a higher priority context waiting and if yes
+ * unbind @ctx from the physical spu and schedule the highest
+ * priority context to run on the freed physical spu instead.
+ */
+void spu_yield(struct spu_context *ctx)
+{
+ spu_context_nospu_trace(spu_yield__enter, ctx);
+ if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
+ mutex_lock(&ctx->state_mutex);
+ __spu_deactivate(ctx, 0, MAX_PRIO);
+ mutex_unlock(&ctx->state_mutex);
+ }
+}
+
+static noinline void spusched_tick(struct spu_context *ctx)
+{
+ struct spu_context *new = NULL;
+ struct spu *spu = NULL;
+
+ if (spu_acquire(ctx))
+ BUG(); /* a kernel thread never has signals pending */
+
+ if (ctx->state != SPU_STATE_RUNNABLE)
+ goto out;
+ if (ctx->flags & SPU_CREATE_NOSCHED)
+ goto out;
+ if (ctx->policy == SCHED_FIFO)
+ goto out;
+
+ if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+ goto out;
+
+ spu = ctx->spu;
+
+ spu_context_trace(spusched_tick__preempt, ctx, spu);
+
+ new = grab_runnable_context(ctx->prio + 1, spu->node);
+ if (new) {
+ spu_unschedule(spu, ctx, 0);
+ if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+ spu_add_to_rq(ctx);
+ } else {
+ spu_context_nospu_trace(spusched_tick__newslice, ctx);
+ if (!ctx->time_slice)
+ ctx->time_slice++;
+ }
+out:
+ spu_release(ctx);
+
+ if (new)
+ spu_schedule(spu, new);
+}
+
+/**
+ * count_active_contexts - count nr of active tasks
+ *
+ * Return the number of tasks currently running or waiting to run.
+ *
+ * Note that we don't take runq_lock / list_mutex here. Reading
+ * a single 32bit value is atomic on powerpc, and we don't care
+ * about memory ordering issues here.
+ */
+static unsigned long count_active_contexts(void)
+{
+ int nr_active = 0, node;
+
+ for (node = 0; node < MAX_NUMNODES; node++)
+ nr_active += cbe_spu_info[node].nr_active;
+ nr_active += spu_prio->nr_waiting;
+
+ return nr_active;
+}
+
+/**
+ * spu_calc_load - update the avenrun load estimates.
+ *
+ * No locking against reading these values from userspace, as for
+ * the CPU loadavg code.
+ */
+static void spu_calc_load(void)
+{
+ unsigned long active_tasks; /* fixed-point */
+
+ active_tasks = count_active_contexts() * FIXED_1;
+ CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
+ CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
+ CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+}
+
+static void spusched_wake(unsigned long data)
+{
+ mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+ wake_up_process(spusched_task);
+}
+
+static void spuloadavg_wake(unsigned long data)
+{
+ mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ);
+ spu_calc_load();
+}
+
+static int spusched_thread(void *unused)
+{
+ struct spu *spu;
+ int node;
+
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+
+ mutex_lock(mtx);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus,
+ cbe_list) {
+ struct spu_context *ctx = spu->ctx;
+
+ if (ctx) {
+ get_spu_context(ctx);
+ mutex_unlock(mtx);
+ spusched_tick(ctx);
+ mutex_lock(mtx);
+ put_spu_context(ctx);
+ }
+ }
+ mutex_unlock(mtx);
+ }
+ }
+
+ return 0;
+}
+
+void spuctx_switch_state(struct spu_context *ctx,
+ enum spu_utilization_state new_state)
+{
+ unsigned long long curtime;
+ signed long long delta;
+ struct timespec ts;
+ struct spu *spu;
+ enum spu_utilization_state old_state;
+ int node;
+
+ ktime_get_ts(&ts);
+ curtime = timespec_to_ns(&ts);
+ delta = curtime - ctx->stats.tstamp;
+
+ WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+ WARN_ON(delta < 0);
+
+ spu = ctx->spu;
+ old_state = ctx->stats.util_state;
+ ctx->stats.util_state = new_state;
+ ctx->stats.tstamp = curtime;
+
+ /*
+ * Update the physical SPU utilization statistics.
+ */
+ if (spu) {
+ ctx->stats.times[old_state] += delta;
+ spu->stats.times[old_state] += delta;
+ spu->stats.util_state = new_state;
+ spu->stats.tstamp = curtime;
+ node = spu->node;
+ if (old_state == SPU_UTIL_USER)
+ atomic_dec(&cbe_spu_info[node].busy_spus);
+ if (new_state == SPU_UTIL_USER)
+ atomic_inc(&cbe_spu_info[node].busy_spus);
+ }
+}
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int show_spu_loadavg(struct seq_file *s, void *private)
+{
+ int a, b, c;
+
+ a = spu_avenrun[0] + (FIXED_1/200);
+ b = spu_avenrun[1] + (FIXED_1/200);
+ c = spu_avenrun[2] + (FIXED_1/200);
+
+ /*
+ * Note that last_pid doesn't really make much sense for the
+ * SPU loadavg (it even seems very odd on the CPU side...),
+ * but we include it here to have a 100% compatible interface.
+ */
+ seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+ LOAD_INT(a), LOAD_FRAC(a),
+ LOAD_INT(b), LOAD_FRAC(b),
+ LOAD_INT(c), LOAD_FRAC(c),
+ count_active_contexts(),
+ atomic_read(&nr_spu_contexts),
+ current->nsproxy->pid_ns->last_pid);
+ return 0;
+}
+
+static int spu_loadavg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, show_spu_loadavg, NULL);
+}
+
+static const struct file_operations spu_loadavg_fops = {
+ .open = spu_loadavg_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int __init spu_sched_init(void)
+{
+ struct proc_dir_entry *entry;
+ int err = -ENOMEM, i;
+
+ spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
+ if (!spu_prio)
+ goto out;
+
+ for (i = 0; i < MAX_PRIO; i++) {
+ INIT_LIST_HEAD(&spu_prio->runq[i]);
+ __clear_bit(i, spu_prio->bitmap);
+ }
+ spin_lock_init(&spu_prio->runq_lock);
+
+ setup_timer(&spusched_timer, spusched_wake, 0);
+ setup_timer(&spuloadavg_timer, spuloadavg_wake, 0);
+
+ spusched_task = kthread_run(spusched_thread, NULL, "spusched");
+ if (IS_ERR(spusched_task)) {
+ err = PTR_ERR(spusched_task);
+ goto out_free_spu_prio;
+ }
+
+ mod_timer(&spuloadavg_timer, 0);
+
+ entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops);
+ if (!entry)
+ goto out_stop_kthread;
+
+ pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
+ SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
+ return 0;
+
+ out_stop_kthread:
+ kthread_stop(spusched_task);
+ out_free_spu_prio:
+ kfree(spu_prio);
+ out:
+ return err;
+}
+
+void spu_sched_exit(void)
+{
+ struct spu *spu;
+ int node;
+
+ remove_proc_entry("spu_loadavg", NULL);
+
+ del_timer_sync(&spusched_timer);
+ del_timer_sync(&spuloadavg_timer);
+ kthread_stop(spusched_task);
+
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+ if (spu->alloc_state != SPU_FREE)
+ spu->alloc_state = SPU_FREE;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+ kfree(spu_prio);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c
new file mode 100644
index 00000000000..72c905f1ee7
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -0,0 +1,336 @@
+/*
+ * spu_restore.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context restore sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE 0x40000 /* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+#define BR_INSTR 0x327fff80 /* br -4 */
+#define NOP_INSTR 0x40200000 /* nop */
+#define HEQ_INSTR 0x7b000000 /* heq $0, $0 */
+#define STOP_INSTR 0x00000000 /* stop 0x0 */
+#define ILLEGAL_INSTR 0x00800000 /* illegal instr */
+#define RESTORE_COMPLETE 0x00003ffc /* stop 0x3ffc */
+
+static inline void fetch_regs_from_mem(addr64 lscsa_ea)
+{
+ unsigned int ls = (unsigned int)&regs_spill[0];
+ unsigned int size = sizeof(regs_spill);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x40; /* GET */
+
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_upper_240kb(addr64 lscsa_ea)
+{
+ unsigned int ls = 16384;
+ unsigned int list = (unsigned int)&dma_list[0];
+ unsigned int size = sizeof(dma_list);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x44; /* GETL */
+
+ /* Restore, Step 4:
+ * Enqueue the GETL command (tag 0) to the MFC SPU command
+ * queue to transfer the upper 240 kb of LS from CSA.
+ */
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, list);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_decr(void)
+{
+ unsigned int offset;
+ unsigned int decr_running;
+ unsigned int decr;
+
+ /* Restore, Step 6(moved):
+ * If the LSCSA "decrementer running" flag is set
+ * then write the SPU_WrDec channel with the
+ * decrementer value from LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(decr_status);
+ decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
+ if (decr_running) {
+ offset = LSCSA_QW_OFFSET(decr);
+ decr = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrDec, decr);
+ }
+}
+
+static inline void write_ppu_mb(void)
+{
+ unsigned int offset;
+ unsigned int data;
+
+ /* Restore, Step 11:
+ * Write the MFC_WrOut_MB channel with the PPU_MB
+ * data from LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(ppu_mb);
+ data = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrOutMbox, data);
+}
+
+static inline void write_ppuint_mb(void)
+{
+ unsigned int offset;
+ unsigned int data;
+
+ /* Restore, Step 12:
+ * Write the MFC_WrInt_MB channel with the PPUINT_MB
+ * data from LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(ppuint_mb);
+ data = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrOutIntrMbox, data);
+}
+
+static inline void restore_fpcr(void)
+{
+ unsigned int offset;
+ vector unsigned int fpcr;
+
+ /* Restore, Step 13:
+ * Restore the floating-point status and control
+ * register from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(fpcr);
+ fpcr = regs_spill[offset].v;
+ spu_mtfpscr(fpcr);
+}
+
+static inline void restore_srr0(void)
+{
+ unsigned int offset;
+ unsigned int srr0;
+
+ /* Restore, Step 14:
+ * Restore the SPU SRR0 data from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(srr0);
+ srr0 = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrSRR0, srr0);
+}
+
+static inline void restore_event_mask(void)
+{
+ unsigned int offset;
+ unsigned int event_mask;
+
+ /* Restore, Step 15:
+ * Restore the SPU_RdEventMsk data from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(event_mask);
+ event_mask = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void restore_tag_mask(void)
+{
+ unsigned int offset;
+ unsigned int tag_mask;
+
+ /* Restore, Step 16:
+ * Restore the SPU_RdTagMsk data from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(tag_mask);
+ tag_mask = regs_spill[offset].slot[0];
+ spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void restore_complete(void)
+{
+ extern void exit_fini(void);
+ unsigned int *exit_instrs = (unsigned int *)exit_fini;
+ unsigned int offset;
+ unsigned int stopped_status;
+ unsigned int stopped_code;
+
+ /* Restore, Step 18:
+ * Issue a stop-and-signal instruction with
+ * "good context restore" signal value.
+ *
+ * Restore, Step 19:
+ * There may be additional instructions placed
+ * here by the PPE Sequence for SPU Context
+ * Restore in order to restore the correct
+ * "stopped state".
+ *
+ * This step is handled here by analyzing the
+ * LSCSA.stopped_status and then modifying the
+ * exit() function to behave appropriately.
+ */
+
+ offset = LSCSA_QW_OFFSET(stopped_status);
+ stopped_status = regs_spill[offset].slot[0];
+ stopped_code = regs_spill[offset].slot[1];
+
+ switch (stopped_status) {
+ case SPU_STOPPED_STATUS_P_I:
+ /* SPU_Status[P,I]=1. Add illegal instruction
+ * followed by stop-and-signal instruction after
+ * end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = ILLEGAL_INSTR;
+ exit_instrs[2] = STOP_INSTR | stopped_code;
+ break;
+ case SPU_STOPPED_STATUS_P_H:
+ /* SPU_Status[P,H]=1. Add 'heq $0, $0' followed
+ * by stop-and-signal instruction after end of
+ * restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = HEQ_INSTR;
+ exit_instrs[2] = STOP_INSTR | stopped_code;
+ break;
+ case SPU_STOPPED_STATUS_S_P:
+ /* SPU_Status[S,P]=1. Add nop instruction
+ * followed by 'br -4' after end of restore
+ * code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = STOP_INSTR | stopped_code;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_S_I:
+ /* SPU_Status[S,I]=1. Add illegal instruction
+ * followed by 'br -4' after end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = ILLEGAL_INSTR;
+ exit_instrs[2] = NOP_INSTR;
<