94 files changed, 34950 insertions, 0 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
new file mode 100644
index 00000000000..e3551d20464
--- /dev/null
+++ b/drivers/misc/Kconfig
@@ -0,0 +1,300 @@
+#
+# Misc strange devices
+#
+
+menuconfig MISC_DEVICES
+	bool "Misc devices"
+	default y
+	---help---
+	  Say Y here to get to see options for device drivers from various
+	  different categories. This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if MISC_DEVICES
+
+config AD525X_DPOT
+	tristate "Analog Devices AD525x Digital Potentiometers"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here, you get support for the Analog Devices
+	  AD5258, AD5259, AD5251, AD5252, AD5253, AD5254 and AD5255
+	  digital potentiometer chips.
+
+	  See Documentation/misc-devices/ad525x_dpot.txt for the
+	  userspace interface.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ad525x_dpot.
+
+config ATMEL_PWM
+	tristate "Atmel AT32/AT91 PWM support"
+	depends on AVR32 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91CAP9
+	help
+	  This option enables device driver support for the PWM channels
+	  on certain Atmel processors.  Pulse Width Modulation is used for
+	  purposes including software controlled power-efficient backlights
+	  on LCD displays, motor control, and waveform generation.
+
+config ATMEL_TCLIB
+	bool "Atmel AT32/AT91 Timer/Counter Library"
+	depends on (AVR32 || ARCH_AT91)
+	help
+	  Select this if you want a library to allocate the Timer/Counter
+	  blocks found on many Atmel processors.  This facilitates using
+	  these blocks by different drivers despite processor differences.
+
+config ATMEL_TCB_CLKSRC
+	bool "TC Block Clocksource"
+	depends on ATMEL_TCLIB && GENERIC_TIME
+	default y
+	help
+	  Select this to get a high precision clocksource based on a
+	  TC block with a 5+ MHz base clock rate.  Two timer channels
+	  are combined to make a single 32-bit timer.
+
+	  When GENERIC_CLOCKEVENTS is defined, the third timer channel
+	  may be used as a clock event device supporting oneshot mode
+	  (delays of up to two seconds) based on the 32 KiHz clock.
+
+config ATMEL_TCB_CLKSRC_BLOCK
+	int
+	depends on ATMEL_TCB_CLKSRC
+	prompt "TC Block" if ARCH_AT91RM9200 || ARCH_AT91SAM9260 || CPU_AT32AP700X
+	default 0
+	range 0 1
+	help
+	  Some chips provide more than one TC block, so you have the
+	  choice of which one to use for the clock framework.  The other
+	  TC can be used for other purposes, such as PWM generation and
+	  interval timing.
+
+config IBM_ASM
+	tristate "Device driver for IBM RSA service processor"
+	depends on X86 && PCI && INPUT && EXPERIMENTAL
+	---help---
+	  This option enables device driver support for in-band access to the
+	  IBM RSA (Condor) service processor in eServer xSeries systems.
+	  The ibmasm device driver allows user space application to access
+	  ASM (Advanced Systems Management) functions on the service
+	  processor. The driver is meant to be used in conjunction with
+	  a user space API.
+	  The ibmasm driver also enables the OS to use the UART on the
+	  service processor board as a regular serial port. To make use of
+	  this feature serial driver support (CONFIG_SERIAL_8250) must be
+	  enabled.
+
+	  WARNING: This software may not be supported or function
+	  correctly on your IBM server. Please consult the IBM ServerProven
+	  website <http://www.pc.ibm.com/ww/eserver/xseries/serverproven> for
+	  information on the specific driver level and support statement
+	  for your IBM server.
+
+config PHANTOM
+	tristate "Sensable PHANToM (PCI)"
+	depends on PCI
+	help
+	  Say Y here if you want to build a driver for Sensable PHANToM device.
+
+	  This driver is only for PCI PHANToMs.
+
+	  If you choose to build module, its name will be phantom. If unsure,
+	  say N here.
+
+config SGI_IOC4
+	tristate "SGI IOC4 Base IO support"
+	depends on PCI
+	---help---
+	  This option enables basic support for the IOC4 chip on certain
+	  SGI IO controller cards (IO9, IO10, and PCI-RT).  This option
+	  does not enable any specific functions on such a card, but provides
+	  necessary infrastructure for other drivers to utilize.
+
+	  If you have an SGI Altix with an IOC4-based card say Y.
+	  Otherwise say N.
+
+config TIFM_CORE
+	tristate "TI Flash Media interface support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && PCI
+	help
+	  If you want support for Texas Instruments(R) Flash Media adapters
+	  you should select this option and then also choose an appropriate
+	  host adapter, such as 'TI Flash Media PCI74xx/PCI76xx host adapter
+	  support', if you have a TI PCI74xx compatible card reader, for
+	  example.
+	  You will also have to select some flash card format drivers. MMC/SD
+	  cards are supported via 'MMC/SD Card support: TI Flash Media MMC/SD
+	  Interface support (MMC_TIFM_SD)'.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called tifm_core.
+
+config TIFM_7XX1
+	tristate "TI Flash Media PCI74xx/PCI76xx host adapter support (EXPERIMENTAL)"
+	depends on PCI && TIFM_CORE && EXPERIMENTAL
+	default TIFM_CORE
+	help
+	  This option enables support for Texas Instruments(R) PCI74xx and
+	  PCI76xx families of Flash Media adapters, found in many laptops.
+	  To make actual use of the device, you will have to select some
+	  flash card format drivers, as outlined in the TIFM_CORE Help.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called tifm_7xx1.
+
+config ICS932S401
+	tristate "Integrated Circuits ICS932S401"
+	depends on I2C && EXPERIMENTAL
+	help
+	  If you say yes here you get support for the Integrated Circuits
+	  ICS932S401 clock control chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called ics932s401.
+
+config ATMEL_SSC
+	tristate "Device driver for Atmel SSC peripheral"
+	depends on AVR32 || ARCH_AT91
+	---help---
+	  This option enables device driver support for Atmel Synchronized
+	  Serial Communication peripheral (SSC).
+
+	  The SSC peripheral supports a wide variety of serial frame based
+	  communications, i.e. I2S, SPI, etc.
+
+	  If unsure, say N.
+
+config ENCLOSURE_SERVICES
+	tristate "Enclosure Services"
+	default n
+	help
+	  Provides support for intelligent enclosures (bays which
+	  contain storage devices).  You also need either a host
+	  driver (SCSI/ATA) which supports enclosures
+	  or a SCSI enclosure device (SES) to use these services.
+
+config SGI_XP
+	tristate "Support communication between SGI SSIs"
+	depends on NET
+	depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
+	select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+	select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+	select SGI_GRU if X86_64 && SMP
+	---help---
+	  An SGI machine can be divided into multiple Single System
+	  Images which act independently of each other and have
+	  hardware based memory protection from the others.  Enabling
+	  this feature will allow for direct communication between SSIs
+	  based on a network adapter and DMA messaging.
+
+config CS5535_MFGPT
+	tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support"
+	depends on PCI
+	depends on X86
+	default n
+	help
+	  This driver provides access to MFGPT functionality for other
+	  drivers that need timers.  MFGPTs are available in the CS5535 and
+	  CS5536 companion chips that are found in AMD Geode and several
+	  other platforms.  They have a better resolution and max interval
+	  than the generic PIT, and are suitable for use as high-res timers.
+	  You probably don't want to enable this manually; other drivers that
+	  make use of it should enable it.
+
+config CS5535_MFGPT_DEFAULT_IRQ
+	int
+	depends on CS5535_MFGPT
+	default 7
+	help
+	  MFGPTs on the CS5535 require an interrupt.  The selected IRQ
+	  can be overridden as a module option as well as by driver that
+	  use the cs5535_mfgpt_ API; however, different architectures might
+	  want to use a different IRQ by default.  This is here for
+	  architectures to set as necessary.
+
+config HP_ILO
+	tristate "Channel interface driver for HP iLO/iLO2 processor"
+	depends on PCI
+	default n
+	help
+	  The channel interface driver allows applications to communicate
+	  with iLO/iLO2 management processors present on HP ProLiant
+	  servers.  Upon loading, the driver creates /dev/hpilo/dXccbN files,
+	  which can be used to gather data from the management processor,
+	  via read and write system calls.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called hpilo.
+
+config SGI_GRU
+	tristate "SGI GRU driver"
+	depends on X86_UV && SMP
+	default n
+	select MMU_NOTIFIER
+	---help---
+	The GRU is a hardware resource located in the system chipset. The GRU
+	contains memory that can be mmapped into the user address space. This memory is
+	used to communicate with the GRU to perform functions such as load/store,
+	scatter/gather, bcopy, AMOs, etc.  The GRU is directly accessed by user
+	instructions using user virtual addresses. GRU instructions (ex., bcopy) use
+	user virtual addresses for operands.
+
+	If you are not running on a SGI UV system, say N.
+
+config SGI_GRU_DEBUG
+	bool  "SGI GRU driver debug"
+	depends on SGI_GRU
+	default n
+	---help---
+	This option enables addition debugging code for the SGI GRU driver. If
+	you are unsure, say N.
+
+config ISL29003
+	tristate "Intersil ISL29003 ambient light sensor"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here you get support for the Intersil ISL29003
+	  ambient light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called isl29003.
+
+config EP93XX_PWM
+	tristate "EP93xx PWM support"
+	depends on ARCH_EP93XX
+	help
+	  This option enables device driver support for the PWM channels
+	  on the Cirrus EP93xx processors.  The EP9307 chip only has one
+	  PWM channel all the others have two, the second channel is an
+	  alternate function of the EGPIO14 pin.  A sysfs interface is
+	  provided to control the PWM channels.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called ep93xx_pwm.
+
+config DS1682
+	tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm"
+	depends on I2C && EXPERIMENTAL
+	help
+	  If you say yes here you get support for Dallas Semiconductor
+	  DS1682 Total Elapsed Time Recorder.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ds1682.
+
+config TI_DAC7512
+	tristate "Texas Instruments DAC7512"
+	depends on SPI && SYSFS
+	help
+	  If you say yes here you get support for the Texas Instruments
+	  DAC7512 16-bit digital-to-analog converter.
+
+	  This driver can also be built as a module. If so, the module
+	  will be calles ti_dac7512.
+
+source "drivers/misc/c2port/Kconfig"
+source "drivers/misc/eeprom/Kconfig"
+source "drivers/misc/cb710/Kconfig"
+source "drivers/misc/iwmc3200top/Kconfig"
+
+endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
new file mode 100644
index 00000000000..049ff2482f3
--- /dev/null
+++ b/drivers/misc/Makefile
@@ -0,0 +1,30 @@
+#
+# Makefile for misc devices that really don't fit anywhere else.
+#
+
+obj-$(CONFIG_IBM_ASM)		+= ibmasm/
+obj-$(CONFIG_HDPU_FEATURES)	+= hdpuftrs/
+obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
+obj-$(CONFIG_ATMEL_PWM)		+= atmel_pwm.o
+obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
+obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
+obj-$(CONFIG_ICS932S401)	+= ics932s401.o
+obj-$(CONFIG_LKDTM)		+= lkdtm.o
+obj-$(CONFIG_TIFM_CORE)       	+= tifm_core.o
+obj-$(CONFIG_TIFM_7XX1)       	+= tifm_7xx1.o
+obj-$(CONFIG_PHANTOM)		+= phantom.o
+obj-$(CONFIG_SGI_IOC4)		+= ioc4.o
+obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
+obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
+obj-$(CONFIG_SGI_XP)		+= sgi-xp/
+obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
+obj-$(CONFIG_CS5535_MFGPT)	+= cs5535-mfgpt.o
+obj-$(CONFIG_HP_ILO)		+= hpilo.o
+obj-$(CONFIG_ISL29003)		+= isl29003.o
+obj-$(CONFIG_EP93XX_PWM)	+= ep93xx_pwm.o
+obj-$(CONFIG_DS1682)		+= ds1682.o
+obj-$(CONFIG_TI_DAC7512)	+= ti_dac7512.o
+obj-$(CONFIG_C2PORT)		+= c2port/
+obj-$(CONFIG_IWMC3200TOP)      += iwmc3200top/
+obj-y				+= eeprom/
+obj-y				+= cb710/
diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c
new file mode 100644
index 00000000000..30a59f2bacd
--- /dev/null
+++ b/drivers/misc/ad525x_dpot.c
@@ -0,0 +1,666 @@
+/*
+ * ad525x_dpot: Driver for the Analog Devices AD525x digital potentiometers
+ * Copyright (c) 2009 Analog Devices, Inc.
+ * Author: Michael Hennerich <hennerich@blackfin.uclinux.org>
+ *
+ * DEVID		#Wipers		#Positions 	Resistor Options (kOhm)
+ * AD5258		1		64		1, 10, 50, 100
+ * AD5259		1		256		5, 10, 50, 100
+ * AD5251		2		64		1, 10, 50, 100
+ * AD5252		2		256		1, 10, 50, 100
+ * AD5255		3		512		25, 250
+ * AD5253		4		64		1, 10, 50, 100
+ * AD5254		4		256		1, 10, 50, 100
+ *
+ * See Documentation/misc-devices/ad525x_dpot.txt for more info.
+ *
+ * derived from ad5258.c
+ * Copyright (c) 2009 Cyber Switching, Inc.
+ * Author: Chris Verges <chrisv@cyberswitching.com>
+ *
+ * derived from ad5252.c
+ * Copyright (c) 2006 Michael Hennerich <hennerich@blackfin.uclinux.org>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+
+#define DRIVER_NAME			"ad525x_dpot"
+#define DRIVER_VERSION			"0.1"
+
+enum dpot_devid {
+	AD5258_ID,
+	AD5259_ID,
+	AD5251_ID,
+	AD5252_ID,
+	AD5253_ID,
+	AD5254_ID,
+	AD5255_ID,
+};
+
+#define AD5258_MAX_POSITION		64
+#define AD5259_MAX_POSITION		256
+#define AD5251_MAX_POSITION		64
+#define AD5252_MAX_POSITION		256
+#define AD5253_MAX_POSITION		64
+#define AD5254_MAX_POSITION		256
+#define AD5255_MAX_POSITION		512
+
+#define AD525X_RDAC0		0
+#define AD525X_RDAC1		1
+#define AD525X_RDAC2		2
+#define AD525X_RDAC3		3
+
+#define AD525X_REG_TOL		0x18
+#define AD525X_TOL_RDAC0	(AD525X_REG_TOL | AD525X_RDAC0)
+#define AD525X_TOL_RDAC1	(AD525X_REG_TOL | AD525X_RDAC1)
+#define AD525X_TOL_RDAC2	(AD525X_REG_TOL | AD525X_RDAC2)
+#define AD525X_TOL_RDAC3	(AD525X_REG_TOL | AD525X_RDAC3)
+
+/* RDAC-to-EEPROM Interface Commands */
+#define AD525X_I2C_RDAC		(0x00 << 5)
+#define AD525X_I2C_EEPROM	(0x01 << 5)
+#define AD525X_I2C_CMD		(0x80)
+
+#define AD525X_DEC_ALL_6DB	(AD525X_I2C_CMD | (0x4 << 3))
+#define AD525X_INC_ALL_6DB	(AD525X_I2C_CMD | (0x9 << 3))
+#define AD525X_DEC_ALL		(AD525X_I2C_CMD | (0x6 << 3))
+#define AD525X_INC_ALL		(AD525X_I2C_CMD | (0xB << 3))
+
+static s32 ad525x_read(struct i2c_client *client, u8 reg);
+static s32 ad525x_write(struct i2c_client *client, u8 reg, u8 value);
+
+/*
+ * Client data (each client gets its own)
+ */
+
+struct dpot_data {
+	struct mutex update_lock;
+	unsigned rdac_mask;
+	unsigned max_pos;
+	unsigned devid;
+};
+
+/* sysfs functions */
+
+static ssize_t sysfs_show_reg(struct device *dev,
+			      struct device_attribute *attr, char *buf, u32 reg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct dpot_data *data = i2c_get_clientdata(client);
+	s32 value;
+
+	mutex_lock(&data->update_lock);
+	value = ad525x_read(client, reg);
+	mutex_unlock(&data->update_lock);
+
+	if (value < 0)
+		return -EINVAL;
+	/*
+	 * Let someone else deal with converting this ...
+	 * the tolerance is a two-byte value where the MSB
+	 * is a sign + integer value, and the LSB is a
+	 * decimal value.  See page 18 of the AD5258
+	 * datasheet (Rev. A) for more details.
+	 */
+
+	if (reg & AD525X_REG_TOL)
+		return sprintf(buf, "0x%04x\n", value & 0xFFFF);
+	else
+		return sprintf(buf, "%u\n", value & data->rdac_mask);
+}
+
+static ssize_t sysfs_set_reg(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t count, u32 reg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct dpot_data *data = i2c_get_clientdata(client);
+	unsigned long value;
+	int err;
+
+	err = strict_strtoul(buf, 10, &value);
+	if (err)
+		return err;
+
+	if (value > data->rdac_mask)
+		value = data->rdac_mask;
+
+	mutex_lock(&data->update_lock);
+	ad525x_write(client, reg, value);
+	if (reg & AD525X_I2C_EEPROM)
+		msleep(26);	/* Sleep while the EEPROM updates */
+	mutex_unlock(&data->update_lock);
+
+	return count;
+}
+
+static ssize_t sysfs_do_cmd(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count, u32 reg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct dpot_data *data = i2c_get_clientdata(client);
+
+	mutex_lock(&data->update_lock);
+	ad525x_write(client, reg, 0);
+	mutex_unlock(&data->update_lock);
+
+	return count;
+}
+
+/* ------------------------------------------------------------------------- */
+
+static ssize_t show_rdac0(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf, AD525X_I2C_RDAC | AD525X_RDAC0);
+}
+
+static ssize_t set_rdac0(struct device *dev,
+			 struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	return sysfs_set_reg(dev, attr, buf, count,
+			     AD525X_I2C_RDAC | AD525X_RDAC0);
+}
+
+static DEVICE_ATTR(rdac0, S_IWUSR | S_IRUGO, show_rdac0, set_rdac0);
+
+static ssize_t show_eeprom0(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf, AD525X_I2C_EEPROM | AD525X_RDAC0);
+}
+
+static ssize_t set_eeprom0(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	return sysfs_set_reg(dev, attr, buf, count,
+			     AD525X_I2C_EEPROM | AD525X_RDAC0);
+}
+
+static DEVICE_ATTR(eeprom0, S_IWUSR | S_IRUGO, show_eeprom0, set_eeprom0);
+
+static ssize_t show_tolerance0(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf,
+			      AD525X_I2C_EEPROM | AD525X_TOL_RDAC0);
+}
+
+static DEVICE_ATTR(tolerance0, S_IRUGO, show_tolerance0, NULL);
+
+/* ------------------------------------------------------------------------- */
+
+static ssize_t show_rdac1(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf, AD525X_I2C_RDAC | AD525X_RDAC1);
+}
+
+static ssize_t set_rdac1(struct device *dev,
+			 struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	return sysfs_set_reg(dev, attr, buf, count,
+			     AD525X_I2C_RDAC | AD525X_RDAC1);
+}
+
+static DEVICE_ATTR(rdac1, S_IWUSR | S_IRUGO, show_rdac1, set_rdac1);
+
+static ssize_t show_eeprom1(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf, AD525X_I2C_EEPROM | AD525X_RDAC1);
+}
+
+static ssize_t set_eeprom1(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	return sysfs_set_reg(dev, attr, buf, count,
+			     AD525X_I2C_EEPROM | AD525X_RDAC1);
+}
+
+static DEVICE_ATTR(eeprom1, S_IWUSR | S_IRUGO, show_eeprom1, set_eeprom1);
+
+static ssize_t show_tolerance1(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf,
+			      AD525X_I2C_EEPROM | AD525X_TOL_RDAC1);
+}
+
+static DEVICE_ATTR(tolerance1, S_IRUGO, show_tolerance1, NULL);
+
+/* ------------------------------------------------------------------------- */
+
+static ssize_t show_rdac2(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf, AD525X_I2C_RDAC | AD525X_RDAC2);
+}
+
+static ssize_t set_rdac2(struct device *dev,
+			 struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	return sysfs_set_reg(dev, attr, buf, count,
+			     AD525X_I2C_RDAC | AD525X_RDAC2);
+}
+
+static DEVICE_ATTR(rdac2, S_IWUSR | S_IRUGO, show_rdac2, set_rdac2);
+
+static ssize_t show_eeprom2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf, AD525X_I2C_EEPROM | AD525X_RDAC2);
+}
+
+static ssize_t set_eeprom2(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	return sysfs_set_reg(dev, attr, buf, count,
+			     AD525X_I2C_EEPROM | AD525X_RDAC2);
+}
+
+static DEVICE_ATTR(eeprom2, S_IWUSR | S_IRUGO, show_eeprom2, set_eeprom2);
+
+static ssize_t show_tolerance2(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf,
+			      AD525X_I2C_EEPROM | AD525X_TOL_RDAC2);
+}
+
+static DEVICE_ATTR(tolerance2, S_IRUGO, show_tolerance2, NULL);
+
+/* ------------------------------------------------------------------------- */
+
+static ssize_t show_rdac3(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf, AD525X_I2C_RDAC | AD525X_RDAC3);
+}
+
+static ssize_t set_rdac3(struct device *dev,
+			 struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	return sysfs_set_reg(dev, attr, buf, count,
+			     AD525X_I2C_RDAC | AD525X_RDAC3);
+}
+
+static DEVICE_ATTR(rdac3, S_IWUSR | S_IRUGO, show_rdac3, set_rdac3);
+
+static ssize_t show_eeprom3(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf, AD525X_I2C_EEPROM | AD525X_RDAC3);
+}
+
+static ssize_t set_eeprom3(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	return sysfs_set_reg(dev, attr, buf, count,
+			     AD525X_I2C_EEPROM | AD525X_RDAC3);
+}
+
+static DEVICE_ATTR(eeprom3, S_IWUSR | S_IRUGO, show_eeprom3, set_eeprom3);
+
+static ssize_t show_tolerance3(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	return sysfs_show_reg(dev, attr, buf,
+			      AD525X_I2C_EEPROM | AD525X_TOL_RDAC3);
+}
+
+static DEVICE_ATTR(tolerance3, S_IRUGO, show_tolerance3, NULL);
+
+static struct attribute *ad525x_attributes_wipers[4][4] = {
+	{
+		&dev_attr_rdac0.attr,
+		&dev_attr_eeprom0.attr,
+		&dev_attr_tolerance0.attr,
+		NULL
+	}, {
+		&dev_attr_rdac1.attr,
+		&dev_attr_eeprom1.attr,
+		&dev_attr_tolerance1.attr,
+		NULL
+	}, {
+		&dev_attr_rdac2.attr,
+		&dev_attr_eeprom2.attr,
+		&dev_attr_tolerance2.attr,
+		NULL
+	}, {
+		&dev_attr_rdac3.attr,
+		&dev_attr_eeprom3.attr,
+		&dev_attr_tolerance3.attr,
+		NULL
+	}
+};
+
+static const struct attribute_group ad525x_group_wipers[] = {
+	{.attrs = ad525x_attributes_wipers[AD525X_RDAC0]},
+	{.attrs = ad525x_attributes_wipers[AD525X_RDAC1]},
+	{.attrs = ad525x_attributes_wipers[AD525X_RDAC2]},
+	{.attrs = ad525x_attributes_wipers[AD525X_RDAC3]},
+};
+
+/* ------------------------------------------------------------------------- */
+
+static ssize_t set_inc_all(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	return sysfs_do_cmd(dev, attr, buf, count, AD525X_INC_ALL);
+}
+
+static DEVICE_ATTR(inc_all, S_IWUSR, NULL, set_inc_all);
+
+static ssize_t set_dec_all(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	return sysfs_do_cmd(dev, attr, buf, count, AD525X_DEC_ALL);
+}
+
+static DEVICE_ATTR(dec_all, S_IWUSR, NULL, set_dec_all);
+
+static ssize_t set_inc_all_6db(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	return sysfs_do_cmd(dev, attr, buf, count, AD525X_INC_ALL_6DB);
+}
+
+static DEVICE_ATTR(inc_all_6db, S_IWUSR, NULL, set_inc_all_6db);
+
+static ssize_t set_dec_all_6db(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	return sysfs_do_cmd(dev, attr, buf, count, AD525X_DEC_ALL_6DB);
+}
+
+static DEVICE_ATTR(dec_all_6db, S_IWUSR, NULL, set_dec_all_6db);
+
+static struct attribute *ad525x_attributes_commands[] = {
+	&dev_attr_inc_all.attr,
+	&dev_attr_dec_all.attr,
+	&dev_attr_inc_all_6db.attr,
+	&dev_attr_dec_all_6db.attr,
+	NULL
+};
+
+static const struct attribute_group ad525x_group_commands = {
+	.attrs = ad525x_attributes_commands,
+};
+
+/* ------------------------------------------------------------------------- */
+
+/* i2c device functions */
+
+/**
+ * ad525x_read - return the value contained in the specified register
+ * on the AD5258 device.
+ * @client: value returned from i2c_new_device()
+ * @reg: the register to read
+ *
+ * If the tolerance register is specified, 2 bytes are returned.
+ * Otherwise, 1 byte is returned.  A negative value indicates an error
+ * occurred while reading the register.
+ */
+static s32 ad525x_read(struct i2c_client *client, u8 reg)
+{
+	struct dpot_data *data = i2c_get_clientdata(client);
+
+	if ((reg & AD525X_REG_TOL) || (data->max_pos > 256))
+		return i2c_smbus_read_word_data(client, (reg & 0xF8) |
+						((reg & 0x7) << 1));
+	else
+		return i2c_smbus_read_byte_data(client, reg);
+}
+
+/**
+ * ad525x_write - store the given value in the specified register on
+ * the AD5258 device.
+ * @client: value returned from i2c_new_device()
+ * @reg: the register to write
+ * @value: the byte to store in the register
+ *
+ * For certain instructions that do not require a data byte, "NULL"
+ * should be specified for the "value" parameter.  These instructions
+ * include NOP, RESTORE_FROM_EEPROM, and STORE_TO_EEPROM.
+ *
+ * A negative return value indicates an error occurred while reading
+ * the register.
+ */
+static s32 ad525x_write(struct i2c_client *client, u8 reg, u8 value)
+{
+	struct dpot_data *data = i2c_get_clientdata(client);
+
+	/* Only write the instruction byte for certain commands */
+	if (reg & AD525X_I2C_CMD)
+		return i2c_smbus_write_byte(client, reg);
+
+	if (data->max_pos > 256)
+		return i2c_smbus_write_word_data(client, (reg & 0xF8) |
+						((reg & 0x7) << 1), value);
+	else
+		/* All other registers require instruction + data bytes */
+		return i2c_smbus_write_byte_data(client, reg, value);
+}
+
+static int ad525x_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct device *dev = &client->dev;
+	struct dpot_data *data;
+	int err = 0;
+
+	dev_dbg(dev, "%s\n", __func__);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) {
+		dev_err(dev, "missing I2C functionality for this driver\n");
+		goto exit;
+	}
+
+	data = kzalloc(sizeof(struct dpot_data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+
+	switch (id->driver_data) {
+	case AD5258_ID:
+		data->max_pos = AD5258_MAX_POSITION;
+		err = sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC0]);
+		break;
+	case AD5259_ID:
+		data->max_pos = AD5259_MAX_POSITION;
+		err = sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC0]);
+		break;
+	case AD5251_ID:
+		data->max_pos = AD5251_MAX_POSITION;
+		err = sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC1]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC3]);
+		err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands);
+		break;
+	case AD5252_ID:
+		data->max_pos = AD5252_MAX_POSITION;
+		err = sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC1]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC3]);
+		err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands);
+		break;
+	case AD5253_ID:
+		data->max_pos = AD5253_MAX_POSITION;
+		err = sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC0]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC1]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC2]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC3]);
+		err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands);
+		break;
+	case AD5254_ID:
+		data->max_pos = AD5254_MAX_POSITION;
+		err = sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC0]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC1]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC2]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC3]);
+		err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands);
+		break;
+	case AD5255_ID:
+		data->max_pos = AD5255_MAX_POSITION;
+		err = sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC0]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC1]);
+		err |= sysfs_create_group(&dev->kobj,
+				       &ad525x_group_wipers[AD525X_RDAC2]);
+		err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands);
+		break;
+	default:
+		err = -ENODEV;
+		goto exit_free;
+	}
+
+	if (err) {
+		dev_err(dev, "failed to register sysfs hooks\n");
+		goto exit_free;
+	}
+
+	data->devid = id->driver_data;
+	data->rdac_mask = data->max_pos - 1;
+
+	dev_info(dev, "%s %d-Position Digital Potentiometer registered\n",
+		 id->name, data->max_pos);
+
+	return 0;
+
+exit_free:
+	kfree(data);
+	i2c_set_clientdata(client, NULL);
+exit:
+	dev_err(dev, "failed to create client\n");
+	return err;
+}
+
+static int __devexit ad525x_remove(struct i2c_client *client)
+{
+	struct dpot_data *data = i2c_get_clientdata(client);
+	struct device *dev = &client->dev;
+
+	switch (data->devid) {
+	case AD5258_ID:
+	case AD5259_ID:
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC0]);
+		break;
+	case AD5251_ID:
+	case AD5252_ID:
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC1]);
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC3]);
+		sysfs_remove_group(&dev->kobj, &ad525x_group_commands);
+		break;
+	case AD5253_ID:
+	case AD5254_ID:
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC0]);
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC1]);
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC2]);
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC3]);
+		sysfs_remove_group(&dev->kobj, &ad525x_group_commands);
+		break;
+	case AD5255_ID:
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC0]);
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC1]);
+		sysfs_remove_group(&dev->kobj,
+				   &ad525x_group_wipers[AD525X_RDAC2]);
+		sysfs_remove_group(&dev->kobj, &ad525x_group_commands);
+		break;
+	}
+
+	i2c_set_clientdata(client, NULL);
+	kfree(data);
+
+	return 0;
+}
+
+static const struct i2c_device_id ad525x_idtable[] = {
+	{"ad5258", AD5258_ID},
+	{"ad5259", AD5259_ID},
+	{"ad5251", AD5251_ID},
+	{"ad5252", AD5252_ID},
+	{"ad5253", AD5253_ID},
+	{"ad5254", AD5254_ID},
+	{"ad5255", AD5255_ID},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, ad525x_idtable);
+
+static struct i2c_driver ad525x_driver = {
+	.driver = {
+		   .owner = THIS_MODULE,
+		   .name = DRIVER_NAME,
+		   },
+	.id_table = ad525x_idtable,
+	.probe = ad525x_probe,
+	.remove = __devexit_p(ad525x_remove),
+};
+
+static int __init ad525x_init(void)
+{
+	return i2c_add_driver(&ad525x_driver);
+}
+
+module_init(ad525x_init);
+
+static void __exit ad525x_exit(void)
+{
+	i2c_del_driver(&ad525x_driver);
+}
+
+module_exit(ad525x_exit);
+
+MODULE_AUTHOR("Chris Verges <chrisv@cyberswitching.com>, "
+	      "Michael Hennerich <hennerich@blackfin.uclinux.org>, ");
+MODULE_DESCRIPTION("AD5258/9 digital potentiometer driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
new file mode 100644
index 00000000000..558bf3f2c27
--- /dev/null
+++ b/drivers/misc/atmel-ssc.c
@@ -0,0 +1,175 @@
+/*
+ * Atmel SSC driver
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/list.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/atmel-ssc.h>
+
+/* Serialize access to ssc_list and user count */
+static DEFINE_SPINLOCK(user_lock);
+static LIST_HEAD(ssc_list);
+
+struct ssc_device *ssc_request(unsigned int ssc_num)
+{
+	int ssc_valid = 0;
+	struct ssc_device *ssc;
+
+	spin_lock(&user_lock);
+	list_for_each_entry(ssc, &ssc_list, list) {
+		if (ssc->pdev->id == ssc_num) {
+			ssc_valid = 1;
+			break;
+		}
+	}
+
+	if (!ssc_valid) {
+		spin_unlock(&user_lock);
+		pr_err("ssc: ssc%d platform device is missing\n", ssc_num);
+		return ERR_PTR(-ENODEV);
+	}
+
+	if (ssc->user) {
+		spin_unlock(&user_lock);
+		dev_dbg(&ssc->pdev->dev, "module busy\n");
+		return ERR_PTR(-EBUSY);
+	}
+	ssc->user++;
+	spin_unlock(&user_lock);
+
+	clk_enable(ssc->clk);
+
+	return ssc;
+}
+EXPORT_SYMBOL(ssc_request);
+
+void ssc_free(struct ssc_device *ssc)
+{
+	spin_lock(&user_lock);
+	if (ssc->user) {
+		ssc->user--;
+		clk_disable(ssc->clk);
+	} else {
+		dev_dbg(&ssc->pdev->dev, "device already free\n");
+	}
+	spin_unlock(&user_lock);
+}
+EXPORT_SYMBOL(ssc_free);
+
+static int __init ssc_probe(struct platform_device *pdev)
+{
+	int retval = 0;
+	struct resource *regs;
+	struct ssc_device *ssc;
+
+	ssc = kzalloc(sizeof(struct ssc_device), GFP_KERNEL);
+	if (!ssc) {
+		dev_dbg(&pdev->dev, "out of memory\n");
+		retval = -ENOMEM;
+		goto out;
+	}
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs) {
+		dev_dbg(&pdev->dev, "no mmio resource defined\n");
+		retval = -ENXIO;
+		goto out_free;
+	}
+
+	ssc->clk = clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(ssc->clk)) {
+		dev_dbg(&pdev->dev, "no pclk clock defined\n");
+		retval = -ENXIO;
+		goto out_free;
+	}
+
+	ssc->pdev = pdev;
+	ssc->regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!ssc->regs) {
+		dev_dbg(&pdev->dev, "ioremap failed\n");
+		retval = -EINVAL;
+		goto out_clk;
+	}
+
+	/* disable all interrupts */
+	clk_enable(ssc->clk);
+	ssc_writel(ssc->regs, IDR, ~0UL);
+	ssc_readl(ssc->regs, SR);
+	clk_disable(ssc->clk);
+
+	ssc->irq = platform_get_irq(pdev, 0);
+	if (!ssc->irq) {
+		dev_dbg(&pdev->dev, "could not get irq\n");
+		retval = -ENXIO;
+		goto out_unmap;
+	}
+
+	spin_lock(&user_lock);
+	list_add_tail(&ssc->list, &ssc_list);
+	spin_unlock(&user_lock);
+
+	platform_set_drvdata(pdev, ssc);
+
+	dev_info(&pdev->dev, "Atmel SSC device at 0x%p (irq %d)\n",
+			ssc->regs, ssc->irq);
+
+	goto out;
+
+out_unmap:
+	iounmap(ssc->regs);
+out_clk:
+	clk_put(ssc->clk);
+out_free:
+	kfree(ssc);
+out:
+	return retval;
+}
+
+static int __devexit ssc_remove(struct platform_device *pdev)
+{
+	struct ssc_device *ssc = platform_get_drvdata(pdev);
+
+	spin_lock(&user_lock);
+	iounmap(ssc->regs);
+	clk_put(ssc->clk);
+	list_del(&ssc->list);
+	kfree(ssc);
+	spin_unlock(&user_lock);
+
+	return 0;
+}
+
+static struct platform_driver ssc_driver = {
+	.remove		= __devexit_p(ssc_remove),
+	.driver		= {
+		.name		= "ssc",
+		.owner		= THIS_MODULE,
+	},
+};
+
+static int __init ssc_init(void)
+{
+	return platform_driver_probe(&ssc_driver, ssc_probe);
+}
+module_init(ssc_init);
+
+static void __exit ssc_exit(void)
+{
+	platform_driver_unregister(&ssc_driver);
+}
+module_exit(ssc_exit);
+
+MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>");
+MODULE_DESCRIPTION("SSC driver for Atmel AVR32 and AT91");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ssc");
diff --git a/drivers/misc/atmel_pwm.c b/drivers/misc/atmel_pwm.c
new file mode 100644
index 00000000000..6aa5294dfec
--- /dev/null
+++ b/drivers/misc/atmel_pwm.c
@@ -0,0 +1,409 @@
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/atmel_pwm.h>
+
+
+/*
+ * This is a simple driver for the PWM controller found in various newer
+ * Atmel SOCs, including the AVR32 series and the AT91sam9263.
+ *
+ * Chips with current Linux ports have only 4 PWM channels, out of max 32.
+ * AT32UC3A and AT32UC3B chips have 7 channels (but currently no Linux).
+ * Docs are inconsistent about the width of the channel counter registers;
+ * it's at least 16 bits, but several places say 20 bits.
+ */
+#define	PWM_NCHAN	4		/* max 32 */
+
+struct pwm {
+	spinlock_t		lock;
+	struct platform_device	*pdev;
+	u32			mask;
+	int			irq;
+	void __iomem		*base;
+	struct clk		*clk;
+	struct pwm_channel	*channel[PWM_NCHAN];
+	void			(*handler[PWM_NCHAN])(struct pwm_channel *);
+};
+
+
+/* global PWM controller registers */
+#define PWM_MR		0x00
+#define PWM_ENA		0x04
+#define PWM_DIS		0x08
+#define PWM_SR		0x0c
+#define PWM_IER		0x10
+#define PWM_IDR		0x14
+#define PWM_IMR		0x18
+#define PWM_ISR		0x1c
+
+static inline void pwm_writel(const struct pwm *p, unsigned offset, u32 val)
+{
+	__raw_writel(val, p->base + offset);
+}
+
+static inline u32 pwm_readl(const struct pwm *p, unsigned offset)
+{
+	return __raw_readl(p->base + offset);
+}
+
+static inline void __iomem *pwmc_regs(const struct pwm *p, int index)
+{
+	return p->base + 0x200 + index * 0x20;
+}
+
+static struct pwm *pwm;
+
+static void pwm_dumpregs(struct pwm_channel *ch, char *tag)
+{
+	struct device	*dev = &pwm->pdev->dev;
+
+	dev_dbg(dev, "%s: mr %08x, sr %08x, imr %08x\n",
+		tag,
+		pwm_readl(pwm, PWM_MR),
+		pwm_readl(pwm, PWM_SR),
+		pwm_readl(pwm, PWM_IMR));
+	dev_dbg(dev,
+		"pwm ch%d - mr %08x, dty %u, prd %u, cnt %u\n",
+		ch->index,
+		pwm_channel_readl(ch, PWM_CMR),
+		pwm_channel_readl(ch, PWM_CDTY),
+		pwm_channel_readl(ch, PWM_CPRD),
+		pwm_channel_readl(ch, PWM_CCNT));
+}
+
+
+/**
+ * pwm_channel_alloc - allocate an unused PWM channel
+ * @index: identifies the channel
+ * @ch: structure to be initialized
+ *
+ * Drivers allocate PWM channels according to the board's wiring, and
+ * matching board-specific setup code.  Returns zero or negative errno.
+ */
+int pwm_channel_alloc(int index, struct pwm_channel *ch)
+{
+	unsigned long	flags;
+	int		status = 0;
+
+	/* insist on PWM init, with this signal pinned out */
+	if (!pwm || !(pwm->mask & 1 << index))
+		return -ENODEV;
+
+	if (index < 0 || index >= PWM_NCHAN || !ch)
+		return -EINVAL;
+	memset(ch, 0, sizeof *ch);
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	if (pwm->channel[index])
+		status = -EBUSY;
+	else {
+		clk_enable(pwm->clk);
+
+		ch->regs = pwmc_regs(pwm, index);
+		ch->index = index;
+
+		/* REVISIT: ap7000 seems to go 2x as fast as we expect!! */
+		ch->mck = clk_get_rate(pwm->clk);
+
+		pwm->channel[index] = ch;
+		pwm->handler[index] = NULL;
+
+		/* channel and irq are always disabled when we return */
+		pwm_writel(pwm, PWM_DIS, 1 << index);
+		pwm_writel(pwm, PWM_IDR, 1 << index);
+	}
+	spin_unlock_irqrestore(&pwm->lock, flags);
+	return status;
+}
+EXPORT_SYMBOL(pwm_channel_alloc);
+
+static int pwmcheck(struct pwm_channel *ch)
+{
+	int		index;
+
+	if (!pwm)
+		return -ENODEV;
+	if (!ch)
+		return -EINVAL;
+	index = ch->index;
+	if (index < 0 || index >= PWM_NCHAN || pwm->channel[index] != ch)
+		return -EINVAL;
+
+	return index;
+}
+
+/**
+ * pwm_channel_free - release a previously allocated channel
+ * @ch: the channel being released
+ *
+ * The channel is completely shut down (counter and IRQ disabled),
+ * and made available for re-use.  Returns zero, or negative errno.
+ */
+int pwm_channel_free(struct pwm_channel *ch)
+{
+	unsigned long	flags;
+	int		t;
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	t = pwmcheck(ch);
+	if (t >= 0) {
+		pwm->channel[t] = NULL;
+		pwm->handler[t] = NULL;
+
+		/* channel and irq are always disabled when we return */
+		pwm_writel(pwm, PWM_DIS, 1 << t);
+		pwm_writel(pwm, PWM_IDR, 1 << t);
+
+		clk_disable(pwm->clk);
+		t = 0;
+	}
+	spin_unlock_irqrestore(&pwm->lock, flags);
+	return t;
+}
+EXPORT_SYMBOL(pwm_channel_free);
+
+int __pwm_channel_onoff(struct pwm_channel *ch, int enabled)
+{
+	unsigned long	flags;
+	int		t;
+
+	/* OMITTED FUNCTIONALITY:  starting several channels in synch */
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	t = pwmcheck(ch);
+	if (t >= 0) {
+		pwm_writel(pwm, enabled ? PWM_ENA : PWM_DIS, 1 << t);
+		t = 0;
+		pwm_dumpregs(ch, enabled ? "enable" : "disable");
+	}
+	spin_unlock_irqrestore(&pwm->lock, flags);
+
+	return t;
+}
+EXPORT_SYMBOL(__pwm_channel_onoff);
+
+/**
+ * pwm_clk_alloc - allocate and configure CLKA or CLKB
+ * @prescale: from 0..10, the power of two used to divide MCK
+ * @div: from 1..255, the linear divisor to use
+ *
+ * Returns PWM_CPR_CLKA, PWM_CPR_CLKB, or negative errno.  The allocated
+ * clock will run with a period of (2^prescale * div) / MCK, or twice as
+ * long if center aligned PWM output is used.  The clock must later be
+ * deconfigured using pwm_clk_free().
+ */
+int pwm_clk_alloc(unsigned prescale, unsigned div)
+{
+	unsigned long	flags;
+	u32		mr;
+	u32		val = (prescale << 8) | div;
+	int		ret = -EBUSY;
+
+	if (prescale >= 10 || div == 0 || div > 255)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	mr = pwm_readl(pwm, PWM_MR);
+	if ((mr & 0xffff) == 0) {
+		mr |= val;
+		ret = PWM_CPR_CLKA;
+	} else if ((mr & (0xffff << 16)) == 0) {
+		mr |= val << 16;
+		ret = PWM_CPR_CLKB;
+	}
+	if (ret > 0)
+		pwm_writel(pwm, PWM_MR, mr);
+	spin_unlock_irqrestore(&pwm->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(pwm_clk_alloc);
+
+/**
+ * pwm_clk_free - deconfigure and release CLKA or CLKB
+ *
+ * Reverses the effect of pwm_clk_alloc().
+ */
+void pwm_clk_free(unsigned clk)
+{
+	unsigned long	flags;
+	u32		mr;
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	mr = pwm_readl(pwm, PWM_MR);
+	if (clk == PWM_CPR_CLKA)
+		pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 0));
+	if (clk == PWM_CPR_CLKB)
+		pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 16));
+	spin_unlock_irqrestore(&pwm->lock, flags);
+}
+EXPORT_SYMBOL(pwm_clk_free);
+
+/**
+ * pwm_channel_handler - manage channel's IRQ handler
+ * @ch: the channel
+ * @handler: the handler to use, possibly NULL
+ *
+ * If the handler is non-null, the handler will be called after every
+ * period of this PWM channel.  If the handler is null, this channel
+ * won't generate an IRQ.
+ */
+int pwm_channel_handler(struct pwm_channel *ch,
+		void (*handler)(struct pwm_channel *ch))
+{
+	unsigned long	flags;
+	int		t;
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	t = pwmcheck(ch);
+	if (t >= 0) {
+		pwm->handler[t] = handler;
+		pwm_writel(pwm, handler ? PWM_IER : PWM_IDR, 1 << t);
+		t = 0;
+	}
+	spin_unlock_irqrestore(&pwm->lock, flags);
+
+	return t;
+}
+EXPORT_SYMBOL(pwm_channel_handler);
+
+static irqreturn_t pwm_irq(int id, void *_pwm)
+{
+	struct pwm	*p = _pwm;
+	irqreturn_t	handled = IRQ_NONE;
+	u32		irqstat;
+	int		index;
+
+	spin_lock(&p->lock);
+
+	/* ack irqs, then handle them */
+	irqstat = pwm_readl(pwm, PWM_ISR);
+
+	while (irqstat) {
+		struct pwm_channel *ch;
+		void (*handler)(struct pwm_channel *ch);
+
+		index = ffs(irqstat) - 1;
+		irqstat &= ~(1 << index);
+		ch = pwm->channel[index];
+		handler = pwm->handler[index];
+		if (handler && ch) {
+			spin_unlock(&p->lock);
+			handler(ch);
+			spin_lock(&p->lock);
+			handled = IRQ_HANDLED;
+		}
+	}
+
+	spin_unlock(&p->lock);
+	return handled;
+}
+
+static int __init pwm_probe(struct platform_device *pdev)
+{
+	struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int irq = platform_get_irq(pdev, 0);
+	u32 *mp = pdev->dev.platform_data;
+	struct pwm *p;
+	int status = -EIO;
+
+	if (pwm)
+		return -EBUSY;
+	if (!r || irq < 0 || !mp || !*mp)
+		return -ENODEV;
+	if (*mp & ~((1<<PWM_NCHAN)-1)) {
+		dev_warn(&pdev->dev, "mask 0x%x ... more than %d channels\n",
+			*mp, PWM_NCHAN);
+		return -EINVAL;
+	}
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	spin_lock_init(&p->lock);
+	p->pdev = pdev;
+	p->mask = *mp;
+	p->irq = irq;
+	p->base = ioremap(r->start, r->end - r->start + 1);
+	if (!p->base)
+		goto fail;
+	p->clk = clk_get(&pdev->dev, "pwm_clk");
+	if (IS_ERR(p->clk)) {
+		status = PTR_ERR(p->clk);
+		p->clk = NULL;
+		goto fail;
+	}
+
+	status = request_irq(irq, pwm_irq, 0, pdev->name, p);
+	if (status < 0)
+		goto fail;
+
+	pwm = p;
+	platform_set_drvdata(pdev, p);
+
+	return 0;
+
+fail:
+	if (p->clk)
+		clk_put(p->clk);
+	if (p->base)
+		iounmap(p->base);
+
+	kfree(p);
+	return status;
+}
+
+static int __exit pwm_remove(struct platform_device *pdev)
+{
+	struct pwm *p = platform_get_drvdata(pdev);
+
+	if (p != pwm)
+		return -EINVAL;
+
+	clk_enable(pwm->clk);
+	pwm_writel(pwm, PWM_DIS, (1 << PWM_NCHAN) - 1);
+	pwm_writel(pwm, PWM_IDR, (1 << PWM_NCHAN) - 1);
+	clk_disable(pwm->clk);
+
+	pwm = NULL;
+
+	free_irq(p->irq, p);
+	clk_put(p->clk);
+	iounmap(p->base);
+	kfree(p);
+
+	return 0;
+}
+
+static struct platform_driver atmel_pwm_driver = {
+	.driver = {
+		.name = "atmel_pwm",
+		.owner = THIS_MODULE,
+	},
+	.remove = __exit_p(pwm_remove),
+
+	/* NOTE: PWM can keep running in AVR32 "idle" and "frozen" states;
+	 * and all AT91sam9263 states, albeit at reduced clock rate if
+	 * MCK becomes the slow clock (i.e. what Linux labels STR).
+	 */
+};
+
+static int __init pwm_init(void)
+{
+	return platform_driver_probe(&atmel_pwm_driver, pwm_probe);
+}
+module_init(pwm_init);
+
+static void __exit pwm_exit(void)
+{
+	platform_driver_unregister(&atmel_pwm_driver);
+}
+module_exit(pwm_exit);
+
+MODULE_DESCRIPTION("Driver for AT32/AT91 PWM module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:atmel_pwm");
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
new file mode 100644
index 00000000000..05dc8a31f28
--- /dev/null
+++ b/drivers/misc/atmel_tclib.c
@@ -0,0 +1,161 @@
+#include <linux/atmel_tc.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+/* Number of bytes to reserve for the iomem resource */
+#define ATMEL_TC_IOMEM_SIZE	256
+
+
+/*
+ * This is a thin library to solve the problem of how to portably allocate
+ * one of the TC blocks.  For simplicity, it doesn't currently expect to
+ * share individual timers between different drivers.
+ */
+
+#if defined(CONFIG_AVR32)
+/* AVR32 has these divide PBB */
+const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#elif defined(CONFIG_ARCH_AT91)
+/* AT91 has these divide MCK */
+const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#endif
+
+static DEFINE_SPINLOCK(tc_list_lock);
+static LIST_HEAD(tc_list);
+
+/**
+ * atmel_tc_alloc - allocate a specified TC block
+ * @block: which block to allocate
+ * @name: name to be associated with the iomem resource
+ *
+ * Caller allocates a block.  If it is available, a pointer to a
+ * pre-initialized struct atmel_tc is returned. The caller can access
+ * the registers directly through the "regs" field.
+ */
+struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name)
+{
+	struct atmel_tc		*tc;
+	struct platform_device	*pdev = NULL;
+	struct resource		*r;
+
+	spin_lock(&tc_list_lock);
+	list_for_each_entry(tc, &tc_list, node) {
+		if (tc->pdev->id == block) {
+			pdev = tc->pdev;
+			break;
+		}
+	}
+
+	if (!pdev || tc->iomem)
+		goto fail;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	r = request_mem_region(r->start, ATMEL_TC_IOMEM_SIZE, name);
+	if (!r)
+		goto fail;
+
+	tc->regs = ioremap(r->start, ATMEL_TC_IOMEM_SIZE);
+	if (!tc->regs)
+		goto fail_ioremap;
+
+	tc->iomem = r;
+
+out:
+	spin_unlock(&tc_list_lock);
+	return tc;
+
+fail_ioremap:
+	release_resource(r);
+fail:
+	tc = NULL;
+	goto out;
+}
+EXPORT_SYMBOL_GPL(atmel_tc_alloc);
+
+/**
+ * atmel_tc_free - release a specified TC block
+ * @tc: Timer/counter block that was returned by atmel_tc_alloc()
+ *
+ * This reverses the effect of atmel_tc_alloc(), unmapping the I/O
+ * registers, invalidating the resource returned by that routine and
+ * making the TC available to other drivers.
+ */
+void atmel_tc_free(struct atmel_tc *tc)
+{
+	spin_lock(&tc_list_lock);
+	if (tc->regs) {
+		iounmap(tc->regs);
+		release_resource(tc->iomem);
+		tc->regs = NULL;
+		tc->iomem = NULL;
+	}
+	spin_unlock(&tc_list_lock);
+}
+EXPORT_SYMBOL_GPL(atmel_tc_free);
+
+static int __init tc_probe(struct platform_device *pdev)
+{
+	struct atmel_tc *tc;
+	struct clk	*clk;
+	int		irq;
+
+	if (!platform_get_resource(pdev, IORESOURCE_MEM, 0))
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -EINVAL;
+
+	tc = kzalloc(sizeof(struct atmel_tc), GFP_KERNEL);
+	if (!tc)
+		return -ENOMEM;
+
+	tc->pdev = pdev;
+
+	clk = clk_get(&pdev->dev, "t0_clk");
+	if (IS_ERR(clk)) {
+		kfree(tc);
+		return -EINVAL;
+	}
+
+	tc->clk[0] = clk;
+	tc->clk[1] = clk_get(&pdev->dev, "t1_clk");
+	if (IS_ERR(tc->clk[1]))
+		tc->clk[1] = clk;
+	tc->clk[2] = clk_get(&pdev->dev, "t2_clk");
+	if (IS_ERR(tc->clk[2]))
+		tc->clk[2] = clk;
+
+	tc->irq[0] = irq;
+	tc->irq[1] = platform_get_irq(pdev, 1);
+	if (tc->irq[1] < 0)
+		tc->irq[1] = irq;
+	tc->irq[2] = platform_get_irq(pdev, 2);
+	if (tc->irq[2] < 0)
+		tc->irq[2] = irq;
+
+	spin_lock(&tc_list_lock);
+	list_add_tail(&tc->node, &tc_list);
+	spin_unlock(&tc_list_lock);
+
+	return 0;
+}
+
+static struct platform_driver tc_driver = {
+	.driver.name	= "atmel_tcb",
+};
+
+static int __init tc_init(void)
+{
+	return platform_driver_probe(&tc_driver, tc_probe);
+}
+arch_initcall(tc_init);
diff --git a/drivers/misc/c2port/Kconfig b/drivers/misc/c2port/Kconfig
new file mode 100644
index 00000000000..e46af9a5810
--- /dev/null
+++ b/drivers/misc/c2port/Kconfig
@@ -0,0 +1,35 @@
+#
+# C2 port devices
+#
+
+menuconfig C2PORT
+	tristate "Silicon Labs C2 port support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	default no
+	help
+	  This option enables support for Silicon Labs C2 port used to
+	  program Silicon micro controller chips (and other 8051 compatible).
+
+	  If your board have no such micro controllers you don't need this
+	  interface at all.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called c2port_core. Note that you also need a client module
+	  usually called c2port-*.
+
+	  If you are not sure, say N here.
+
+if C2PORT
+
+config C2PORT_DURAMAR_2150
+	tristate "C2 port support for Eurotech's Duramar 2150 (EXPERIMENTAL)"
+	depends on X86 && C2PORT
+	default no
+	help
+	  This option enables C2 support for the Eurotech's Duramar 2150
+	  on board micro controller.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called c2port-duramar2150.
+
+endif # C2PORT
diff --git a/drivers/misc/c2port/Makefile b/drivers/misc/c2port/Makefile
new file mode 100644
index 00000000000..3b2cf43d60f
--- /dev/null
+++ b/drivers/misc/c2port/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_C2PORT)		+= core.o
+
+obj-$(CONFIG_C2PORT_DURAMAR_2150)	+= c2port-duramar2150.o
diff --git a/drivers/misc/c2port/c2port-duramar2150.c b/drivers/misc/c2port/c2port-duramar2150.c
new file mode 100644
index 00000000000..338dcc12150
--- /dev/null
+++ b/drivers/misc/c2port/c2port-duramar2150.c
@@ -0,0 +1,158 @@
+/*
+ *  Silicon Labs C2 port Linux support for Eurotech Duramar 2150
+ *
+ *  Copyright (c) 2008 Rodolfo Giometti <giometti@linux.it>
+ *  Copyright (c) 2008 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/c2port.h>
+
+#define DATA_PORT	0x325
+#define DIR_PORT	0x326
+#define    C2D		   (1 << 0)
+#define    C2CK		   (1 << 1)
+
+static DEFINE_MUTEX(update_lock);
+
+/*
+ * C2 port operations
+ */
+
+static void duramar2150_c2port_access(struct c2port_device *dev, int status)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DIR_PORT);
+
+	/* 0 = input, 1 = output */
+	if (status)
+		outb(v | (C2D | C2CK), DIR_PORT);
+	else
+		/* When access is "off" is important that both lines are set
+		 * as inputs or hi-impedence */
+		outb(v & ~(C2D | C2CK), DIR_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static void duramar2150_c2port_c2d_dir(struct c2port_device *dev, int dir)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DIR_PORT);
+
+	if (dir)
+		outb(v & ~C2D, DIR_PORT);
+	else
+		outb(v | C2D, DIR_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static int duramar2150_c2port_c2d_get(struct c2port_device *dev)
+{
+	return inb(DATA_PORT) & C2D;
+}
+
+static void duramar2150_c2port_c2d_set(struct c2port_device *dev, int status)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DATA_PORT);
+
+	if (status)
+		outb(v | C2D, DATA_PORT);
+	else
+		outb(v & ~C2D, DATA_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static void duramar2150_c2port_c2ck_set(struct c2port_device *dev, int status)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DATA_PORT);
+
+	if (status)
+		outb(v | C2CK, DATA_PORT);
+	else
+		outb(v & ~C2CK, DATA_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static struct c2port_ops duramar2150_c2port_ops = {
+	.block_size	= 512,	/* bytes */
+	.blocks_num	= 30,	/* total flash size: 15360 bytes */
+
+	.access		= duramar2150_c2port_access,
+	.c2d_dir	= duramar2150_c2port_c2d_dir,
+	.c2d_get	= duramar2150_c2port_c2d_get,
+	.c2d_set	= duramar2150_c2port_c2d_set,
+	.c2ck_set	= duramar2150_c2port_c2ck_set,
+};
+
+static struct c2port_device *duramar2150_c2port_dev;
+
+/*
+ * Module stuff
+ */
+
+static int __init duramar2150_c2port_init(void)
+{
+	struct resource *res;
+	int ret = 0;
+
+	res = request_region(0x325, 2, "c2port");
+	if (!res)
+		return -EBUSY;
+
+	duramar2150_c2port_dev = c2port_device_register("uc",
+					&duramar2150_c2port_ops, NULL);
+	if (!duramar2150_c2port_dev) {
+		ret = -ENODEV;
+		goto free_region;
+	}
+
+	return 0;
+
+free_region:
+	release_region(0x325, 2);
+	return ret;
+}
+
+static void __exit duramar2150_c2port_exit(void)
+{
+	/* Setup the GPIOs as input by default (access = 0) */
+	duramar2150_c2port_access(duramar2150_c2port_dev, 0);
+
+	c2port_device_unregister(duramar2150_c2port_dev);
+
+	release_region(0x325, 2);
+}
+
+module_init(duramar2150_c2port_init);
+module_exit(duramar2150_c2port_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("Silicon Labs C2 port Linux support for Duramar 2150");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c
new file mode 100644
index 00000000000..b5346b4db91
--- /dev/null
+++ b/drivers/misc/c2port/core.c
@@ -0,0 +1,1005 @@
+/*
+ *  Silicon Labs C2 port core Linux support
+ *
+ *  Copyright (c) 2007 Rodolfo Giometti <giometti@linux.it>
+ *  Copyright (c) 2007 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/sched.h>
+
+#include <linux/c2port.h>
+
+#define DRIVER_NAME             "c2port"
+#define DRIVER_VERSION          "0.51.0"
+
+static DEFINE_SPINLOCK(c2port_idr_lock);
+static DEFINE_IDR(c2port_idr);
+
+/*
+ * Local variables
+ */
+
+static struct class *c2port_class;
+
+/*
+ * C2 registers & commands defines
+ */
+
+/* C2 registers */
+#define C2PORT_DEVICEID		0x00
+#define C2PORT_REVID		0x01
+#define C2PORT_FPCTL		0x02
+#define C2PORT_FPDAT		0xB4
+
+/* C2 interface commands */
+#define C2PORT_GET_VERSION	0x01
+#define C2PORT_DEVICE_ERASE	0x03
+#define C2PORT_BLOCK_READ	0x06
+#define C2PORT_BLOCK_WRITE	0x07
+#define C2PORT_PAGE_ERASE	0x08
+
+/* C2 status return codes */
+#define C2PORT_INVALID_COMMAND	0x00
+#define C2PORT_COMMAND_FAILED	0x02
+#define C2PORT_COMMAND_OK	0x0d
+
+/*
+ * C2 port low level signal managements
+ */
+
+static void c2port_reset(struct c2port_device *dev)
+{
+	struct c2port_ops *ops = dev->ops;
+
+	/* To reset the device we have to keep clock line low for at least
+	 * 20us.
+	 */
+	local_irq_disable();
+	ops->c2ck_set(dev, 0);
+	udelay(25);
+	ops->c2ck_set(dev, 1);
+	local_irq_enable();
+
+	udelay(1);
+}
+
+static void c2port_strobe_ck(struct c2port_device *dev)
+{
+	struct c2port_ops *ops = dev->ops;
+
+	/* During hi-low-hi transition we disable local IRQs to avoid
+	 * interructions since C2 port specification says that it must be
+	 * shorter than 5us, otherwise the microcontroller may consider
+	 * it as a reset signal!
+	 */
+	local_irq_disable();
+	ops->c2ck_set(dev, 0);
+	udelay(1);
+	ops->c2ck_set(dev, 1);
+	local_irq_enable();
+
+	udelay(1);
+}
+
+/*
+ * C2 port basic functions
+ */
+
+static void c2port_write_ar(struct c2port_device *dev, u8 addr)
+{
+	struct c2port_ops *ops = dev->ops;
+	int i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (11b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+
+	/* ADDRESS field */
+	for (i = 0; i < 8; i++) {
+		ops->c2d_set(dev, addr & 0x01);
+		c2port_strobe_ck(dev);
+
+		addr >>= 1;
+	}
+
+	/* STOP field */
+	ops->c2d_dir(dev, 1);
+	c2port_strobe_ck(dev);
+}
+
+static int c2port_read_ar(struct c2port_device *dev, u8 *addr)
+{
+	struct c2port_ops *ops = dev->ops;
+	int i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (10b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+
+	/* ADDRESS field */
+	ops->c2d_dir(dev, 1);
+	*addr = 0;
+	for (i = 0; i < 8; i++) {
+		*addr >>= 1;	/* shift in 8-bit ADDRESS field LSB first */
+
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			*addr |= 0x80;
+	}
+
+	/* STOP field */
+	c2port_strobe_ck(dev);
+
+	return 0;
+}
+
+static int c2port_write_dr(struct c2port_device *dev, u8 data)
+{
+	struct c2port_ops *ops = dev->ops;
+	int timeout, i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (01b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* LENGTH field (00b, LSB first -> 1 byte) */
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* DATA field */
+	for (i = 0; i < 8; i++) {
+		ops->c2d_set(dev, data & 0x01);
+		c2port_strobe_ck(dev);
+
+		data >>= 1;
+	}
+
+	/* WAIT field */
+	ops->c2d_dir(dev, 1);
+	timeout = 20;
+	do {
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	/* STOP field */
+	c2port_strobe_ck(dev);
+
+	return 0;
+}
+
+static int c2port_read_dr(struct c2port_device *dev, u8 *data)
+{
+	struct c2port_ops *ops = dev->ops;
+	int timeout, i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (00b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* LENGTH field (00b, LSB first -> 1 byte) */
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* WAIT field */
+	ops->c2d_dir(dev, 1);
+	timeout = 20;
+	do {
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	/* DATA field */
+	*data = 0;
+	for (i = 0; i < 8; i++) {
+		*data >>= 1;	/* shift in 8-bit DATA field LSB first */
+
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			*data |= 0x80;
+	}
+
+	/* STOP field */
+	c2port_strobe_ck(dev);
+
+	return 0;
+}
+
+static int c2port_poll_in_busy(struct c2port_device *dev)
+{
+	u8 addr;
+	int ret, timeout = 20;
+
+	do {
+		ret = (c2port_read_ar(dev, &addr));
+		if (ret < 0)
+			return -EIO;
+
+		if (!(addr & 0x02))
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	return 0;
+}
+
+static int c2port_poll_out_ready(struct c2port_device *dev)
+{
+	u8 addr;
+	int ret, timeout = 10000; /* erase flash needs long time... */
+
+	do {
+		ret = (c2port_read_ar(dev, &addr));
+		if (ret < 0)
+			return -EIO;
+
+		if (addr & 0x01)
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * sysfs methods
+ */
+
+static ssize_t c2port_show_name(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", c2dev->name);
+}
+
+static ssize_t c2port_show_flash_blocks_num(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+
+	return sprintf(buf, "%d\n", ops->blocks_num);
+}
+
+static ssize_t c2port_show_flash_block_size(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+
+	return sprintf(buf, "%d\n", ops->block_size);
+}
+
+static ssize_t c2port_show_flash_size(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+
+	return sprintf(buf, "%d\n", ops->blocks_num * ops->block_size);
+}
+
+static ssize_t c2port_show_access(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", c2dev->access);
+}
+
+static ssize_t c2port_store_access(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+	int status, ret;
+
+	ret = sscanf(buf, "%d", &status);
+	if (ret != 1)
+		return -EINVAL;
+
+	mutex_lock(&c2dev->mutex);
+
+	c2dev->access = !!status;
+
+	/* If access is "on" clock should be HIGH _before_ setting the line
+	 * as output and data line should be set as INPUT anyway */
+	if (c2dev->access)
+		ops->c2ck_set(c2dev, 1);
+	ops->access(c2dev, c2dev->access);
+	if (c2dev->access)
+		ops->c2d_dir(c2dev, 1);
+
+	mutex_unlock(&c2dev->mutex);
+
+	return count;
+}
+
+static ssize_t c2port_store_reset(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	/* Check the device access status */
+	if (!c2dev->access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+
+	c2port_reset(c2dev);
+	c2dev->flash_access = 0;
+
+	mutex_unlock(&c2dev->mutex);
+
+	return count;
+}
+
+static ssize_t __c2port_show_dev_id(struct c2port_device *dev, char *buf)
+{
+	u8 data;
+	int ret;
+
+	/* Select DEVICEID register for C2 data register accesses */
+	c2port_write_ar(dev, C2PORT_DEVICEID);
+
+	/* Read and return the device ID register */
+	ret = c2port_read_dr(dev, &data);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t c2port_show_dev_id(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	/* Check the device access status */
+	if (!c2dev->access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_show_dev_id(c2dev, buf);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(dev, "cannot read from %s\n", c2dev->name);
+
+	return ret;
+}
+
+static ssize_t __c2port_show_rev_id(struct c2port_device *dev, char *buf)
+{
+	u8 data;
+	int ret;
+
+	/* Select REVID register for C2 data register accesses */
+	c2port_write_ar(dev, C2PORT_REVID);
+
+	/* Read and return the revision ID register */
+	ret = c2port_read_dr(dev, &data);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t c2port_show_rev_id(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	/* Check the device access status */
+	if (!c2dev->access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_show_rev_id(c2dev, buf);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(c2dev->dev, "cannot read from %s\n", c2dev->name);
+
+	return ret;
+}
+
+static ssize_t c2port_show_flash_access(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", c2dev->flash_access);
+}
+
+static ssize_t __c2port_store_flash_access(struct c2port_device *dev,
+						int status)
+{
+	int ret;
+
+	/* Check the device access status */
+	if (!dev->access)
+		return -EBUSY;
+
+	dev->flash_access = !!status;
+
+	/* If flash_access is off we have nothing to do... */
+	if (dev->flash_access == 0)
+		return 0;
+
+	/* Target the C2 flash programming control register for C2 data
+	 * register access */
+	c2port_write_ar(dev, C2PORT_FPCTL);
+
+	/* Write the first keycode to enable C2 Flash programming */
+	ret = c2port_write_dr(dev, 0x02);
+	if (ret < 0)
+		return ret;
+
+	/* Write the second keycode to enable C2 Flash programming */
+	ret = c2port_write_dr(dev, 0x01);
+	if (ret < 0)
+		return ret;
+
+	/* Delay for at least 20ms to ensure the target is ready for
+	 * C2 flash programming */
+	mdelay(25);
+
+	return 0;
+}
+
+static ssize_t c2port_store_flash_access(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	int status;
+	ssize_t ret;
+
+	ret = sscanf(buf, "%d", &status);
+	if (ret != 1)
+		return -EINVAL;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_store_flash_access(c2dev, status);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0) {
+		dev_err(c2dev->dev, "cannot enable %s flash programming\n",
+			c2dev->name);
+		return ret;
+	}
+
+	return count;
+}
+
+static ssize_t __c2port_write_flash_erase(struct c2port_device *dev)
+{
+	u8 status;
+	int ret;
+
+	/* Target the C2 flash programming data register for C2 data register
+	 * access.
+	 */
+	c2port_write_ar(dev, C2PORT_FPDAT);
+
+	/* Send device erase command */
+	c2port_write_dr(dev, C2PORT_DEVICE_ERASE);
+
+	/* Wait for input acknowledge */
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before starting FLASH access sequence */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Send a three-byte arming sequence to enable the device erase.
+	 * If the sequence is not received correctly, the command will be
+	 * ignored.
+	 * Sequence is: 0xde, 0xad, 0xa5.
+	 */
+	c2port_write_dr(dev, 0xde);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+	c2port_write_dr(dev, 0xad);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+	c2port_write_dr(dev, 0xa5);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static ssize_t c2port_store_flash_erase(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	int ret;
+
+	/* Check the device and flash access status */
+	if (!c2dev->access || !c2dev->flash_access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_write_flash_erase(c2dev);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0) {
+		dev_err(c2dev->dev, "cannot erase %s flash\n", c2dev->name);
+		return ret;
+	}
+
+	return count;
+}
+
+static ssize_t __c2port_read_flash_data(struct c2port_device *dev,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_ops *ops = dev->ops;
+	u8 status, nread = 128;
+	int i, ret;
+
+	/* Check for flash end */
+	if (offset >= ops->block_size * ops->blocks_num)
+		return 0;
+
+	if (ops->block_size * ops->blocks_num - offset < nread)
+		nread = ops->block_size * ops->blocks_num - offset;
+	if (count < nread)
+		nread = count;
+	if (nread == 0)
+		return nread;
+
+	/* Target the C2 flash programming data register for C2 data register
+	 * access */
+	c2port_write_ar(dev, C2PORT_FPDAT);
+
+	/* Send flash block read command */
+	c2port_write_dr(dev, C2PORT_BLOCK_READ);
+
+	/* Wait for input acknowledge */
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before starting FLASH access sequence */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Send address high byte */
+	c2port_write_dr(dev, offset >> 8);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address low byte */
+	c2port_write_dr(dev, offset & 0x00ff);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address block size */
+	c2port_write_dr(dev, nread);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before reading FLASH block */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Read flash block */
+	for (i = 0; i < nread; i++) {
+		ret = c2port_poll_out_ready(dev);
+		if (ret < 0)
+			return ret;
+
+		ret = c2port_read_dr(dev, buffer+i);
+		if (ret < 0)
+			return ret;
+	}
+
+	return nread;
+}
+
+static ssize_t c2port_read_flash_data(struct kobject *kobj,
+				struct bin_attribute *attr,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_device *c2dev =
+			dev_get_drvdata(container_of(kobj,
+						struct device, kobj));
+	ssize_t ret;
+
+	/* Check the device and flash access status */
+	if (!c2dev->access || !c2dev->flash_access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_read_flash_data(c2dev, buffer, offset, count);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(c2dev->dev, "cannot read %s flash\n", c2dev->name);
+
+	return ret;
+}
+
+static ssize_t __c2port_write_flash_data(struct c2port_device *dev,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_ops *ops = dev->ops;
+	u8 status, nwrite = 128;
+	int i, ret;
+
+	if (nwrite > count)
+		nwrite = count;
+	if (ops->block_size * ops->blocks_num - offset < nwrite)
+		nwrite = ops->block_size * ops->blocks_num - offset;
+
+	/* Check for flash end */
+	if (offset >= ops->block_size * ops->blocks_num)
+		return -EINVAL;
+
+	/* Target the C2 flash programming data register for C2 data register
+	 * access */
+	c2port_write_ar(dev, C2PORT_FPDAT);
+
+	/* Send flash block write command */
+	c2port_write_dr(dev, C2PORT_BLOCK_WRITE);
+
+	/* Wait for input acknowledge */
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before starting FLASH access sequence */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Send address high byte */
+	c2port_write_dr(dev, offset >> 8);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address low byte */
+	c2port_write_dr(dev, offset & 0x00ff);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address block size */
+	c2port_write_dr(dev, nwrite);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before writing FLASH block */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Write flash block */
+	for (i = 0; i < nwrite; i++) {
+		ret = c2port_write_dr(dev, *(buffer+i));
+		if (ret < 0)
+			return ret;
+
+		ret = c2port_poll_in_busy(dev);
+		if (ret < 0)
+			return ret;
+
+	}
+
+	/* Wait for last flash write to complete */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	return nwrite;
+}
+
+static ssize_t c2port_write_flash_data(struct kobject *kobj,
+				struct bin_attribute *attr,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_device *c2dev =
+			dev_get_drvdata(container_of(kobj,
+						struct device, kobj));
+	int ret;
+
+	/* Check the device access status */
+	if (!c2dev->access || !c2dev->flash_access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_write_flash_data(c2dev, buffer, offset, count);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(c2dev->dev, "cannot write %s flash\n", c2dev->name);
+
+	return ret;
+}
+
+/*
+ * Class attributes
+ */
+
+static struct device_attribute c2port_attrs[] = {
+	__ATTR(name, 0444, c2port_show_name, NULL),
+	__ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL),
+	__ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL),
+	__ATTR(flash_size, 0444, c2port_show_flash_size, NULL),
+	__ATTR(access, 0644, c2port_show_access, c2port_store_access),
+	__ATTR(reset, 0200, NULL, c2port_store_reset),
+	__ATTR(dev_id, 0444, c2port_show_dev_id, NULL),
+	__ATTR(rev_id, 0444, c2port_show_rev_id, NULL),
+
+	__ATTR(flash_access, 0644, c2port_show_flash_access,
+					c2port_store_flash_access),
+	__ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase),
+	__ATTR_NULL,
+};
+
+static struct bin_attribute c2port_bin_attrs = {
+	.attr	= {
+		.name	= "flash_data",
+		.mode	= 0644
+	},
+	.read	= c2port_read_flash_data,
+	.write	= c2port_write_flash_data,
+	/* .size is computed at run-time */
+};
+
+/*
+ * Exported functions
+ */
+
+struct c2port_device *c2port_device_register(char *name,
+					struct c2port_ops *ops, void *devdata)
+{
+	struct c2port_device *c2dev;
+	int id, ret;
+
+	if (unlikely(!ops) || unlikely(!ops->access) || \
+		unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \
+		unlikely(!ops->c2d_get) || unlikely(!ops->c2d_set))
+		return ERR_PTR(-EINVAL);
+
+	c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL);
+	kmemcheck_annotate_bitfield(c2dev, flags);
+	if (unlikely(!c2dev))
+		return ERR_PTR(-ENOMEM);
+
+	ret = idr_pre_get(&c2port_idr, GFP_KERNEL);
+	if (!ret) {
+		ret = -ENOMEM;
+		goto error_idr_get_new;
+	}
+
+	spin_lock_irq(&c2port_idr_lock);
+	ret = idr_get_new(&c2port_idr, c2dev, &id);
+	spin_unlock_irq(&c2port_idr_lock);
+
+	if (ret < 0)
+		goto error_idr_get_new;
+	c2dev->id = id;
+
+	c2dev->dev = device_create(c2port_class, NULL, 0, c2dev,
+					"c2port%d", id);
+	if (unlikely(!c2dev->dev)) {
+		ret = -ENOMEM;
+		goto error_device_create;
+	}
+	dev_set_drvdata(c2dev->dev, c2dev);
+
+	strncpy(c2dev->name, name, C2PORT_NAME_LEN);
+	c2dev->ops = ops;
+	mutex_init(&c2dev->mutex);
+
+	/* Create binary file */
+	c2port_bin_attrs.size = ops->blocks_num * ops->block_size;
+	ret = device_create_bin_file(c2dev->dev, &c2port_bin_attrs);
+	if (unlikely(ret))
+		goto error_device_create_bin_file;
+
+	/* By default C2 port access is off */
+	c2dev->access = c2dev->flash_access = 0;
+	ops->access(c2dev, 0);
+
+	dev_info(c2dev->dev, "C2 port %s added\n", name);
+	dev_info(c2dev->dev, "%s flash has %d blocks x %d bytes "
+				"(%d bytes total)\n",
+				name, ops->blocks_num, ops->block_size,
+				ops->blocks_num * ops->block_size);
+
+	return c2dev;
+
+error_device_create_bin_file:
+	device_destroy(c2port_class, 0);
+
+error_device_create:
+	spin_lock_irq(&c2port_idr_lock);
+	idr_remove(&c2port_idr, id);
+	spin_unlock_irq(&c2port_idr_lock);
+
+error_idr_get_new:
+	kfree(c2dev);
+
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(c2port_device_register);
+
+void c2port_device_unregister(struct c2port_device *c2dev)
+{
+	if (!c2dev)
+		return;
+
+	dev_info(c2dev->dev, "C2 port %s removed\n", c2dev->name);
+
+	device_remove_bin_file(c2dev->dev, &c2port_bin_attrs);
+	spin_lock_irq(&c2port_idr_lock);
+	idr_remove(&c2port_idr, c2dev->id);
+	spin_unlock_irq(&c2port_idr_lock);
+
+	device_destroy(c2port_class, c2dev->id);
+
+	kfree(c2dev);
+}
+EXPORT_SYMBOL(c2port_device_unregister);
+
+/*
+ * Module stuff
+ */
+
+static int __init c2port_init(void)
+{
+	printk(KERN_INFO "Silicon Labs C2 port support v. " DRIVER_VERSION
+		" - (C) 2007 Rodolfo Giometti\n");
+
+	c2port_class = class_create(THIS_MODULE, "c2port");
+	if (!c2port_class) {
+		printk(KERN_ERR "c2port: failed to allocate class\n");
+		return -ENOMEM;
+	}
+	c2port_class->dev_attrs = c2port_attrs;
+
+	return 0;
+}
+
+static void __exit c2port_exit(void)
+{
+	class_destroy(c2port_class);
+}
+
+module_init(c2port_init);
+module_exit(c2port_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("Silicon Labs C2 port support v. " DRIVER_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
new file mode 100644
index 00000000000..22429b8b106
--- /dev/null
+++ b/drivers/misc/cb710/Kconfig
@@ -0,0 +1,25 @@
+config CB710_CORE
+	tristate "ENE CB710/720 Flash memory card reader support"
+	depends on PCI
+	help
+	  This option enables support for PCI ENE CB710/720 Flash memory card
+	  reader found in some laptops (ie. some versions of HP Compaq nx9500).
+
+	  You will also have to select some flash card format drivers (MMC/SD,
+	  MemoryStick).
+
+	  This driver can also be built as a module. If so, the module
+	  will be called cb710.
+
+config CB710_DEBUG
+	bool "Enable driver debugging"
+	depends on CB710_CORE != n
+	default n
+	help
+	  This is an option for use by developers; most people should
+	  say N here.  This adds a lot of debugging output to dmesg.
+
+config CB710_DEBUG_ASSUMPTIONS
+	bool
+	depends on CB710_CORE != n
+	default y
diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile
new file mode 100644
index 00000000000..7b80cbf1a60
--- /dev/null
+++ b/drivers/misc/cb710/Makefile
@@ -0,0 +1,8 @@
+ifeq ($(CONFIG_CB710_DEBUG),y)
+	EXTRA_CFLAGS		+= -DDEBUG
+endif
+
+obj-$(CONFIG_CB710_CORE)	+= cb710.o
+
+cb710-y				:= core.o sgbuf2.o
+cb710-$(CONFIG_CB710_DEBUG)	+= debug.o
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c
new file mode 100644
index 00000000000..b14eab0f2ba
--- /dev/null
+++ b/drivers/misc/cb710/core.c
@@ -0,0 +1,357 @@
+/*
+ *  cb710/core.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/cb710.h>
+
+static DEFINE_IDA(cb710_ida);
+static DEFINE_SPINLOCK(cb710_ida_lock);
+
+void cb710_pci_update_config_reg(struct pci_dev *pdev,
+	int reg, uint32_t mask, uint32_t xor)
+{
+	u32 rval;
+
+	pci_read_config_dword(pdev, reg, &rval);
+	rval = (rval & mask) ^ xor;
+	pci_write_config_dword(pdev, reg, rval);
+}
+EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg);
+
+/* Some magic writes based on Windows driver init code */
+static int __devinit cb710_pci_configure(struct pci_dev *pdev)
+{
+	unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	struct pci_dev *pdev0 = pci_get_slot(pdev->bus, devfn);
+	u32 val;
+
+	cb710_pci_update_config_reg(pdev, 0x48,
+		~0x000000FF, 0x0000003F);
+
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (val & 0x80000000)
+		return 0;
+
+	if (!pdev0)
+		return -ENODEV;
+
+	if (pdev0->vendor == PCI_VENDOR_ID_ENE
+	    && pdev0->device == PCI_DEVICE_ID_ENE_720) {
+		cb710_pci_update_config_reg(pdev0, 0x8C,
+			~0x00F00000, 0x00100000);
+		cb710_pci_update_config_reg(pdev0, 0xB0,
+			~0x08000000, 0x08000000);
+	}
+
+	cb710_pci_update_config_reg(pdev0, 0x8C,
+		~0x00000F00, 0x00000200);
+	cb710_pci_update_config_reg(pdev0, 0x90,
+		~0x00060000, 0x00040000);
+
+	pci_dev_put(pdev0);
+
+	return 0;
+}
+
+static irqreturn_t cb710_irq_handler(int irq, void *data)
+{
+	struct cb710_chip *chip = data;
+	struct cb710_slot *slot = &chip->slot[0];
+	irqreturn_t handled = IRQ_NONE;
+	unsigned nr;
+
+	spin_lock(&chip->irq_lock); /* incl. smp_rmb() */
+
+	for (nr = chip->slots; nr; ++slot, --nr) {
+		cb710_irq_handler_t handler_func = slot->irq_handler;
+		if (handler_func && handler_func(slot))
+			handled = IRQ_HANDLED;
+	}
+
+	spin_unlock(&chip->irq_lock);
+
+	return handled;
+}
+
+static void cb710_release_slot(struct device *dev)
+{
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	struct cb710_slot *slot = cb710_pdev_to_slot(to_platform_device(dev));
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+
+	/* slot struct can be freed now */
+	atomic_dec(&chip->slot_refs_count);
+#endif
+}
+
+static int __devinit cb710_register_slot(struct cb710_chip *chip,
+	unsigned slot_mask, unsigned io_offset, const char *name)
+{
+	int nr = chip->slots;
+	struct cb710_slot *slot = &chip->slot[nr];
+	int err;
+
+	dev_dbg(cb710_chip_dev(chip),
+		"register: %s.%d; slot %d; mask %d; IO offset: 0x%02X\n",
+		name, chip->platform_id, nr, slot_mask, io_offset);
+
+	/* slot->irq_handler == NULL here; this needs to be
+	 * seen before platform_device_register() */
+	++chip->slots;
+	smp_wmb();
+
+	slot->iobase = chip->iobase + io_offset;
+	slot->pdev.name = name;
+	slot->pdev.id = chip->platform_id;
+	slot->pdev.dev.parent = &chip->pdev->dev;
+	slot->pdev.dev.release = cb710_release_slot;
+
+	err = platform_device_register(&slot->pdev);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	atomic_inc(&chip->slot_refs_count);
+#endif
+
+	if (err) {
+		/* device_initialize() called from platform_device_register()
+		 * wants this on error path */
+		platform_device_put(&slot->pdev);
+
+		/* slot->irq_handler == NULL here anyway, so no lock needed */
+		--chip->slots;
+		return err;
+	}
+
+	chip->slot_mask |= slot_mask;
+
+	return 0;
+}
+
+static void cb710_unregister_slot(struct cb710_chip *chip,
+	unsigned slot_mask)
+{
+	int nr = chip->slots - 1;
+
+	if (!(chip->slot_mask & slot_mask))
+		return;
+
+	platform_device_unregister(&chip->slot[nr].pdev);
+
+	/* complementary to spin_unlock() in cb710_set_irq_handler() */
+	smp_rmb();
+	BUG_ON(chip->slot[nr].irq_handler != NULL);
+
+	/* slot->irq_handler == NULL here, so no lock needed */
+	--chip->slots;
+	chip->slot_mask &= ~slot_mask;
+}
+
+void cb710_set_irq_handler(struct cb710_slot *slot,
+	cb710_irq_handler_t handler)
+{
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->irq_lock, flags);
+	slot->irq_handler = handler;
+	spin_unlock_irqrestore(&chip->irq_lock, flags);
+}
+EXPORT_SYMBOL_GPL(cb710_set_irq_handler);
+
+#ifdef CONFIG_PM
+
+static int cb710_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+
+	free_irq(pdev->irq, chip);
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	if (state.event & PM_EVENT_SLEEP)
+		pci_set_power_state(pdev, PCI_D3cold);
+	return 0;
+}
+
+static int cb710_resume(struct pci_dev *pdev)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+	int err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	return devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+}
+
+#endif /* CONFIG_PM */
+
+static int __devinit cb710_probe(struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	struct cb710_chip *chip;
+	unsigned long flags;
+	u32 val;
+	int err;
+	int n = 0;
+
+	err = cb710_pci_configure(pdev);
+	if (err)
+		return err;
+
+	/* this is actually magic... */
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (!(val & 0x80000000)) {
+		pci_write_config_dword(pdev, 0x48, val|0x71000000);
+		pci_read_config_dword(pdev, 0x48, &val);
+	}
+
+	dev_dbg(&pdev->dev, "PCI config[0x48] = 0x%08X\n", val);
+	if (!(val & 0x70000000))
+		return -ENODEV;
+	val = (val >> 28) & 7;
+	if (val & CB710_SLOT_MMC)
+		++n;
+	if (val & CB710_SLOT_MS)
+		++n;
+	if (val & CB710_SLOT_SM)
+		++n;
+
+	chip = devm_kzalloc(&pdev->dev,
+		sizeof(*chip) + n * sizeof(*chip->slot), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, 0x0001, KBUILD_MODNAME);
+	if (err)
+		return err;
+
+	chip->pdev = pdev;
+	chip->iobase = pcim_iomap_table(pdev)[0];
+
+	pci_set_drvdata(pdev, chip);
+
+	err = devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+	if (err)
+		return err;
+
+	do {
+		if (!ida_pre_get(&cb710_ida, GFP_KERNEL))
+			return -ENOMEM;
+
+		spin_lock_irqsave(&cb710_ida_lock, flags);
+		err = ida_get_new(&cb710_ida, &chip->platform_id);
+		spin_unlock_irqrestore(&cb710_ida_lock, flags);
+
+		if (err && err != -EAGAIN)
+			return err;
+	} while (err);
+
+
+	dev_info(&pdev->dev, "id %d, IO 0x%p, IRQ %d\n",
+		chip->platform_id, chip->iobase, pdev->irq);
+
+	if (val & CB710_SLOT_MMC) {	/* MMC/SD slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MMC, 0x00, "cb710-mmc");
+		if (err)
+			return err;
+	}
+
+	if (val & CB710_SLOT_MS) {	/* MemoryStick slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MS, 0x40, "cb710-ms");
+		if (err)
+			goto unreg_mmc;
+	}
+
+	if (val & CB710_SLOT_SM) {	/* SmartMedia slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_SM, 0x60, "cb710-sm");
+		if (err)
+			goto unreg_ms;
+	}
+
+	return 0;
+unreg_ms:
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+unreg_mmc:
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+	return err;
+}
+
+static void __devexit cb710_remove_one(struct pci_dev *pdev)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+	unsigned long flags;
+
+	cb710_unregister_slot(chip, CB710_SLOT_SM);
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+
+	spin_lock_irqsave(&cb710_ida_lock, flags);
+	ida_remove(&cb710_ida, chip->platform_id);
+	spin_unlock_irqrestore(&cb710_ida_lock, flags);
+}
+
+static const struct pci_device_id cb710_pci_tbl[] = {
+	{ PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_CB710_FLASH,
+		PCI_ANY_ID, PCI_ANY_ID, },
+	{ 0, }
+};
+
+static struct pci_driver cb710_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = cb710_pci_tbl,
+	.probe = cb710_probe,
+	.remove = __devexit_p(cb710_remove_one),
+#ifdef CONFIG_PM
+	.suspend = cb710_suspend,
+	.resume = cb710_resume,
+#endif
+};
+
+static int __init cb710_init_module(void)
+{
+	return pci_register_driver(&cb710_driver);
+}
+
+static void __exit cb710_cleanup_module(void)
+{
+	pci_unregister_driver(&cb710_driver);
+	ida_destroy(&cb710_ida);
+}
+
+module_init(cb710_init_module);
+module_exit(cb710_cleanup_module);
+
+MODULE_AUTHOR("Michał Mirosław <mirq-linux@rere.qmqm.pl>");
+MODULE_DESCRIPTION("ENE CB710 memory card reader driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, cb710_pci_tbl);
diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c
new file mode 100644
index 00000000000..02358d086e0
--- /dev/null
+++ b/drivers/misc/cb710/debug.c
@@ -0,0 +1,119 @@
+/*
+ *  cb710/debug.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cb710.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#define CB710_REG_COUNT		0x80
+
+static const u16 allow[CB710_REG_COUNT/16] = {
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+};
+static const char *const prefix[ARRAY_SIZE(allow)] = {
+	"MMC", "MMC", "MMC", "MMC",
+	"MS?", "MS?", "SM?", "SM?"
+};
+
+static inline int allow_reg_read(unsigned block, unsigned offset, unsigned bits)
+{
+	unsigned mask = (1 << bits/8) - 1;
+	offset *= bits/8;
+	return ((allow[block] >> offset) & mask) == mask;
+}
+
+#define CB710_READ_REGS_TEMPLATE(t)					\
+static void cb710_read_regs_##t(void __iomem *iobase,			\
+	u##t *reg, unsigned select)					\
+{									\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!(select & (1 << i)))					\
+			continue;					\
+									\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			if (!allow_reg_read(i, j, t))			\
+				continue;				\
+			reg[j] = ioread##t(iobase			\
+				+ (i << 4) + (j * (t/8)));		\
+		}							\
+	}								\
+}
+
+static const char cb710_regf_8[] = "%02X";
+static const char cb710_regf_16[] = "%04X";
+static const char cb710_regf_32[] = "%08X";
+static const char cb710_xes[] = "xxxxxxxx";
+
+#define CB710_DUMP_REGS_TEMPLATE(t)					\
+static void cb710_dump_regs_##t(struct device *dev,			\
+	const u##t *reg, unsigned select)				\
+{									\
+	const char *const xp = &cb710_xes[8 - t/4];			\
+	const char *const format = cb710_regf_##t;			\
+									\
+	char msg[100], *p;						\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!(select & (1 << i)))				\
+			continue;					\
+		p = msg;						\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			*p++ = ' ';					\
+			if (j == 8/(t/8))				\
+				*p++ = ' ';				\
+			if (allow_reg_read(i, j, t))			\
+				p += sprintf(p, format, reg[j]);	\
+			else						\
+				p += sprintf(p, "%s", xp);		\
+		}							\
+		dev_dbg(dev, "%s 0x%02X %s\n", prefix[i], i << 4, msg);	\
+	}								\
+}
+
+#define CB710_READ_AND_DUMP_REGS_TEMPLATE(t)				\
+static void cb710_read_and_dump_regs_##t(struct cb710_chip *chip,	\
+	unsigned select)						\
+{									\
+	u##t regs[CB710_REG_COUNT/sizeof(u##t)];			\
+									\
+	memset(&regs, 0, sizeof(regs));					\
+	cb710_read_regs_##t(chip->iobase, regs, select);		\
+	cb710_dump_regs_##t(cb710_chip_dev(chip), regs, select);	\
+}
+
+#define CB710_REG_ACCESS_TEMPLATES(t)		\
+  CB710_READ_REGS_TEMPLATE(t)			\
+  CB710_DUMP_REGS_TEMPLATE(t)			\
+  CB710_READ_AND_DUMP_REGS_TEMPLATE(t)
+
+CB710_REG_ACCESS_TEMPLATES(8)
+CB710_REG_ACCESS_TEMPLATES(16)
+CB710_REG_ACCESS_TEMPLATES(32)
+
+void cb710_dump_regs(struct cb710_chip *chip, unsigned select)
+{
+	if (!(select & CB710_DUMP_REGS_MASK))
+		select = CB710_DUMP_REGS_ALL;
+	if (!(select & CB710_DUMP_ACCESS_MASK))
+		select |= CB710_DUMP_ACCESS_8;
+
+	if (select & CB710_DUMP_ACCESS_32)
+		cb710_read_and_dump_regs_32(chip, select);
+	if (select & CB710_DUMP_ACCESS_16)
+		cb710_read_and_dump_regs_16(chip, select);
+	if (select & CB710_DUMP_ACCESS_8)
+		cb710_read_and_dump_regs_8(chip, select);
+}
+EXPORT_SYMBOL_GPL(cb710_dump_regs);
+
diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c
new file mode 100644
index 00000000000..d019746551f
--- /dev/null
+++ b/drivers/misc/cb710/sgbuf2.c
@@ -0,0 +1,146 @@
+/*
+ *  cb710/sgbuf2.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cb710.h>
+
+static bool sg_dwiter_next(struct sg_mapping_iter *miter)
+{
+	if (sg_miter_next(miter)) {
+		miter->consumed = 0;
+		return true;
+	} else
+		return false;
+}
+
+static bool sg_dwiter_is_at_end(struct sg_mapping_iter *miter)
+{
+	return miter->length == miter->consumed && !sg_dwiter_next(miter);
+}
+
+static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter)
+{
+	size_t len, left = 4;
+	uint32_t data;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(addr, miter->addr + miter->consumed, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return data;
+		addr += len;
+	} while (sg_dwiter_next(miter));
+
+	memset(addr, 0, left);
+	return data;
+}
+
+static inline bool needs_unaligned_copy(const void *ptr)
+{
+#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
+	return false;
+#else
+	return ((ptr - NULL) & 3) != 0;
+#endif
+}
+
+static bool sg_dwiter_get_next_block(struct sg_mapping_iter *miter, uint32_t **ptr)
+{
+	size_t len;
+
+	if (sg_dwiter_is_at_end(miter))
+		return true;
+
+	len = miter->length - miter->consumed;
+
+	if (likely(len >= 4 && !needs_unaligned_copy(
+			miter->addr + miter->consumed))) {
+		*ptr = miter->addr + miter->consumed;
+		miter->consumed += 4;
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * cb710_sg_dwiter_read_next_block() - get next 32-bit word from sg buffer
+ * @miter: sg mapping iterator used for reading
+ *
+ * Description:
+ *   Returns 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes past the buffer's end
+ *   are not accessed (read) but are returned as zeroes.  @miter@
+ *   is advanced by 4 bytes or to the end of buffer whichever is
+ *   closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ *
+ * Returns:
+ *   32-bit word just read.
+ */
+uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr)))
+		return ptr ? *ptr : 0;
+
+	return sg_dwiter_read_buffer(miter);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_read_next_block);
+
+static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data)
+{
+	size_t len, left = 4;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(miter->addr, addr, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return;
+		addr += len;
+	} while (sg_dwiter_next(miter));
+}
+
+/**
+ * cb710_sg_dwiter_write_next_block() - write next 32-bit word to sg buffer
+ * @miter: sg mapping iterator used for writing
+ *
+ * Description:
+ *   Writes 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes which would be written
+ *   past the buffer's end are silently discarded. @miter@ is
+ *   advanced by 4 bytes or to the end of buffer whichever is closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ */
+void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr))) {
+		if (ptr)
+			*ptr = data;
+		else
+			return;
+	} else
+		sg_dwiter_write_slow(miter, data);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block);
+
diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c
new file mode 100644
index 00000000000..8110460558f
--- /dev/null
+++ b/drivers/misc/cs5535-mfgpt.c
@@ -0,0 +1,370 @@
+/*
+ * Driver for the CS5535/CS5536 Multi-Function General Purpose Timers (MFGPT)
+ *
+ * Copyright (C) 2006, Advanced Micro Devices, Inc.
+ * Copyright (C) 2007  Andres Salomon <dilinger@debian.org>
+ * Copyright (C) 2009  Andres Salomon <dilinger@collabora.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/cs5535.h>
+
+#define DRV_NAME "cs5535-mfgpt"
+#define MFGPT_BAR 2
+
+static int mfgpt_reset_timers;
+module_param_named(mfgptfix, mfgpt_reset_timers, int, 0644);
+MODULE_PARM_DESC(mfgptfix, "Reset the MFGPT timers during init; "
+		"required by some broken BIOSes (ie, TinyBIOS < 0.99).");
+
+struct cs5535_mfgpt_timer {
+	struct cs5535_mfgpt_chip *chip;
+	int nr;
+};
+
+static struct cs5535_mfgpt_chip {
+	DECLARE_BITMAP(avail, MFGPT_MAX_TIMERS);
+	resource_size_t base;
+
+	struct pci_dev *pdev;
+	spinlock_t lock;
+	int initialized;
+} cs5535_mfgpt_chip;
+
+int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp,
+		int event, int enable)
+{
+	uint32_t msr, mask, value, dummy;
+	int shift = (cmp == MFGPT_CMP1) ? 0 : 8;
+
+	if (!timer) {
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	/*
+	 * The register maps for these are described in sections 6.17.1.x of
+	 * the AMD Geode CS5536 Companion Device Data Book.
+	 */
+	switch (event) {
+	case MFGPT_EVENT_RESET:
+		/*
+		 * XXX: According to the docs, we cannot reset timers above
+		 * 6; that is, resets for 7 and 8 will be ignored.  Is this
+		 * a problem?   -dilinger
+		 */
+		msr = MSR_MFGPT_NR;
+		mask = 1 << (timer->nr + 24);
+		break;
+
+	case MFGPT_EVENT_NMI:
+		msr = MSR_MFGPT_NR;
+		mask = 1 << (timer->nr + shift);
+		break;
+
+	case MFGPT_EVENT_IRQ:
+		msr = MSR_MFGPT_IRQ;
+		mask = 1 << (timer->nr + shift);
+		break;
+
+	default:
+		return -EIO;
+	}
+
+	rdmsr(msr, value, dummy);
+
+	if (enable)
+		value |= mask;
+	else
+		value &= ~mask;
+
+	wrmsr(msr, value, dummy);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_toggle_event);
+
+int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, int *irq,
+		int enable)
+{
+	uint32_t zsel, lpc, dummy;
+	int shift;
+
+	if (!timer) {
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	/*
+	 * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
+	 * is using the same CMP of the timer's Siamese twin, the IRQ is set to
+	 * 2, and we mustn't use nor change it.
+	 * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
+	 * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
+	 * with *irq==0 is safe. Currently there _are_ no 2 drivers.
+	 */
+	rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+	shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer->nr % 4) * 4;
+	if (((zsel >> shift) & 0xF) == 2)
+		return -EIO;
+
+	/* Choose IRQ: if none supplied, keep IRQ already set or use default */
+	if (!*irq)
+		*irq = (zsel >> shift) & 0xF;
+	if (!*irq)
+		*irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ;
+
+	/* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
+	if (*irq < 1 || *irq == 2 || *irq > 15)
+		return -EIO;
+	rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
+	if (lpc & (1 << *irq))
+		return -EIO;
+
+	/* All chosen and checked - go for it */
+	if (cs5535_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+		return -EIO;
+	if (enable) {
+		zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
+		wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_set_irq);
+
+struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain)
+{
+	struct cs5535_mfgpt_chip *mfgpt = &cs5535_mfgpt_chip;
+	struct cs5535_mfgpt_timer *timer = NULL;
+	unsigned long flags;
+	int max;
+
+	if (!mfgpt->initialized)
+		goto done;
+
+	/* only allocate timers from the working domain if requested */
+	if (domain == MFGPT_DOMAIN_WORKING)
+		max = 6;
+	else
+		max = MFGPT_MAX_TIMERS;
+
+	if (timer_nr >= max) {
+		/* programmer error.  silly programmers! */
+		WARN_ON(1);
+		goto done;
+	}
+
+	spin_lock_irqsave(&mfgpt->lock, flags);
+	if (timer_nr < 0) {
+		unsigned long t;
+
+		/* try to find any available timer */
+		t = find_first_bit(mfgpt->avail, max);
+		/* set timer_nr to -1 if no timers available */
+		timer_nr = t < max ? (int) t : -1;
+	} else {
+		/* check if the requested timer's available */
+		if (test_bit(timer_nr, mfgpt->avail))
+			timer_nr = -1;
+	}
+
+	if (timer_nr >= 0)
+		/* if timer_nr is not -1, it's an available timer */
+		__clear_bit(timer_nr, mfgpt->avail);
+	spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+	if (timer_nr < 0)
+		goto done;
+
+	timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+	if (!timer) {
+		/* aw hell */
+		spin_lock_irqsave(&mfgpt->lock, flags);
+		__set_bit(timer_nr, mfgpt->avail);
+		spin_unlock_irqrestore(&mfgpt->lock, flags);
+		goto done;
+	}
+	timer->chip = mfgpt;
+	timer->nr = timer_nr;
+	dev_info(&mfgpt->pdev->dev, "registered timer %d\n", timer_nr);
+
+done:
+	return timer;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_alloc_timer);
+
+/*
+ * XXX: This frees the timer memory, but never resets the actual hardware
+ * timer.  The old geode_mfgpt code did this; it would be good to figure
+ * out a way to actually release the hardware timer.  See comments below.
+ */
+void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer)
+{
+	kfree(timer);
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_free_timer);
+
+uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, uint16_t reg)
+{
+	return inw(timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_read);
+
+void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg,
+		uint16_t value)
+{
+	outw(value, timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_write);
+
+/*
+ * This is a sledgehammer that resets all MFGPT timers. This is required by
+ * some broken BIOSes which leave the system in an unstable state
+ * (TinyBIOS 0.98, for example; fixed in 0.99).  It's uncertain as to
+ * whether or not this secret MSR can be used to release individual timers.
+ * Jordan tells me that he and Mitch once played w/ it, but it's unclear
+ * what the results of that were (and they experienced some instability).
+ */
+static void __init reset_all_timers(void)
+{
+	uint32_t val, dummy;
+
+	/* The following undocumented bit resets the MFGPT timers */
+	val = 0xFF; dummy = 0;
+	wrmsr(MSR_MFGPT_SETUP, val, dummy);
+}
+
+/*
+ * Check whether any MFGPTs are available for the kernel to use.  In most
+ * cases, firmware that uses AMD's VSA code will claim all timers during
+ * bootup; we certainly don't want to take them if they're already in use.
+ * In other cases (such as with VSAless OpenFirmware), the system firmware
+ * leaves timers available for us to use.
+ */
+static int __init scan_timers(struct cs5535_mfgpt_chip *mfgpt)
+{
+	struct cs5535_mfgpt_timer timer = { .chip = mfgpt };
+	unsigned long flags;
+	int timers = 0;
+	uint16_t val;
+	int i;
+
+	/* bios workaround */
+	if (mfgpt_reset_timers)
+		reset_all_timers();
+
+	/* just to be safe, protect this section w/ lock */
+	spin_lock_irqsave(&mfgpt->lock, flags);
+	for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
+		timer.nr = i;
+		val = cs5535_mfgpt_read(&timer, MFGPT_REG_SETUP);
+		if (!(val & MFGPT_SETUP_SETUP)) {
+			__set_bit(i, mfgpt->avail);
+			timers++;
+		}
+	}
+	spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+	return timers;
+}
+
+static int __init cs5535_mfgpt_probe(struct pci_dev *pdev,
+		const struct pci_device_id *pci_id)
+{
+	int err, t;
+
+	/* There are two ways to get the MFGPT base address; one is by
+	 * fetching it from MSR_LBAR_MFGPT, the other is by reading the
+	 * PCI BAR info.  The latter method is easier (especially across
+	 * different architectures), so we'll stick with that for now.  If
+	 * it turns out to be unreliable in the face of crappy BIOSes, we
+	 * can always go back to using MSRs.. */
+
+	err = pci_enable_device_io(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "can't enable device IO\n");
+		goto done;
+	}
+
+	err = pci_request_region(pdev, MFGPT_BAR, DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", MFGPT_BAR);
+		goto done;
+	}
+
+	/* set up the driver-specific struct */
+	cs5535_mfgpt_chip.base = pci_resource_start(pdev, MFGPT_BAR);
+	cs5535_mfgpt_chip.pdev = pdev;
+	spin_lock_init(&cs5535_mfgpt_chip.lock);
+
+	dev_info(&pdev->dev, "allocated PCI BAR #%d: base 0x%llx\n", MFGPT_BAR,
+			(unsigned long long) cs5535_mfgpt_chip.base);
+
+	/* detect the available timers */
+	t = scan_timers(&cs5535_mfgpt_chip);
+	dev_info(&pdev->dev, DRV_NAME ": %d MFGPT timers available\n", t);
+	cs5535_mfgpt_chip.initialized = 1;
+	return 0;
+
+done:
+	return err;
+}
+
+static struct pci_device_id cs5535_mfgpt_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_ISA) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA) },
+	{ 0, },
+};
+MODULE_DEVICE_TABLE(pci, cs5535_mfgpt_pci_tbl);
+
+/*
+ * Just like with the cs5535-gpio driver, we can't use the standard PCI driver
+ * registration stuff.  It only allows only one driver to bind to each PCI
+ * device, and we want the GPIO and MFGPT drivers to be able to share a PCI
+ * device.  Instead, we manually scan for the PCI device, request a single
+ * region, and keep track of the devices that we're using.
+ */
+
+static int __init cs5535_mfgpt_scan_pci(void)
+{
+	struct pci_dev *pdev;
+	int err = -ENODEV;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cs5535_mfgpt_pci_tbl); i++) {
+		pdev = pci_get_device(cs5535_mfgpt_pci_tbl[i].vendor,
+				cs5535_mfgpt_pci_tbl[i].device, NULL);
+		if (pdev) {
+			err = cs5535_mfgpt_probe(pdev,
+					&cs5535_mfgpt_pci_tbl[i]);
+			if (err)
+				pci_dev_put(pdev);
+
+			/* we only support a single CS5535/6 southbridge */
+			break;
+		}
+	}
+
+	return err;
+}
+
+static int __init cs5535_mfgpt_init(void)
+{
+	return cs5535_mfgpt_scan_pci();
+}
+
+module_init(cs5535_mfgpt_init);
+
+MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>");
+MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c
new file mode 100644
index 00000000000..f3ee4a1abb7
--- /dev/null
+++ b/drivers/misc/ds1682.c
@@ -0,0 +1,267 @@
+/*
+ * Dallas Semiconductor DS1682 Elapsed Time Recorder device driver
+ *
+ * Written by: Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (C) 2007 Secret Lab Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * The DS1682 elapsed timer recorder is a simple device that implements
+ * one elapsed time counter, one event counter, an alarm signal and 10
+ * bytes of general purpose EEPROM.
+ *
+ * This driver provides access to the DS1682 counters and user data via
+ * the sysfs.  The following attributes are added to the device node:
+ *     elapsed_time (u32): Total elapsed event time in ms resolution
+ *     alarm_time (u32): When elapsed time exceeds the value in alarm_time,
+ *                       then the alarm pin is asserted.
+ *     event_count (u16): number of times the event pin has gone low.
+ *     eeprom (u8[10]): general purpose EEPROM
+ *
+ * Counter registers and user data are both read/write unless the device
+ * has been write protected.  This driver does not support turning off write
+ * protection.  Once write protection is turned on, it is impossible to
+ * turn it off again, so I have left the feature out of this driver to avoid
+ * accidental enabling, but it is trivial to add write protect support.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/hwmon-sysfs.h>
+
+/* Device registers */
+#define DS1682_REG_CONFIG		0x00
+#define DS1682_REG_ALARM		0x01
+#define DS1682_REG_ELAPSED		0x05
+#define DS1682_REG_EVT_CNTR		0x09
+#define DS1682_REG_EEPROM		0x0b
+#define DS1682_REG_RESET		0x1d
+#define DS1682_REG_WRITE_DISABLE	0x1e
+#define DS1682_REG_WRITE_MEM_DISABLE	0x1f
+
+#define DS1682_EEPROM_SIZE		10
+
+/*
+ * Generic counter attributes
+ */
+static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	struct i2c_client *client = to_i2c_client(dev);
+	__le32 val = 0;
+	int rc;
+
+	dev_dbg(dev, "ds1682_show() called on %s\n", attr->attr.name);
+
+	/* Read the register */
+	rc = i2c_smbus_read_i2c_block_data(client, sattr->index, sattr->nr,
+					   (u8 *) & val);
+	if (rc < 0)
+		return -EIO;
+
+	/* Special case: the 32 bit regs are time values with 1/4s
+	 * resolution, scale them up to milliseconds */
+	if (sattr->nr == 4)
+		return sprintf(buf, "%llu\n",
+			((unsigned long long)le32_to_cpu(val)) * 250);
+
+	/* Format the output string and return # of bytes */
+	return sprintf(buf, "%li\n", (long)le32_to_cpu(val));
+}
+
+static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	struct i2c_client *client = to_i2c_client(dev);
+	char *endp;
+	u64 val;
+	__le32 val_le;
+	int rc;
+
+	dev_dbg(dev, "ds1682_store() called on %s\n", attr->attr.name);
+
+	/* Decode input */
+	val = simple_strtoull(buf, &endp, 0);
+	if (buf == endp) {
+		dev_dbg(dev, "input string not a number\n");
+		return -EINVAL;
+	}
+
+	/* Special case: the 32 bit regs are time values with 1/4s
+	 * resolution, scale input down to quarter-seconds */
+	if (sattr->nr == 4)
+		do_div(val, 250);
+
+	/* write out the value */
+	val_le = cpu_to_le32(val);
+	rc = i2c_smbus_write_i2c_block_data(client, sattr->index, sattr->nr,
+					    (u8 *) & val_le);
+	if (rc < 0) {
+		dev_err(dev, "register write failed; reg=0x%x, size=%i\n",
+			sattr->index, sattr->nr);
+		return -EIO;
+	}
+
+	return count;
+}
+
+/*
+ * Simple register attributes
+ */
+static SENSOR_DEVICE_ATTR_2(elapsed_time, S_IRUGO | S_IWUSR, ds1682_show,
+			    ds1682_store, 4, DS1682_REG_ELAPSED);
+static SENSOR_DEVICE_ATTR_2(alarm_time, S_IRUGO | S_IWUSR, ds1682_show,
+			    ds1682_store, 4, DS1682_REG_ALARM);
+static SENSOR_DEVICE_ATTR_2(event_count, S_IRUGO | S_IWUSR, ds1682_show,
+			    ds1682_store, 2, DS1682_REG_EVT_CNTR);
+
+static const struct attribute_group ds1682_group = {
+	.attrs = (struct attribute *[]) {
+		&sensor_dev_attr_elapsed_time.dev_attr.attr,
+		&sensor_dev_attr_alarm_time.dev_attr.attr,
+		&sensor_dev_attr_event_count.dev_attr.attr,
+		NULL,
+	},
+};
+
+/*
+ * User data attribute
+ */
+static ssize_t ds1682_eeprom_read(struct kobject *kobj, struct bin_attribute *attr,
+				  char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+	int rc;
+
+	dev_dbg(&client->dev, "ds1682_eeprom_read(p=%p, off=%lli, c=%zi)\n",
+		buf, off, count);
+
+	if (off >= DS1682_EEPROM_SIZE)
+		return 0;
+
+	if (off + count > DS1682_EEPROM_SIZE)
+		count = DS1682_EEPROM_SIZE - off;
+
+	rc = i2c_smbus_read_i2c_block_data(client, DS1682_REG_EEPROM + off,
+					   count, buf);
+	if (rc < 0)
+		return -EIO;
+
+	return count;
+}
+
+static ssize_t ds1682_eeprom_write(struct kobject *kobj, struct bin_attribute *attr,
+				   char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+
+	dev_dbg(&client->dev, "ds1682_eeprom_write(p=%p, off=%lli, c=%zi)\n",
+		buf, off, count);
+
+	if (off >= DS1682_EEPROM_SIZE)
+		return -ENOSPC;
+
+	if (off + count > DS1682_EEPROM_SIZE)
+		count = DS1682_EEPROM_SIZE - off;
+
+	/* Write out to the device */
+	if (i2c_smbus_write_i2c_block_data(client, DS1682_REG_EEPROM + off,
+					   count, buf) < 0)
+		return -EIO;
+
+	return count;
+}
+
+static struct bin_attribute ds1682_eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = DS1682_EEPROM_SIZE,
+	.read = ds1682_eeprom_read,
+	.write = ds1682_eeprom_write,
+};
+
+/*
+ * Called when a ds1682 device is matched with this driver
+ */
+static int ds1682_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	int rc;
+
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_I2C_BLOCK)) {
+		dev_err(&client->dev, "i2c bus does not support the ds1682\n");
+		rc = -ENODEV;
+		goto exit;
+	}
+
+	rc = sysfs_create_group(&client->dev.kobj, &ds1682_group);
+	if (rc)
+		goto exit;
+
+	rc = sysfs_create_bin_file(&client->dev.kobj, &ds1682_eeprom_attr);
+	if (rc)
+		goto exit_bin_attr;
+
+	return 0;
+
+ exit_bin_attr:
+	sysfs_remove_group(&client->dev.kobj, &ds1682_group);
+ exit:
+	return rc;
+}
+
+static int ds1682_remove(struct i2c_client *client)
+{
+	sysfs_remove_bin_file(&client->dev.kobj, &ds1682_eeprom_attr);
+	sysfs_remove_group(&client->dev.kobj, &ds1682_group);
+	return 0;
+}
+
+static const struct i2c_device_id ds1682_id[] = {
+	{ "ds1682", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ds1682_id);
+
+static struct i2c_driver ds1682_driver = {
+	.driver = {
+		.name = "ds1682",
+	},
+	.probe = ds1682_probe,
+	.remove = ds1682_remove,
+	.id_table = ds1682_id,
+};
+
+static int __init ds1682_init(void)
+{
+	return i2c_add_driver(&ds1682_driver);
+}
+
+static void __exit ds1682_exit(void)
+{
+	i2c_del_driver(&ds1682_driver);
+}
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("DS1682 Elapsed Time Indicator driver");
+MODULE_LICENSE("GPL");
+
+module_init(ds1682_init);
+module_exit(ds1682_exit);
diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
new file mode 100644
index 00000000000..9118613af32
--- /dev/null
+++ b/drivers/misc/eeprom/Kconfig
@@ -0,0 +1,73 @@
+menu "EEPROM support"
+
+config EEPROM_AT24
+	tristate "I2C EEPROMs from most vendors"
+	depends on I2C && SYSFS
+	help
+	  Enable this driver to get read/write support to most I2C EEPROMs,
+	  after you configure the driver to know about each EEPROM on
+	  your target board.  Use these generic chip names, instead of
+	  vendor-specific ones like at24c64 or 24lc02:
+
+	     24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08,
+	     24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024
+
+	  Unless you like data loss puzzles, always be sure that any chip
+	  you configure as a 24c32 (32 kbit) or larger is NOT really a
+	  24c16 (16 kbit) or smaller, and vice versa. Marking the chip
+	  as read-only won't help recover from this. Also, if your chip
+	  has any software write-protect mechanism you may want to review the
+	  code to make sure this driver won't turn it on by accident.
+
+	  If you use this with an SMBus adapter instead of an I2C adapter,
+	  full functionality is not available.  Only smaller devices are
+	  supported (24c16 and below, max 4 kByte).
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called at24.
+
+config EEPROM_AT25
+	tristate "SPI EEPROMs from most vendors"
+	depends on SPI && SYSFS
+	help
+	  Enable this driver to get read/write support to most SPI EEPROMs,
+	  after you configure the board init code to know about each eeprom
+	  on your target board.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called at25.
+
+config EEPROM_LEGACY
+	tristate "Old I2C EEPROM reader"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here you get read-only access to the EEPROM data
+	  available on modern memory DIMMs and Sony Vaio laptops via I2C. Such
+	  EEPROMs could theoretically be available on other devices as well.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called eeprom.
+
+config EEPROM_MAX6875
+	tristate "Maxim MAX6874/5 power supply supervisor"
+	depends on I2C && EXPERIMENTAL
+	help
+	  If you say yes here you get read-only support for the user EEPROM of
+	  the Maxim MAX6874/5 EEPROM-programmable, quad power-supply
+	  sequencer/supervisor.
+
+	  All other features of this chip should be accessed via i2c-dev.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called max6875.
+
+
+config EEPROM_93CX6
+	tristate "EEPROM 93CX6 support"
+	help
+	  This is a driver for the EEPROM chipsets 93c46 and 93c66.
+	  The driver supports both read as well as write commands.
+
+	  If unsure, say N.
+
+endmenu
diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile
new file mode 100644
index 00000000000..df3d68ffa9d
--- /dev/null
+++ b/drivers/misc/eeprom/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_EEPROM_AT24)	+= at24.o
+obj-$(CONFIG_EEPROM_AT25)	+= at25.o
+obj-$(CONFIG_EEPROM_LEGACY)	+= eeprom.o
+obj-$(CONFIG_EEPROM_MAX6875)	+= max6875.o
+obj-$(CONFIG_EEPROM_93CX6)	+= eeprom_93cx6.o
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
new file mode 100644
index 00000000000..2cb2736d65a
--- /dev/null
+++ b/drivers/misc/eeprom/at24.c
@@ -0,0 +1,637 @@
+/*
+ * at24.c - handle most I2C EEPROMs
+ *
+ * Copyright (C) 2005-2007 David Brownell
+ * Copyright (C) 2008 Wolfram Sang, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/mod_devicetable.h>
+#include <linux/log2.h>
+#include <linux/bitops.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/i2c/at24.h>
+
+/*
+ * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
+ * Differences between different vendor product lines (like Atmel AT24C or
+ * MicroChip 24LC, etc) won't much matter for typical read/write access.
+ * There are also I2C RAM chips, likewise interchangeable. One example
+ * would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes).
+ *
+ * However, misconfiguration can lose data. "Set 16-bit memory address"
+ * to a part with 8-bit addressing will overwrite data. Writing with too
+ * big a page size also loses data. And it's not safe to assume that the
+ * conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC
+ * uses 0x51, for just one example.
+ *
+ * Accordingly, explicit board-specific configuration data should be used
+ * in almost all cases. (One partial exception is an SMBus used to access
+ * "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.)
+ *
+ * So this driver uses "new style" I2C driver binding, expecting to be
+ * told what devices exist. That may be in arch/X/mach-Y/board-Z.c or
+ * similar kernel-resident tables; or, configuration data coming from
+ * a bootloader.
+ *
+ * Other than binding model, current differences from "eeprom" driver are
+ * that this one handles write access and isn't restricted to 24c02 devices.
+ * It also handles larger devices (32 kbit and up) with two-byte addresses,
+ * which won't work on pure SMBus systems.
+ */
+
+struct at24_data {
+	struct at24_platform_data chip;
+	struct memory_accessor macc;
+	bool use_smbus;
+
+	/*
+	 * Lock protects against activities from other Linux tasks,
+	 * but not from changes by other I2C masters.
+	 */
+	struct mutex lock;
+	struct bin_attribute bin;
+
+	u8 *writebuf;
+	unsigned write_max;
+	unsigned num_addresses;
+
+	/*
+	 * Some chips tie up multiple I2C addresses; dummy devices reserve
+	 * them for us, and we'll use them with SMBus calls.
+	 */
+	struct i2c_client *client[];
+};
+
+/*
+ * This parameter is to help this driver avoid blocking other drivers out
+ * of I2C for potentially troublesome amounts of time. With a 100 kHz I2C
+ * clock, one 256 byte read takes about 1/43 second which is excessive;
+ * but the 1/170 second it takes at 400 kHz may be quite reasonable; and
+ * at 1 MHz (Fm+) a 1/430 second delay could easily be invisible.
+ *
+ * This value is forced to be a power of two so that writes align on pages.
+ */
+static unsigned io_limit = 128;
+module_param(io_limit, uint, 0);
+MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)");
+
+/*
+ * Specs often allow 5 msec for a page write, sometimes 20 msec;
+ * it's important to recover from write timeouts.
+ */
+static unsigned write_timeout = 25;
+module_param(write_timeout, uint, 0);
+MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)");
+
+#define AT24_SIZE_BYTELEN 5
+#define AT24_SIZE_FLAGS 8
+
+#define AT24_BITMASK(x) (BIT(x) - 1)
+
+/* create non-zero magic value for given eeprom parameters */
+#define AT24_DEVICE_MAGIC(_len, _flags) 		\
+	((1 << AT24_SIZE_FLAGS | (_flags)) 		\
+	    << AT24_SIZE_BYTELEN | ilog2(_len))
+
+static const struct i2c_device_id at24_ids[] = {
+	/* needs 8 addresses as A0-A2 are ignored */
+	{ "24c00", AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) },
+	/* old variants can't be handled with this generic entry! */
+	{ "24c01", AT24_DEVICE_MAGIC(1024 / 8, 0) },
+	{ "24c02", AT24_DEVICE_MAGIC(2048 / 8, 0) },
+	/* spd is a 24c02 in memory DIMMs */
+	{ "spd", AT24_DEVICE_MAGIC(2048 / 8,
+		AT24_FLAG_READONLY | AT24_FLAG_IRUGO) },
+	{ "24c04", AT24_DEVICE_MAGIC(4096 / 8, 0) },
+	/* 24rf08 quirk is handled at i2c-core */
+	{ "24c08", AT24_DEVICE_MAGIC(8192 / 8, 0) },
+	{ "24c16", AT24_DEVICE_MAGIC(16384 / 8, 0) },
+	{ "24c32", AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) },
+	{ "24c64", AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) },
+	{ "24c128", AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) },
+	{ "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) },
+	{ "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) },
+	{ "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) },
+	{ "at24", 0 },
+	{ /* END OF LIST */ }
+};
+MODULE_DEVICE_TABLE(i2c, at24_ids);
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * This routine supports chips which consume multiple I2C addresses. It
+ * computes the addressing information to be used for a given r/w request.
+ * Assumes that sanity checks for offset happened at sysfs-layer.
+ */
+static struct i2c_client *at24_translate_offset(struct at24_data *at24,
+		unsigned *offset)
+{
+	unsigned i;
+
+	if (at24->chip.flags & AT24_FLAG_ADDR16) {
+		i = *offset >> 16;
+		*offset &= 0xffff;
+	} else {
+		i = *offset >> 8;
+		*offset &= 0xff;
+	}
+
+	return at24->client[i];
+}
+
+static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
+		unsigned offset, size_t count)
+{
+	struct i2c_msg msg[2];
+	u8 msgbuf[2];
+	struct i2c_client *client;
+	unsigned long timeout, read_time;
+	int status, i;
+
+	memset(msg, 0, sizeof(msg));
+
+	/*
+	 * REVISIT some multi-address chips don't rollover page reads to
+	 * the next slave address, so we may need to truncate the count.
+	 * Those chips might need another quirk flag.
+	 *
+	 * If the real hardware used four adjacent 24c02 chips and that
+	 * were misconfigured as one 24c08, that would be a similar effect:
+	 * one "eeprom" file not four, but larger reads would fail when
+	 * they crossed certain pages.
+	 */
+
+	/*
+	 * Slave address and byte offset derive from the offset. Always
+	 * set the byte address; on a multi-master board, another master
+	 * may have changed the chip's "current" address pointer.
+	 */
+	client = at24_translate_offset(at24, &offset);
+
+	if (count > io_limit)
+		count = io_limit;
+
+	if (at24->use_smbus) {
+		/* Smaller eeproms can work given some SMBus extension calls */
+		if (count > I2C_SMBUS_BLOCK_MAX)
+			count = I2C_SMBUS_BLOCK_MAX;
+	} else {
+		/*
+		 * When we have a better choice than SMBus calls, use a
+		 * combined I2C message. Write address; then read up to
+		 * io_limit data bytes. Note that read page rollover helps us
+		 * here (unlike writes). msgbuf is u8 and will cast to our
+		 * needs.
+		 */
+		i = 0;
+		if (at24->chip.flags & AT24_FLAG_ADDR16)
+			msgbuf[i++] = offset >> 8;
+		msgbuf[i++] = offset;
+
+		msg[0].addr = client->addr;
+		msg[0].buf = msgbuf;
+		msg[0].len = i;
+
+		msg[1].addr = client->addr;
+		msg[1].flags = I2C_M_RD;
+		msg[1].buf = buf;
+		msg[1].len = count;
+	}
+
+	/*
+	 * Reads fail if the previous write didn't complete yet. We may
+	 * loop a few times until this one succeeds, waiting at least
+	 * long enough for one entire page write to work.
+	 */
+	timeout = jiffies + msecs_to_jiffies(write_timeout);
+	do {
+		read_time = jiffies;
+		if (at24->use_smbus) {
+			status = i2c_smbus_read_i2c_block_data(client, offset,
+					count, buf);
+		} else {
+			status = i2c_transfer(client->adapter, msg, 2);
+			if (status == 2)
+				status = count;
+		}
+		dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n",
+				count, offset, status, jiffies);
+
+		if (status == count)
+			return count;
+
+		/* REVISIT: at HZ=100, this is sloooow */
+		msleep(1);
+	} while (time_before(read_time, timeout));
+
+	return -ETIMEDOUT;
+}
+
+static ssize_t at24_read(struct at24_data *at24,
+		char *buf, loff_t off, size_t count)
+{
+	ssize_t retval = 0;
+
+	if (unlikely(!count))
+		return count;
+
+	/*
+	 * Read data from chip, protecting against concurrent updates
+	 * from this host, but not from other I2C masters.
+	 */
+	mutex_lock(&at24->lock);
+
+	while (count) {
+		ssize_t	status;
+
+		status = at24_eeprom_read(at24, buf, off, count);
+		if (status <= 0) {
+			if (retval == 0)
+				retval = status;
+			break;
+		}
+		buf += status;
+		off += status;
+		count -= status;
+		retval += status;
+	}
+
+	mutex_unlock(&at24->lock);
+
+	return retval;
+}
+
+static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct at24_data *at24;
+
+	at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
+	return at24_read(at24, buf, off, count);
+}
+
+
+/*
+ * Note that if the hardware write-protect pin is pulled high, the whole
+ * chip is normally write protected. But there are plenty of product
+ * variants here, including OTP fuses and partial chip protect.
+ *
+ * We only use page mode writes; the alternative is sloooow. This routine
+ * writes at most one page.
+ */
+static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf,
+		unsigned offset, size_t count)
+{
+	struct i2c_client *client;
+	struct i2c_msg msg;
+	ssize_t status;
+	unsigned long timeout, write_time;
+	unsigned next_page;
+
+	/* Get corresponding I2C address and adjust offset */
+	client = at24_translate_offset(at24, &offset);
+
+	/* write_max is at most a page */
+	if (count > at24->write_max)
+		count = at24->write_max;
+
+	/* Never roll over backwards, to the start of this page */
+	next_page = roundup(offset + 1, at24->chip.page_size);
+	if (offset + count > next_page)
+		count = next_page - offset;
+
+	/* If we'll use I2C calls for I/O, set up the message */
+	if (!at24->use_smbus) {
+		int i = 0;
+
+		msg.addr = client->addr;
+		msg.flags = 0;
+
+		/* msg.buf is u8 and casts will mask the values */
+		msg.buf = at24->writebuf;
+		if (at24->chip.flags & AT24_FLAG_ADDR16)
+			msg.buf[i++] = offset >> 8;
+
+		msg.buf[i++] = offset;
+		memcpy(&msg.buf[i], buf, count);
+		msg.len = i + count;
+	}
+
+	/*
+	 * Writes fail if the previous one didn't complete yet. We may
+	 * loop a few times until this one succeeds, waiting at least
+	 * long enough for one entire page write to work.
+	 */
+	timeout = jiffies + msecs_to_jiffies(write_timeout);
+	do {
+		write_time = jiffies;
+		if (at24->use_smbus) {
+			status = i2c_smbus_write_i2c_block_data(client,
+					offset, count, buf);
+			if (status == 0)
+				status = count;
+		} else {
+			status = i2c_transfer(client->adapter, &msg, 1);
+			if (status == 1)
+				status = count;
+		}
+		dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n",
+				count, offset, status, jiffies);
+
+		if (status == count)
+			return count;
+
+		/* REVISIT: at HZ=100, this is sloooow */
+		msleep(1);
+	} while (time_before(write_time, timeout));
+
+	return -ETIMEDOUT;
+}
+
+static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
+			  size_t count)
+{
+	ssize_t retval = 0;
+
+	if (unlikely(!count))
+		return count;
+
+	/*
+	 * Write data to chip, protecting against concurrent updates
+	 * from this host, but not from other I2C masters.
+	 */
+	mutex_lock(&at24->lock);
+
+	while (count) {
+		ssize_t	status;
+
+		status = at24_eeprom_write(at24, buf, off, count);
+		if (status <= 0) {
+			if (retval == 0)
+				retval = status;
+			break;
+		}
+		buf += status;
+		off += status;
+		count -= status;
+		retval += status;
+	}
+
+	mutex_unlock(&at24->lock);
+
+	return retval;
+}
+
+static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct at24_data *at24;
+
+	at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
+	return at24_write(at24, buf, off, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * This lets other kernel code access the eeprom data. For example, it
+ * might hold a board's Ethernet address, or board-specific calibration
+ * data generated on the manufacturing floor.
+ */
+
+static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf,
+			 off_t offset, size_t count)
+{
+	struct at24_data *at24 = container_of(macc, struct at24_data, macc);
+
+	return at24_read(at24, buf, offset, count);
+}
+
+static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf,
+			  off_t offset, size_t count)
+{
+	struct at24_data *at24 = container_of(macc, struct at24_data, macc);
+
+	return at24_write(at24, buf, offset, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	struct at24_platform_data chip;
+	bool writable;
+	bool use_smbus = false;
+	struct at24_data *at24;
+	int err;
+	unsigned i, num_addresses;
+	kernel_ulong_t magic;
+
+	if (client->dev.platform_data) {
+		chip = *(struct at24_platform_data *)client->dev.platform_data;
+	} else {
+		if (!id->driver_data) {
+			err = -ENODEV;
+			goto err_out;
+		}
+		magic = id->driver_data;
+		chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN));
+		magic >>= AT24_SIZE_BYTELEN;
+		chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS);
+		/*
+		 * This is slow, but we can't know all eeproms, so we better
+		 * play safe. Specifying custom eeprom-types via platform_data
+		 * is recommended anyhow.
+		 */
+		chip.page_size = 1;
+
+		chip.setup = NULL;
+		chip.context = NULL;
+	}
+
+	if (!is_power_of_2(chip.byte_len))
+		dev_warn(&client->dev,
+			"byte_len looks suspicious (no power of 2)!\n");
+	if (!is_power_of_2(chip.page_size))
+		dev_warn(&client->dev,
+			"page_size looks suspicious (no power of 2)!\n");
+
+	/* Use I2C operations unless we're stuck with SMBus extensions. */
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		if (chip.flags & AT24_FLAG_ADDR16) {
+			err = -EPFNOSUPPORT;
+			goto err_out;
+		}
+		if (!i2c_check_functionality(client->adapter,
+				I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+			err = -EPFNOSUPPORT;
+			goto err_out;
+		}
+		use_smbus = true;
+	}
+
+	if (chip.flags & AT24_FLAG_TAKE8ADDR)
+		num_addresses = 8;
+	else
+		num_addresses =	DIV_ROUND_UP(chip.byte_len,
+			(chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256);
+
+	at24 = kzalloc(sizeof(struct at24_data) +
+		num_addresses * sizeof(struct i2c_client *), GFP_KERNEL);
+	if (!at24) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	mutex_init(&at24->lock);
+	at24->use_smbus = use_smbus;
+	at24->chip = chip;
+	at24->num_addresses = num_addresses;
+
+	/*
+	 * Export the EEPROM bytes through sysfs, since that's convenient.
+	 * By default, only root should see the data (maybe passwords etc)
+	 */
+	at24->bin.attr.name = "eeprom";
+	at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
+	at24->bin.read = at24_bin_read;
+	at24->bin.size = chip.byte_len;
+
+	at24->macc.read = at24_macc_read;
+
+	writable = !(chip.flags & AT24_FLAG_READONLY);
+	if (writable) {
+		if (!use_smbus || i2c_check_functionality(client->adapter,
+				I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
+
+			unsigned write_max = chip.page_size;
+
+			at24->macc.write = at24_macc_write;
+
+			at24->bin.write = at24_bin_write;
+			at24->bin.attr.mode |= S_IWUSR;
+
+			if (write_max > io_limit)
+				write_max = io_limit;
+			if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX)
+				write_max = I2C_SMBUS_BLOCK_MAX;
+			at24->write_max = write_max;
+
+			/* buffer (data + address at the beginning) */
+			at24->writebuf = kmalloc(write_max + 2, GFP_KERNEL);
+			if (!at24->writebuf) {
+				err = -ENOMEM;
+				goto err_struct;
+			}
+		} else {
+			dev_warn(&client->dev,
+				"cannot write due to controller restrictions.");
+		}
+	}
+
+	at24->client[0] = client;
+
+	/* use dummy devices for multiple-address chips */
+	for (i = 1; i < num_addresses; i++) {
+		at24->client[i] = i2c_new_dummy(client->adapter,
+					client->addr + i);
+		if (!at24->client[i]) {
+			dev_err(&client->dev, "address 0x%02x unavailable\n",
+					client->addr + i);
+			err = -EADDRINUSE;
+			goto err_clients;
+		}
+	}
+
+	err = sysfs_create_bin_file(&client->dev.kobj, &at24->bin);
+	if (err)
+		goto err_clients;
+
+	i2c_set_clientdata(client, at24);
+
+	dev_info(&client->dev, "%zu byte %s EEPROM %s\n",
+		at24->bin.size, client->name,
+		writable ? "(writable)" : "(read-only)");
+	dev_dbg(&client->dev,
+		"page_size %d, num_addresses %d, write_max %d%s\n",
+		chip.page_size, num_addresses,
+		at24->write_max,
+		use_smbus ? ", use_smbus" : "");
+
+	/* export data to kernel code */
+	if (chip.setup)
+		chip.setup(&at24->macc, chip.context);
+
+	return 0;
+
+err_clients:
+	for (i = 1; i < num_addresses; i++)
+		if (at24->client[i])
+			i2c_unregister_device(at24->client[i]);
+
+	kfree(at24->writebuf);
+err_struct:
+	kfree(at24);
+err_out:
+	dev_dbg(&client->dev, "probe error %d\n", err);
+	return err;
+}
+
+static int __devexit at24_remove(struct i2c_client *client)
+{
+	struct at24_data *at24;
+	int i;
+
+	at24 = i2c_get_clientdata(client);
+	sysfs_remove_bin_file(&client->dev.kobj, &at24->bin);
+
+	for (i = 1; i < at24->num_addresses; i++)
+		i2c_unregister_device(at24->client[i]);
+
+	kfree(at24->writebuf);
+	kfree(at24);
+	i2c_set_clientdata(client, NULL);
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct i2c_driver at24_driver = {
+	.driver = {
+		.name = "at24",
+		.owner = THIS_MODULE,
+	},
+	.probe = at24_probe,
+	.remove = __devexit_p(at24_remove),
+	.id_table = at24_ids,
+};
+
+static int __init at24_init(void)
+{
+	io_limit = rounddown_pow_of_two(io_limit);
+	return i2c_add_driver(&at24_driver);
+}
+module_init(at24_init);
+
+static void __exit at24_exit(void)
+{
+	i2c_del_driver(&at24_driver);
+}
+module_exit(at24_exit);
+
+MODULE_DESCRIPTION("Driver for most I2C EEPROMs");
+MODULE_AUTHOR("David Brownell and Wolfram Sang");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
new file mode 100644
index 00000000000..d902d81dde3
--- /dev/null
+++ b/drivers/misc/eeprom/at25.c
@@ -0,0 +1,420 @@
+/*
+ * at25.c -- support most SPI EEPROMs, such as Atmel AT25 models
+ *
+ * Copyright (C) 2006 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/eeprom.h>
+
+
+/*
+ * NOTE: this is an *EEPROM* driver.  The vagaries of product naming
+ * mean that some AT25 products are EEPROMs, and others are FLASH.
+ * Handle FLASH chips with the drivers/mtd/devices/m25p80.c driver,
+ * not this one!
+ */
+
+struct at25_data {
+	struct spi_device	*spi;
+	struct memory_accessor	mem;
+	struct mutex		lock;
+	struct spi_eeprom	chip;
+	struct bin_attribute	bin;
+	unsigned		addrlen;
+};
+
+#define	AT25_WREN	0x06		/* latch the write enable */
+#define	AT25_WRDI	0x04		/* reset the write enable */
+#define	AT25_RDSR	0x05		/* read status register */
+#define	AT25_WRSR	0x01		/* write status register */
+#define	AT25_READ	0x03		/* read byte(s) */
+#define	AT25_WRITE	0x02		/* write byte(s)/sector */
+
+#define	AT25_SR_nRDY	0x01		/* nRDY = write-in-progress */
+#define	AT25_SR_WEN	0x02		/* write enable (latched) */
+#define	AT25_SR_BP0	0x04		/* BP for software writeprotect */
+#define	AT25_SR_BP1	0x08
+#define	AT25_SR_WPEN	0x80		/* writeprotect enable */
+
+
+#define EE_MAXADDRLEN	3		/* 24 bit addresses, up to 2 MBytes */
+
+/* Specs often allow 5 msec for a page write, sometimes 20 msec;
+ * it's important to recover from write timeouts.
+ */
+#define	EE_TIMEOUT	25
+
+/*-------------------------------------------------------------------------*/
+
+#define	io_limit	PAGE_SIZE	/* bytes */
+
+static ssize_t
+at25_ee_read(
+	struct at25_data	*at25,
+	char			*buf,
+	unsigned		offset,
+	size_t			count
+)
+{
+	u8			command[EE_MAXADDRLEN + 1];
+	u8			*cp;
+	ssize_t			status;
+	struct spi_transfer	t[2];
+	struct spi_message	m;
+
+	if (unlikely(offset >= at25->bin.size))
+		return 0;
+	if ((offset + count) > at25->bin.size)
+		count = at25->bin.size - offset;
+	if (unlikely(!count))
+		return count;
+
+	cp = command;
+	*cp++ = AT25_READ;
+
+	/* 8/16/24-bit address is written MSB first */
+	switch (at25->addrlen) {
+	default:	/* case 3 */
+		*cp++ = offset >> 16;
+	case 2:
+		*cp++ = offset >> 8;
+	case 1:
+	case 0:	/* can't happen: for better codegen */
+		*cp++ = offset >> 0;
+	}
+
+	spi_message_init(&m);
+	memset(t, 0, sizeof t);
+
+	t[0].tx_buf = command;
+	t[0].len = at25->addrlen + 1;
+	spi_message_add_tail(&t[0], &m);
+
+	t[1].rx_buf = buf;
+	t[1].len = count;
+	spi_message_add_tail(&t[1], &m);
+
+	mutex_lock(&at25->lock);
+
+	/* Read it all at once.
+	 *
+	 * REVISIT that's potentially a problem with large chips, if
+	 * other devices on the bus need to be accessed regularly or
+	 * this chip is clocked very slowly
+	 */
+	status = spi_sync(at25->spi, &m);
+	dev_dbg(&at25->spi->dev,
+		"read %Zd bytes at %d --> %d\n",
+		count, offset, (int) status);
+
+	mutex_unlock(&at25->lock);
+	return status ? status : count;
+}
+
+static ssize_t
+at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+	      char *buf, loff_t off, size_t count)
+{
+	struct device		*dev;
+	struct at25_data	*at25;
+
+	dev = container_of(kobj, struct device, kobj);
+	at25 = dev_get_drvdata(dev);
+
+	return at25_ee_read(at25, buf, off, count);
+}
+
+
+static ssize_t
+at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
+	      size_t count)
+{
+	ssize_t			status = 0;
+	unsigned		written = 0;
+	unsigned		buf_size;
+	u8			*bounce;
+
+	if (unlikely(off >= at25->bin.size))
+		return -EFBIG;
+	if ((off + count) > at25->bin.size)
+		count = at25->bin.size - off;
+	if (unlikely(!count))
+		return count;
+
+	/* Temp buffer starts with command and address */
+	buf_size = at25->chip.page_size;
+	if (buf_size > io_limit)
+		buf_size = io_limit;
+	bounce = kmalloc(buf_size + at25->addrlen + 1, GFP_KERNEL);
+	if (!bounce)
+		return -ENOMEM;
+
+	/* For write, rollover is within the page ... so we write at
+	 * most one page, then manually roll over to the next page.
+	 */
+	bounce[0] = AT25_WRITE;
+	mutex_lock(&at25->lock);
+	do {
+		unsigned long	timeout, retries;
+		unsigned	segment;
+		unsigned	offset = (unsigned) off;
+		u8		*cp = bounce + 1;
+		int		sr;
+
+		*cp = AT25_WREN;
+		status = spi_write(at25->spi, cp, 1);
+		if (status < 0) {
+			dev_dbg(&at25->spi->dev, "WREN --> %d\n",
+					(int) status);
+			break;
+		}
+
+		/* 8/16/24-bit address is written MSB first */
+		switch (at25->addrlen) {
+		default:	/* case 3 */
+			*cp++ = offset >> 16;
+		case 2:
+			*cp++ = offset >> 8;
+		case 1:
+		case 0:	/* can't happen: for better codegen */
+			*cp++ = offset >> 0;
+		}
+
+		/* Write as much of a page as we can */
+		segment = buf_size - (offset % buf_size);
+		if (segment > count)
+			segment = count;
+		memcpy(cp, buf, segment);
+		status = spi_write(at25->spi, bounce,
+				segment + at25->addrlen + 1);
+		dev_dbg(&at25->spi->dev,
+				"write %u bytes at %u --> %d\n",
+				segment, offset, (int) status);
+		if (status < 0)
+			break;
+
+		/* REVISIT this should detect (or prevent) failed writes
+		 * to readonly sections of the EEPROM...
+		 */
+
+		/* Wait for non-busy status */
+		timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT);
+		retries = 0;
+		do {
+
+			sr = spi_w8r8(at25->spi, AT25_RDSR);
+			if (sr < 0 || (sr & AT25_SR_nRDY)) {
+				dev_dbg(&at25->spi->dev,
+					"rdsr --> %d (%02x)\n", sr, sr);
+				/* at HZ=100, this is sloooow */
+				msleep(1);
+				continue;
+			}
+			if (!(sr & AT25_SR_nRDY))
+				break;
+		} while (retries++ < 3 || time_before_eq(jiffies, timeout));
+
+		if ((sr < 0) || (sr & AT25_SR_nRDY)) {
+			dev_err(&at25->spi->dev,
+				"write %d bytes offset %d, "
+				"timeout after %u msecs\n",
+				segment, offset,
+				jiffies_to_msecs(jiffies -
+					(timeout - EE_TIMEOUT)));
+			status = -ETIMEDOUT;
+			break;
+		}
+
+		off += segment;
+		buf += segment;
+		count -= segment;
+		written += segment;
+
+	} while (count > 0);
+
+	mutex_unlock(&at25->lock);
+
+	kfree(bounce);
+	return written ? written : status;
+}
+
+static ssize_t
+at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr,
+	       char *buf, loff_t off, size_t count)
+{
+	struct device		*dev;
+	struct at25_data	*at25;
+
+	dev = container_of(kobj, struct device, kobj);
+	at25 = dev_get_drvdata(dev);
+
+	return at25_ee_write(at25, buf, off, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* Let in-kernel code access the eeprom data. */
+
+static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf,
+			 off_t offset, size_t count)
+{
+	struct at25_data *at25 = container_of(mem, struct at25_data, mem);
+
+	return at25_ee_read(at25, buf, offset, count);
+}
+
+static ssize_t at25_mem_write(struct memory_accessor *mem, const char *buf,
+			  off_t offset, size_t count)
+{
+	struct at25_data *at25 = container_of(mem, struct at25_data, mem);
+
+	return at25_ee_write(at25, buf, offset, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static int at25_probe(struct spi_device *spi)
+{
+	struct at25_data	*at25 = NULL;
+	const struct spi_eeprom *chip;
+	int			err;
+	int			sr;
+	int			addrlen;
+
+	/* Chip description */
+	chip = spi->dev.platform_data;
+	if (!chip) {
+		dev_dbg(&spi->dev, "no chip description\n");
+		err = -ENODEV;
+		goto fail;
+	}
+
+	/* For now we only support 8/16/24 bit addressing */
+	if (chip->flags & EE_ADDR1)
+		addrlen = 1;
+	else if (chip->flags & EE_ADDR2)
+		addrlen = 2;
+	else if (chip->flags & EE_ADDR3)
+		addrlen = 3;
+	else {
+		dev_dbg(&spi->dev, "unsupported address type\n");
+		err = -EINVAL;
+		goto fail;
+	}
+
+	/* Ping the chip ... the status register is pretty portable,
+	 * unlike probing manufacturer IDs.  We do expect that system
+	 * firmware didn't write it in the past few milliseconds!
+	 */
+	sr = spi_w8r8(spi, AT25_RDSR);
+	if (sr < 0 || sr & AT25_SR_nRDY) {
+		dev_dbg(&spi->dev, "rdsr --> %d (%02x)\n", sr, sr);
+		err = -ENXIO;
+		goto fail;
+	}
+
+	if (!(at25 = kzalloc(sizeof *at25, GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	mutex_init(&at25->lock);
+	at25->chip = *chip;
+	at25->spi = spi_dev_get(spi);
+	dev_set_drvdata(&spi->dev, at25);
+	at25->addrlen = addrlen;
+
+	/* Export the EEPROM bytes through sysfs, since that's convenient.
+	 * And maybe to other kernel code; it might hold a board's Ethernet
+	 * address, or board-specific calibration data generated on the
+	 * manufacturing floor.
+	 *
+	 * Default to root-only access to the data; EEPROMs often hold data
+	 * that's sensitive for read and/or write, like ethernet addresses,
+	 * security codes, board-specific manufacturing calibrations, etc.
+	 */
+	at25->bin.attr.name = "eeprom";
+	at25->bin.attr.mode = S_IRUSR;
+	at25->bin.read = at25_bin_read;
+	at25->mem.read = at25_mem_read;
+
+	at25->bin.size = at25->chip.byte_len;
+	if (!(chip->flags & EE_READONLY)) {
+		at25->bin.write = at25_bin_write;
+		at25->bin.attr.mode |= S_IWUSR;
+		at25->mem.write = at25_mem_write;
+	}
+
+	err = sysfs_create_bin_file(&spi->dev.kobj, &at25->bin);
+	if (err)
+		goto fail;
+
+	if (chip->setup)
+		chip->setup(&at25->mem, chip->context);
+
+	dev_info(&spi->dev, "%Zd %s %s eeprom%s, pagesize %u\n",
+		(at25->bin.size < 1024)
+			? at25->bin.size
+			: (at25->bin.size / 1024),
+		(at25->bin.size < 1024) ? "Byte" : "KByte",
+		at25->chip.name,
+		(chip->flags & EE_READONLY) ? " (readonly)" : "",
+		at25->chip.page_size);
+	return 0;
+fail:
+	dev_dbg(&spi->dev, "probe err %d\n", err);
+	kfree(at25);
+	return err;
+}
+
+static int __devexit at25_remove(struct spi_device *spi)
+{
+	struct at25_data	*at25;
+
+	at25 = dev_get_drvdata(&spi->dev);
+	sysfs_remove_bin_file(&spi->dev.kobj, &at25->bin);
+	kfree(at25);
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct spi_driver at25_driver = {
+	.driver = {
+		.name		= "at25",
+		.owner		= THIS_MODULE,
+	},
+	.probe		= at25_probe,
+	.remove		= __devexit_p(at25_remove),
+};
+
+static int __init at25_init(void)
+{
+	return spi_register_driver(&at25_driver);
+}
+module_init(at25_init);
+
+static void __exit at25_exit(void)
+{
+	spi_unregister_driver(&at25_driver);
+}
+module_exit(at25_exit);
+
+MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
+MODULE_AUTHOR("David Brownell");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:at25");
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
new file mode 100644
index 00000000000..f939ebc2507
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom.c
@@ -0,0 +1,253 @@
+/*
+    Copyright (C) 1998, 1999  Frodo Looijaard <frodol@dds.nl> and
+			       Philip Edelbrock <phil@netroedge.com>
+    Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
+    Copyright (C) 2003 IBM Corp.
+    Copyright (C) 2004 Jean Delvare <khali@linux-fr.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54,
+					0x55, 0x56, 0x57, I2C_CLIENT_END };
+
+
+/* Size of EEPROM in bytes */
+#define EEPROM_SIZE		256
+
+/* possible types of eeprom devices */
+enum eeprom_nature {
+	UNKNOWN,
+	VAIO,
+};
+
+/* Each client has this additional data */
+struct eeprom_data {
+	struct mutex update_lock;
+	u8 valid;			/* bitfield, bit!=0 if slice is valid */
+	unsigned long last_updated[8];	/* In jiffies, 8 slices */
+	u8 data[EEPROM_SIZE];		/* Register values */
+	enum eeprom_nature nature;
+};
+
+
+static void eeprom_update_client(struct i2c_client *client, u8 slice)
+{
+	struct eeprom_data *data = i2c_get_clientdata(client);
+	int i;
+
+	mutex_lock(&data->update_lock);
+
+	if (!(data->valid & (1 << slice)) ||
+	    time_after(jiffies, data->last_updated[slice] + 300 * HZ)) {
+		dev_dbg(&client->dev, "Starting eeprom update, slice %u\n", slice);
+
+		if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+			for (i = slice << 5; i < (slice + 1) << 5; i += 32)
+				if (i2c_smbus_read_i2c_block_data(client, i,
+							32, data->data + i)
+							!= 32)
+					goto exit;
+		} else {
+			for (i = slice << 5; i < (slice + 1) << 5; i += 2) {
+				int word = i2c_smbus_read_word_data(client, i);
+				if (word < 0)
+					goto exit;
+				data->data[i] = word & 0xff;
+				data->data[i + 1] = word >> 8;
+			}
+		}
+		data->last_updated[slice] = jiffies;
+		data->valid |= (1 << slice);
+	}
+exit:
+	mutex_unlock(&data->update_lock);
+}
+
+static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+			   char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj));
+	struct eeprom_data *data = i2c_get_clientdata(client);
+	u8 slice;
+
+	if (off > EEPROM_SIZE)
+		return 0;
+	if (off + count > EEPROM_SIZE)
+		count = EEPROM_SIZE - off;
+
+	/* Only refresh slices which contain requested bytes */
+	for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++)
+		eeprom_update_client(client, slice);
+
+	/* Hide Vaio private settings to regular users:
+	   - BIOS passwords: bytes 0x00 to 0x0f
+	   - UUID: bytes 0x10 to 0x1f
+	   - Serial number: 0xc0 to 0xdf */
+	if (data->nature == VAIO && !capable(CAP_SYS_ADMIN)) {
+		int i;
+
+		for (i = 0; i < count; i++) {
+			if ((off + i <= 0x1f) ||
+			    (off + i >= 0xc0 && off + i <= 0xdf))
+				buf[i] = 0;
+			else
+				buf[i] = data->data[off + i];
+		}
+	} else {
+		memcpy(buf, &data->data[off], count);
+	}
+
+	return count;
+}
+
+static struct bin_attribute eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = S_IRUGO,
+	},
+	.size = EEPROM_SIZE,
+	.read = eeprom_read,
+};
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info)
+{
+	struct i2c_adapter *adapter = client->adapter;
+
+	/* EDID EEPROMs are often 24C00 EEPROMs, which answer to all
+	   addresses 0x50-0x57, but we only care about 0x50. So decline
+	   attaching to addresses >= 0x51 on DDC buses */
+	if (!(adapter->class & I2C_CLASS_SPD) && client->addr >= 0x51)
+		return -ENODEV;
+
+	/* There are four ways we can read the EEPROM data:
+	   (1) I2C block reads (faster, but unsupported by most adapters)
+	   (2) Word reads (128% overhead)
+	   (3) Consecutive byte reads (88% overhead, unsafe)
+	   (4) Regular byte data reads (265% overhead)
+	   The third and fourth methods are not implemented by this driver
+	   because all known adapters support one of the first two. */
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)
+	 && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
+		return -ENODEV;
+
+	strlcpy(info->type, "eeprom", I2C_NAME_SIZE);
+
+	return 0;
+}
+
+static int eeprom_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct eeprom_data *data;
+	int err;
+
+	if (!(data = kzalloc(sizeof(struct eeprom_data), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	memset(data->data, 0xff, EEPROM_SIZE);
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+	data->nature = UNKNOWN;
+
+	/* Detect the Vaio nature of EEPROMs.
+	   We use the "PCG-" or "VGN-" prefix as the signature. */
+	if (client->addr == 0x57
+	 && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		char name[4];
+
+		name[0] = i2c_smbus_read_byte_data(client, 0x80);
+		name[1] = i2c_smbus_read_byte_data(client, 0x81);
+		name[2] = i2c_smbus_read_byte_data(client, 0x82);
+		name[3] = i2c_smbus_read_byte_data(client, 0x83);
+
+		if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) {
+			dev_info(&client->dev, "Vaio EEPROM detected, "
+				 "enabling privacy protection\n");
+			data->nature = VAIO;
+		}
+	}
+
+	/* create the sysfs eeprom file */
+	err = sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr);
+	if (err)
+		goto exit_kfree;
+
+	return 0;
+
+exit_kfree:
+	kfree(data);
+exit:
+	return err;
+}
+
+static int eeprom_remove(struct i2c_client *client)
+{
+	sysfs_remove_bin_file(&client->dev.kobj, &eeprom_attr);
+	kfree(i2c_get_clientdata(client));
+
+	return 0;
+}
+
+static const struct i2c_device_id eeprom_id[] = {
+	{ "eeprom", 0 },
+	{ }
+};
+
+static struct i2c_driver eeprom_driver = {
+	.driver = {
+		.name	= "eeprom",
+	},
+	.probe		= eeprom_probe,
+	.remove		= eeprom_remove,
+	.id_table	= eeprom_id,
+
+	.class		= I2C_CLASS_DDC | I2C_CLASS_SPD,
+	.detect		= eeprom_detect,
+	.address_list	= normal_i2c,
+};
+
+static int __init eeprom_init(void)
+{
+	return i2c_add_driver(&eeprom_driver);
+}
+
+static void __exit eeprom_exit(void)
+{
+	i2c_del_driver(&eeprom_driver);
+}
+
+
+MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl> and "
+		"Philip Edelbrock <phil@netroedge.com> and "
+		"Greg Kroah-Hartman <greg@kroah.com>");
+MODULE_DESCRIPTION("I2C EEPROM driver");
+MODULE_LICENSE("GPL");
+
+module_init(eeprom_init);
+module_exit(eeprom_exit);
diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c
new file mode 100644
index 00000000000..15b1780025c
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom_93cx6.c
@@ -0,0 +1,240 @@
+/*
+	Copyright (C) 2004 - 2006 rt2x00 SourceForge Project
+	<http://rt2x00.serialmonkey.com>
+
+	This program is free software; you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation; either version 2 of the License, or
+	(at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with this program; if not, write to the
+	Free Software Foundation, Inc.,
+	59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/*
+	Module: eeprom_93cx6
+	Abstract: EEPROM reader routines for 93cx6 chipsets.
+	Supported chipsets: 93c46 & 93c66.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/eeprom_93cx6.h>
+
+MODULE_AUTHOR("http://rt2x00.serialmonkey.com");
+MODULE_VERSION("1.0");
+MODULE_DESCRIPTION("EEPROM 93cx6 chip driver");
+MODULE_LICENSE("GPL");
+
+static inline void eeprom_93cx6_pulse_high(struct eeprom_93cx6 *eeprom)
+{
+	eeprom->reg_data_clock = 1;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * Add a short delay for the pulse to work.
+	 * According to the specifications the "maximum minimum"
+	 * time should be 450ns.
+	 */
+	ndelay(450);
+}
+
+static inline void eeprom_93cx6_pulse_low(struct eeprom_93cx6 *eeprom)
+{
+	eeprom->reg_data_clock = 0;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * Add a short delay for the pulse to work.
+	 * According to the specifications the "maximum minimum"
+	 * time should be 450ns.
+	 */
+	ndelay(450);
+}
+
+static void eeprom_93cx6_startup(struct eeprom_93cx6 *eeprom)
+{
+	/*
+	 * Clear all flags, and enable chip select.
+	 */
+	eeprom->register_read(eeprom);
+	eeprom->reg_data_in = 0;
+	eeprom->reg_data_out = 0;
+	eeprom->reg_data_clock = 0;
+	eeprom->reg_chip_select = 1;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * kick a pulse.
+	 */
+	eeprom_93cx6_pulse_high(eeprom);
+	eeprom_93cx6_pulse_low(eeprom);
+}
+
+static void eeprom_93cx6_cleanup(struct eeprom_93cx6 *eeprom)
+{
+	/*
+	 * Clear chip_select and data_in flags.
+	 */
+	eeprom->register_read(eeprom);
+	eeprom->reg_data_in = 0;
+	eeprom->reg_chip_select = 0;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * kick a pulse.
+	 */
+	eeprom_93cx6_pulse_high(eeprom);
+	eeprom_93cx6_pulse_low(eeprom);
+}
+
+static void eeprom_93cx6_write_bits(struct eeprom_93cx6 *eeprom,
+	const u16 data, const u16 count)
+{
+	unsigned int i;
+
+	eeprom->register_read(eeprom);
+
+	/*
+	 * Clear data flags.
+	 */
+	eeprom->reg_data_in = 0;
+	eeprom->reg_data_out = 0;
+
+	/*
+	 * Start writing all bits.
+	 */
+	for (i = count; i > 0; i--) {
+		/*
+		 * Check if this bit needs to be set.
+		 */
+		eeprom->reg_data_in = !!(data & (1 << (i - 1)));
+
+		/*
+		 * Write the bit to the eeprom register.
+		 */
+		eeprom->register_write(eeprom);
+
+		/*
+		 * Kick a pulse.
+		 */
+		eeprom_93cx6_pulse_high(eeprom);
+		eeprom_93cx6_pulse_low(eeprom);
+	}
+
+	eeprom->reg_data_in = 0;
+	eeprom->register_write(eeprom);
+}
+
+static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom,
+	u16 *data, const u16 count)
+{
+	unsigned int i;
+	u16 buf = 0;
+
+	eeprom->register_read(eeprom);
+
+	/*
+	 * Clear data flags.
+	 */
+	eeprom->reg_data_in = 0;
+	eeprom->reg_data_out = 0;
+
+	/*
+	 * Start reading all bits.
+	 */
+	for (i = count; i > 0; i--) {
+		eeprom_93cx6_pulse_high(eeprom);
+
+		eeprom->register_read(eeprom);
+
+		/*
+		 * Clear data_in flag.
+		 */
+		eeprom->reg_data_in = 0;
+
+		/*
+		 * Read if the bit has been set.
+		 */
+		if (eeprom->reg_data_out)
+			buf |= (1 << (i - 1));
+
+		eeprom_93cx6_pulse_low(eeprom);
+	}
+
+	*data = buf;
+}
+
+/**
+ * eeprom_93cx6_read - Read multiple words from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Word index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ *
+ * This function will read the eeprom data as host-endian word
+ * into the given data pointer.
+ */
+void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom, const u8 word,
+	u16 *data)
+{
+	u16 command;
+
+	/*
+	 * Initialize the eeprom register
+	 */
+	eeprom_93cx6_startup(eeprom);
+
+	/*
+	 * Select the read opcode and the word to be read.
+	 */
+	command = (PCI_EEPROM_READ_OPCODE << eeprom->width) | word;
+	eeprom_93cx6_write_bits(eeprom, command,
+		PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	/*
+	 * Read the requested 16 bits.
+	 */
+	eeprom_93cx6_read_bits(eeprom, data, 16);
+
+	/*
+	 * Cleanup eeprom register.
+	 */
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_read);
+
+/**
+ * eeprom_93cx6_multiread - Read multiple words from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Word index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ * @words: Number of words that should be read.
+ *
+ * This function will read all requested words from the eeprom,
+ * this is done by calling eeprom_93cx6_read() multiple times.
+ * But with the additional change that while the eeprom_93cx6_read
+ * will return host ordered bytes, this method will return little
+ * endian words.
+ */
+void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word,
+	__le16 *data, const u16 words)
+{
+	unsigned int i;
+	u16 tmp;
+
+	for (i = 0; i < words; i++) {
+		tmp = 0;
+		eeprom_93cx6_read(eeprom, word + i, &tmp);
+		data[i] = cpu_to_le16(tmp);
+	}
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread);
+
diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c
new file mode 100644
index 00000000000..5a6b2bce8ad
--- /dev/null
+++ b/drivers/misc/eeprom/max6875.c
@@ -0,0 +1,227 @@
+/*
+    max6875.c - driver for MAX6874/MAX6875
+
+    Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com>
+
+    Based on eeprom.c
+
+    The MAX6875 has a bank of registers and two banks of EEPROM.
+    Address ranges are defined as follows:
+     * 0x0000 - 0x0046 = configuration registers
+     * 0x8000 - 0x8046 = configuration EEPROM
+     * 0x8100 - 0x82FF = user EEPROM
+
+    This driver makes the user EEPROM available for read.
+
+    The registers & config EEPROM should be accessed via i2c-dev.
+
+    The MAX6875 ignores the lowest address bit, so each chip responds to
+    two addresses - 0x50/0x51 and 0x52/0x53.
+
+    Note that the MAX6875 uses i2c_smbus_write_byte_data() to set the read
+    address, so this driver is destructive if loaded for the wrong EEPROM chip.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; version 2 of the License.
+*/
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+/* The MAX6875 can only read/write 16 bytes at a time */
+#define SLICE_SIZE			16
+#define SLICE_BITS			4
+
+/* USER EEPROM is at addresses 0x8100 - 0x82FF */
+#define USER_EEPROM_BASE		0x8100
+#define USER_EEPROM_SIZE		0x0200
+#define USER_EEPROM_SLICES		32
+
+/* MAX6875 commands */
+#define MAX6875_CMD_BLK_READ		0x84
+
+/* Each client has this additional data */
+struct max6875_data {
+	struct i2c_client	*fake_client;
+	struct mutex		update_lock;
+
+	u32			valid;
+	u8			data[USER_EEPROM_SIZE];
+	unsigned long		last_updated[USER_EEPROM_SLICES];
+};
+
+static void max6875_update_slice(struct i2c_client *client, int slice)
+{
+	struct max6875_data *data = i2c_get_clientdata(client);
+	int i, j, addr;
+	u8 *buf;
+
+	if (slice >= USER_EEPROM_SLICES)
+		return;
+
+	mutex_lock(&data->update_lock);
+
+	buf = &data->data[slice << SLICE_BITS];
+
+	if (!(data->valid & (1 << slice)) ||
+	    time_after(jiffies, data->last_updated[slice])) {
+
+		dev_dbg(&client->dev, "Starting update of slice %u\n", slice);
+
+		data->valid &= ~(1 << slice);
+
+		addr = USER_EEPROM_BASE + (slice << SLICE_BITS);
+
+		/* select the eeprom address */
+		if (i2c_smbus_write_byte_data(client, addr >> 8, addr & 0xFF)) {
+			dev_err(&client->dev, "address set failed\n");
+			goto exit_up;
+		}
+
+		if (i2c_check_functionality(client->adapter,
+					    I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+			if (i2c_smbus_read_i2c_block_data(client,
+							  MAX6875_CMD_BLK_READ,
+							  SLICE_SIZE,
+							  buf) != SLICE_SIZE) {
+				goto exit_up;
+			}
+		} else {
+			for (i = 0; i < SLICE_SIZE; i++) {
+				j = i2c_smbus_read_byte(client);
+				if (j < 0) {
+					goto exit_up;
+				}
+				buf[i] = j;
+			}
+		}
+		data->last_updated[slice] = jiffies;
+		data->valid |= (1 << slice);
+	}
+exit_up:
+	mutex_unlock(&data->update_lock);
+}
+
+static ssize_t max6875_read(struct kobject *kobj,
+			    struct bin_attribute *bin_attr,
+			    char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+	struct max6875_data *data = i2c_get_clientdata(client);
+	int slice, max_slice;
+
+	if (off > USER_EEPROM_SIZE)
+		return 0;
+
+	if (off + count > USER_EEPROM_SIZE)
+		count = USER_EEPROM_SIZE - off;
+
+	/* refresh slices which contain requested bytes */
+	max_slice = (off + count - 1) >> SLICE_BITS;
+	for (slice = (off >> SLICE_BITS); slice <= max_slice; slice++)
+		max6875_update_slice(client, slice);
+
+	memcpy(buf, &data->data[off], count);
+
+	return count;
+}
+
+static struct bin_attribute user_eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = S_IRUGO,
+	},
+	.size = USER_EEPROM_SIZE,
+	.read = max6875_read,
+};
+
+static int max6875_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct max6875_data *data;
+	int err;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA
+				     | I2C_FUNC_SMBUS_READ_BYTE))
+		return -ENODEV;
+
+	/* Only bind to even addresses */
+	if (client->addr & 1)
+		return -ENODEV;
+
+	if (!(data = kzalloc(sizeof(struct max6875_data), GFP_KERNEL)))
+		return -ENOMEM;
+
+	/* A fake client is created on the odd address */
+	data->fake_client = i2c_new_dummy(client->adapter, client->addr + 1);
+	if (!data->fake_client) {
+		err = -ENOMEM;
+		goto exit_kfree;
+	}
+
+	/* Init real i2c_client */
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+
+	err = sysfs_create_bin_file(&client->dev.kobj, &user_eeprom_attr);
+	if (err)
+		goto exit_remove_fake;
+
+	return 0;
+
+exit_remove_fake:
+	i2c_unregister_device(data->fake_client);
+exit_kfree:
+	kfree(data);
+	return err;
+}
+
+static int max6875_remove(struct i2c_client *client)
+{
+	struct max6875_data *data = i2c_get_clientdata(client);
+
+	i2c_unregister_device(data->fake_client);
+
+	sysfs_remove_bin_file(&client->dev.kobj, &user_eeprom_attr);
+	kfree(data);
+
+	return 0;
+}
+
+static const struct i2c_device_id max6875_id[] = {
+	{ "max6875", 0 },
+	{ }
+};
+
+static struct i2c_driver max6875_driver = {
+	.driver = {
+		.name	= "max6875",
+	},
+	.probe		= max6875_probe,
+	.remove		= max6875_remove,
+	.id_table	= max6875_id,
+};
+
+static int __init max6875_init(void)
+{
+	return i2c_add_driver(&max6875_driver);
+}
+
+static void __exit max6875_exit(void)
+{
+	i2c_del_driver(&max6875_driver);
+}
+
+
+MODULE_AUTHOR("Ben Gardner <bgardner@wabtec.com>");
+MODULE_DESCRIPTION("MAX6875 driver");
+MODULE_LICENSE("GPL");
+
+module_init(max6875_init);
+module_exit(max6875_exit);
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
new file mode 100644
index 00000000000..1eac626e710
--- /dev/null
+++ b/drivers/misc/enclosure.c
@@ -0,0 +1,566 @@
+/*
+ * Enclosure Services
+ *
+ * Copyright (C) 2008 James Bottomley <James.Bottomley@HansenPartnership.com>
+ *
+**-----------------------------------------------------------------------------
+**
+**  This program is free software; you can redistribute it and/or
+**  modify it under the terms of the GNU General Public License
+**  version 2 as published by the Free Software Foundation.
+**
+**  This program is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**  GNU General Public License for more details.
+**
+**  You should have received a copy of the GNU General Public License
+**  along with this program; if not, write to the Free Software
+**  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+**
+**-----------------------------------------------------------------------------
+*/
+#include <linux/device.h>
+#include <linux/enclosure.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+
+static LIST_HEAD(container_list);
+static DEFINE_MUTEX(container_list_lock);
+static struct class enclosure_class;
+
+/**
+ * enclosure_find - find an enclosure given a parent device
+ * @dev:	the parent to match against
+ * @start:	Optional enclosure device to start from (NULL if none)
+ *
+ * Looks through the list of registered enclosures to find all those
+ * with @dev as a parent.  Returns NULL if no enclosure is
+ * found. @start can be used as a starting point to obtain multiple
+ * enclosures per parent (should begin with NULL and then be set to
+ * each returned enclosure device). Obtains a reference to the
+ * enclosure class device which must be released with device_put().
+ * If @start is not NULL, a reference must be taken on it which is
+ * released before returning (this allows a loop through all
+ * enclosures to exit with only the reference on the enclosure of
+ * interest held).  Note that the @dev may correspond to the actual
+ * device housing the enclosure, in which case no iteration via @start
+ * is required.
+ */
+struct enclosure_device *enclosure_find(struct device *dev,
+					struct enclosure_device *start)
+{
+	struct enclosure_device *edev;
+
+	mutex_lock(&container_list_lock);
+	edev = list_prepare_entry(start, &container_list, node);
+	if (start)
+		put_device(&start->edev);
+
+	list_for_each_entry_continue(edev, &container_list, node) {
+		struct device *parent = edev->edev.parent;
+		/* parent might not be immediate, so iterate up to
+		 * the root of the tree if necessary */
+		while (parent) {
+			if (parent == dev) {
+				get_device(&edev->edev);
+				mutex_unlock(&container_list_lock);
+				return edev;
+			}
+			parent = parent->parent;
+		}
+	}
+	mutex_unlock(&container_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(enclosure_find);
+
+/**
+ * enclosure_for_each_device - calls a function for each enclosure
+ * @fn:		the function to call
+ * @data:	the data to pass to each call
+ *
+ * Loops over all the enclosures calling the function.
+ *
+ * Note, this function uses a mutex which will be held across calls to
+ * @fn, so it must have non atomic context, and @fn may (although it
+ * should not) sleep or otherwise cause the mutex to be held for
+ * indefinite periods
+ */
+int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *),
+			      void *data)
+{
+	int error = 0;
+	struct enclosure_device *edev;
+
+	mutex_lock(&container_list_lock);
+	list_for_each_entry(edev, &container_list, node) {
+		error = fn(edev, data);
+		if (error)
+			break;
+	}
+	mutex_unlock(&container_list_lock);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(enclosure_for_each_device);
+
+/**
+ * enclosure_register - register device as an enclosure
+ *
+ * @dev:	device containing the enclosure
+ * @components:	number of components in the enclosure
+ *
+ * This sets up the device for being an enclosure.  Note that @dev does
+ * not have to be a dedicated enclosure device.  It may be some other type
+ * of device that additionally responds to enclosure services
+ */
+struct enclosure_device *
+enclosure_register(struct device *dev, const char *name, int components,
+		   struct enclosure_component_callbacks *cb)
+{
+	struct enclosure_device *edev =
+		kzalloc(sizeof(struct enclosure_device) +
+			sizeof(struct enclosure_component)*components,
+			GFP_KERNEL);
+	int err, i;
+
+	BUG_ON(!cb);
+
+	if (!edev)
+		return ERR_PTR(-ENOMEM);
+
+	edev->components = components;
+
+	edev->edev.class = &enclosure_class;
+	edev->edev.parent = get_device(dev);
+	edev->cb = cb;
+	dev_set_name(&edev->edev, "%s", name);
+	err = device_register(&edev->edev);
+	if (err)
+		goto err;
+
+	for (i = 0; i < components; i++)
+		edev->component[i].number = -1;
+
+	mutex_lock(&container_list_lock);
+	list_add_tail(&edev->node, &container_list);
+	mutex_unlock(&container_list_lock);
+
+	return edev;
+
+ err:
+	put_device(edev->edev.parent);
+	kfree(edev);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(enclosure_register);
+
+static struct enclosure_component_callbacks enclosure_null_callbacks;
+
+/**
+ * enclosure_unregister - remove an enclosure
+ *
+ * @edev:	the registered enclosure to remove;
+ */
+void enclosure_unregister(struct enclosure_device *edev)
+{
+	int i;
+
+	mutex_lock(&container_list_lock);
+	list_del(&edev->node);
+	mutex_unlock(&container_list_lock);
+
+	for (i = 0; i < edev->components; i++)
+		if (edev->component[i].number != -1)
+			device_unregister(&edev->component[i].cdev);
+
+	/* prevent any callbacks into service user */
+	edev->cb = &enclosure_null_callbacks;
+	device_unregister(&edev->edev);
+}
+EXPORT_SYMBOL_GPL(enclosure_unregister);
+
+#define ENCLOSURE_NAME_SIZE	64
+
+static void enclosure_link_name(struct enclosure_component *cdev, char *name)
+{
+	strcpy(name, "enclosure_device:");
+	strcat(name, dev_name(&cdev->cdev));
+}
+
+static void enclosure_remove_links(struct enclosure_component *cdev)
+{
+	char name[ENCLOSURE_NAME_SIZE];
+
+	enclosure_link_name(cdev, name);
+	sysfs_remove_link(&cdev->dev->kobj, name);
+	sysfs_remove_link(&cdev->cdev.kobj, "device");
+}
+
+static int enclosure_add_links(struct enclosure_component *cdev)
+{
+	int error;
+	char name[ENCLOSURE_NAME_SIZE];
+
+	error = sysfs_create_link(&cdev->cdev.kobj, &cdev->dev->kobj, "device");
+	if (error)
+		return error;
+
+	enclosure_link_name(cdev, name);
+	error = sysfs_create_link(&cdev->dev->kobj, &cdev->cdev.kobj, name);
+	if (error)
+		sysfs_remove_link(&cdev->cdev.kobj, "device");
+
+	return error;
+}
+
+static void enclosure_release(struct device *cdev)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	put_device(cdev->parent);
+	kfree(edev);
+}
+
+static void enclosure_component_release(struct device *dev)
+{
+	struct enclosure_component *cdev = to_enclosure_component(dev);
+
+	if (cdev->dev) {
+		enclosure_remove_links(cdev);
+		put_device(cdev->dev);
+	}
+	put_device(dev->parent);
+}
+
+static const struct attribute_group *enclosure_groups[];
+
+/**
+ * enclosure_component_register - add a particular component to an enclosure
+ * @edev:	the enclosure to add the component
+ * @num:	the device number
+ * @type:	the type of component being added
+ * @name:	an optional name to appear in sysfs (leave NULL if none)
+ *
+ * Registers the component.  The name is optional for enclosures that
+ * give their components a unique name.  If not, leave the field NULL
+ * and a name will be assigned.
+ *
+ * Returns a pointer to the enclosure component or an error.
+ */
+struct enclosure_component *
+enclosure_component_register(struct enclosure_device *edev,
+			     unsigned int number,
+			     enum enclosure_component_type type,
+			     const char *name)
+{
+	struct enclosure_component *ecomp;
+	struct device *cdev;
+	int err;
+
+	if (number >= edev->components)
+		return ERR_PTR(-EINVAL);
+
+	ecomp = &edev->component[number];
+
+	if (ecomp->number != -1)
+		return ERR_PTR(-EINVAL);
+
+	ecomp->type = type;
+	ecomp->number = number;
+	cdev = &ecomp->cdev;
+	cdev->parent = get_device(&edev->edev);
+	if (name && name[0])
+		dev_set_name(cdev, "%s", name);
+	else
+		dev_set_name(cdev, "%u", number);
+
+	cdev->release = enclosure_component_release;
+	cdev->groups = enclosure_groups;
+
+	err = device_register(cdev);
+	if (err)
+		ERR_PTR(err);
+
+	return ecomp;
+}
+EXPORT_SYMBOL_GPL(enclosure_component_register);
+
+/**
+ * enclosure_add_device - add a device as being part of an enclosure
+ * @edev:	the enclosure device being added to.
+ * @num:	the number of the component
+ * @dev:	the device being added
+ *
+ * Declares a real device to reside in slot (or identifier) @num of an
+ * enclosure.  This will cause the relevant sysfs links to appear.
+ * This function may also be used to change a device associated with
+ * an enclosure without having to call enclosure_remove_device() in
+ * between.
+ *
+ * Returns zero on success or an error.
+ */
+int enclosure_add_device(struct enclosure_device *edev, int component,
+			 struct device *dev)
+{
+	struct enclosure_component *cdev;
+
+	if (!edev || component >= edev->components)
+		return -EINVAL;
+
+	cdev = &edev->component[component];
+
+	if (cdev->dev == dev)
+		return -EEXIST;
+
+	if (cdev->dev)
+		enclosure_remove_links(cdev);
+
+	put_device(cdev->dev);
+	cdev->dev = get_device(dev);
+	return enclosure_add_links(cdev);
+}
+EXPORT_SYMBOL_GPL(enclosure_add_device);
+
+/**
+ * enclosure_remove_device - remove a device from an enclosure
+ * @edev:	the enclosure device
+ * @num:	the number of the component to remove
+ *
+ * Returns zero on success or an error.
+ *
+ */
+int enclosure_remove_device(struct enclosure_device *edev, struct device *dev)
+{
+	struct enclosure_component *cdev;
+	int i;
+
+	if (!edev || !dev)
+		return -EINVAL;
+
+	for (i = 0; i < edev->components; i++) {
+		cdev = &edev->component[i];
+		if (cdev->dev == dev) {
+			enclosure_remove_links(cdev);
+			device_del(&cdev->cdev);
+			put_device(dev);
+			cdev->dev = NULL;
+			return device_add(&cdev->cdev);
+		}
+	}
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(enclosure_remove_device);
+
+/*
+ * sysfs pieces below
+ */
+
+static ssize_t enclosure_show_components(struct device *cdev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	return snprintf(buf, 40, "%d\n", edev->components);
+}
+
+static struct device_attribute enclosure_attrs[] = {
+	__ATTR(components, S_IRUGO, enclosure_show_components, NULL),
+	__ATTR_NULL
+};
+
+static struct class enclosure_class = {
+	.name			= "enclosure",
+	.owner			= THIS_MODULE,
+	.dev_release		= enclosure_release,
+	.dev_attrs		= enclosure_attrs,
+};
+
+static const char *const enclosure_status [] = {
+	[ENCLOSURE_STATUS_UNSUPPORTED] = "unsupported",
+	[ENCLOSURE_STATUS_OK] = "OK",
+	[ENCLOSURE_STATUS_CRITICAL] = "critical",
+	[ENCLOSURE_STATUS_NON_CRITICAL] = "non-critical",
+	[ENCLOSURE_STATUS_UNRECOVERABLE] = "unrecoverable",
+	[ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed",
+	[ENCLOSURE_STATUS_UNKNOWN] = "unknown",
+	[ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable",
+	[ENCLOSURE_STATUS_MAX] = NULL,
+};
+
+static const char *const enclosure_type [] = {
+	[ENCLOSURE_COMPONENT_DEVICE] = "device",
+	[ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device",
+};
+
+static ssize_t get_component_fault(struct device *cdev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_fault)
+		edev->cb->get_fault(edev, ecomp);
+	return snprintf(buf, 40, "%d\n", ecomp->fault);
+}
+
+static ssize_t set_component_fault(struct device *cdev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_fault)
+		edev->cb->set_fault(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_status(struct device *cdev,
+				    struct device_attribute *attr,char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_status)
+		edev->cb->get_status(edev, ecomp);
+	return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]);
+}
+
+static ssize_t set_component_status(struct device *cdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int i;
+
+	for (i = 0; enclosure_status[i]; i++) {
+		if (strncmp(buf, enclosure_status[i],
+			    strlen(enclosure_status[i])) == 0 &&
+		    (buf[strlen(enclosure_status[i])] == '\n' ||
+		     buf[strlen(enclosure_status[i])] == '\0'))
+			break;
+	}
+
+	if (enclosure_status[i] && edev->cb->set_status) {
+		edev->cb->set_status(edev, ecomp, i);
+		return count;
+	} else
+		return -EINVAL;
+}
+
+static ssize_t get_component_active(struct device *cdev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_active)
+		edev->cb->get_active(edev, ecomp);
+	return snprintf(buf, 40, "%d\n", ecomp->active);
+}
+
+static ssize_t set_component_active(struct device *cdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_active)
+		edev->cb->set_active(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_locate(struct device *cdev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_locate)
+		edev->cb->get_locate(edev, ecomp);
+	return snprintf(buf, 40, "%d\n", ecomp->locate);
+}
+
+static ssize_t set_component_locate(struct device *cdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_locate)
+		edev->cb->set_locate(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_type(struct device *cdev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	return snprintf(buf, 40, "%s\n", enclosure_type[ecomp->type]);
+}
+
+
+static DEVICE_ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault,
+		    set_component_fault);
+static DEVICE_ATTR(status, S_IRUGO | S_IWUSR, get_component_status,
+		   set_component_status);
+static DEVICE_ATTR(active, S_IRUGO | S_IWUSR, get_component_active,
+		   set_component_active);
+static DEVICE_ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate,
+		   set_component_locate);
+static DEVICE_ATTR(type, S_IRUGO, get_component_type, NULL);
+
+static struct attribute *enclosure_component_attrs[] = {
+	&dev_attr_fault.attr,
+	&dev_attr_status.attr,
+	&dev_attr_active.attr,
+	&dev_attr_locate.attr,
+	&dev_attr_type.attr,
+	NULL
+};
+
+static struct attribute_group enclosure_group = {
+	.attrs = enclosure_component_attrs,
+};
+
+static const struct attribute_group *enclosure_groups[] = {
+	&enclosure_group,
+	NULL
+};
+
+static int __init enclosure_init(void)
+{
+	int err;
+
+	err = class_register(&enclosure_class);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void __exit enclosure_exit(void)
+{
+	class_unregister(&enclosure_class);
+}
+
+module_init(enclosure_init);
+module_exit(enclosure_exit);
+
+MODULE_AUTHOR("James Bottomley");
+MODULE_DESCRIPTION("Enclosure Services");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/ep93xx_pwm.c b/drivers/misc/ep93xx_pwm.c
new file mode 100644
index 00000000000..ba4694169d7
--- /dev/null
+++ b/drivers/misc/ep93xx_pwm.c
@@ -0,0 +1,384 @@
+/*
+ *  Simple PWM driver for EP93XX
+ *
+ *	(c) Copyright 2009  Matthieu Crapet <mcrapet@gmail.com>
+ *	(c) Copyright 2009  H Hartley Sweeten <hsweeten@visionengravers.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *  EP9307 has only one channel:
+ *    - PWMOUT
+ *
+ *  EP9301/02/12/15 have two channels:
+ *    - PWMOUT
+ *    - PWMOUT1 (alternate function for EGPIO14)
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+#include <mach/platform.h>
+
+#define EP93XX_PWMx_TERM_COUNT	0x00
+#define EP93XX_PWMx_DUTY_CYCLE	0x04
+#define EP93XX_PWMx_ENABLE	0x08
+#define EP93XX_PWMx_INVERT	0x0C
+
+#define EP93XX_PWM_MAX_COUNT	0xFFFF
+
+struct ep93xx_pwm {
+	void __iomem	*mmio_base;
+	struct clk	*clk;
+	u32		duty_percent;
+};
+
+static inline void ep93xx_pwm_writel(struct ep93xx_pwm *pwm,
+		unsigned int val, unsigned int off)
+{
+	__raw_writel(val, pwm->mmio_base + off);
+}
+
+static inline unsigned int ep93xx_pwm_readl(struct ep93xx_pwm *pwm,
+		unsigned int off)
+{
+	return __raw_readl(pwm->mmio_base + off);
+}
+
+static inline void ep93xx_pwm_write_tc(struct ep93xx_pwm *pwm, u16 value)
+{
+	ep93xx_pwm_writel(pwm, value, EP93XX_PWMx_TERM_COUNT);
+}
+
+static inline u16 ep93xx_pwm_read_tc(struct ep93xx_pwm *pwm)
+{
+	return ep93xx_pwm_readl(pwm, EP93XX_PWMx_TERM_COUNT);
+}
+
+static inline void ep93xx_pwm_write_dc(struct ep93xx_pwm *pwm, u16 value)
+{
+	ep93xx_pwm_writel(pwm, value, EP93XX_PWMx_DUTY_CYCLE);
+}
+
+static inline void ep93xx_pwm_enable(struct ep93xx_pwm *pwm)
+{
+	ep93xx_pwm_writel(pwm, 0x1, EP93XX_PWMx_ENABLE);
+}
+
+static inline void ep93xx_pwm_disable(struct ep93xx_pwm *pwm)
+{
+	ep93xx_pwm_writel(pwm, 0x0, EP93XX_PWMx_ENABLE);
+}
+
+static inline int ep93xx_pwm_is_enabled(struct ep93xx_pwm *pwm)
+{
+	return ep93xx_pwm_readl(pwm, EP93XX_PWMx_ENABLE) & 0x1;
+}
+
+static inline void ep93xx_pwm_invert(struct ep93xx_pwm *pwm)
+{
+	ep93xx_pwm_writel(pwm, 0x1, EP93XX_PWMx_INVERT);
+}
+
+static inline void ep93xx_pwm_normal(struct ep93xx_pwm *pwm)
+{
+	ep93xx_pwm_writel(pwm, 0x0, EP93XX_PWMx_INVERT);
+}
+
+static inline int ep93xx_pwm_is_inverted(struct ep93xx_pwm *pwm)
+{
+	return ep93xx_pwm_readl(pwm, EP93XX_PWMx_INVERT) & 0x1;
+}
+
+/*
+ * /sys/devices/platform/ep93xx-pwm.N
+ *   /min_freq      read-only   minimum pwm output frequency
+ *   /max_req       read-only   maximum pwm output frequency
+ *   /freq          read-write  pwm output frequency (0 = disable output)
+ *   /duty_percent  read-write  pwm duty cycle percent (1..99)
+ *   /invert        read-write  invert pwm output
+ */
+
+static ssize_t ep93xx_pwm_get_min_freq(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
+	unsigned long rate = clk_get_rate(pwm->clk);
+
+	return sprintf(buf, "%ld\n", rate / (EP93XX_PWM_MAX_COUNT + 1));
+}
+
+static ssize_t ep93xx_pwm_get_max_freq(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
+	unsigned long rate = clk_get_rate(pwm->clk);
+
+	return sprintf(buf, "%ld\n", rate / 2);
+}
+
+static ssize_t ep93xx_pwm_get_freq(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
+
+	if (ep93xx_pwm_is_enabled(pwm)) {
+		unsigned long rate = clk_get_rate(pwm->clk);
+		u16 term = ep93xx_pwm_read_tc(pwm);
+
+		return sprintf(buf, "%ld\n", rate / (term + 1));
+	} else {
+		return sprintf(buf, "disabled\n");
+	}
+}
+
+static ssize_t ep93xx_pwm_set_freq(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
+	long val;
+	int err;
+
+	err = strict_strtol(buf, 10, &val);
+	if (err)
+		return -EINVAL;
+
+	if (val == 0) {
+		ep93xx_pwm_disable(pwm);
+	} else if (val <= (clk_get_rate(pwm->clk) / 2)) {
+		u32 term, duty;
+
+		val = (clk_get_rate(pwm->clk) / val) - 1;
+		if (val > EP93XX_PWM_MAX_COUNT)
+			val = EP93XX_PWM_MAX_COUNT;
+		if (val < 1)
+			val = 1;
+
+		term = ep93xx_pwm_read_tc(pwm);
+		duty = ((val + 1) * pwm->duty_percent / 100) - 1;
+
+		/* If pwm is running, order is important */
+		if (val > term) {
+			ep93xx_pwm_write_tc(pwm, val);
+			ep93xx_pwm_write_dc(pwm, duty);
+		} else {
+			ep93xx_pwm_write_dc(pwm, duty);
+			ep93xx_pwm_write_tc(pwm, val);
+		}
+
+		if (!ep93xx_pwm_is_enabled(pwm))
+			ep93xx_pwm_enable(pwm);
+	} else {
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+static ssize_t ep93xx_pwm_get_duty_percent(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
+
+	return sprintf(buf, "%d\n", pwm->duty_percent);
+}
+
+static ssize_t ep93xx_pwm_set_duty_percent(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
+	long val;
+	int err;
+
+	err = strict_strtol(buf, 10, &val);
+	if (err)
+		return -EINVAL;
+
+	if (val > 0 && val < 100) {
+		u32 term = ep93xx_pwm_read_tc(pwm);
+		ep93xx_pwm_write_dc(pwm, ((term + 1) * val / 100) - 1);
+		pwm->duty_percent = val;
+		return count;
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t ep93xx_pwm_get_invert(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
+
+	return sprintf(buf, "%d\n", ep93xx_pwm_is_inverted(pwm));
+}
+
+static ssize_t ep93xx_pwm_set_invert(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
+	long val;
+	int err;
+
+	err = strict_strtol(buf, 10, &val);
+	if (err)
+		return -EINVAL;
+
+	if (val == 0)
+		ep93xx_pwm_normal(pwm);
+	else if (val == 1)
+		ep93xx_pwm_invert(pwm);
+	else
+		return -EINVAL;
+
+	return count;
+}
+
+static DEVICE_ATTR(min_freq, S_IRUGO, ep93xx_pwm_get_min_freq, NULL);
+static DEVICE_ATTR(max_freq, S_IRUGO, ep93xx_pwm_get_max_freq, NULL);
+static DEVICE_ATTR(freq, S_IWUGO | S_IRUGO,
+		   ep93xx_pwm_get_freq, ep93xx_pwm_set_freq);
+static DEVICE_ATTR(duty_percent, S_IWUGO | S_IRUGO,
+		   ep93xx_pwm_get_duty_percent, ep93xx_pwm_set_duty_percent);
+static DEVICE_ATTR(invert, S_IWUGO | S_IRUGO,
+		   ep93xx_pwm_get_invert, ep93xx_pwm_set_invert);
+
+static struct attribute *ep93xx_pwm_attrs[] = {
+	&dev_attr_min_freq.attr,
+	&dev_attr_max_freq.attr,
+	&dev_attr_freq.attr,
+	&dev_attr_duty_percent.attr,
+	&dev_attr_invert.attr,
+	NULL
+};
+
+static const struct attribute_group ep93xx_pwm_sysfs_files = {
+	.attrs	= ep93xx_pwm_attrs,
+};
+
+static int __init ep93xx_pwm_probe(struct platform_device *pdev)
+{
+	struct ep93xx_pwm *pwm;
+	struct resource *res;
+	int err;
+
+	err = ep93xx_pwm_acquire_gpio(pdev);
+	if (err)
+		return err;
+
+	pwm = kzalloc(sizeof(struct ep93xx_pwm), GFP_KERNEL);
+	if (!pwm) {
+		err = -ENOMEM;
+		goto fail_no_mem;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL) {
+		err = -ENXIO;
+		goto fail_no_mem_resource;
+	}
+
+	res = request_mem_region(res->start, resource_size(res), pdev->name);
+	if (res == NULL) {
+		err = -EBUSY;
+		goto fail_no_mem_resource;
+	}
+
+	pwm->mmio_base = ioremap(res->start, resource_size(res));
+	if (pwm->mmio_base == NULL) {
+		err = -ENXIO;
+		goto fail_no_ioremap;
+	}
+
+	err = sysfs_create_group(&pdev->dev.kobj, &ep93xx_pwm_sysfs_files);
+	if (err)
+		goto fail_no_sysfs;
+
+	pwm->clk = clk_get(&pdev->dev, "pwm_clk");
+	if (IS_ERR(pwm->clk)) {
+		err = PTR_ERR(pwm->clk);
+		goto fail_no_clk;
+	}
+
+	pwm->duty_percent = 50;
+
+	platform_set_drvdata(pdev, pwm);
+
+	/* disable pwm at startup. Avoids zero value. */
+	ep93xx_pwm_disable(pwm);
+	ep93xx_pwm_write_tc(pwm, EP93XX_PWM_MAX_COUNT);
+	ep93xx_pwm_write_dc(pwm, EP93XX_PWM_MAX_COUNT / 2);
+
+	clk_enable(pwm->clk);
+
+	return 0;
+
+fail_no_clk:
+	sysfs_remove_group(&pdev->dev.kobj, &ep93xx_pwm_sysfs_files);
+fail_no_sysfs:
+	iounmap(pwm->mmio_base);
+fail_no_ioremap:
+	release_mem_region(res->start, resource_size(res));
+fail_no_mem_resource:
+	kfree(pwm);
+fail_no_mem:
+	ep93xx_pwm_release_gpio(pdev);
+	return err;
+}
+
+static int __exit ep93xx_pwm_remove(struct platform_device *pdev)
+{
+	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	ep93xx_pwm_disable(pwm);
+	clk_disable(pwm->clk);
+	clk_put(pwm->clk);
+	platform_set_drvdata(pdev, NULL);
+	sysfs_remove_group(&pdev->dev.kobj, &ep93xx_pwm_sysfs_files);
+	iounmap(pwm->mmio_base);
+	release_mem_region(res->start, resource_size(res));
+	kfree(pwm);
+	ep93xx_pwm_release_gpio(pdev);
+
+	return 0;
+}
+
+static struct platform_driver ep93xx_pwm_driver = {
+	.driver		= {
+		.name	= "ep93xx-pwm",
+		.owner	= THIS_MODULE,
+	},
+	.remove		= __exit_p(ep93xx_pwm_remove),
+};
+
+static int __init ep93xx_pwm_init(void)
+{
+	return platform_driver_probe(&ep93xx_pwm_driver, ep93xx_pwm_probe);
+}
+
+static void __exit ep93xx_pwm_exit(void)
+{
+	platform_driver_unregister(&ep93xx_pwm_driver);
+}
+
+module_init(ep93xx_pwm_init);
+module_exit(ep93xx_pwm_exit);
+
+MODULE_AUTHOR("Matthieu Crapet <mcrapet@gmail.com>, "
+	      "H Hartley Sweeten <hsweeten@visionengravers.com>");
+MODULE_DESCRIPTION("EP93xx PWM driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ep93xx-pwm");
diff --git a/drivers/misc/hdpuftrs/Makefile b/drivers/misc/hdpuftrs/Makefile
new file mode 100644
index 00000000000..ac74ae67923
--- /dev/null
+++ b/drivers/misc/hdpuftrs/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_HDPU_FEATURES) := hdpu_cpustate.o hdpu_nexus.o
diff --git a/drivers/misc/hdpuftrs/hdpu_cpustate.c b/drivers/misc/hdpuftrs/hdpu_cpustate.c
new file mode 100644
index 00000000000..176fe4e09d3
--- /dev/null
+++ b/drivers/misc/hdpuftrs/hdpu_cpustate.c
@@ -0,0 +1,256 @@
+/*
+ *	Sky CPU State Driver
+ *
+ *	Copyright (C) 2002 Brian Waite
+ *
+ *	This driver allows use of the CPU state bits
+ *	It exports the /dev/sky_cpustate and also
+ *	/proc/sky_cpustate pseudo-file for status information.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/miscdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/hdpu_features.h>
+#include <linux/platform_device.h>
+#include <asm/uaccess.h>
+#include <linux/seq_file.h>
+#include <asm/io.h>
+
+#define SKY_CPUSTATE_VERSION		"1.1"
+
+static int hdpu_cpustate_probe(struct platform_device *pdev);
+static int hdpu_cpustate_remove(struct platform_device *pdev);
+
+static unsigned char cpustate_get_state(void);
+static int cpustate_proc_open(struct inode *inode, struct file *file);
+static int cpustate_proc_read(struct seq_file *seq, void *offset);
+
+static struct cpustate_t cpustate;
+
+static const struct file_operations proc_cpustate = {
+	.open = cpustate_proc_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.owner = THIS_MODULE,
+};
+
+static int cpustate_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cpustate_proc_read, NULL);
+}
+
+static int cpustate_proc_read(struct seq_file *seq, void *offset)
+{
+	seq_printf(seq, "CPU State: %04x\n", cpustate_get_state());
+	return 0;
+}
+
+static int cpustate_get_ref(int excl)
+{
+
+	int retval = -EBUSY;
+
+	spin_lock(&cpustate.lock);
+
+	if (cpustate.excl)
+		goto out_busy;
+
+	if (excl) {
+		if (cpustate.open_count)
+			goto out_busy;
+		cpustate.excl = 1;
+	}
+
+	cpustate.open_count++;
+	retval = 0;
+
+      out_busy:
+	spin_unlock(&cpustate.lock);
+	return retval;
+}
+
+static int cpustate_free_ref(void)
+{
+
+	spin_lock(&cpustate.lock);
+
+	cpustate.excl = 0;
+	cpustate.open_count--;
+
+	spin_unlock(&cpustate.lock);
+	return 0;
+}
+
+static unsigned char cpustate_get_state(void)
+{
+
+	return cpustate.cached_val;
+}
+
+static void cpustate_set_state(unsigned char new_state)
+{
+	unsigned int state = (new_state << 21);
+
+#ifdef DEBUG_CPUSTATE
+	printk("CPUSTATE -> 0x%x\n", new_state);
+#endif
+	spin_lock(&cpustate.lock);
+	cpustate.cached_val = new_state;
+	writel((0xff << 21), cpustate.clr_addr);
+	writel(state, cpustate.set_addr);
+	spin_unlock(&cpustate.lock);
+}
+
+/*
+ *	Now all the various file operations that we export.
+ */
+
+static ssize_t cpustate_read(struct file *file, char *buf,
+			     size_t count, loff_t * ppos)
+{
+	unsigned char data;
+
+	if (count < 0)
+		return -EFAULT;
+	if (count == 0)
+		return 0;
+
+	data = cpustate_get_state();
+	if (copy_to_user(buf, &data, sizeof(unsigned char)))
+		return -EFAULT;
+	return sizeof(unsigned char);
+}
+
+static ssize_t cpustate_write(struct file *file, const char *buf,
+			      size_t count, loff_t * ppos)
+{
+	unsigned char data;
+
+	if (count < 0)
+		return -EFAULT;
+
+	if (count == 0)
+		return 0;
+
+	if (copy_from_user((unsigned char *)&data, buf, sizeof(unsigned char)))
+		return -EFAULT;
+
+	cpustate_set_state(data);
+	return sizeof(unsigned char);
+}
+
+static int cpustate_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	lock_kernel();
+	ret = cpustate_get_ref((file->f_flags & O_EXCL));
+	unlock_kernel();
+
+	return ret;
+}
+
+static int cpustate_release(struct inode *inode, struct file *file)
+{
+	return cpustate_free_ref();
+}
+
+static struct platform_driver hdpu_cpustate_driver = {
+	.probe = hdpu_cpustate_probe,
+	.remove = hdpu_cpustate_remove,
+	.driver = {
+		.name = HDPU_CPUSTATE_NAME,
+		.owner = THIS_MODULE,
+	},
+};
+
+/*
+ *	The various file operations we support.
+ */
+static const struct file_operations cpustate_fops = {
+      .owner	= THIS_MODULE,
+      .open	= cpustate_open,
+      .release	= cpustate_release,
+      .read	= cpustate_read,
+      .write	= cpustate_write,
+      .llseek	= no_llseek,
+};
+
+static struct miscdevice cpustate_dev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "sky_cpustate",
+	.fops	= &cpustate_fops,
+};
+
+static int hdpu_cpustate_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct proc_dir_entry *proc_de;
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		printk(KERN_ERR "sky_cpustate: "
+		       "Invalid memory resource.\n");
+		return -EINVAL;
+	}
+	cpustate.set_addr = (unsigned long *)res->start;
+	cpustate.clr_addr = (unsigned long *)res->end - 1;
+
+	ret = misc_register(&cpustate_dev);
+	if (ret) {
+		printk(KERN_WARNING "sky_cpustate: "
+		       "Unable to register misc device.\n");
+		cpustate.set_addr = NULL;
+		cpustate.clr_addr = NULL;
+		return ret;
+	}
+
+	proc_de = proc_create("sky_cpustate", 0666, NULL, &proc_cpustate);
+	if (!proc_de) {
+		printk(KERN_WARNING "sky_cpustate: "
+		       "Unable to create proc entry\n");
+	}
+
+	printk(KERN_INFO "Sky CPU State Driver v" SKY_CPUSTATE_VERSION "\n");
+	return 0;
+}
+
+static int hdpu_cpustate_remove(struct platform_device *pdev)
+{
+	cpustate.set_addr = NULL;
+	cpustate.clr_addr = NULL;
+
+	remove_proc_entry("sky_cpustate", NULL);
+	misc_deregister(&cpustate_dev);
+
+	return 0;
+}
+
+static int __init cpustate_init(void)
+{
+	return platform_driver_register(&hdpu_cpustate_driver);
+}
+
+static void __exit cpustate_exit(void)
+{
+	platform_driver_unregister(&hdpu_cpustate_driver);
+}
+
+module_init(cpustate_init);
+module_exit(cpustate_exit);
+
+MODULE_AUTHOR("Brian Waite");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" HDPU_CPUSTATE_NAME);
diff --git a/drivers/misc/hdpuftrs/hdpu_nexus.c b/drivers/misc/hdpuftrs/hdpu_nexus.c
new file mode 100644
index 00000000000..ce39fa54949
--- /dev/null
+++ b/drivers/misc/hdpuftrs/hdpu_nexus.c
@@ -0,0 +1,149 @@
+/*
+ *	Sky Nexus Register Driver
+ *
+ *	Copyright (C) 2002 Brian Waite
+ *
+ *	This driver allows reading the Nexus register
+ *	It exports the /proc/sky_chassis_id and also
+ *	/proc/sky_slot_id pseudo-file for status information.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/hdpu_features.h>
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+#include <asm/io.h>
+
+static int hdpu_nexus_probe(struct platform_device *pdev);
+static int hdpu_nexus_remove(struct platform_device *pdev);
+static int hdpu_slot_id_open(struct inode *inode, struct file *file);
+static int hdpu_slot_id_read(struct seq_file *seq, void *offset);
+static int hdpu_chassis_id_open(struct inode *inode, struct file *file);
+static int hdpu_chassis_id_read(struct seq_file *seq, void *offset);
+
+static struct proc_dir_entry *hdpu_slot_id;
+static struct proc_dir_entry *hdpu_chassis_id;
+static int slot_id = -1;
+static int chassis_id = -1;
+
+static const struct file_operations proc_slot_id = {
+	.open = hdpu_slot_id_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.owner = THIS_MODULE,
+};
+
+static const struct file_operations proc_chassis_id = {
+	.open = hdpu_chassis_id_open,
+	.read = seq_read,
+	.llseek	= seq_lseek,
+	.release = single_release,
+	.owner = THIS_MODULE,
+};
+
+static struct platform_driver hdpu_nexus_driver = {
+	.probe = hdpu_nexus_probe,
+	.remove = hdpu_nexus_remove,
+	.driver = {
+		.name = HDPU_NEXUS_NAME,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int hdpu_slot_id_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hdpu_slot_id_read, NULL);
+}
+
+static int hdpu_slot_id_read(struct seq_file *seq, void *offset)
+{
+	seq_printf(seq, "%d\n", slot_id);
+	return 0;
+}
+
+static int hdpu_chassis_id_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hdpu_chassis_id_read, NULL);
+}
+
+static int hdpu_chassis_id_read(struct seq_file *seq, void *offset)
+{
+	seq_printf(seq, "%d\n", chassis_id);
+	return 0;
+}
+
+static int hdpu_nexus_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int *nexus_id_addr;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		printk(KERN_ERR "sky_nexus: "
+		       "Invalid memory resource.\n");
+		return -EINVAL;
+	}
+	nexus_id_addr = ioremap(res->start,
+				(unsigned long)(res->end - res->start));
+	if (nexus_id_addr) {
+		slot_id = (*nexus_id_addr >> 8) & 0x1f;
+		chassis_id = *nexus_id_addr & 0xff;
+		iounmap(nexus_id_addr);
+	} else {
+		printk(KERN_ERR "sky_nexus: Could not map slot id\n");
+	}
+
+	hdpu_slot_id = proc_create("sky_slot_id", 0666, NULL, &proc_slot_id);
+	if (!hdpu_slot_id) {
+		printk(KERN_WARNING "sky_nexus: "
+		       "Unable to create proc dir entry: sky_slot_id\n");
+	}
+
+	hdpu_chassis_id = proc_create("sky_chassis_id", 0666, NULL,
+				      &proc_chassis_id);
+	if (!hdpu_chassis_id)
+		printk(KERN_WARNING "sky_nexus: "
+		       "Unable to create proc dir entry: sky_chassis_id\n");
+
+	return 0;
+}
+
+static int hdpu_nexus_remove(struct platform_device *pdev)
+{
+	slot_id = -1;
+	chassis_id = -1;
+
+	remove_proc_entry("sky_slot_id", NULL);
+	remove_proc_entry("sky_chassis_id", NULL);
+
+	hdpu_slot_id = 0;
+	hdpu_chassis_id = 0;
+
+	return 0;
+}
+
+static int __init nexus_init(void)
+{
+	return platform_driver_register(&hdpu_nexus_driver);
+}
+
+static void __exit nexus_exit(void)
+{
+	platform_driver_unregister(&hdpu_nexus_driver);
+}
+
+module_init(nexus_init);
+module_exit(nexus_exit);
+
+MODULE_AUTHOR("Brian Waite");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" HDPU_NEXUS_NAME);
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
new file mode 100644
index 00000000000..a92a3a742b4
--- /dev/null
+++ b/drivers/misc/hpilo.c
@@ -0,0 +1,886 @@
+/*
+ * Driver for HP iLO/iLO2 management processor.
+ *
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *	David Altobelli <david.altobelli@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include "hpilo.h"
+
+static struct class *ilo_class;
+static unsigned int ilo_major;
+static char ilo_hwdev[MAX_ILO_DEV];
+
+static inline int get_entry_id(int entry)
+{
+	return (entry & ENTRY_MASK_DESCRIPTOR) >> ENTRY_BITPOS_DESCRIPTOR;
+}
+
+static inline int get_entry_len(int entry)
+{
+	return ((entry & ENTRY_MASK_QWORDS) >> ENTRY_BITPOS_QWORDS) << 3;
+}
+
+static inline int mk_entry(int id, int len)
+{
+	int qlen = len & 7 ? (len >> 3) + 1 : len >> 3;
+	return id << ENTRY_BITPOS_DESCRIPTOR | qlen << ENTRY_BITPOS_QWORDS;
+}
+
+static inline int desc_mem_sz(int nr_entry)
+{
+	return nr_entry << L2_QENTRY_SZ;
+}
+
+/*
+ * FIFO queues, shared with hardware.
+ *
+ * If a queue has empty slots, an entry is added to the queue tail,
+ * and that entry is marked as occupied.
+ * Entries can be dequeued from the head of the list, when the device
+ * has marked the entry as consumed.
+ *
+ * Returns true on successful queue/dequeue, false on failure.
+ */
+static int fifo_enqueue(struct ilo_hwinfo *hw, char *fifobar, int entry)
+{
+	struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&hw->fifo_lock, flags);
+	if (!(fifo_q->fifobar[(fifo_q->tail + 1) & fifo_q->imask]
+	      & ENTRY_MASK_O)) {
+		fifo_q->fifobar[fifo_q->tail & fifo_q->imask] |=
+				(entry & ENTRY_MASK_NOSTATE) | fifo_q->merge;
+		fifo_q->tail += 1;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+	return ret;
+}
+
+static int fifo_dequeue(struct ilo_hwinfo *hw, char *fifobar, int *entry)
+{
+	struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+	unsigned long flags;
+	int ret = 0;
+	u64 c;
+
+	spin_lock_irqsave(&hw->fifo_lock, flags);
+	c = fifo_q->fifobar[fifo_q->head & fifo_q->imask];
+	if (c & ENTRY_MASK_C) {
+		if (entry)
+			*entry = c & ENTRY_MASK_NOSTATE;
+
+		fifo_q->fifobar[fifo_q->head & fifo_q->imask] =
+							(c | ENTRY_MASK) + 1;
+		fifo_q->head += 1;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+	return ret;
+}
+
+static int fifo_check_recv(struct ilo_hwinfo *hw, char *fifobar)
+{
+	struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+	unsigned long flags;
+	int ret = 0;
+	u64 c;
+
+	spin_lock_irqsave(&hw->fifo_lock, flags);
+	c = fifo_q->fifobar[fifo_q->head & fifo_q->imask];
+	if (c & ENTRY_MASK_C)
+		ret = 1;
+	spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+	return ret;
+}
+
+static int ilo_pkt_enqueue(struct ilo_hwinfo *hw, struct ccb *ccb,
+			   int dir, int id, int len)
+{
+	char *fifobar;
+	int entry;
+
+	if (dir == SENDQ)
+		fifobar = ccb->ccb_u1.send_fifobar;
+	else
+		fifobar = ccb->ccb_u3.recv_fifobar;
+
+	entry = mk_entry(id, len);
+	return fifo_enqueue(hw, fifobar, entry);
+}
+
+static int ilo_pkt_dequeue(struct ilo_hwinfo *hw, struct ccb *ccb,
+			   int dir, int *id, int *len, void **pkt)
+{
+	char *fifobar, *desc;
+	int entry = 0, pkt_id = 0;
+	int ret;
+
+	if (dir == SENDQ) {
+		fifobar = ccb->ccb_u1.send_fifobar;
+		desc = ccb->ccb_u2.send_desc;
+	} else {
+		fifobar = ccb->ccb_u3.recv_fifobar;
+		desc = ccb->ccb_u4.recv_desc;
+	}
+
+	ret = fifo_dequeue(hw, fifobar, &entry);
+	if (ret) {
+		pkt_id = get_entry_id(entry);
+		if (id)
+			*id = pkt_id;
+		if (len)
+			*len = get_entry_len(entry);
+		if (pkt)
+			*pkt = (void *)(desc + desc_mem_sz(pkt_id));
+	}
+
+	return ret;
+}
+
+static int ilo_pkt_recv(struct ilo_hwinfo *hw, struct ccb *ccb)
+{
+	char *fifobar = ccb->ccb_u3.recv_fifobar;
+
+	return fifo_check_recv(hw, fifobar);
+}
+
+static inline void doorbell_set(struct ccb *ccb)
+{
+	iowrite8(1, ccb->ccb_u5.db_base);
+}
+
+static inline void doorbell_clr(struct ccb *ccb)
+{
+	iowrite8(2, ccb->ccb_u5.db_base);
+}
+
+static inline int ctrl_set(int l2sz, int idxmask, int desclim)
+{
+	int active = 0, go = 1;
+	return l2sz << CTRL_BITPOS_L2SZ |
+	       idxmask << CTRL_BITPOS_FIFOINDEXMASK |
+	       desclim << CTRL_BITPOS_DESCLIMIT |
+	       active << CTRL_BITPOS_A |
+	       go << CTRL_BITPOS_G;
+}
+
+static void ctrl_setup(struct ccb *ccb, int nr_desc, int l2desc_sz)
+{
+	/* for simplicity, use the same parameters for send and recv ctrls */
+	ccb->send_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1);
+	ccb->recv_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1);
+}
+
+static inline int fifo_sz(int nr_entry)
+{
+	/* size of a fifo is determined by the number of entries it contains */
+	return (nr_entry * sizeof(u64)) + FIFOHANDLESIZE;
+}
+
+static void fifo_setup(void *base_addr, int nr_entry)
+{
+	struct fifo *fifo_q = base_addr;
+	int i;
+
+	/* set up an empty fifo */
+	fifo_q->head = 0;
+	fifo_q->tail = 0;
+	fifo_q->reset = 0;
+	fifo_q->nrents = nr_entry;
+	fifo_q->imask = nr_entry - 1;
+	fifo_q->merge = ENTRY_MASK_O;
+
+	for (i = 0; i < nr_entry; i++)
+		fifo_q->fifobar[i] = 0;
+}
+
+static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data)
+{
+	struct ccb *driver_ccb = &data->driver_ccb;
+	struct ccb __iomem *device_ccb = data->mapped_ccb;
+	int retries;
+
+	/* complicated dance to tell the hw we are stopping */
+	doorbell_clr(driver_ccb);
+	iowrite32(ioread32(&device_ccb->send_ctrl) & ~(1 << CTRL_BITPOS_G),
+		  &device_ccb->send_ctrl);
+	iowrite32(ioread32(&device_ccb->recv_ctrl) & ~(1 << CTRL_BITPOS_G),
+		  &device_ccb->recv_ctrl);
+
+	/* give iLO some time to process stop request */
+	for (retries = MAX_WAIT; retries > 0; retries--) {
+		doorbell_set(driver_ccb);
+		udelay(WAIT_TIME);
+		if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A))
+		    &&
+		    !(ioread32(&device_ccb->recv_ctrl) & (1 << CTRL_BITPOS_A)))
+			break;
+	}
+	if (retries == 0)
+		dev_err(&pdev->dev, "Closing, but controller still active\n");
+
+	/* clear the hw ccb */
+	memset_io(device_ccb, 0, sizeof(struct ccb));
+
+	/* free resources used to back send/recv queues */
+	pci_free_consistent(pdev, data->dma_size, data->dma_va, data->dma_pa);
+}
+
+static int ilo_ccb_setup(struct ilo_hwinfo *hw, struct ccb_data *data, int slot)
+{
+	char *dma_va, *dma_pa;
+	struct ccb *driver_ccb, *ilo_ccb;
+
+	driver_ccb = &data->driver_ccb;
+	ilo_ccb = &data->ilo_ccb;
+
+	data->dma_size = 2 * fifo_sz(NR_QENTRY) +
+			 2 * desc_mem_sz(NR_QENTRY) +
+			 ILO_START_ALIGN + ILO_CACHE_SZ;
+
+	data->dma_va = pci_alloc_consistent(hw->ilo_dev, data->dma_size,
+					    &data->dma_pa);
+	if (!data->dma_va)
+		return -ENOMEM;
+
+	dma_va = (char *)data->dma_va;
+	dma_pa = (char *)data->dma_pa;
+
+	memset(dma_va, 0, data->dma_size);
+
+	dma_va = (char *)roundup((unsigned long)dma_va, ILO_START_ALIGN);
+	dma_pa = (char *)roundup((unsigned long)dma_pa, ILO_START_ALIGN);
+
+	/*
+	 * Create two ccb's, one with virt addrs, one with phys addrs.
+	 * Copy the phys addr ccb to device shared mem.
+	 */
+	ctrl_setup(driver_ccb, NR_QENTRY, L2_QENTRY_SZ);
+	ctrl_setup(ilo_ccb, NR_QENTRY, L2_QENTRY_SZ);
+
+	fifo_setup(dma_va, NR_QENTRY);
+	driver_ccb->ccb_u1.send_fifobar = dma_va + FIFOHANDLESIZE;
+	ilo_ccb->ccb_u1.send_fifobar = dma_pa + FIFOHANDLESIZE;
+	dma_va += fifo_sz(NR_QENTRY);
+	dma_pa += fifo_sz(NR_QENTRY);
+
+	dma_va = (char *)roundup((unsigned long)dma_va, ILO_CACHE_SZ);
+	dma_pa = (char *)roundup((unsigned long)dma_pa, ILO_CACHE_SZ);
+
+	fifo_setup(dma_va, NR_QENTRY);
+	driver_ccb->ccb_u3.recv_fifobar = dma_va + FIFOHANDLESIZE;
+	ilo_ccb->ccb_u3.recv_fifobar = dma_pa + FIFOHANDLESIZE;
+	dma_va += fifo_sz(NR_QENTRY);
+	dma_pa += fifo_sz(NR_QENTRY);
+
+	driver_ccb->ccb_u2.send_desc = dma_va;
+	ilo_ccb->ccb_u2.send_desc = dma_pa;
+	dma_pa += desc_mem_sz(NR_QENTRY);
+	dma_va += desc_mem_sz(NR_QENTRY);
+
+	driver_ccb->ccb_u4.recv_desc = dma_va;
+	ilo_ccb->ccb_u4.recv_desc = dma_pa;
+
+	driver_ccb->channel = slot;
+	ilo_ccb->channel = slot;
+
+	driver_ccb->ccb_u5.db_base = hw->db_vaddr + (slot << L2_DB_SIZE);
+	ilo_ccb->ccb_u5.db_base = NULL; /* hw ccb's doorbell is not used */
+
+	return 0;
+}
+
+static void ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot)
+{
+	int pkt_id, pkt_sz;
+	struct ccb *driver_ccb = &data->driver_ccb;
+
+	/* copy the ccb with physical addrs to device memory */
+	data->mapped_ccb = (struct ccb __iomem *)
+				(hw->ram_vaddr + (slot * ILOHW_CCB_SZ));
+	memcpy_toio(data->mapped_ccb, &data->ilo_ccb, sizeof(struct ccb));
+
+	/* put packets on the send and receive queues */
+	pkt_sz = 0;
+	for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) {
+		ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, pkt_sz);
+		doorbell_set(driver_ccb);
+	}
+
+	pkt_sz = desc_mem_sz(1);
+	for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++)
+		ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, pkt_sz);
+
+	/* the ccb is ready to use */
+	doorbell_clr(driver_ccb);
+}
+
+static int ilo_ccb_verify(struct ilo_hwinfo *hw, struct ccb_data *data)
+{
+	int pkt_id, i;
+	struct ccb *driver_ccb = &data->driver_ccb;
+
+	/* make sure iLO is really handling requests */
+	for (i = MAX_WAIT; i > 0; i--) {
+		if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL))
+			break;
+		udelay(WAIT_TIME);
+	}
+
+	if (i == 0) {
+		dev_err(&hw->ilo_dev->dev, "Open could not dequeue a packet\n");
+		return -EBUSY;
+	}
+
+	ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, 0);
+	doorbell_set(driver_ccb);
+	return 0;
+}
+
+static inline int is_channel_reset(struct ccb *ccb)
+{
+	/* check for this particular channel needing a reset */
+	return FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset;
+}
+
+static inline void set_channel_reset(struct ccb *ccb)
+{
+	/* set a flag indicating this channel needs a reset */
+	FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset = 1;
+}
+
+static inline int get_device_outbound(struct ilo_hwinfo *hw)
+{
+	return ioread32(&hw->mmio_vaddr[DB_OUT]);
+}
+
+static inline int is_db_reset(int db_out)
+{
+	return db_out & (1 << DB_RESET);
+}
+
+static inline int is_device_reset(struct ilo_hwinfo *hw)
+{
+	/* check for global reset condition */
+	return is_db_reset(get_device_outbound(hw));
+}
+
+static inline void clear_pending_db(struct ilo_hwinfo *hw, int clr)
+{
+	iowrite32(clr, &hw->mmio_vaddr[DB_OUT]);
+}
+
+static inline void clear_device(struct ilo_hwinfo *hw)
+{
+	/* clear the device (reset bits, pending channel entries) */
+	clear_pending_db(hw, -1);
+}
+
+static inline void ilo_enable_interrupts(struct ilo_hwinfo *hw)
+{
+	iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) | 1, &hw->mmio_vaddr[DB_IRQ]);
+}
+
+static inline void ilo_disable_interrupts(struct ilo_hwinfo *hw)
+{
+	iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) & ~1,
+		 &hw->mmio_vaddr[DB_IRQ]);
+}
+
+static void ilo_set_reset(struct ilo_hwinfo *hw)
+{
+	int slot;
+
+	/*
+	 * Mapped memory is zeroed on ilo reset, so set a per ccb flag
+	 * to indicate that this ccb needs to be closed and reopened.
+	 */
+	for (slot = 0; slot < MAX_CCB; slot++) {
+		if (!hw->ccb_alloc[slot])
+			continue;
+		set_channel_reset(&hw->ccb_alloc[slot]->driver_ccb);
+	}
+}
+
+static ssize_t ilo_read(struct file *fp, char __user *buf,
+			size_t len, loff_t *off)
+{
+	int err, found, cnt, pkt_id, pkt_len;
+	struct ccb_data *data = fp->private_data;
+	struct ccb *driver_ccb = &data->driver_ccb;
+	struct ilo_hwinfo *hw = data->ilo_hw;
+	void *pkt;
+
+	if (is_channel_reset(driver_ccb)) {
+		/*
+		 * If the device has been reset, applications
+		 * need to close and reopen all ccbs.
+		 */
+		return -ENODEV;
+	}
+
+	/*
+	 * This function is to be called when data is expected
+	 * in the channel, and will return an error if no packet is found
+	 * during the loop below.  The sleep/retry logic is to allow
+	 * applications to call read() immediately post write(),
+	 * and give iLO some time to process the sent packet.
+	 */
+	cnt = 20;
+	do {
+		/* look for a received packet */
+		found = ilo_pkt_dequeue(hw, driver_ccb, RECVQ, &pkt_id,
+					&pkt_len, &pkt);
+		if (found)
+			break;
+		cnt--;
+		msleep(100);
+	} while (!found && cnt);
+
+	if (!found)
+		return -EAGAIN;
+
+	/* only copy the length of the received packet */
+	if (pkt_len < len)
+		len = pkt_len;
+
+	err = copy_to_user(buf, pkt, len);
+
+	/* return the received packet to the queue */
+	ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, desc_mem_sz(1));
+
+	return err ? -EFAULT : len;
+}
+
+static ssize_t ilo_write(struct file *fp, const char __user *buf,
+			 size_t len, loff_t *off)
+{
+	int err, pkt_id, pkt_len;
+	struct ccb_data *data = fp->private_data;
+	struct ccb *driver_ccb = &data->driver_ccb;
+	struct ilo_hwinfo *hw = data->ilo_hw;
+	void *pkt;
+
+	if (is_channel_reset(driver_ccb))
+		return -ENODEV;
+
+	/* get a packet to send the user command */
+	if (!ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, &pkt_len, &pkt))
+		return -EBUSY;
+
+	/* limit the length to the length of the packet */
+	if (pkt_len < len)
+		len = pkt_len;
+
+	/* on failure, set the len to 0 to return empty packet to the device */
+	err = copy_from_user(pkt, buf, len);
+	if (err)
+		len = 0;
+
+	/* send the packet */
+	ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, len);
+	doorbell_set(driver_ccb);
+
+	return err ? -EFAULT : len;
+}
+
+static unsigned int ilo_poll(struct file *fp, poll_table *wait)
+{
+	struct ccb_data *data = fp->private_data;
+	struct ccb *driver_ccb = &data->driver_ccb;
+
+	poll_wait(fp, &data->ccb_waitq, wait);
+
+	if (is_channel_reset(driver_ccb))
+		return POLLERR;
+	else if (ilo_pkt_recv(data->ilo_hw, driver_ccb))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static int ilo_close(struct inode *ip, struct file *fp)
+{
+	int slot;
+	struct ccb_data *data;
+	struct ilo_hwinfo *hw;
+	unsigned long flags;
+
+	slot = iminor(ip) % MAX_CCB;
+	hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev);
+
+	spin_lock(&hw->open_lock);
+
+	if (hw->ccb_alloc[slot]->ccb_cnt == 1) {
+
+		data = fp->private_data;
+
+		spin_lock_irqsave(&hw->alloc_lock, flags);
+		hw->ccb_alloc[slot] = NULL;
+		spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+		ilo_ccb_close(hw->ilo_dev, data);
+
+		kfree(data);
+	} else
+		hw->ccb_alloc[slot]->ccb_cnt--;
+
+	spin_unlock(&hw->open_lock);
+
+	return 0;
+}
+
+static int ilo_open(struct inode *ip, struct file *fp)
+{
+	int slot, error;
+	struct ccb_data *data;
+	struct ilo_hwinfo *hw;
+	unsigned long flags;
+
+	slot = iminor(ip) % MAX_CCB;
+	hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev);
+
+	/* new ccb allocation */
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	spin_lock(&hw->open_lock);
+
+	/* each fd private_data holds sw/hw view of ccb */
+	if (hw->ccb_alloc[slot] == NULL) {
+		/* create a channel control block for this minor */
+		error = ilo_ccb_setup(hw, data, slot);
+		if (error) {
+			kfree(data);
+			goto out;
+		}
+
+		data->ccb_cnt = 1;
+		data->ccb_excl = fp->f_flags & O_EXCL;
+		data->ilo_hw = hw;
+		init_waitqueue_head(&data->ccb_waitq);
+
+		/* write the ccb to hw */
+		spin_lock_irqsave(&hw->alloc_lock, flags);
+		ilo_ccb_open(hw, data, slot);
+		hw->ccb_alloc[slot] = data;
+		spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+		/* make sure the channel is functional */
+		error = ilo_ccb_verify(hw, data);
+		if (error) {
+
+			spin_lock_irqsave(&hw->alloc_lock, flags);
+			hw->ccb_alloc[slot] = NULL;
+			spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+			ilo_ccb_close(hw->ilo_dev, data);
+
+			kfree(data);
+			goto out;
+		}
+
+	} else {
+		kfree(data);
+		if (fp->f_flags & O_EXCL || hw->ccb_alloc[slot]->ccb_excl) {
+			/*
+			 * The channel exists, and either this open
+			 * or a previous open of this channel wants
+			 * exclusive access.
+			 */
+			error = -EBUSY;
+		} else {
+			hw->ccb_alloc[slot]->ccb_cnt++;
+			error = 0;
+		}
+	}
+out:
+	spin_unlock(&hw->open_lock);
+
+	if (!error)
+		fp->private_data = hw->ccb_alloc[slot];
+
+	return error;
+}
+
+static const struct file_operations ilo_fops = {
+	.owner		= THIS_MODULE,
+	.read		= ilo_read,
+	.write		= ilo_write,
+	.poll		= ilo_poll,
+	.open 		= ilo_open,
+	.release 	= ilo_close,
+};
+
+static irqreturn_t ilo_isr(int irq, void *data)
+{
+	struct ilo_hwinfo *hw = data;
+	int pending, i;
+
+	spin_lock(&hw->alloc_lock);
+
+	/* check for ccbs which have data */
+	pending = get_device_outbound(hw);
+	if (!pending) {
+		spin_unlock(&hw->alloc_lock);
+		return IRQ_NONE;
+	}
+
+	if (is_db_reset(pending)) {
+		/* wake up all ccbs if the device was reset */
+		pending = -1;
+		ilo_set_reset(hw);
+	}
+
+	for (i = 0; i < MAX_CCB; i++) {
+		if (!hw->ccb_alloc[i])
+			continue;
+		if (pending & (1 << i))
+			wake_up_interruptible(&hw->ccb_alloc[i]->ccb_waitq);
+	}
+
+	/* clear the device of the channels that have been handled */
+	clear_pending_db(hw, pending);
+
+	spin_unlock(&hw->alloc_lock);
+
+	return IRQ_HANDLED;
+}
+
+static void ilo_unmap_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
+{
+	pci_iounmap(pdev, hw->db_vaddr);
+	pci_iounmap(pdev, hw->ram_vaddr);
+	pci_iounmap(pdev, hw->mmio_vaddr);
+}
+
+static int __devinit ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
+{
+	int error = -ENOMEM;
+
+	/* map the memory mapped i/o registers */
+	hw->mmio_vaddr = pci_iomap(pdev, 1, 0);
+	if (hw->mmio_vaddr == NULL) {
+		dev_err(&pdev->dev, "Error mapping mmio\n");
+		goto out;
+	}
+
+	/* map the adapter shared memory region */
+	hw->ram_vaddr = pci_iomap(pdev, 2, MAX_CCB * ILOHW_CCB_SZ);
+	if (hw->ram_vaddr == NULL) {
+		dev_err(&pdev->dev, "Error mapping shared mem\n");
+		goto mmio_free;
+	}
+
+	/* map the doorbell aperture */
+	hw->db_vaddr = pci_iomap(pdev, 3, MAX_CCB * ONE_DB_SIZE);
+	if (hw->db_vaddr == NULL) {
+		dev_err(&pdev->dev, "Error mapping doorbell\n");
+		goto ram_free;
+	}
+
+	return 0;
+ram_free:
+	pci_iounmap(pdev, hw->ram_vaddr);
+mmio_free:
+	pci_iounmap(pdev, hw->mmio_vaddr);
+out:
+	return error;
+}
+
+static void ilo_remove(struct pci_dev *pdev)
+{
+	int i, minor;
+	struct ilo_hwinfo *ilo_hw = pci_get_drvdata(pdev);
+
+	clear_device(ilo_hw);
+
+	minor = MINOR(ilo_hw->cdev.dev);
+	for (i = minor; i < minor + MAX_CCB; i++)
+		device_destroy(ilo_class, MKDEV(ilo_major, i));
+
+	cdev_del(&ilo_hw->cdev);
+	ilo_disable_interrupts(ilo_hw);
+	free_irq(pdev->irq, ilo_hw);
+	ilo_unmap_device(pdev, ilo_hw);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	kfree(ilo_hw);
+	ilo_hwdev[(minor / MAX_CCB)] = 0;
+}
+
+static int __devinit ilo_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *ent)
+{
+	int devnum, minor, start, error;
+	struct ilo_hwinfo *ilo_hw;
+
+	/* find a free range for device files */
+	for (devnum = 0; devnum < MAX_ILO_DEV; devnum++) {
+		if (ilo_hwdev[devnum] == 0) {
+			ilo_hwdev[devnum] = 1;
+			break;
+		}
+	}
+
+	if (devnum == MAX_ILO_DEV) {
+		dev_err(&pdev->dev, "Error finding free device\n");
+		return -ENODEV;
+	}
+
+	/* track global allocations for this device */
+	error = -ENOMEM;
+	ilo_hw = kzalloc(sizeof(*ilo_hw), GFP_KERNEL);
+	if (!ilo_hw)
+		goto out;
+
+	ilo_hw->ilo_dev = pdev;
+	spin_lock_init(&ilo_hw->alloc_lock);
+	spin_lock_init(&ilo_hw->fifo_lock);
+	spin_lock_init(&ilo_hw->open_lock);
+
+	error = pci_enable_device(pdev);
+	if (error)
+		goto free;
+
+	pci_set_master(pdev);
+
+	error = pci_request_regions(pdev, ILO_NAME);
+	if (error)
+		goto disable;
+
+	error = ilo_map_device(pdev, ilo_hw);
+	if (error)
+		goto free_regions;
+
+	pci_set_drvdata(pdev, ilo_hw);
+	clear_device(ilo_hw);
+
+	error = request_irq(pdev->irq, ilo_isr, IRQF_SHARED, "hpilo", ilo_hw);
+	if (error)
+		goto unmap;
+
+	ilo_enable_interrupts(ilo_hw);
+
+	cdev_init(&ilo_hw->cdev, &ilo_fops);
+	ilo_hw->cdev.owner = THIS_MODULE;
+	start = devnum * MAX_CCB;
+	error = cdev_add(&ilo_hw->cdev, MKDEV(ilo_major, start), MAX_CCB);
+	if (error) {
+		dev_err(&pdev->dev, "Could not add cdev\n");
+		goto remove_isr;
+	}
+
+	for (minor = 0 ; minor < MAX_CCB; minor++) {
+		struct device *dev;
+		dev = device_create(ilo_class, &pdev->dev,
+				    MKDEV(ilo_major, minor), NULL,
+				    "hpilo!d%dccb%d", devnum, minor);
+		if (IS_ERR(dev))
+			dev_err(&pdev->dev, "Could not create files\n");
+	}
+
+	return 0;
+remove_isr:
+	ilo_disable_interrupts(ilo_hw);
+	free_irq(pdev->irq, ilo_hw);
+unmap:
+	ilo_unmap_device(pdev, ilo_hw);
+free_regions:
+	pci_release_regions(pdev);
+disable:
+	pci_disable_device(pdev);
+free:
+	kfree(ilo_hw);
+out:
+	ilo_hwdev[devnum] = 0;
+	return error;
+}
+
+static struct pci_device_id ilo_devices[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_COMPAQ, 0xB204) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HP, 0x3307) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, ilo_devices);
+
+static struct pci_driver ilo_driver = {
+	.name 	  = ILO_NAME,
+	.id_table = ilo_devices,
+	.probe 	  = ilo_probe,
+	.remove   = __devexit_p(ilo_remove),
+};
+
+static int __init ilo_init(void)
+{
+	int error;
+	dev_t dev;
+
+	ilo_class = class_create(THIS_MODULE, "iLO");
+	if (IS_ERR(ilo_class)) {
+		error = PTR_ERR(ilo_class);
+		goto out;
+	}
+
+	error = alloc_chrdev_region(&dev, 0, MAX_OPEN, ILO_NAME);
+	if (error)
+		goto class_destroy;
+
+	ilo_major = MAJOR(dev);
+
+	error =	pci_register_driver(&ilo_driver);
+	if (error)
+		goto chr_remove;
+
+	return 0;
+chr_remove:
+	unregister_chrdev_region(dev, MAX_OPEN);
+class_destroy:
+	class_destroy(ilo_class);
+out:
+	return error;
+}
+
+static void __exit ilo_exit(void)
+{
+	pci_unregister_driver(&ilo_driver);
+	unregister_chrdev_region(MKDEV(ilo_major, 0), MAX_OPEN);
+	class_destroy(ilo_class);
+}
+
+MODULE_VERSION("1.2");
+MODULE_ALIAS(ILO_NAME);
+MODULE_DESCRIPTION(ILO_NAME);
+MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>");
+MODULE_LICENSE("GPL v2");
+
+module_init(ilo_init);
+module_exit(ilo_exit);
diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h
new file mode 100644
index 00000000000..247eb386a97
--- /dev/null
+++ b/drivers/misc/hpilo.h
@@ -0,0 +1,212 @@
+/*
+ * linux/drivers/char/hpilo.h
+ *
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *	David Altobelli <david.altobelli@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __HPILO_H
+#define __HPILO_H
+
+#define ILO_NAME "hpilo"
+
+/* max number of open channel control blocks per device, hw limited to 32 */
+#define MAX_CCB		8
+/* max number of supported devices */
+#define MAX_ILO_DEV	1
+/* max number of files */
+#define MAX_OPEN	(MAX_CCB * MAX_ILO_DEV)
+/* total wait time in usec */
+#define MAX_WAIT_TIME	10000
+/* per spin wait time in usec */
+#define WAIT_TIME	10
+/* spin counter for open/close delay */
+#define MAX_WAIT	(MAX_WAIT_TIME / WAIT_TIME)
+
+/*
+ * Per device, used to track global memory allocations.
+ */
+struct ilo_hwinfo {
+	/* mmio registers on device */
+	char __iomem *mmio_vaddr;
+
+	/* doorbell registers on device */
+	char __iomem *db_vaddr;
+
+	/* shared memory on device used for channel control blocks */
+	char __iomem *ram_vaddr;
+
+	/* files corresponding to this device */
+	struct ccb_data *ccb_alloc[MAX_CCB];
+
+	struct pci_dev *ilo_dev;
+
+	/*
+	 * open_lock      serializes ccb_cnt during open and close
+	 * [ irq disabled ]
+	 * -> alloc_lock  used when adding/removing/searching ccb_alloc,
+	 *                which represents all ccbs open on the device
+	 * --> fifo_lock  controls access to fifo queues shared with hw
+	 *
+	 * Locks must be taken in this order, but open_lock and alloc_lock
+	 * are optional, they do not need to be held in order to take a
+	 * lower level lock.
+	 */
+	spinlock_t open_lock;
+	spinlock_t alloc_lock;
+	spinlock_t fifo_lock;
+
+	struct cdev cdev;
+};
+
+/* offset from mmio_vaddr for enabling doorbell interrupts */
+#define DB_IRQ		0xB2
+/* offset from mmio_vaddr for outbound communications */
+#define DB_OUT		0xD4
+/* DB_OUT reset bit */
+#define DB_RESET	26
+
+/*
+ * Channel control block. Used to manage hardware queues.
+ * The format must match hw's version.  The hw ccb is 128 bytes,
+ * but the context area shouldn't be touched by the driver.
+ */
+#define ILOSW_CCB_SZ	64
+#define ILOHW_CCB_SZ 	128
+struct ccb {
+	union {
+		char *send_fifobar;
+		u64 padding1;
+	} ccb_u1;
+	union {
+		char *send_desc;
+		u64 padding2;
+	} ccb_u2;
+	u64 send_ctrl;
+
+	union {
+		char *recv_fifobar;
+		u64 padding3;
+	} ccb_u3;
+	union {
+		char *recv_desc;
+		u64 padding4;
+	} ccb_u4;
+	u64 recv_ctrl;
+
+	union {
+		char __iomem *db_base;
+		u64 padding5;
+	} ccb_u5;
+
+	u64 channel;
+
+	/* unused context area (64 bytes) */
+};
+
+/* ccb queue parameters */
+#define SENDQ		1
+#define RECVQ 		2
+#define NR_QENTRY    	4
+#define L2_QENTRY_SZ 	12
+
+/* ccb ctrl bitfields */
+#define CTRL_BITPOS_L2SZ             0
+#define CTRL_BITPOS_FIFOINDEXMASK    4
+#define CTRL_BITPOS_DESCLIMIT        18
+#define CTRL_BITPOS_A                30
+#define CTRL_BITPOS_G                31
+
+/* ccb doorbell macros */
+#define L2_DB_SIZE		14
+#define ONE_DB_SIZE		(1 << L2_DB_SIZE)
+
+/*
+ * Per fd structure used to track the ccb allocated to that dev file.
+ */
+struct ccb_data {
+	/* software version of ccb, using virtual addrs */
+	struct ccb  driver_ccb;
+
+	/* hardware version of ccb, using physical addrs */
+	struct ccb  ilo_ccb;
+
+	/* hardware ccb is written to this shared mapped device memory */
+	struct ccb __iomem *mapped_ccb;
+
+	/* dma'able memory used for send/recv queues */
+	void       *dma_va;
+	dma_addr_t  dma_pa;
+	size_t      dma_size;
+
+	/* pointer to hardware device info */
+	struct ilo_hwinfo *ilo_hw;
+
+	/* queue for this ccb to wait for recv data */
+	wait_queue_head_t ccb_waitq;
+
+	/* usage count, to allow for shared ccb's */
+	int	    ccb_cnt;
+
+	/* open wanted exclusive access to this ccb */
+	int	    ccb_excl;
+};
+
+/*
+ * FIFO queue structure, shared with hw.
+ */
+#define ILO_START_ALIGN	4096
+#define ILO_CACHE_SZ 	 128
+struct fifo {
+    u64 nrents;	/* user requested number of fifo entries */
+    u64 imask;  /* mask to extract valid fifo index */
+    u64 merge;	/*  O/C bits to merge in during enqueue operation */
+    u64 reset;	/* set to non-zero when the target device resets */
+    u8  pad_0[ILO_CACHE_SZ - (sizeof(u64) * 4)];
+
+    u64 head;
+    u8  pad_1[ILO_CACHE_SZ - (sizeof(u64))];
+
+    u64 tail;
+    u8  pad_2[ILO_CACHE_SZ - (sizeof(u64))];
+
+    u64 fifobar[1];
+};
+
+/* convert between struct fifo, and the fifobar, which is saved in the ccb */
+#define FIFOHANDLESIZE (sizeof(struct fifo) - sizeof(u64))
+#define FIFOBARTOHANDLE(_fifo) \
+	((struct fifo *)(((char *)(_fifo)) - FIFOHANDLESIZE))
+
+/* the number of qwords to consume from the entry descriptor */
+#define ENTRY_BITPOS_QWORDS      0
+/* descriptor index number (within a specified queue) */
+#define ENTRY_BITPOS_DESCRIPTOR  10
+/* state bit, fifo entry consumed by consumer */
+#define ENTRY_BITPOS_C           22
+/* state bit, fifo entry is occupied */
+#define ENTRY_BITPOS_O           23
+
+#define ENTRY_BITS_QWORDS        10
+#define ENTRY_BITS_DESCRIPTOR    12
+#define ENTRY_BITS_C             1
+#define ENTRY_BITS_O             1
+#define ENTRY_BITS_TOTAL	\
+	(ENTRY_BITS_C + ENTRY_BITS_O + \
+	 ENTRY_BITS_QWORDS + ENTRY_BITS_DESCRIPTOR)
+
+/* extract various entry fields */
+#define ENTRY_MASK ((1 << ENTRY_BITS_TOTAL) - 1)
+#define ENTRY_MASK_C (((1 << ENTRY_BITS_C) - 1) << ENTRY_BITPOS_C)
+#define ENTRY_MASK_O (((1 << ENTRY_BITS_O) - 1) << ENTRY_BITPOS_O)
+#define ENTRY_MASK_QWORDS \
+	(((1 << ENTRY_BITS_QWORDS) - 1) << ENTRY_BITPOS_QWORDS)
+#define ENTRY_MASK_DESCRIPTOR \
+	(((1 << ENTRY_BITS_DESCRIPTOR) - 1) << ENTRY_BITPOS_DESCRIPTOR)
+
+#define ENTRY_MASK_NOSTATE (ENTRY_MASK >> (ENTRY_BITS_C + ENTRY_BITS_O))
+
+#endif /* __HPILO_H */
diff --git a/drivers/misc/ibmasm/Makefile b/drivers/misc/ibmasm/Makefile
new file mode 100644
index 00000000000..9e63ade5ffd
--- /dev/null
+++ b/drivers/misc/ibmasm/Makefile
@@ -0,0 +1,15 @@
+
+obj-$(CONFIG_IBM_ASM) := ibmasm.o
+
+ibmasm-y :=	module.o      \
+		ibmasmfs.o    \
+		event.o       \
+		command.o     \
+		remote.o      \
+		heartbeat.o   \
+		r_heartbeat.o \
+		dot_command.o \
+		lowlevel.o
+
+ibmasm-$(CONFIG_SERIAL_8250) += uart.o
+
diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c
new file mode 100644
index 00000000000..e2031739aa2
--- /dev/null
+++ b/drivers/misc/ibmasm/command.c
@@ -0,0 +1,186 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/sched.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+static void exec_next_command(struct service_processor *sp);
+
+static atomic_t command_count = ATOMIC_INIT(0);
+
+struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size)
+{
+	struct command *cmd;
+
+	if (buffer_size > IBMASM_CMD_MAX_BUFFER_SIZE)
+		return NULL;
+
+	cmd = kzalloc(sizeof(struct command), GFP_KERNEL);
+	if (cmd == NULL)
+		return NULL;
+
+
+	cmd->buffer = kzalloc(buffer_size, GFP_KERNEL);
+	if (cmd->buffer == NULL) {
+		kfree(cmd);
+		return NULL;
+	}
+	cmd->buffer_size = buffer_size;
+
+	kref_init(&cmd->kref);
+	cmd->lock = &sp->lock;
+
+	cmd->status = IBMASM_CMD_PENDING;
+	init_waitqueue_head(&cmd->wait);
+	INIT_LIST_HEAD(&cmd->queue_node);
+
+	atomic_inc(&command_count);
+	dbg("command count: %d\n", atomic_read(&command_count));
+
+	return cmd;
+}
+
+void ibmasm_free_command(struct kref *kref)
+{
+	struct command *cmd = to_command(kref);
+
+	list_del(&cmd->queue_node);
+	atomic_dec(&command_count);
+	dbg("command count: %d\n", atomic_read(&command_count));
+	kfree(cmd->buffer);
+	kfree(cmd);
+}
+
+static void enqueue_command(struct service_processor *sp, struct command *cmd)
+{
+	list_add_tail(&cmd->queue_node, &sp->command_queue);
+}
+
+static struct command *dequeue_command(struct service_processor *sp)
+{
+	struct command *cmd;
+	struct list_head *next;
+
+	if (list_empty(&sp->command_queue))
+		return NULL;
+
+	next = sp->command_queue.next;
+	list_del_init(next);
+	cmd = list_entry(next, struct command, queue_node);
+
+	return cmd;
+}
+
+static inline void do_exec_command(struct service_processor *sp)
+{
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+	if (ibmasm_send_i2o_message(sp)) {
+		sp->current_command->status = IBMASM_CMD_FAILED;
+		wake_up(&sp->current_command->wait);
+		command_put(sp->current_command);
+		exec_next_command(sp);
+	}
+}
+
+/**
+ * exec_command
+ * send a command to a service processor
+ * Commands are executed sequentially. One command (sp->current_command)
+ * is sent to the service processor. Once the interrupt handler gets a
+ * message of type command_response, the message is copied into
+ * the current commands buffer,
+ */
+void ibmasm_exec_command(struct service_processor *sp, struct command *cmd)
+{
+	unsigned long flags;
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+	spin_lock_irqsave(&sp->lock, flags);
+
+	if (!sp->current_command) {
+		sp->current_command = cmd;
+		command_get(sp->current_command);
+		spin_unlock_irqrestore(&sp->lock, flags);
+		do_exec_command(sp);
+	} else {
+		enqueue_command(sp, cmd);
+		spin_unlock_irqrestore(&sp->lock, flags);
+	}
+}
+
+static void exec_next_command(struct service_processor *sp)
+{
+	unsigned long flags;
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+	spin_lock_irqsave(&sp->lock, flags);
+	sp->current_command = dequeue_command(sp);
+	if (sp->current_command) {
+		command_get(sp->current_command);
+		spin_unlock_irqrestore(&sp->lock, flags);
+		do_exec_command(sp);
+	} else {
+		spin_unlock_irqrestore(&sp->lock, flags);
+	}
+}
+
+/**
+ * Sleep until a command has failed or a response has been received
+ * and the command status been updated by the interrupt handler.
+ * (see receive_response).
+ */
+void ibmasm_wait_for_response(struct command *cmd, int timeout)
+{
+	wait_event_interruptible_timeout(cmd->wait,
+				cmd->status == IBMASM_CMD_COMPLETE ||
+				cmd->status == IBMASM_CMD_FAILED,
+				timeout * HZ);
+}
+
+/**
+ * receive_command_response
+ * called by the interrupt handler when a dot command of type command_response
+ * was received.
+ */
+void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size)
+{
+	struct command *cmd = sp->current_command;
+
+	if (!sp->current_command)
+		return;
+
+	memcpy_fromio(cmd->buffer, response, min(size, cmd->buffer_size));
+	cmd->status = IBMASM_CMD_COMPLETE;
+	wake_up(&sp->current_command->wait);
+	command_put(sp->current_command);
+	exec_next_command(sp);
+}
diff --git a/drivers/misc/ibmasm/dot_command.c b/drivers/misc/ibmasm/dot_command.c
new file mode 100644
index 00000000000..3dd2dfb8da1
--- /dev/null
+++ b/drivers/misc/ibmasm/dot_command.c
@@ -0,0 +1,152 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#include "ibmasm.h"
+#include "dot_command.h"
+
+/**
+ * Dispatch an incoming message to the specific handler for the message.
+ * Called from interrupt context.
+ */
+void ibmasm_receive_message(struct service_processor *sp, void *message, int message_size)
+{
+	u32 size;
+	struct dot_command_header *header = (struct dot_command_header *)message;
+
+	if (message_size == 0)
+		return;
+
+	size = get_dot_command_size(message);
+	if (size == 0)
+		return;
+
+	if (size > message_size)
+		size = message_size;
+
+	switch (header->type) {
+	case sp_event:
+		ibmasm_receive_event(sp, message, size);
+		break;
+	case sp_command_response:
+		ibmasm_receive_command_response(sp, message, size);
+		break;
+	case sp_heartbeat:
+		ibmasm_receive_heartbeat(sp, message, size);
+		break;
+	default:
+		dev_err(sp->dev, "Received unknown message from service processor\n");
+	}
+}
+
+
+#define INIT_BUFFER_SIZE 32
+
+
+/**
+ * send the 4.3.5.10 dot command (driver VPD) to the service processor
+ */
+int ibmasm_send_driver_vpd(struct service_processor *sp)
+{
+	struct command *command;
+	struct dot_command_header *header;
+	u8 *vpd_command;
+	u8 *vpd_data;
+	int result = 0;
+
+	command = ibmasm_new_command(sp, INIT_BUFFER_SIZE);
+	if (command == NULL)
+		return -ENOMEM;
+
+	header = (struct dot_command_header *)command->buffer;
+	header->type                = sp_write;
+	header->command_size        = 4;
+	header->data_size           = 16;
+	header->status              = 0;
+	header->reserved            = 0;
+
+	vpd_command = command->buffer + sizeof(struct dot_command_header);
+	vpd_command[0] = 0x4;
+	vpd_command[1] = 0x3;
+	vpd_command[2] = 0x5;
+	vpd_command[3] = 0xa;
+
+	vpd_data = vpd_command + header->command_size;
+	vpd_data[0] = 0;
+	strcat(vpd_data, IBMASM_DRIVER_VPD);
+	vpd_data[10] = 0;
+	vpd_data[15] = 0;
+
+	ibmasm_exec_command(sp, command);
+	ibmasm_wait_for_response(command, IBMASM_CMD_TIMEOUT_NORMAL);
+
+	if (command->status != IBMASM_CMD_COMPLETE)
+		result = -ENODEV;
+
+	command_put(command);
+
+	return result;
+}
+
+struct os_state_command {
+	struct dot_command_header	header;
+	unsigned char			command[3];
+	unsigned char			data;
+};
+
+/**
+ * send the 4.3.6 dot command (os state) to the service processor
+ * During driver init this function is called with os state "up".
+ * This causes the service processor to start sending heartbeats the
+ * driver.
+ * During driver exit the function is called with os state "down",
+ * causing the service processor to stop the heartbeats.
+ */
+int ibmasm_send_os_state(struct service_processor *sp, int os_state)
+{
+	struct command *cmd;
+	struct os_state_command *os_state_cmd;
+	int result = 0;
+
+	cmd = ibmasm_new_command(sp, sizeof(struct os_state_command));
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	os_state_cmd = (struct os_state_command *)cmd->buffer;
+	os_state_cmd->header.type		= sp_write;
+	os_state_cmd->header.command_size	= 3;
+	os_state_cmd->header.data_size		= 1;
+	os_state_cmd->header.status		= 0;
+	os_state_cmd->command[0]		= 4;
+	os_state_cmd->command[1]		= 3;
+	os_state_cmd->command[2]		= 6;
+	os_state_cmd->data			= os_state;
+
+	ibmasm_exec_command(sp, cmd);
+	ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL);
+
+	if (cmd->status != IBMASM_CMD_COMPLETE)
+		result = -ENODEV;
+
+	command_put(cmd);
+	return result;
+}
diff --git a/drivers/misc/ibmasm/dot_command.h b/drivers/misc/ibmasm/dot_command.h
new file mode 100644
index 00000000000..6cbba1afef3
--- /dev/null
+++ b/drivers/misc/ibmasm/dot_command.h
@@ -0,0 +1,78 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#ifndef __DOT_COMMAND_H__
+#define __DOT_COMMAND_H__
+
+/*
+ * dot commands are the protocol used to communicate with the service
+ * processor.
+ * They consist of header, a command of variable length and data of
+ * variable length.
+ */
+
+/* dot command types */
+#define sp_write		0
+#define sp_write_next		1
+#define sp_read			2
+#define sp_read_next		3
+#define sp_command_response	4
+#define sp_event		5
+#define sp_heartbeat		6
+
+#pragma pack(1)
+struct dot_command_header {
+	u8	type;
+	u8	command_size;
+	u16	data_size;
+	u8	status;
+	u8	reserved;
+};
+#pragma pack()
+
+static inline size_t get_dot_command_size(void *buffer)
+{
+	struct dot_command_header *cmd = (struct dot_command_header *)buffer;
+	return sizeof(struct dot_command_header) + cmd->command_size + cmd->data_size;
+}
+
+static inline unsigned int get_dot_command_timeout(void *buffer)
+{
+	struct dot_command_header *header = (struct dot_command_header *)buffer;
+	unsigned char *cmd = buffer + sizeof(struct dot_command_header);
+
+	/* dot commands 6.3.1, 7.1 and 8.x need a longer timeout */
+
+	if (header->command_size == 3) {
+		if ((cmd[0] == 6) && (cmd[1] == 3) && (cmd[2] == 1))
+			return IBMASM_CMD_TIMEOUT_EXTRA;
+	} else if (header->command_size == 2) {
+		if ((cmd[0] == 7) && (cmd[1] == 1))
+			return IBMASM_CMD_TIMEOUT_EXTRA;
+		if (cmd[0] == 8)
+			return IBMASM_CMD_TIMEOUT_EXTRA;
+	}
+	return IBMASM_CMD_TIMEOUT_NORMAL;
+}
+
+#endif /* __DOT_COMMAND_H__ */
diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c
new file mode 100644
index 00000000000..572d41ffc18
--- /dev/null
+++ b/drivers/misc/ibmasm/event.c
@@ -0,0 +1,176 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/sched.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+/*
+ * ASM service processor event handling routines.
+ *
+ * Events are signalled to the device drivers through interrupts.
+ * They have the format of dot commands, with the type field set to
+ * sp_event.
+ * The driver does not interpret the events, it simply stores them in a
+ * circular buffer.
+ */
+
+static void wake_up_event_readers(struct service_processor *sp)
+{
+	struct event_reader *reader;
+
+	list_for_each_entry(reader, &sp->event_buffer->readers, node)
+                wake_up_interruptible(&reader->wait);
+}
+
+/**
+ * receive_event
+ * Called by the interrupt handler when a dot command of type sp_event is
+ * received.
+ * Store the event in the circular event buffer, wake up any sleeping
+ * event readers.
+ * There is no reader marker in the buffer, therefore readers are
+ * responsible for keeping up with the writer, or they will lose events.
+ */
+void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size)
+{
+	struct event_buffer *buffer = sp->event_buffer;
+	struct ibmasm_event *event;
+	unsigned long flags;
+
+	data_size = min(data_size, IBMASM_EVENT_MAX_SIZE);
+
+	spin_lock_irqsave(&sp->lock, flags);
+	/* copy the event into the next slot in the circular buffer */
+	event = &buffer->events[buffer->next_index];
+	memcpy_fromio(event->data, data, data_size);
+	event->data_size = data_size;
+	event->serial_number = buffer->next_serial_number;
+
+	/* advance indices in the buffer */
+	buffer->next_index = (buffer->next_index + 1) % IBMASM_NUM_EVENTS;
+	buffer->next_serial_number++;
+	spin_unlock_irqrestore(&sp->lock, flags);
+
+	wake_up_event_readers(sp);
+}
+
+static inline int event_available(struct event_buffer *b, struct event_reader *r)
+{
+	return (r->next_serial_number < b->next_serial_number);
+}
+
+/**
+ * get_next_event
+ * Called by event readers (initiated from user space through the file
+ * system).
+ * Sleeps until a new event is available.
+ */
+int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader)
+{
+	struct event_buffer *buffer = sp->event_buffer;
+	struct ibmasm_event *event;
+	unsigned int index;
+	unsigned long flags;
+
+	reader->cancelled = 0;
+
+	if (wait_event_interruptible(reader->wait,
+			event_available(buffer, reader) || reader->cancelled))
+		return -ERESTARTSYS;
+
+	if (!event_available(buffer, reader))
+		return 0;
+
+	spin_lock_irqsave(&sp->lock, flags);
+
+	index = buffer->next_index;
+	event = &buffer->events[index];
+	while (event->serial_number < reader->next_serial_number) {
+		index = (index + 1) % IBMASM_NUM_EVENTS;
+		event = &buffer->events[index];
+	}
+	memcpy(reader->data, event->data, event->data_size);
+	reader->data_size = event->data_size;
+	reader->next_serial_number = event->serial_number + 1;
+
+	spin_unlock_irqrestore(&sp->lock, flags);
+
+	return event->data_size;
+}
+
+void ibmasm_cancel_next_event(struct event_reader *reader)
+{
+        reader->cancelled = 1;
+        wake_up_interruptible(&reader->wait);
+}
+
+void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader)
+{
+	unsigned long flags;
+
+	reader->next_serial_number = sp->event_buffer->next_serial_number;
+	init_waitqueue_head(&reader->wait);
+	spin_lock_irqsave(&sp->lock, flags);
+	list_add(&reader->node, &sp->event_buffer->readers);
+	spin_unlock_irqrestore(&sp->lock, flags);
+}
+
+void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sp->lock, flags);
+	list_del(&reader->node);
+	spin_unlock_irqrestore(&sp->lock, flags);
+}
+
+int ibmasm_event_buffer_init(struct service_processor *sp)
+{
+	struct event_buffer *buffer;
+	struct ibmasm_event *event;
+	int i;
+
+	buffer = kmalloc(sizeof(struct event_buffer), GFP_KERNEL);
+	if (!buffer)
+		return 1;
+
+	buffer->next_index = 0;
+	buffer->next_serial_number = 1;
+
+	event = buffer->events;
+	for (i=0; i<IBMASM_NUM_EVENTS; i++, event++)
+		event->serial_number = 0;
+
+	INIT_LIST_HEAD(&buffer->readers);
+
+	sp->event_buffer = buffer;
+
+	return 0;
+}
+
+void ibmasm_event_buffer_exit(struct service_processor *sp)
+{
+	kfree(sp->event_buffer);
+}
diff --git a/drivers/misc/ibmasm/heartbeat.c b/drivers/misc/ibmasm/heartbeat.c
new file mode 100644
index 00000000000..1bc4306572a
--- /dev/null
+++ b/drivers/misc/ibmasm/heartbeat.c
@@ -0,0 +1,101 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/notifier.h>
+#include "ibmasm.h"
+#include "dot_command.h"
+#include "lowlevel.h"
+
+static int suspend_heartbeats = 0;
+
+/*
+ * Once the driver indicates to the service processor that it is running
+ * - see send_os_state() - the service processor sends periodic heartbeats
+ * to the driver. The driver must respond to the heartbeats or else the OS
+ * will be rebooted.
+ * In the case of a panic the interrupt handler continues to work and thus
+ * continues to respond to heartbeats, making the service processor believe
+ * the OS is still running and thus preventing a reboot.
+ * To prevent this from happening a callback is added the panic_notifier_list.
+ * Before responding to a heartbeat the driver checks if a panic has happened,
+ * if yes it suspends heartbeat, causing the service processor to reboot as
+ * expected.
+ */
+static int panic_happened(struct notifier_block *n, unsigned long val, void *v)
+{
+	suspend_heartbeats = 1;
+	return 0;
+}
+
+static struct notifier_block panic_notifier = { panic_happened, NULL, 1 };
+
+void ibmasm_register_panic_notifier(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier);
+}
+
+void ibmasm_unregister_panic_notifier(void)
+{
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+			&panic_notifier);
+}
+
+
+int ibmasm_heartbeat_init(struct service_processor *sp)
+{
+	sp->heartbeat = ibmasm_new_command(sp, HEARTBEAT_BUFFER_SIZE);
+	if (sp->heartbeat == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ibmasm_heartbeat_exit(struct service_processor *sp)
+{
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+	ibmasm_wait_for_response(sp->heartbeat, IBMASM_CMD_TIMEOUT_NORMAL);
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+	suspend_heartbeats = 1;
+	command_put(sp->heartbeat);
+}
+
+void ibmasm_receive_heartbeat(struct service_processor *sp,  void *message, size_t size)
+{
+	struct command *cmd = sp->heartbeat;
+	struct dot_command_header *header = (struct dot_command_header *)cmd->buffer;
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+	if (suspend_heartbeats)
+		return;
+
+	/* return the received dot command to sender */
+	cmd->status = IBMASM_CMD_PENDING;
+	size = min(size, cmd->buffer_size);
+	memcpy_fromio(cmd->buffer, message, size);
+	header->type = sp_write;
+	ibmasm_exec_command(sp, cmd);
+}
diff --git a/drivers/misc/ibmasm/i2o.h b/drivers/misc/ibmasm/i2o.h
new file mode 100644
index 00000000000..bf2c738d2b7
--- /dev/null
+++ b/drivers/misc/ibmasm/i2o.h
@@ -0,0 +1,77 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#pragma pack(1)
+struct i2o_header {
+	u8	version;
+	u8	message_flags;
+	u16	message_size;
+	u8	target;
+	u8	initiator_and_target;
+	u8	initiator;
+	u8	function;
+	u32	initiator_context;
+};
+#pragma pack()
+
+#define I2O_HEADER_TEMPLATE \
+      { .version              = 0x01, \
+	.message_flags        = 0x00, \
+	.function             = 0xFF, \
+	.initiator            = 0x00, \
+	.initiator_and_target = 0x40, \
+	.target               = 0x00, \
+	.initiator_context    = 0x0 }
+
+#define I2O_MESSAGE_SIZE	0x1000
+#define I2O_COMMAND_SIZE	(I2O_MESSAGE_SIZE - sizeof(struct i2o_header))
+
+#pragma pack(1)
+struct i2o_message {
+	struct i2o_header	header;
+	void			*data;
+};
+#pragma pack()
+
+static inline unsigned short outgoing_message_size(unsigned int data_size)
+{
+	unsigned int size;
+	unsigned short i2o_size;
+
+	if (data_size > I2O_COMMAND_SIZE)
+		data_size = I2O_COMMAND_SIZE;
+
+	size = sizeof(struct i2o_header) + data_size;
+
+	i2o_size = size / sizeof(u32);
+
+	if (size % sizeof(u32))
+	       i2o_size++;
+
+	return i2o_size;
+}
+
+static inline u32 incoming_data_size(struct i2o_message *i2o_message)
+{
+	return (sizeof(u32) * i2o_message->header.message_size);
+}
diff --git a/drivers/misc/ibmasm/ibmasm.h b/drivers/misc/ibmasm/ibmasm.h
new file mode 100644
index 00000000000..4d8a4e248b3
--- /dev/null
+++ b/drivers/misc/ibmasm/ibmasm.h
@@ -0,0 +1,220 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/device.h>
+#include <linux/input.h>
+
+/* Driver identification */
+#define DRIVER_NAME	"ibmasm"
+#define DRIVER_VERSION  "1.0"
+#define DRIVER_AUTHOR   "Max Asbock <masbock@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>"
+#define DRIVER_DESC     "IBM ASM Service Processor Driver"
+
+#define err(msg) printk(KERN_ERR "%s: " msg "\n", DRIVER_NAME)
+#define info(msg) printk(KERN_INFO "%s: " msg "\n", DRIVER_NAME)
+
+extern int ibmasm_debug;
+#define dbg(STR, ARGS...)					\
+	do {							\
+		if (ibmasm_debug)				\
+			printk(KERN_DEBUG STR , ##ARGS);	\
+	} while (0)
+
+static inline char *get_timestamp(char *buf)
+{
+	struct timeval now;
+	do_gettimeofday(&now);
+	sprintf(buf, "%lu.%lu", now.tv_sec, now.tv_usec);
+	return buf;
+}
+
+#define IBMASM_CMD_PENDING	0
+#define IBMASM_CMD_COMPLETE	1
+#define IBMASM_CMD_FAILED	2
+
+#define IBMASM_CMD_TIMEOUT_NORMAL	45
+#define IBMASM_CMD_TIMEOUT_EXTRA	240
+
+#define IBMASM_CMD_MAX_BUFFER_SIZE	0x8000
+
+#define REVERSE_HEARTBEAT_TIMEOUT	120
+
+#define HEARTBEAT_BUFFER_SIZE		0x400
+
+#ifdef IA64
+#define IBMASM_DRIVER_VPD "Lin64 6.08      "
+#else
+#define IBMASM_DRIVER_VPD "Lin32 6.08      "
+#endif
+
+#define SYSTEM_STATE_OS_UP      5
+#define SYSTEM_STATE_OS_DOWN    4
+
+#define IBMASM_NAME_SIZE	16
+
+#define IBMASM_NUM_EVENTS	10
+#define IBMASM_EVENT_MAX_SIZE	2048u
+
+
+struct command {
+	struct list_head	queue_node;
+	wait_queue_head_t	wait;
+	unsigned char		*buffer;
+	size_t			buffer_size;
+	int			status;
+	struct kref		kref;
+	spinlock_t		*lock;
+};
+#define to_command(c) container_of(c, struct command, kref)
+
+void ibmasm_free_command(struct kref *kref);
+static inline void command_put(struct command *cmd)
+{
+	unsigned long flags;
+	spinlock_t *lock = cmd->lock;
+
+	spin_lock_irqsave(lock, flags);
+	kref_put(&cmd->kref, ibmasm_free_command);
+	spin_unlock_irqrestore(lock, flags);
+}
+
+static inline void command_get(struct command *cmd)
+{
+	kref_get(&cmd->kref);
+}
+
+
+struct ibmasm_event {
+	unsigned int	serial_number;
+	unsigned int	data_size;
+	unsigned char	data[IBMASM_EVENT_MAX_SIZE];
+};
+
+struct event_buffer {
+	struct ibmasm_event	events[IBMASM_NUM_EVENTS];
+	unsigned int		next_serial_number;
+	unsigned int		next_index;
+	struct list_head	readers;
+};
+
+struct event_reader {
+	int			cancelled;
+	unsigned int		next_serial_number;
+	wait_queue_head_t	wait;
+	struct list_head	node;
+	unsigned int		data_size;
+	unsigned char		data[IBMASM_EVENT_MAX_SIZE];
+};
+
+struct reverse_heartbeat {
+	wait_queue_head_t	wait;
+	unsigned int		stopped;
+};
+
+struct ibmasm_remote {
+	struct input_dev *keybd_dev;
+	struct input_dev *mouse_dev;
+};
+
+struct service_processor {
+	struct list_head	node;
+	spinlock_t		lock;
+	void __iomem		*base_address;
+	unsigned int		irq;
+	struct command		*current_command;
+	struct command		*heartbeat;
+	struct list_head	command_queue;
+	struct event_buffer	*event_buffer;
+	char			dirname[IBMASM_NAME_SIZE];
+	char			devname[IBMASM_NAME_SIZE];
+	unsigned int		number;
+	struct ibmasm_remote	remote;
+	int			serial_line;
+	struct device		*dev;
+};
+
+/* command processing */
+struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size);
+void ibmasm_exec_command(struct service_processor *sp, struct command *cmd);
+void ibmasm_wait_for_response(struct command *cmd, int timeout);
+void ibmasm_receive_command_response(struct service_processor *sp, void *response,  size_t size);
+
+/* event processing */
+int ibmasm_event_buffer_init(struct service_processor *sp);
+void ibmasm_event_buffer_exit(struct service_processor *sp);
+void ibmasm_receive_event(struct service_processor *sp, void *data,  unsigned int data_size);
+void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader);
+void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader);
+int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader);
+void ibmasm_cancel_next_event(struct event_reader *reader);
+
+/* heartbeat - from SP to OS */
+void ibmasm_register_panic_notifier(void);
+void ibmasm_unregister_panic_notifier(void);
+int ibmasm_heartbeat_init(struct service_processor *sp);
+void ibmasm_heartbeat_exit(struct service_processor *sp);
+void ibmasm_receive_heartbeat(struct service_processor *sp,  void *message, size_t size);
+
+/* reverse heartbeat - from OS to SP */
+void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb);
+int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb);
+void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb);
+
+/* dot commands */
+void ibmasm_receive_message(struct service_processor *sp, void *data, int data_size);
+int ibmasm_send_driver_vpd(struct service_processor *sp);
+int ibmasm_send_os_state(struct service_processor *sp, int os_state);
+
+/* low level message processing */
+int ibmasm_send_i2o_message(struct service_processor *sp);
+irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id);
+
+/* remote console */
+void ibmasm_handle_mouse_interrupt(struct service_processor *sp);
+int ibmasm_init_remote_input_dev(struct service_processor *sp);
+void ibmasm_free_remote_input_dev(struct service_processor *sp);
+
+/* file system */
+int ibmasmfs_register(void);
+void ibmasmfs_unregister(void);
+void ibmasmfs_add_sp(struct service_processor *sp);
+
+/* uart */
+#ifdef CONFIG_SERIAL_8250
+void ibmasm_register_uart(struct service_processor *sp);
+void ibmasm_unregister_uart(struct service_processor *sp);
+#else
+#define ibmasm_register_uart(sp)	do { } while(0)
+#define ibmasm_unregister_uart(sp)	do { } while(0)
+#endif
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
new file mode 100644
index 00000000000..aecf40ecb3a
--- /dev/null
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -0,0 +1,635 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+/*
+ * Parts of this code are based on an article by Jonathan Corbet
+ * that appeared in Linux Weekly News.
+ */
+
+
+/*
+ * The IBMASM file virtual filesystem. It creates the following hierarchy
+ * dymamically when mounted from user space:
+ *
+ *    /ibmasm
+ *    |-- 0
+ *    |   |-- command
+ *    |   |-- event
+ *    |   |-- reverse_heartbeat
+ *    |   `-- remote_video
+ *    |       |-- depth
+ *    |       |-- height
+ *    |       `-- width
+ *    .
+ *    .
+ *    .
+ *    `-- n
+ *        |-- command
+ *        |-- event
+ *        |-- reverse_heartbeat
+ *        `-- remote_video
+ *            |-- depth
+ *            |-- height
+ *            `-- width
+ *
+ * For each service processor the following files are created:
+ *
+ * command: execute dot commands
+ *	write: execute a dot command on the service processor
+ *	read: return the result of a previously executed dot command
+ *
+ * events: listen for service processor events
+ *	read: sleep (interruptible) until an event occurs
+ *      write: wakeup sleeping event listener
+ *
+ * reverse_heartbeat: send a heartbeat to the service processor
+ *	read: sleep (interruptible) until the reverse heartbeat fails
+ *      write: wakeup sleeping heartbeat listener
+ *
+ * remote_video/width
+ * remote_video/height
+ * remote_video/width: control remote display settings
+ *	write: set value
+ *	read: read value
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include "ibmasm.h"
+#include "remote.h"
+#include "dot_command.h"
+
+#define IBMASMFS_MAGIC 0x66726f67
+
+static LIST_HEAD(service_processors);
+
+static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode);
+static void ibmasmfs_create_files (struct super_block *sb, struct dentry *root);
+static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent);
+
+
+static int ibmasmfs_get_super(struct file_system_type *fst,
+			int flags, const char *name, void *data,
+			struct vfsmount *mnt)
+{
+	return get_sb_single(fst, flags, data, ibmasmfs_fill_super, mnt);
+}
+
+static const struct super_operations ibmasmfs_s_ops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+};
+
+static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations;
+
+static struct file_system_type ibmasmfs_type = {
+	.owner          = THIS_MODULE,
+	.name           = "ibmasmfs",
+	.get_sb         = ibmasmfs_get_super,
+	.kill_sb        = kill_litter_super,
+};
+
+static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent)
+{
+	struct inode *root;
+	struct dentry *root_dentry;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = IBMASMFS_MAGIC;
+	sb->s_op = &ibmasmfs_s_ops;
+	sb->s_time_gran = 1;
+
+	root = ibmasmfs_make_inode (sb, S_IFDIR | 0500);
+	if (!root)
+		return -ENOMEM;
+
+	root->i_op = &simple_dir_inode_operations;
+	root->i_fop = ibmasmfs_dir_ops;
+
+	root_dentry = d_alloc_root(root);
+	if (!root_dentry) {
+		iput(root);
+		return -ENOMEM;
+	}
+	sb->s_root = root_dentry;
+
+	ibmasmfs_create_files(sb, root_dentry);
+	return 0;
+}
+
+static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
+{
+	struct inode *ret = new_inode(sb);
+
+	if (ret) {
+		ret->i_mode = mode;
+		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
+	}
+	return ret;
+}
+
+static struct dentry *ibmasmfs_create_file (struct super_block *sb,
+			struct dentry *parent,
+			const char *name,
+			const struct file_operations *fops,
+			void *data,
+			int mode)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(parent, name);
+	if (!dentry)
+		return NULL;
+
+	inode = ibmasmfs_make_inode(sb, S_IFREG | mode);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+
+	inode->i_fop = fops;
+	inode->i_private = data;
+
+	d_add(dentry, inode);
+	return dentry;
+}
+
+static struct dentry *ibmasmfs_create_dir (struct super_block *sb,
+				struct dentry *parent,
+				const char *name)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(parent, name);
+	if (!dentry)
+		return NULL;
+
+	inode = ibmasmfs_make_inode(sb, S_IFDIR | 0500);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = ibmasmfs_dir_ops;
+
+	d_add(dentry, inode);
+	return dentry;
+}
+
+int ibmasmfs_register(void)
+{
+	return register_filesystem(&ibmasmfs_type);
+}
+
+void ibmasmfs_unregister(void)
+{
+	unregister_filesystem(&ibmasmfs_type);
+}
+
+void ibmasmfs_add_sp(struct service_processor *sp)
+{
+	list_add(&sp->node, &service_processors);
+}
+
+/* struct to save state between command file operations */
+struct ibmasmfs_command_data {
+	struct service_processor	*sp;
+	struct command			*command;
+};
+
+/* struct to save state between event file operations */
+struct ibmasmfs_event_data {
+	struct service_processor	*sp;
+	struct event_reader		reader;
+	int				active;
+};
+
+/* struct to save state between reverse heartbeat file operations */
+struct ibmasmfs_heartbeat_data {
+	struct service_processor	*sp;
+	struct reverse_heartbeat	heartbeat;
+	int				active;
+};
+
+static int command_file_open(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_command_data *command_data;
+
+	if (!inode->i_private)
+		return -ENODEV;
+
+	command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL);
+	if (!command_data)
+		return -ENOMEM;
+
+	command_data->command = NULL;
+	command_data->sp = inode->i_private;
+	file->private_data = command_data;
+	return 0;
+}
+
+static int command_file_close(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_command_data *command_data = file->private_data;
+
+	if (command_data->command)
+		command_put(command_data->command);
+
+	kfree(command_data);
+	return 0;
+}
+
+static ssize_t command_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_command_data *command_data = file->private_data;
+	struct command *cmd;
+	int len;
+	unsigned long flags;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	spin_lock_irqsave(&command_data->sp->lock, flags);
+	cmd = command_data->command;
+	if (cmd == NULL) {
+		spin_unlock_irqrestore(&command_data->sp->lock, flags);
+		return 0;
+	}
+	command_data->command = NULL;
+	spin_unlock_irqrestore(&command_data->sp->lock, flags);
+
+	if (cmd->status != IBMASM_CMD_COMPLETE) {
+		command_put(cmd);
+		return -EIO;
+	}
+	len = min(count, cmd->buffer_size);
+	if (copy_to_user(buf, cmd->buffer, len)) {
+		command_put(cmd);
+		return -EFAULT;
+	}
+	command_put(cmd);
+
+	return len;
+}
+
+static ssize_t command_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_command_data *command_data = file->private_data;
+	struct command *cmd;
+	unsigned long flags;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	/* commands are executed sequentially, only one command at a time */
+	if (command_data->command)
+		return -EAGAIN;
+
+	cmd = ibmasm_new_command(command_data->sp, count);
+	if (!cmd)
+		return -ENOMEM;
+
+	if (copy_from_user(cmd->buffer, ubuff, count)) {
+		command_put(cmd);
+		return -EFAULT;
+	}
+
+	spin_lock_irqsave(&command_data->sp->lock, flags);
+	if (command_data->command) {
+		spin_unlock_irqrestore(&command_data->sp->lock, flags);
+		command_put(cmd);
+		return -EAGAIN;
+	}
+	command_data->command = cmd;
+	spin_unlock_irqrestore(&command_data->sp->lock, flags);
+
+	ibmasm_exec_command(command_data->sp, cmd);
+	ibmasm_wait_for_response(cmd, get_dot_command_timeout(cmd->buffer));
+
+	return count;
+}
+
+static int event_file_open(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_event_data *event_data;
+	struct service_processor *sp;
+
+	if (!inode->i_private)
+		return -ENODEV;
+
+	sp = inode->i_private;
+
+	event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL);
+	if (!event_data)
+		return -ENOMEM;
+
+	ibmasm_event_reader_register(sp, &event_data->reader);
+
+	event_data->sp = sp;
+	event_data->active = 0;
+	file->private_data = event_data;
+	return 0;
+}
+
+static int event_file_close(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_event_data *event_data = file->private_data;
+
+	ibmasm_event_reader_unregister(event_data->sp, &event_data->reader);
+	kfree(event_data);
+	return 0;
+}
+
+static ssize_t event_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_event_data *event_data = file->private_data;
+	struct event_reader *reader = &event_data->reader;
+	struct service_processor *sp = event_data->sp;
+	int ret;
+	unsigned long flags;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > IBMASM_EVENT_MAX_SIZE)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	spin_lock_irqsave(&sp->lock, flags);
+	if (event_data->active) {
+		spin_unlock_irqrestore(&sp->lock, flags);
+		return -EBUSY;
+	}
+	event_data->active = 1;
+	spin_unlock_irqrestore(&sp->lock, flags);
+
+	ret = ibmasm_get_next_event(sp, reader);
+	if (ret <= 0)
+		goto out;
+
+	if (count < reader->data_size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+        if (copy_to_user(buf, reader->data, reader->data_size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+	ret = reader->data_size;
+
+out:
+	event_data->active = 0;
+	return ret;
+}
+
+static ssize_t event_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_event_data *event_data = file->private_data;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count != 1)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	ibmasm_cancel_next_event(&event_data->reader);
+	return 0;
+}
+
+static int r_heartbeat_file_open(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat;
+
+	if (!inode->i_private)
+		return -ENODEV;
+
+	rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL);
+	if (!rhbeat)
+		return -ENOMEM;
+
+	rhbeat->sp = inode->i_private;
+	rhbeat->active = 0;
+	ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat);
+	file->private_data = rhbeat;
+	return 0;
+}
+
+static int r_heartbeat_file_close(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+
+	kfree(rhbeat);
+	return 0;
+}
+
+static ssize_t r_heartbeat_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+	unsigned long flags;
+	int result;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > 1024)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	/* allow only one reverse heartbeat per process */
+	spin_lock_irqsave(&rhbeat->sp->lock, flags);
+	if (rhbeat->active) {
+		spin_unlock_irqrestore(&rhbeat->sp->lock, flags);
+		return -EBUSY;
+	}
+	rhbeat->active = 1;
+	spin_unlock_irqrestore(&rhbeat->sp->lock, flags);
+
+	result = ibmasm_start_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat);
+	rhbeat->active = 0;
+
+	return result;
+}
+
+static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count != 1)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	if (rhbeat->active)
+		ibmasm_stop_reverse_heartbeat(&rhbeat->heartbeat);
+
+	return 1;
+}
+
+static int remote_settings_file_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static int remote_settings_file_close(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	void __iomem *address = (void __iomem *)file->private_data;
+	unsigned char *page;
+	int retval;
+	int len = 0;
+	unsigned int value;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > 1024)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	page = (unsigned char *)__get_free_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	value = readl(address);
+	len = sprintf(page, "%d\n", value);
+
+	if (copy_to_user(buf, page, len)) {
+		retval = -EFAULT;
+		goto exit;
+	}
+	*offset += len;
+	retval = len;
+
+exit:
+	free_page((unsigned long)page);
+	return retval;
+}
+
+static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)
+{
+	void __iomem *address = (void __iomem *)file->private_data;
+	char *buff;
+	unsigned int value;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > 1024)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	buff = kzalloc (count + 1, GFP_KERNEL);
+	if (!buff)
+		return -ENOMEM;
+
+
+	if (copy_from_user(buff, ubuff, count)) {
+		kfree(buff);
+		return -EFAULT;
+	}
+
+	value = simple_strtoul(buff, NULL, 10);
+	writel(value, address);
+	kfree(buff);
+
+	return count;
+}
+
+static const struct file_operations command_fops = {
+	.open =		command_file_open,
+	.release =	command_file_close,
+	.read =		command_file_read,
+	.write =	command_file_write,
+};
+
+static const struct file_operations event_fops = {
+	.open =		event_file_open,
+	.release =	event_file_close,
+	.read =		event_file_read,
+	.write =	event_file_write,
+};
+
+static const struct file_operations r_heartbeat_fops = {
+	.open =		r_heartbeat_file_open,
+	.release =	r_heartbeat_file_close,
+	.read =		r_heartbeat_file_read,
+	.write =	r_heartbeat_file_write,
+};
+
+static const struct file_operations remote_settings_fops = {
+	.open =		remote_settings_file_open,
+	.release =	remote_settings_file_close,
+	.read =		remote_settings_file_read,
+	.write =	remote_settings_file_write,
+};
+
+
+static void ibmasmfs_create_files (struct super_block *sb, struct dentry *root)
+{
+	struct list_head *entry;
+	struct service_processor *sp;
+
+	list_for_each(entry, &service_processors) {
+		struct dentry *dir;
+		struct dentry *remote_dir;
+		sp = list_entry(entry, struct service_processor, node);
+		dir = ibmasmfs_create_dir(sb, root, sp->dirname);
+		if (!dir)
+			continue;
+
+		ibmasmfs_create_file(sb, dir, "command", &command_fops, sp, S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(sb, dir, "event", &event_fops, sp, S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(sb, dir, "reverse_heartbeat", &r_heartbeat_fops, sp, S_IRUSR|S_IWUSR);
+
+		remote_dir = ibmasmfs_create_dir(sb, dir, "remote_video");
+		if (!remote_dir)
+			continue;
+
+		ibmasmfs_create_file(sb, remote_dir, "width", &remote_settings_fops, (void *)display_width(sp), S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(sb, remote_dir, "height", &remote_settings_fops, (void *)display_height(sp), S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(sb, remote_dir, "depth", &remote_settings_fops, (void *)display_depth(sp), S_IRUSR|S_IWUSR);
+	}
+}
diff --git a/drivers/misc/ibmasm/lowlevel.c b/drivers/misc/ibmasm/lowlevel.c
new file mode 100644
index 00000000000..4b2398e27fd
--- /dev/null
+++ b/drivers/misc/ibmasm/lowlevel.c
@@ -0,0 +1,85 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#include "ibmasm.h"
+#include "lowlevel.h"
+#include "i2o.h"
+#include "dot_command.h"
+#include "remote.h"
+
+static struct i2o_header header = I2O_HEADER_TEMPLATE;
+
+
+int ibmasm_send_i2o_message(struct service_processor *sp)
+{
+	u32 mfa;
+	unsigned int command_size;
+	struct i2o_message *message;
+	struct command *command = sp->current_command;
+
+	mfa = get_mfa_inbound(sp->base_address);
+	if (!mfa)
+		return 1;
+
+	command_size = get_dot_command_size(command->buffer);
+	header.message_size = outgoing_message_size(command_size);
+
+	message = get_i2o_message(sp->base_address, mfa);
+
+	memcpy_toio(&message->header, &header, sizeof(struct i2o_header));
+	memcpy_toio(&message->data, command->buffer, command_size);
+
+	set_mfa_inbound(sp->base_address, mfa);
+
+	return 0;
+}
+
+irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id)
+{
+	u32	mfa;
+	struct service_processor *sp = (struct service_processor *)dev_id;
+	void __iomem *base_address = sp->base_address;
+	char tsbuf[32];
+
+	if (!sp_interrupt_pending(base_address))
+		return IRQ_NONE;
+
+	dbg("respond to interrupt at %s\n", get_timestamp(tsbuf));
+
+	if (mouse_interrupt_pending(sp)) {
+		ibmasm_handle_mouse_interrupt(sp);
+		clear_mouse_interrupt(sp);
+	}
+
+	mfa = get_mfa_outbound(base_address);
+	if (valid_mfa(mfa)) {
+		struct i2o_message *msg = get_i2o_message(base_address, mfa);
+		ibmasm_receive_message(sp, &msg->data, incoming_data_size(msg));
+	} else
+		dbg("didn't get a valid MFA\n");
+
+	set_mfa_outbound(base_address, mfa);
+	dbg("finished interrupt at   %s\n", get_timestamp(tsbuf));
+
+	return IRQ_HANDLED;
+}
diff --git a/drivers/misc/ibmasm/lowlevel.h b/drivers/misc/ibmasm/lowlevel.h
new file mode 100644
index 00000000000..766766523a6
--- /dev/null
+++ b/drivers/misc/ibmasm/lowlevel.h
@@ -0,0 +1,137 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+/* Condor service processor specific hardware definitions */
+
+#ifndef __IBMASM_CONDOR_H__
+#define __IBMASM_CONDOR_H__
+
+#include <asm/io.h>
+
+#define VENDORID_IBM	0x1014
+#define DEVICEID_RSA	0x010F
+
+#define GET_MFA_ADDR(x)  (x & 0xFFFFFF00)
+
+#define MAILBOX_FULL(x)  (x & 0x00000001)
+
+#define NO_MFAS_AVAILABLE     0xFFFFFFFF
+
+
+#define INBOUND_QUEUE_PORT   0x40  /* contains address of next free MFA */
+#define OUTBOUND_QUEUE_PORT  0x44  /* contains address of posted MFA    */
+
+#define SP_INTR_MASK	0x00000008
+#define UART_INTR_MASK	0x00000010
+
+#define INTR_STATUS_REGISTER   0x13A0
+#define INTR_CONTROL_REGISTER  0x13A4
+
+#define SCOUT_COM_A_BASE         0x0000
+#define SCOUT_COM_B_BASE         0x0100
+#define SCOUT_COM_C_BASE         0x0200
+#define SCOUT_COM_D_BASE         0x0300
+
+static inline int sp_interrupt_pending(void __iomem *base_address)
+{
+	return SP_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER);
+}
+
+static inline int uart_interrupt_pending(void __iomem *base_address)
+{
+	return UART_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER);
+}
+
+static inline void ibmasm_enable_interrupts(void __iomem *base_address, int mask)
+{
+	void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER;
+	writel( readl(ctrl_reg) & ~mask, ctrl_reg);
+}
+
+static inline void ibmasm_disable_interrupts(void __iomem *base_address, int mask)
+{
+	void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER;
+	writel( readl(ctrl_reg) | mask, ctrl_reg);
+}
+
+static inline void enable_sp_interrupts(void __iomem *base_address)
+{
+	ibmasm_enable_interrupts(base_address, SP_INTR_MASK);
+}
+
+static inline void disable_sp_interrupts(void __iomem *base_address)
+{
+	ibmasm_disable_interrupts(base_address, SP_INTR_MASK);
+}
+
+static inline void enable_uart_interrupts(void __iomem *base_address)
+{
+	ibmasm_enable_interrupts(base_address, UART_INTR_MASK);
+}
+
+static inline void disable_uart_interrupts(void __iomem *base_address)
+{
+	ibmasm_disable_interrupts(base_address, UART_INTR_MASK);
+}
+
+#define valid_mfa(mfa)	( (mfa) != NO_MFAS_AVAILABLE )
+
+static inline u32 get_mfa_outbound(void __iomem *base_address)
+{
+	int retry;
+	u32 mfa;
+
+	for (retry=0; retry<=10; retry++) {
+		mfa = readl(base_address + OUTBOUND_QUEUE_PORT);
+		if (valid_mfa(mfa))
+			break;
+	}
+	return mfa;
+}
+
+static inline void set_mfa_outbound(void __iomem *base_address, u32 mfa)
+{
+	writel(mfa, base_address + OUTBOUND_QUEUE_PORT);
+}
+
+static inline u32 get_mfa_inbound(void __iomem *base_address)
+{
+	u32 mfa = readl(base_address + INBOUND_QUEUE_PORT);
+
+	if (MAILBOX_FULL(mfa))
+		return 0;
+
+	return mfa;
+}
+
+static inline void set_mfa_inbound(void __iomem *base_address, u32 mfa)
+{
+	writel(mfa, base_address + INBOUND_QUEUE_PORT);
+}
+
+static inline struct i2o_message *get_i2o_message(void __iomem *base_address, u32 mfa)
+{
+	return (struct i2o_message *)(GET_MFA_ADDR(mfa) + base_address);
+}
+
+#endif /* __IBMASM_CONDOR_H__ */
diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c
new file mode 100644
index 00000000000..dc14b0b9cbf
--- /dev/null
+++ b/drivers/misc/ibmasm/module.c
@@ -0,0 +1,237 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ * This driver is based on code originally written by Pete Reynolds
+ * and others.
+ *
+ */
+
+/*
+ * The ASM device driver does the following things:
+ *
+ * 1) When loaded it sends a message to the service processor,
+ * indicating that an OS is * running. This causes the service processor
+ * to send periodic heartbeats to the OS.
+ *
+ * 2) Answers the periodic heartbeats sent by the service processor.
+ * Failure to do so would result in system reboot.
+ *
+ * 3) Acts as a pass through for dot commands sent from user applications.
+ * The interface for this is the ibmasmfs file system.
+ *
+ * 4) Allows user applications to register for event notification. Events
+ * are sent to the driver through interrupts. They can be read from user
+ * space through the ibmasmfs file system.
+ *
+ * 5) Allows user space applications to send heartbeats to the service
+ * processor (aka reverse heartbeats). Again this happens through ibmasmfs.
+ *
+ * 6) Handles remote mouse and keyboard event interrupts and makes them
+ * available to user applications through ibmasmfs.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+#include "remote.h"
+
+int ibmasm_debug = 0;
+module_param(ibmasm_debug, int , S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(ibmasm_debug, " Set debug mode on or off");
+
+
+static int __devinit ibmasm_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int result;
+	struct service_processor *sp;
+
+	if ((result = pci_enable_device(pdev))) {
+		dev_err(&pdev->dev, "Failed to enable PCI device\n");
+		return result;
+	}
+	if ((result = pci_request_regions(pdev, DRIVER_NAME))) {
+		dev_err(&pdev->dev, "Failed to allocate PCI resources\n");
+		goto error_resources;
+	}
+	/* vnc client won't work without bus-mastering */
+	pci_set_master(pdev);
+
+	sp = kzalloc(sizeof(struct service_processor), GFP_KERNEL);
+	if (sp == NULL) {
+		dev_err(&pdev->dev, "Failed to allocate memory\n");
+		result = -ENOMEM;
+		goto error_kmalloc;
+	}
+
+	spin_lock_init(&sp->lock);
+	INIT_LIST_HEAD(&sp->command_queue);
+
+	pci_set_drvdata(pdev, (void *)sp);
+	sp->dev = &pdev->dev;
+	sp->number = pdev->bus->number;
+	snprintf(sp->dirname, IBMASM_NAME_SIZE, "%d", sp->number);
+	snprintf(sp->devname, IBMASM_NAME_SIZE, "%s%d", DRIVER_NAME, sp->number);
+
+	if (ibmasm_event_buffer_init(sp)) {
+		dev_err(sp->dev, "Failed to allocate event buffer\n");
+		goto error_eventbuffer;
+	}
+
+	if (ibmasm_heartbeat_init(sp)) {
+		dev_err(sp->dev, "Failed to allocate heartbeat command\n");
+		goto error_heartbeat;
+	}
+
+	sp->irq = pdev->irq;
+	sp->base_address = pci_ioremap_bar(pdev, 0);
+	if (!sp->base_address) {
+		dev_err(sp->dev, "Failed to ioremap pci memory\n");
+		result =  -ENODEV;
+		goto error_ioremap;
+	}
+
+	result = request_irq(sp->irq, ibmasm_interrupt_handler, IRQF_SHARED, sp->devname, (void*)sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to register interrupt handler\n");
+		goto error_request_irq;
+	}
+
+	enable_sp_interrupts(sp->base_address);
+
+	result = ibmasm_init_remote_input_dev(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to initialize remote queue\n");
+		goto error_send_message;
+	}
+
+	result = ibmasm_send_driver_vpd(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to send driver VPD to service processor\n");
+		goto error_send_message;
+	}
+	result = ibmasm_send_os_state(sp, SYSTEM_STATE_OS_UP);
+	if (result) {
+		dev_err(sp->dev, "Failed to send OS state to service processor\n");
+		goto error_send_message;
+	}
+	ibmasmfs_add_sp(sp);
+
+	ibmasm_register_uart(sp);
+
+	return 0;
+
+error_send_message:
+	disable_sp_interrupts(sp->base_address);
+	ibmasm_free_remote_input_dev(sp);
+	free_irq(sp->irq, (void *)sp);
+error_request_irq:
+	iounmap(sp->base_address);
+error_ioremap:
+	ibmasm_heartbeat_exit(sp);
+error_heartbeat:
+	ibmasm_event_buffer_exit(sp);
+error_eventbuffer:
+	pci_set_drvdata(pdev, NULL);
+	kfree(sp);
+error_kmalloc:
+        pci_release_regions(pdev);
+error_resources:
+        pci_disable_device(pdev);
+
+	return result;
+}
+
+static void __devexit ibmasm_remove_one(struct pci_dev *pdev)
+{
+	struct service_processor *sp = (struct service_processor *)pci_get_drvdata(pdev);
+
+	dbg("Unregistering UART\n");
+	ibmasm_unregister_uart(sp);
+	dbg("Sending OS down message\n");
+	if (ibmasm_send_os_state(sp, SYSTEM_STATE_OS_DOWN))
+		err("failed to get repsonse to 'Send OS State' command\n");
+	dbg("Disabling heartbeats\n");
+	ibmasm_heartbeat_exit(sp);
+	dbg("Disabling interrupts\n");
+	disable_sp_interrupts(sp->base_address);
+	dbg("Freeing SP irq\n");
+	free_irq(sp->irq, (void *)sp);
+	dbg("Cleaning up\n");
+	ibmasm_free_remote_input_dev(sp);
+	iounmap(sp->base_address);
+	ibmasm_event_buffer_exit(sp);
+	pci_set_drvdata(pdev, NULL);
+	kfree(sp);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_device_id ibmasm_pci_table[] =
+{
+	{ PCI_DEVICE(VENDORID_IBM, DEVICEID_RSA) },
+	{},
+};
+
+static struct pci_driver ibmasm_driver = {
+	.name		= DRIVER_NAME,
+	.id_table	= ibmasm_pci_table,
+	.probe		= ibmasm_init_one,
+	.remove		= __devexit_p(ibmasm_remove_one),
+};
+
+static void __exit ibmasm_exit (void)
+{
+	ibmasm_unregister_panic_notifier();
+	ibmasmfs_unregister();
+	pci_unregister_driver(&ibmasm_driver);
+	info(DRIVER_DESC " version " DRIVER_VERSION " unloaded");
+}
+
+static int __init ibmasm_init(void)
+{
+	int result;
+
+	result = ibmasmfs_register();
+	if (result) {
+		err("Failed to register ibmasmfs file system");
+		return result;
+	}
+	result = pci_register_driver(&ibmasm_driver);
+	if (result) {
+		ibmasmfs_unregister();
+		return result;
+	}
+	ibmasm_register_panic_notifier();
+	info(DRIVER_DESC " version " DRIVER_VERSION " loaded");
+	return 0;
+}
+
+module_init(ibmasm_init);
+module_exit(ibmasm_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, ibmasm_pci_table);
+
diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c
new file mode 100644
index 00000000000..2de487ac788
--- /dev/null
+++ b/drivers/misc/ibmasm/r_heartbeat.c
@@ -0,0 +1,99 @@
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/sched.h>
+#include "ibmasm.h"
+#include "dot_command.h"
+
+/*
+ * Reverse Heartbeat, i.e. heartbeats sent from the driver to the
+ * service processor.
+ * These heartbeats are initiated by user level programs.
+ */
+
+/* the reverse heartbeat dot command */
+#pragma pack(1)
+static struct {
+	struct dot_command_header	header;
+	unsigned char			command[3];
+} rhb_dot_cmd = {
+	.header = {
+		.type =		sp_read,
+		.command_size = 3,
+		.data_size =	0,
+		.status =	0
+	},
+	.command = { 4, 3, 6 }
+};
+#pragma pack()
+
+void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb)
+{
+	init_waitqueue_head(&rhb->wait);
+	rhb->stopped = 0;
+}
+
+/**
+ * start_reverse_heartbeat
+ * Loop forever, sending a reverse heartbeat dot command to the service
+ * processor, then sleeping. The loop comes to an end if the service
+ * processor fails to respond 3 times or we were interrupted.
+ */
+int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb)
+{
+	struct command *cmd;
+	int times_failed = 0;
+	int result = 1;
+
+	cmd = ibmasm_new_command(sp, sizeof rhb_dot_cmd);
+	if (!cmd)
+		return -ENOMEM;
+
+	while (times_failed < 3) {
+		memcpy(cmd->buffer, (void *)&rhb_dot_cmd, sizeof rhb_dot_cmd);
+		cmd->status = IBMASM_CMD_PENDING;
+		ibmasm_exec_command(sp, cmd);
+		ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL);
+
+		if (cmd->status != IBMASM_CMD_COMPLETE)
+			times_failed++;
+
+		wait_event_interruptible_timeout(rhb->wait,
+			rhb->stopped,
+			REVERSE_HEARTBEAT_TIMEOUT * HZ);
+
+		if (signal_pending(current) || rhb->stopped) {
+			result = -EINTR;
+			break;
+		}
+	}
+	command_put(cmd);
+	rhb->stopped = 0;
+
+	return result;
+}
+
+void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb)
+{
+	rhb->stopped = 1;
+	wake_up_interruptible(&rhb->wait);
+}
diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c
new file mode 100644
index 00000000000..477bb43c899
--- /dev/null
+++ b/drivers/misc/ibmasm/remote.c
@@ -0,0 +1,282 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Authors: Max Asböck <amax@us.ibm.com>
+ *          Vernon Mauery <vernux@us.ibm.com>
+ *
+ */
+
+/* Remote mouse and keyboard event handling functions */
+
+#include <linux/pci.h>
+#include "ibmasm.h"
+#include "remote.h"
+
+#define MOUSE_X_MAX	1600
+#define MOUSE_Y_MAX	1200
+
+static const unsigned short xlate_high[XLATE_SIZE] = {
+	[KEY_SYM_ENTER & 0xff] = KEY_ENTER,
+	[KEY_SYM_KPSLASH & 0xff] = KEY_KPSLASH,
+	[KEY_SYM_KPSTAR & 0xff] = KEY_KPASTERISK,
+	[KEY_SYM_KPMINUS & 0xff] = KEY_KPMINUS,
+	[KEY_SYM_KPDOT & 0xff] = KEY_KPDOT,
+	[KEY_SYM_KPPLUS & 0xff] = KEY_KPPLUS,
+	[KEY_SYM_KP0 & 0xff] = KEY_KP0,
+	[KEY_SYM_KP1 & 0xff] = KEY_KP1,
+	[KEY_SYM_KP2 & 0xff] = KEY_KP2, [KEY_SYM_KPDOWN & 0xff] = KEY_KP2,
+	[KEY_SYM_KP3 & 0xff] = KEY_KP3,
+	[KEY_SYM_KP4 & 0xff] = KEY_KP4, [KEY_SYM_KPLEFT & 0xff] = KEY_KP4,
+	[KEY_SYM_KP5 & 0xff] = KEY_KP5,
+	[KEY_SYM_KP6 & 0xff] = KEY_KP6, [KEY_SYM_KPRIGHT & 0xff] = KEY_KP6,
+	[KEY_SYM_KP7 & 0xff] = KEY_KP7,
+	[KEY_SYM_KP8 & 0xff] = KEY_KP8, [KEY_SYM_KPUP & 0xff] = KEY_KP8,
+	[KEY_SYM_KP9 & 0xff] = KEY_KP9,
+	[KEY_SYM_BK_SPC & 0xff] = KEY_BACKSPACE,
+	[KEY_SYM_TAB & 0xff] = KEY_TAB,
+	[KEY_SYM_CTRL & 0xff] = KEY_LEFTCTRL,
+	[KEY_SYM_ALT & 0xff] = KEY_LEFTALT,
+	[KEY_SYM_INSERT & 0xff] = KEY_INSERT,
+	[KEY_SYM_DELETE & 0xff] = KEY_DELETE,
+	[KEY_SYM_SHIFT & 0xff] = KEY_LEFTSHIFT,
+	[KEY_SYM_UARROW & 0xff] = KEY_UP,
+	[KEY_SYM_DARROW & 0xff] = KEY_DOWN,
+	[KEY_SYM_LARROW & 0xff] = KEY_LEFT,
+	[KEY_SYM_RARROW & 0xff] = KEY_RIGHT,
+	[KEY_SYM_ESCAPE & 0xff] = KEY_ESC,
+        [KEY_SYM_PAGEUP & 0xff] = KEY_PAGEUP,
+        [KEY_SYM_PAGEDOWN & 0xff] = KEY_PAGEDOWN,
+        [KEY_SYM_HOME & 0xff] = KEY_HOME,
+        [KEY_SYM_END & 0xff] = KEY_END,
+	[KEY_SYM_F1 & 0xff] = KEY_F1,
+	[KEY_SYM_F2 & 0xff] = KEY_F2,
+	[KEY_SYM_F3 & 0xff] = KEY_F3,
+	[KEY_SYM_F4 & 0xff] = KEY_F4,
+	[KEY_SYM_F5 & 0xff] = KEY_F5,
+	[KEY_SYM_F6 & 0xff] = KEY_F6,
+	[KEY_SYM_F7 & 0xff] = KEY_F7,
+	[KEY_SYM_F8 & 0xff] = KEY_F8,
+	[KEY_SYM_F9 & 0xff] = KEY_F9,
+	[KEY_SYM_F10 & 0xff] = KEY_F10,
+	[KEY_SYM_F11 & 0xff] = KEY_F11,
+	[KEY_SYM_F12 & 0xff] = KEY_F12,
+	[KEY_SYM_CAP_LOCK & 0xff] = KEY_CAPSLOCK,
+	[KEY_SYM_NUM_LOCK & 0xff] = KEY_NUMLOCK,
+	[KEY_SYM_SCR_LOCK & 0xff] = KEY_SCROLLLOCK,
+};
+
+static const unsigned short xlate[XLATE_SIZE] = {
+	[NO_KEYCODE] = KEY_RESERVED,
+	[KEY_SYM_SPACE] = KEY_SPACE,
+	[KEY_SYM_TILDE] = KEY_GRAVE,        [KEY_SYM_BKTIC] = KEY_GRAVE,
+	[KEY_SYM_ONE] = KEY_1,              [KEY_SYM_BANG] = KEY_1,
+	[KEY_SYM_TWO] = KEY_2,              [KEY_SYM_AT] = KEY_2,
+	[KEY_SYM_THREE] = KEY_3,            [KEY_SYM_POUND] = KEY_3,
+	[KEY_SYM_FOUR] = KEY_4,             [KEY_SYM_DOLLAR] = KEY_4,
+	[KEY_SYM_FIVE] = KEY_5,             [KEY_SYM_PERCENT] = KEY_5,
+	[KEY_SYM_SIX] = KEY_6,              [KEY_SYM_CARAT] = KEY_6,
+	[KEY_SYM_SEVEN] = KEY_7,            [KEY_SYM_AMPER] = KEY_7,
+	[KEY_SYM_EIGHT] = KEY_8,            [KEY_SYM_STAR] = KEY_8,
+	[KEY_SYM_NINE] = KEY_9,             [KEY_SYM_LPAREN] = KEY_9,
+	[KEY_SYM_ZERO] = KEY_0,             [KEY_SYM_RPAREN] = KEY_0,
+	[KEY_SYM_MINUS] = KEY_MINUS,        [KEY_SYM_USCORE] = KEY_MINUS,
+	[KEY_SYM_EQUAL] = KEY_EQUAL,        [KEY_SYM_PLUS] = KEY_EQUAL,
+	[KEY_SYM_LBRKT] = KEY_LEFTBRACE,    [KEY_SYM_LCURLY] = KEY_LEFTBRACE,
+	[KEY_SYM_RBRKT] = KEY_RIGHTBRACE,   [KEY_SYM_RCURLY] = KEY_RIGHTBRACE,
+	[KEY_SYM_SLASH] = KEY_BACKSLASH,    [KEY_SYM_PIPE] = KEY_BACKSLASH,
+	[KEY_SYM_TIC] = KEY_APOSTROPHE,     [KEY_SYM_QUOTE] = KEY_APOSTROPHE,
+	[KEY_SYM_SEMIC] = KEY_SEMICOLON,    [KEY_SYM_COLON] = KEY_SEMICOLON,
+	[KEY_SYM_COMMA] = KEY_COMMA,        [KEY_SYM_LT] = KEY_COMMA,
+	[KEY_SYM_PERIOD] = KEY_DOT,         [KEY_SYM_GT] = KEY_DOT,
+	[KEY_SYM_BSLASH] = KEY_SLASH,       [KEY_SYM_QMARK] = KEY_SLASH,
+	[KEY_SYM_A] = KEY_A,                [KEY_SYM_a] = KEY_A,
+	[KEY_SYM_B] = KEY_B,                [KEY_SYM_b] = KEY_B,
+	[KEY_SYM_C] = KEY_C,                [KEY_SYM_c] = KEY_C,
+	[KEY_SYM_D] = KEY_D,                [KEY_SYM_d] = KEY_D,
+	[KEY_SYM_E] = KEY_E,                [KEY_SYM_e] = KEY_E,
+	[KEY_SYM_F] = KEY_F,                [KEY_SYM_f] = KEY_F,
+	[KEY_SYM_G] = KEY_G,                [KEY_SYM_g] = KEY_G,
+	[KEY_SYM_H] = KEY_H,                [KEY_SYM_h] = KEY_H,
+	[KEY_SYM_I] = KEY_I,                [KEY_SYM_i] = KEY_I,
+	[KEY_SYM_J] = KEY_J,                [KEY_SYM_j] = KEY_J,
+	[KEY_SYM_K] = KEY_K,                [KEY_SYM_k] = KEY_K,
+	[KEY_SYM_L] = KEY_L,                [KEY_SYM_l] = KEY_L,
+	[KEY_SYM_M] = KEY_M,                [KEY_SYM_m] = KEY_M,
+	[KEY_SYM_N] = KEY_N,                [KEY_SYM_n] = KEY_N,
+	[KEY_SYM_O] = KEY_O,                [KEY_SYM_o] = KEY_O,
+	[KEY_SYM_P] = KEY_P,                [KEY_SYM_p] = KEY_P,
+	[KEY_SYM_Q] = KEY_Q,                [KEY_SYM_q] = KEY_Q,
+	[KEY_SYM_R] = KEY_R,                [KEY_SYM_r] = KEY_R,
+	[KEY_SYM_S] = KEY_S,                [KEY_SYM_s] = KEY_S,
+	[KEY_SYM_T] = KEY_T,                [KEY_SYM_t] = KEY_T,
+	[KEY_SYM_U] = KEY_U,                [KEY_SYM_u] = KEY_U,
+	[KEY_SYM_V] = KEY_V,                [KEY_SYM_v] = KEY_V,
+	[KEY_SYM_W] = KEY_W,                [KEY_SYM_w] = KEY_W,
+	[KEY_SYM_X] = KEY_X,                [KEY_SYM_x] = KEY_X,
+	[KEY_SYM_Y] = KEY_Y,                [KEY_SYM_y] = KEY_Y,
+	[KEY_SYM_Z] = KEY_Z,                [KEY_SYM_z] = KEY_Z,
+};
+
+static void print_input(struct remote_input *input)
+{
+	if (input->type == INPUT_TYPE_MOUSE) {
+		unsigned char buttons = input->mouse_buttons;
+		dbg("remote mouse movement: (x,y)=(%d,%d)%s%s%s%s\n",
+			input->data.mouse.x, input->data.mouse.y,
+			(buttons) ? " -- buttons:" : "",
+			(buttons & REMOTE_BUTTON_LEFT) ? "left " : "",
+			(buttons & REMOTE_BUTTON_MIDDLE) ? "middle " : "",
+			(buttons & REMOTE_BUTTON_RIGHT) ? "right" : ""
+		      );
+	} else {
+		dbg("remote keypress (code, flag, down):"
+			   "%d (0x%x) [0x%x] [0x%x]\n",
+				input->data.keyboard.key_code,
+				input->data.keyboard.key_code,
+				input->data.keyboard.key_flag,
+				input->data.keyboard.key_down
+		      );
+	}
+}
+
+static void send_mouse_event(struct input_dev *dev, struct remote_input *input)
+{
+	unsigned char buttons = input->mouse_buttons;
+
+	input_report_abs(dev, ABS_X, input->data.mouse.x);
+	input_report_abs(dev, ABS_Y, input->data.mouse.y);
+	input_report_key(dev, BTN_LEFT, buttons & REMOTE_BUTTON_LEFT);
+	input_report_key(dev, BTN_MIDDLE, buttons & REMOTE_BUTTON_MIDDLE);
+	input_report_key(dev, BTN_RIGHT, buttons & REMOTE_BUTTON_RIGHT);
+	input_sync(dev);
+}
+
+static void send_keyboard_event(struct input_dev *dev,
+		struct remote_input *input)
+{
+	unsigned int key;
+	unsigned short code = input->data.keyboard.key_code;
+
+	if (code & 0xff00)
+		key = xlate_high[code & 0xff];
+	else
+		key = xlate[code];
+	input_report_key(dev, key, input->data.keyboard.key_down);
+	input_sync(dev);
+}
+
+void ibmasm_handle_mouse_interrupt(struct service_processor *sp)
+{
+	unsigned long reader;
+	unsigned long writer;
+	struct remote_input input;
+
+	reader = get_queue_reader(sp);
+	writer = get_queue_writer(sp);
+
+	while (reader != writer) {
+		memcpy_fromio(&input, get_queue_entry(sp, reader),
+				sizeof(struct remote_input));
+
+		print_input(&input);
+		if (input.type == INPUT_TYPE_MOUSE) {
+			send_mouse_event(sp->remote.mouse_dev, &input);
+		} else if (input.type == INPUT_TYPE_KEYBOARD) {
+			send_keyboard_event(sp->remote.keybd_dev, &input);
+		} else
+			break;
+
+		reader = advance_queue_reader(sp, reader);
+		writer = get_queue_writer(sp);
+	}
+}
+
+int ibmasm_init_remote_input_dev(struct service_processor *sp)
+{
+	/* set up the mouse input device */
+	struct input_dev *mouse_dev, *keybd_dev;
+	struct pci_dev *pdev = to_pci_dev(sp->dev);
+	int error = -ENOMEM;
+	int i;
+
+	sp->remote.mouse_dev = mouse_dev = input_allocate_device();
+	sp->remote.keybd_dev = keybd_dev = input_allocate_device();
+
+	if (!mouse_dev || !keybd_dev)
+		goto err_free_devices;
+
+	mouse_dev->id.bustype = BUS_PCI;
+	mouse_dev->id.vendor = pdev->vendor;
+	mouse_dev->id.product = pdev->device;
+	mouse_dev->id.version = 1;
+	mouse_dev->dev.parent = sp->dev;
+	mouse_dev->evbit[0]  = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	set_bit(BTN_TOUCH, mouse_dev->keybit);
+	mouse_dev->name = "ibmasm RSA I remote mouse";
+	input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0);
+	input_set_abs_params(mouse_dev, ABS_Y, 0, MOUSE_Y_MAX, 0, 0);
+
+	keybd_dev->id.bustype = BUS_PCI;
+	keybd_dev->id.vendor = pdev->vendor;
+	keybd_dev->id.product = pdev->device;
+	keybd_dev->id.version = 2;
+	keybd_dev->dev.parent = sp->dev;
+	keybd_dev->evbit[0]  = BIT_MASK(EV_KEY);
+	keybd_dev->name = "ibmasm RSA I remote keyboard";
+
+	for (i = 0; i < XLATE_SIZE; i++) {
+		if (xlate_high[i])
+			set_bit(xlate_high[i], keybd_dev->keybit);
+		if (xlate[i])
+			set_bit(xlate[i], keybd_dev->keybit);
+	}
+
+	error = input_register_device(mouse_dev);
+	if (error)
+		goto err_free_devices;
+
+	error = input_register_device(keybd_dev);
+	if (error)
+		goto err_unregister_mouse_dev;
+
+	enable_mouse_interrupts(sp);
+
+	printk(KERN_INFO "ibmasm remote responding to events on RSA card %d\n", sp->number);
+
+	return 0;
+
+ err_unregister_mouse_dev:
+	input_unregister_device(mouse_dev);
+	mouse_dev = NULL; /* so we don't try to free it again below */
+ err_free_devices:
+	input_free_device(mouse_dev);
+	input_free_device(keybd_dev);
+
+	return error;
+}
+
+void ibmasm_free_remote_input_dev(struct service_processor *sp)
+{
+	disable_mouse_interrupts(sp);
+	input_unregister_device(sp->remote.mouse_dev);
+	input_unregister_device(sp->remote.keybd_dev);
+}
+
diff --git a/drivers/misc/ibmasm/remote.h b/drivers/misc/ibmasm/remote.h
new file mode 100644
index 00000000000..72acf5af7a2
--- /dev/null
+++ b/drivers/misc/ibmasm/remote.h
@@ -0,0 +1,270 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ * Orignally written by Pete Reynolds
+ */
+
+#ifndef _IBMASM_REMOTE_H_
+#define _IBMASM_REMOTE_H_
+
+#include <asm/io.h>
+
+/* pci offsets */
+#define CONDOR_MOUSE_DATA		0x000AC000
+#define CONDOR_MOUSE_ISR_CONTROL	0x00
+#define CONDOR_MOUSE_ISR_STATUS		0x04
+#define CONDOR_MOUSE_Q_READER		0x08
+#define CONDOR_MOUSE_Q_WRITER		0x0C
+#define CONDOR_MOUSE_Q_BEGIN		0x10
+#define CONDOR_MOUSE_MAX_X		0x14
+#define CONDOR_MOUSE_MAX_Y		0x18
+
+#define CONDOR_INPUT_DESKTOP_INFO	0x1F0
+#define CONDOR_INPUT_DISPLAY_RESX	0x1F4
+#define CONDOR_INPUT_DISPLAY_RESY	0x1F8
+#define CONDOR_INPUT_DISPLAY_BITS	0x1FC
+#define CONDOR_OUTPUT_VNC_STATUS	0x200
+
+#define CONDOR_MOUSE_INTR_STATUS_MASK	0x00000001
+
+#define INPUT_TYPE_MOUSE	0x1
+#define INPUT_TYPE_KEYBOARD	0x2
+
+
+/* mouse button states received from SP */
+#define REMOTE_DOUBLE_CLICK	0xF0
+#define REMOTE_BUTTON_LEFT	0x01
+#define REMOTE_BUTTON_MIDDLE	0x02
+#define REMOTE_BUTTON_RIGHT	0x04
+
+/* size of keysym/keycode translation matricies */
+#define XLATE_SIZE 256
+
+struct mouse_input {
+	unsigned short	y;
+	unsigned short	x;
+};
+
+
+struct keyboard_input {
+	unsigned short	key_code;
+	unsigned char	key_flag;
+	unsigned char	key_down;
+};
+
+
+
+struct remote_input {
+	union {
+		struct mouse_input	mouse;
+		struct keyboard_input	keyboard;
+	} data;
+
+	unsigned char	type;
+	unsigned char	pad1;
+	unsigned char	mouse_buttons;
+	unsigned char	pad3;
+};
+
+#define mouse_addr(sp)		(sp->base_address + CONDOR_MOUSE_DATA)
+#define display_width(sp)	(mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESX)
+#define display_height(sp)	(mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESY)
+#define display_depth(sp)	(mouse_addr(sp) + CONDOR_INPUT_DISPLAY_BITS)
+#define desktop_info(sp)	(mouse_addr(sp) + CONDOR_INPUT_DESKTOP_INFO)
+#define vnc_status(sp)		(mouse_addr(sp) + CONDOR_OUTPUT_VNC_STATUS)
+#define isr_control(sp)		(mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+
+#define mouse_interrupt_pending(sp)	readl(mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS)
+#define clear_mouse_interrupt(sp)	writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS)
+#define enable_mouse_interrupts(sp)	writel(1, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+#define disable_mouse_interrupts(sp)	writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+
+/* remote input queue operations */
+#define REMOTE_QUEUE_SIZE	60
+
+#define get_queue_writer(sp)	readl(mouse_addr(sp) + CONDOR_MOUSE_Q_WRITER)
+#define get_queue_reader(sp)	readl(mouse_addr(sp) + CONDOR_MOUSE_Q_READER)
+#define set_queue_reader(sp, reader)	writel(reader, mouse_addr(sp) + CONDOR_MOUSE_Q_READER)
+
+#define queue_begin	(mouse_addr(sp) + CONDOR_MOUSE_Q_BEGIN)
+
+#define get_queue_entry(sp, read_index) \
+	((void*)(queue_begin + read_index * sizeof(struct remote_input)))
+
+static inline int advance_queue_reader(struct service_processor *sp, unsigned long reader)
+{
+	reader++;
+	if (reader == REMOTE_QUEUE_SIZE)
+		reader = 0;
+
+	set_queue_reader(sp, reader);
+	return reader;
+}
+
+#define NO_KEYCODE 0
+#define KEY_SYM_BK_SPC   0xFF08
+#define KEY_SYM_TAB      0xFF09
+#define KEY_SYM_ENTER    0xFF0D
+#define KEY_SYM_SCR_LOCK 0xFF14
+#define KEY_SYM_ESCAPE   0xFF1B
+#define KEY_SYM_HOME     0xFF50
+#define KEY_SYM_LARROW   0xFF51
+#define KEY_SYM_UARROW   0xFF52
+#define KEY_SYM_RARROW   0xFF53
+#define KEY_SYM_DARROW   0xFF54
+#define KEY_SYM_PAGEUP   0xFF55
+#define KEY_SYM_PAGEDOWN 0xFF56
+#define KEY_SYM_END      0xFF57
+#define KEY_SYM_INSERT   0xFF63
+#define KEY_SYM_NUM_LOCK 0xFF7F
+#define KEY_SYM_KPSTAR   0xFFAA
+#define KEY_SYM_KPPLUS   0xFFAB
+#define KEY_SYM_KPMINUS  0xFFAD
+#define KEY_SYM_KPDOT    0xFFAE
+#define KEY_SYM_KPSLASH  0xFFAF
+#define KEY_SYM_KPRIGHT  0xFF96
+#define KEY_SYM_KPUP     0xFF97
+#define KEY_SYM_KPLEFT   0xFF98
+#define KEY_SYM_KPDOWN   0xFF99
+#define KEY_SYM_KP0      0xFFB0
+#define KEY_SYM_KP1      0xFFB1
+#define KEY_SYM_KP2      0xFFB2
+#define KEY_SYM_KP3      0xFFB3
+#define KEY_SYM_KP4      0xFFB4
+#define KEY_SYM_KP5      0xFFB5
+#define KEY_SYM_KP6      0xFFB6
+#define KEY_SYM_KP7      0xFFB7
+#define KEY_SYM_KP8      0xFFB8
+#define KEY_SYM_KP9      0xFFB9
+#define KEY_SYM_F1       0xFFBE      // 1B 5B 5B 41
+#define KEY_SYM_F2       0xFFBF      // 1B 5B 5B 42
+#define KEY_SYM_F3       0xFFC0      // 1B 5B 5B 43
+#define KEY_SYM_F4       0xFFC1      // 1B 5B 5B 44
+#define KEY_SYM_F5       0xFFC2      // 1B 5B 5B 45
+#define KEY_SYM_F6       0xFFC3      // 1B 5B 31 37 7E
+#define KEY_SYM_F7       0xFFC4      // 1B 5B 31 38 7E
+#define KEY_SYM_F8       0xFFC5      // 1B 5B 31 39 7E
+#define KEY_SYM_F9       0xFFC6      // 1B 5B 32 30 7E
+#define KEY_SYM_F10      0xFFC7      // 1B 5B 32 31 7E
+#define KEY_SYM_F11      0xFFC8      // 1B 5B 32 33 7E
+#define KEY_SYM_F12      0xFFC9      // 1B 5B 32 34 7E
+#define KEY_SYM_SHIFT    0xFFE1
+#define KEY_SYM_CTRL     0xFFE3
+#define KEY_SYM_ALT      0xFFE9
+#define KEY_SYM_CAP_LOCK 0xFFE5
+#define KEY_SYM_DELETE   0xFFFF
+#define KEY_SYM_TILDE    0x60
+#define KEY_SYM_BKTIC    0x7E
+#define KEY_SYM_ONE      0x31
+#define KEY_SYM_BANG     0x21
+#define KEY_SYM_TWO      0x32
+#define KEY_SYM_AT       0x40
+#define KEY_SYM_THREE    0x33
+#define KEY_SYM_POUND    0x23
+#define KEY_SYM_FOUR     0x34
+#define KEY_SYM_DOLLAR   0x24
+#define KEY_SYM_FIVE     0x35
+#define KEY_SYM_PERCENT  0x25
+#define KEY_SYM_SIX      0x36
+#define KEY_SYM_CARAT    0x5E
+#define KEY_SYM_SEVEN    0x37
+#define KEY_SYM_AMPER    0x26
+#define KEY_SYM_EIGHT    0x38
+#define KEY_SYM_STAR     0x2A
+#define KEY_SYM_NINE     0x39
+#define KEY_SYM_LPAREN   0x28
+#define KEY_SYM_ZERO     0x30
+#define KEY_SYM_RPAREN   0x29
+#define KEY_SYM_MINUS    0x2D
+#define KEY_SYM_USCORE   0x5F
+#define KEY_SYM_EQUAL    0x2B
+#define KEY_SYM_PLUS     0x3D
+#define KEY_SYM_LBRKT    0x5B
+#define KEY_SYM_LCURLY   0x7B
+#define KEY_SYM_RBRKT    0x5D
+#define KEY_SYM_RCURLY   0x7D
+#define KEY_SYM_SLASH    0x5C
+#define KEY_SYM_PIPE     0x7C
+#define KEY_SYM_TIC      0x27
+#define KEY_SYM_QUOTE    0x22
+#define KEY_SYM_SEMIC    0x3B
+#define KEY_SYM_COLON    0x3A
+#define KEY_SYM_COMMA    0x2C
+#define KEY_SYM_LT       0x3C
+#define KEY_SYM_PERIOD   0x2E
+#define KEY_SYM_GT       0x3E
+#define KEY_SYM_BSLASH   0x2F
+#define KEY_SYM_QMARK    0x3F
+#define KEY_SYM_A        0x41
+#define KEY_SYM_B        0x42
+#define KEY_SYM_C        0x43
+#define KEY_SYM_D        0x44
+#define KEY_SYM_E        0x45
+#define KEY_SYM_F        0x46
+#define KEY_SYM_G        0x47
+#define KEY_SYM_H        0x48
+#define KEY_SYM_I        0x49
+#define KEY_SYM_J        0x4A
+#define KEY_SYM_K        0x4B
+#define KEY_SYM_L        0x4C
+#define KEY_SYM_M        0x4D
+#define KEY_SYM_N        0x4E
+#define KEY_SYM_O        0x4F
+#define KEY_SYM_P        0x50
+#define KEY_SYM_Q        0x51
+#define KEY_SYM_R        0x52
+#define KEY_SYM_S        0x53
+#define KEY_SYM_T        0x54
+#define KEY_SYM_U        0x55
+#define KEY_SYM_V        0x56
+#define KEY_SYM_W        0x57
+#define KEY_SYM_X        0x58
+#define KEY_SYM_Y        0x59
+#define KEY_SYM_Z        0x5A
+#define KEY_SYM_a        0x61
+#define KEY_SYM_b        0x62
+#define KEY_SYM_c        0x63
+#define KEY_SYM_d        0x64
+#define KEY_SYM_e        0x65
+#define KEY_SYM_f        0x66
+#define KEY_SYM_g        0x67
+#define KEY_SYM_h        0x68
+#define KEY_SYM_i        0x69
+#define KEY_SYM_j        0x6A
+#define KEY_SYM_k        0x6B
+#define KEY_SYM_l        0x6C
+#define KEY_SYM_m        0x6D
+#define KEY_SYM_n        0x6E
+#define KEY_SYM_o        0x6F
+#define KEY_SYM_p        0x70
+#define KEY_SYM_q        0x71
+#define KEY_SYM_r        0x72
+#define KEY_SYM_s        0x73
+#define KEY_SYM_t        0x74
+#define KEY_SYM_u        0x75
+#define KEY_SYM_v        0x76
+#define KEY_SYM_w        0x77
+#define KEY_SYM_x        0x78
+#define KEY_SYM_y        0x79
+#define KEY_SYM_z        0x7A
+#define KEY_SYM_SPACE    0x20
+#endif /* _IBMASM_REMOTE_H_ */
diff --git a/drivers/misc/ibmasm/uart.c b/drivers/misc/ibmasm/uart.c
new file mode 100644
index 00000000000..93baa350d69
--- /dev/null
+++ b/drivers/misc/ibmasm/uart.c
@@ -0,0 +1,72 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asb�ck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/termios.h>
+#include <linux/tty.h>
+#include <linux/serial_core.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+
+void ibmasm_register_uart(struct service_processor *sp)
+{
+	struct uart_port uport;
+	void __iomem *iomem_base;
+
+	iomem_base = sp->base_address + SCOUT_COM_B_BASE;
+
+	/* read the uart scratch register to determine if the UART
+	 * is dedicated to the service processor or if the OS can use it
+	 */
+	if (0 == readl(iomem_base + UART_SCR)) {
+		dev_info(sp->dev, "IBM SP UART not registered, owned by service processor\n");
+		sp->serial_line = -1;
+		return;
+	}
+
+	memset(&uport, 0, sizeof(struct uart_port));
+	uport.irq	= sp->irq;
+	uport.uartclk	= 3686400;
+	uport.flags	= UPF_SHARE_IRQ;
+	uport.iotype	= UPIO_MEM;
+	uport.membase	= iomem_base;
+
+	sp->serial_line = serial8250_register_port(&uport);
+	if (sp->serial_line < 0) {
+		dev_err(sp->dev, "Failed to register serial port\n");
+		return;
+	}
+	enable_uart_interrupts(sp->base_address);
+}
+
+void ibmasm_unregister_uart(struct service_processor *sp)
+{
+	if (sp->serial_line < 0)
+		return;
+
+	disable_uart_interrupts(sp->base_address);
+	serial8250_unregister_port(sp->serial_line);
+}
diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
new file mode 100644
index 00000000000..395a4ea64e9
--- /dev/null
+++ b/drivers/misc/ics932s401.c
@@ -0,0 +1,505 @@
+/*
+ * A driver for the Integrated Circuits ICS932S401
+ * Copyright (C) 2008 IBM
+ *
+ * Author: Darrick J. Wong <djwong@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END };
+
+/* ICS932S401 registers */
+#define ICS932S401_REG_CFG2			0x01
+#define 	ICS932S401_CFG1_SPREAD		0x01
+#define ICS932S401_REG_CFG7			0x06
+#define		ICS932S401_FS_MASK		0x07
+#define	ICS932S401_REG_VENDOR_REV		0x07
+#define		ICS932S401_VENDOR		1
+#define		ICS932S401_VENDOR_MASK		0x0F
+#define		ICS932S401_REV			4
+#define		ICS932S401_REV_SHIFT		4
+#define ICS932S401_REG_DEVICE			0x09
+#define		ICS932S401_DEVICE		11
+#define	ICS932S401_REG_CTRL			0x0A
+#define		ICS932S401_MN_ENABLED		0x80
+#define		ICS932S401_CPU_ALT		0x04
+#define		ICS932S401_SRC_ALT		0x08
+#define ICS932S401_REG_CPU_M_CTRL		0x0B
+#define		ICS932S401_M_MASK		0x3F
+#define	ICS932S401_REG_CPU_N_CTRL		0x0C
+#define	ICS932S401_REG_CPU_SPREAD1		0x0D
+#define ICS932S401_REG_CPU_SPREAD2		0x0E
+#define		ICS932S401_SPREAD_MASK		0x7FFF
+#define ICS932S401_REG_SRC_M_CTRL		0x0F
+#define ICS932S401_REG_SRC_N_CTRL		0x10
+#define	ICS932S401_REG_SRC_SPREAD1		0x11
+#define ICS932S401_REG_SRC_SPREAD2		0x12
+#define ICS932S401_REG_CPU_DIVISOR		0x13
+#define 	ICS932S401_CPU_DIVISOR_SHIFT	4
+#define ICS932S401_REG_PCISRC_DIVISOR		0x14
+#define		ICS932S401_SRC_DIVISOR_MASK	0x0F
+#define		ICS932S401_PCI_DIVISOR_SHIFT	4
+
+/* Base clock is 14.318MHz */
+#define BASE_CLOCK				14318
+
+#define NUM_REGS				21
+#define NUM_MIRRORED_REGS			15
+
+static int regs_to_copy[NUM_MIRRORED_REGS] = {
+	ICS932S401_REG_CFG2,
+	ICS932S401_REG_CFG7,
+	ICS932S401_REG_VENDOR_REV,
+	ICS932S401_REG_DEVICE,
+	ICS932S401_REG_CTRL,
+	ICS932S401_REG_CPU_M_CTRL,
+	ICS932S401_REG_CPU_N_CTRL,
+	ICS932S401_REG_CPU_SPREAD1,
+	ICS932S401_REG_CPU_SPREAD2,
+	ICS932S401_REG_SRC_M_CTRL,
+	ICS932S401_REG_SRC_N_CTRL,
+	ICS932S401_REG_SRC_SPREAD1,
+	ICS932S401_REG_SRC_SPREAD2,
+	ICS932S401_REG_CPU_DIVISOR,
+	ICS932S401_REG_PCISRC_DIVISOR,
+};
+
+/* How often do we reread sensors values? (In jiffies) */
+#define SENSOR_REFRESH_INTERVAL	(2 * HZ)
+
+/* How often do we reread sensor limit values? (In jiffies) */
+#define LIMIT_REFRESH_INTERVAL	(60 * HZ)
+
+struct ics932s401_data {
+	struct attribute_group	attrs;
+	struct mutex		lock;
+	char			sensors_valid;
+	unsigned long		sensors_last_updated;	/* In jiffies */
+
+	u8			regs[NUM_REGS];
+};
+
+static int ics932s401_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id);
+static int ics932s401_detect(struct i2c_client *client,
+			  struct i2c_board_info *info);
+static int ics932s401_remove(struct i2c_client *client);
+
+static const struct i2c_device_id ics932s401_id[] = {
+	{ "ics932s401", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ics932s401_id);
+
+static struct i2c_driver ics932s401_driver = {
+	.class		= I2C_CLASS_HWMON,
+	.driver = {
+		.name	= "ics932s401",
+	},
+	.probe		= ics932s401_probe,
+	.remove		= ics932s401_remove,
+	.id_table	= ics932s401_id,
+	.detect		= ics932s401_detect,
+	.address_list	= normal_i2c,
+};
+
+static struct ics932s401_data *ics932s401_update_device(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ics932s401_data *data = i2c_get_clientdata(client);
+	unsigned long local_jiffies = jiffies;
+	int i, temp;
+
+	mutex_lock(&data->lock);
+	if (time_before(local_jiffies, data->sensors_last_updated +
+		SENSOR_REFRESH_INTERVAL)
+		&& data->sensors_valid)
+		goto out;
+
+	/*
+	 * Each register must be read as a word and then right shifted 8 bits.
+	 * Not really sure why this is; setting the "byte count programming"
+	 * register to 1 does not fix this problem.
+	 */
+	for (i = 0; i < NUM_MIRRORED_REGS; i++) {
+		temp = i2c_smbus_read_word_data(client, regs_to_copy[i]);
+		data->regs[regs_to_copy[i]] = temp >> 8;
+	}
+
+	data->sensors_last_updated = local_jiffies;
+	data->sensors_valid = 1;
+
+out:
+	mutex_unlock(&data->lock);
+	return data;
+}
+
+static ssize_t show_spread_enabled(struct device *dev,
+				   struct device_attribute *devattr,
+				   char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	if (data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD)
+		return sprintf(buf, "1\n");
+
+	return sprintf(buf, "0\n");
+}
+
+/* bit to cpu khz map */
+static const int fs_speeds[] = {
+	266666,
+	133333,
+	200000,
+	166666,
+	333333,
+	100000,
+	400000,
+	0,
+};
+
+/* clock divisor map */
+static const int divisors[] = {2, 3, 5, 15, 4, 6, 10, 30, 8, 12, 20, 60, 16,
+			       24, 40, 120};
+
+/* Calculate CPU frequency from the M/N registers. */
+static int calculate_cpu_freq(struct ics932s401_data *data)
+{
+	int m, n, freq;
+
+	m = data->regs[ICS932S401_REG_CPU_M_CTRL] & ICS932S401_M_MASK;
+	n = data->regs[ICS932S401_REG_CPU_N_CTRL];
+
+	/* Pull in bits 8 & 9 from the M register */
+	n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x80) << 1;
+	n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x40) << 3;
+
+	freq = BASE_CLOCK * (n + 8) / (m + 2);
+	freq /= divisors[data->regs[ICS932S401_REG_CPU_DIVISOR] >>
+			 ICS932S401_CPU_DIVISOR_SHIFT];
+
+	return freq;
+}
+
+static ssize_t show_cpu_clock(struct device *dev,
+			      struct device_attribute *devattr,
+			      char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	return sprintf(buf, "%d\n", calculate_cpu_freq(data));
+}
+
+static ssize_t show_cpu_clock_sel(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int freq;
+
+	if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+		freq = calculate_cpu_freq(data);
+	else {
+		/* Freq is neatly wrapped up for us */
+		int fid = data->regs[ICS932S401_REG_CFG7] & ICS932S401_FS_MASK;
+		freq = fs_speeds[fid];
+		if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT) {
+			switch (freq) {
+			case 166666:
+				freq = 160000;
+				break;
+			case 333333:
+				freq = 320000;
+				break;
+			}
+		}
+	}
+
+	return sprintf(buf, "%d\n", freq);
+}
+
+/* Calculate SRC frequency from the M/N registers. */
+static int calculate_src_freq(struct ics932s401_data *data)
+{
+	int m, n, freq;
+
+	m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK;
+	n = data->regs[ICS932S401_REG_SRC_N_CTRL];
+
+	/* Pull in bits 8 & 9 from the M register */
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1;
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3;
+
+	freq = BASE_CLOCK * (n + 8) / (m + 2);
+	freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] &
+			 ICS932S401_SRC_DIVISOR_MASK];
+
+	return freq;
+}
+
+static ssize_t show_src_clock(struct device *dev,
+			      struct device_attribute *devattr,
+			      char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	return sprintf(buf, "%d\n", calculate_src_freq(data));
+}
+
+static ssize_t show_src_clock_sel(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int freq;
+
+	if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+		freq = calculate_src_freq(data);
+	else
+		/* Freq is neatly wrapped up for us */
+		if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT &&
+		    data->regs[ICS932S401_REG_CTRL] & ICS932S401_SRC_ALT)
+			freq = 96000;
+		else
+			freq = 100000;
+
+	return sprintf(buf, "%d\n", freq);
+}
+
+/* Calculate PCI frequency from the SRC M/N registers. */
+static int calculate_pci_freq(struct ics932s401_data *data)
+{
+	int m, n, freq;
+
+	m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK;
+	n = data->regs[ICS932S401_REG_SRC_N_CTRL];
+
+	/* Pull in bits 8 & 9 from the M register */
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1;
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3;
+
+	freq = BASE_CLOCK * (n + 8) / (m + 2);
+	freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] >>
+			 ICS932S401_PCI_DIVISOR_SHIFT];
+
+	return freq;
+}
+
+static ssize_t show_pci_clock(struct device *dev,
+			      struct device_attribute *devattr,
+			      char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	return sprintf(buf, "%d\n", calculate_pci_freq(data));
+}
+
+static ssize_t show_pci_clock_sel(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int freq;
+
+	if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+		freq = calculate_pci_freq(data);
+	else
+		freq = 33333;
+
+	return sprintf(buf, "%d\n", freq);
+}
+
+static ssize_t show_value(struct device *dev,
+			  struct device_attribute *devattr,
+			  char *buf);
+
+static ssize_t show_spread(struct device *dev,
+			   struct device_attribute *devattr,
+			   char *buf);
+
+static DEVICE_ATTR(spread_enabled, S_IRUGO, show_spread_enabled, NULL);
+static DEVICE_ATTR(cpu_clock_selection, S_IRUGO, show_cpu_clock_sel, NULL);
+static DEVICE_ATTR(cpu_clock, S_IRUGO, show_cpu_clock, NULL);
+static DEVICE_ATTR(src_clock_selection, S_IRUGO, show_src_clock_sel, NULL);
+static DEVICE_ATTR(src_clock, S_IRUGO, show_src_clock, NULL);
+static DEVICE_ATTR(pci_clock_selection, S_IRUGO, show_pci_clock_sel, NULL);
+static DEVICE_ATTR(pci_clock, S_IRUGO, show_pci_clock, NULL);
+static DEVICE_ATTR(usb_clock, S_IRUGO, show_value, NULL);
+static DEVICE_ATTR(ref_clock, S_IRUGO, show_value, NULL);
+static DEVICE_ATTR(cpu_spread, S_IRUGO, show_spread, NULL);
+static DEVICE_ATTR(src_spread, S_IRUGO, show_spread, NULL);
+
+static struct attribute *ics932s401_attr[] =
+{
+	&dev_attr_spread_enabled.attr,
+	&dev_attr_cpu_clock_selection.attr,
+	&dev_attr_cpu_clock.attr,
+	&dev_attr_src_clock_selection.attr,
+	&dev_attr_src_clock.attr,
+	&dev_attr_pci_clock_selection.attr,
+	&dev_attr_pci_clock.attr,
+	&dev_attr_usb_clock.attr,
+	&dev_attr_ref_clock.attr,
+	&dev_attr_cpu_spread.attr,
+	&dev_attr_src_spread.attr,
+	NULL
+};
+
+static ssize_t show_value(struct device *dev,
+			  struct device_attribute *devattr,
+			  char *buf)
+{
+	int x;
+
+	if (devattr == &dev_attr_usb_clock)
+		x = 48000;
+	else if (devattr == &dev_attr_ref_clock)
+		x = BASE_CLOCK;
+	else
+		BUG();
+
+	return sprintf(buf, "%d\n", x);
+}
+
+static ssize_t show_spread(struct device *dev,
+			   struct device_attribute *devattr,
+			   char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int reg;
+	unsigned long val;
+
+	if (!(data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD))
+		return sprintf(buf, "0%%\n");
+
+	if (devattr == &dev_attr_src_spread)
+		reg = ICS932S401_REG_SRC_SPREAD1;
+	else if (devattr == &dev_attr_cpu_spread)
+		reg = ICS932S401_REG_CPU_SPREAD1;
+	else
+		BUG();
+
+	val = data->regs[reg] | (data->regs[reg + 1] << 8);
+	val &= ICS932S401_SPREAD_MASK;
+
+	/* Scale 0..2^14 to -0.5. */
+	val = 500000 * val / 16384;
+	return sprintf(buf, "-0.%lu%%\n", val);
+}
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int ics932s401_detect(struct i2c_client *client,
+			  struct i2c_board_info *info)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	int vendor, device, revision;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -ENODEV;
+
+	vendor = i2c_smbus_read_word_data(client, ICS932S401_REG_VENDOR_REV);
+	vendor >>= 8;
+	revision = vendor >> ICS932S401_REV_SHIFT;
+	vendor &= ICS932S401_VENDOR_MASK;
+	if (vendor != ICS932S401_VENDOR)
+		return -ENODEV;
+
+	device = i2c_smbus_read_word_data(client, ICS932S401_REG_DEVICE);
+	device >>= 8;
+	if (device != ICS932S401_DEVICE)
+		return -ENODEV;
+
+	if (revision != ICS932S401_REV)
+		dev_info(&adapter->dev, "Unknown revision %d\n", revision);
+
+	strlcpy(info->type, "ics932s401", I2C_NAME_SIZE);
+
+	return 0;
+}
+
+static int ics932s401_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct ics932s401_data *data;
+	int err;
+
+	data = kzalloc(sizeof(struct ics932s401_data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->lock);
+
+	dev_info(&client->dev, "%s chip found\n", client->name);
+
+	/* Register sysfs hooks */
+	data->attrs.attrs = ics932s401_attr;
+	err = sysfs_create_group(&client->dev.kobj, &data->attrs);
+	if (err)
+		goto exit_free;
+
+	return 0;
+
+exit_free:
+	kfree(data);
+exit:
+	return err;
+}
+
+static int ics932s401_remove(struct i2c_client *client)
+{
+	struct ics932s401_data *data = i2c_get_clientdata(client);
+
+	sysfs_remove_group(&client->dev.kobj, &data->attrs);
+	kfree(data);
+	return 0;
+}
+
+static int __init ics932s401_init(void)
+{
+	return i2c_add_driver(&ics932s401_driver);
+}
+
+static void __exit ics932s401_exit(void)
+{
+	i2c_del_driver(&ics932s401_driver);
+}
+
+MODULE_AUTHOR("Darrick J. Wong <djwong@us.ibm.com>");
+MODULE_DESCRIPTION("ICS932S401 driver");
+MODULE_LICENSE("GPL");
+
+module_init(ics932s401_init);
+module_exit(ics932s401_exit);
+
+/* IBM IntelliStation Z30 */
+MODULE_ALIAS("dmi:bvnIBM:*:rn9228:*");
+MODULE_ALIAS("dmi:bvnIBM:*:rn9232:*");
+
+/* IBM x3650/x3550 */
+MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3650*");
+MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3550*");
diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c
new file mode 100644
index 00000000000..09dcb699e66
--- /dev/null
+++ b/drivers/misc/ioc4.c
@@ -0,0 +1,512 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/* This file contains the master driver module for use by SGI IOC4 subdrivers.
+ *
+ * It allocates any resources shared between multiple subdevices, and
+ * provides accessor functions (where needed) and the like for those
+ * resources.  It also provides a mechanism for the subdevice modules
+ * to support loading and unloading.
+ *
+ * Non-shared resources (e.g. external interrupt A_INT_OUT register page
+ * alias, serial port and UART registers) are handled by the subdevice
+ * modules themselves.
+ *
+ * This is all necessary because IOC4 is not implemented as a multi-function
+ * PCI device, but an amalgamation of disparate registers for several
+ * types of device (ATA, serial, external interrupts).  The normal
+ * resource management in the kernel doesn't have quite the right interfaces
+ * to handle this situation (e.g. multiple modules can't claim the same
+ * PCI ID), thus this IOC4 master module.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/ioc4.h>
+#include <linux/ktime.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+#include <asm/io.h>
+
+/***************
+ * Definitions *
+ ***************/
+
+/* Tweakable values */
+
+/* PCI bus speed detection/calibration */
+#define IOC4_CALIBRATE_COUNT 63		/* Calibration cycle period */
+#define IOC4_CALIBRATE_CYCLES 256	/* Average over this many cycles */
+#define IOC4_CALIBRATE_DISCARD 2	/* Discard first few cycles */
+#define IOC4_CALIBRATE_LOW_MHZ 25	/* Lower bound on bus speed sanity */
+#define IOC4_CALIBRATE_HIGH_MHZ 75	/* Upper bound on bus speed sanity */
+#define IOC4_CALIBRATE_DEFAULT_MHZ 66	/* Assumed if sanity check fails */
+
+/************************
+ * Submodule management *
+ ************************/
+
+static DEFINE_MUTEX(ioc4_mutex);
+
+static LIST_HEAD(ioc4_devices);
+static LIST_HEAD(ioc4_submodules);
+
+/* Register an IOC4 submodule */
+int
+ioc4_register_submodule(struct ioc4_submodule *is)
+{
+	struct ioc4_driver_data *idd;
+
+	mutex_lock(&ioc4_mutex);
+	list_add(&is->is_list, &ioc4_submodules);
+
+	/* Initialize submodule for each IOC4 */
+	if (!is->is_probe)
+		goto out;
+
+	list_for_each_entry(idd, &ioc4_devices, idd_list) {
+		if (is->is_probe(idd)) {
+			printk(KERN_WARNING
+			       "%s: IOC4 submodule %s probe failed "
+			       "for pci_dev %s",
+			       __func__, module_name(is->is_owner),
+			       pci_name(idd->idd_pdev));
+		}
+	}
+ out:
+	mutex_unlock(&ioc4_mutex);
+	return 0;
+}
+
+/* Unregister an IOC4 submodule */
+void
+ioc4_unregister_submodule(struct ioc4_submodule *is)
+{
+	struct ioc4_driver_data *idd;
+
+	mutex_lock(&ioc4_mutex);
+	list_del(&is->is_list);
+
+	/* Remove submodule for each IOC4 */
+	if (!is->is_remove)
+		goto out;
+
+	list_for_each_entry(idd, &ioc4_devices, idd_list) {
+		if (is->is_remove(idd)) {
+			printk(KERN_WARNING
+			       "%s: IOC4 submodule %s remove failed "
+			       "for pci_dev %s.\n",
+			       __func__, module_name(is->is_owner),
+			       pci_name(idd->idd_pdev));
+		}
+	}
+ out:
+	mutex_unlock(&ioc4_mutex);
+}
+
+/*********************
+ * Device management *
+ *********************/
+
+#define IOC4_CALIBRATE_LOW_LIMIT \
+	(1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_LOW_MHZ)
+#define IOC4_CALIBRATE_HIGH_LIMIT \
+	(1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_HIGH_MHZ)
+#define IOC4_CALIBRATE_DEFAULT \
+	(1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_DEFAULT_MHZ)
+
+#define IOC4_CALIBRATE_END \
+	(IOC4_CALIBRATE_CYCLES + IOC4_CALIBRATE_DISCARD)
+
+#define IOC4_INT_OUT_MODE_TOGGLE 0x7	/* Toggle INT_OUT every COUNT+1 ticks */
+
+/* Determines external interrupt output clock period of the PCI bus an
+ * IOC4 is attached to.  This value can be used to determine the PCI
+ * bus speed.
+ *
+ * IOC4 has a design feature that various internal timers are derived from
+ * the PCI bus clock.  This causes IOC4 device drivers to need to take the
+ * bus speed into account when setting various register values (e.g. INT_OUT
+ * register COUNT field, UART divisors, etc).  Since this information is
+ * needed by several subdrivers, it is determined by the main IOC4 driver,
+ * even though the following code utilizes external interrupt registers
+ * to perform the speed calculation.
+ */
+static void __devinit
+ioc4_clock_calibrate(struct ioc4_driver_data *idd)
+{
+	union ioc4_int_out int_out;
+	union ioc4_gpcr gpcr;
+	unsigned int state, last_state = 1;
+	struct timespec start_ts, end_ts;
+	uint64_t start, end, period;
+	unsigned int count = 0;
+
+	/* Enable output */
+	gpcr.raw = 0;
+	gpcr.fields.dir = IOC4_GPCR_DIR_0;
+	gpcr.fields.int_out_en = 1;
+	writel(gpcr.raw, &idd->idd_misc_regs->gpcr_s.raw);
+
+	/* Reset to power-on state */
+	writel(0, &idd->idd_misc_regs->int_out.raw);
+	mmiowb();
+
+	/* Set up square wave */
+	int_out.raw = 0;
+	int_out.fields.count = IOC4_CALIBRATE_COUNT;
+	int_out.fields.mode = IOC4_INT_OUT_MODE_TOGGLE;
+	int_out.fields.diag = 0;
+	writel(int_out.raw, &idd->idd_misc_regs->int_out.raw);
+	mmiowb();
+
+	/* Check square wave period averaged over some number of cycles */
+	do {
+		int_out.raw = readl(&idd->idd_misc_regs->int_out.raw);
+		state = int_out.fields.int_out;
+		if (!last_state && state) {
+			count++;
+			if (count == IOC4_CALIBRATE_END) {
+				ktime_get_ts(&end_ts);
+				break;
+			} else if (count == IOC4_CALIBRATE_DISCARD)
+				ktime_get_ts(&start_ts);
+		}
+		last_state = state;
+	} while (1);
+
+	/* Calculation rearranged to preserve intermediate precision.
+	 * Logically:
+	 * 1. "end - start" gives us the measurement period over all
+	 *    the square wave cycles.
+	 * 2. Divide by number of square wave cycles to get the period
+	 *    of a square wave cycle.
+	 * 3. Divide by 2*(int_out.fields.count+1), which is the formula
+	 *    by which the IOC4 generates the square wave, to get the
+	 *    period of an IOC4 INT_OUT count.
+	 */
+	end = end_ts.tv_sec * NSEC_PER_SEC + end_ts.tv_nsec;
+	start = start_ts.tv_sec * NSEC_PER_SEC + start_ts.tv_nsec;
+	period = (end - start) /
+		(IOC4_CALIBRATE_CYCLES * 2 * (IOC4_CALIBRATE_COUNT + 1));
+
+	/* Bounds check the result. */
+	if (period > IOC4_CALIBRATE_LOW_LIMIT ||
+	    period < IOC4_CALIBRATE_HIGH_LIMIT) {
+		printk(KERN_INFO
+		       "IOC4 %s: Clock calibration failed.  Assuming"
+		       "PCI clock is %d ns.\n",
+		       pci_name(idd->idd_pdev),
+		       IOC4_CALIBRATE_DEFAULT / IOC4_EXTINT_COUNT_DIVISOR);
+		period = IOC4_CALIBRATE_DEFAULT;
+	} else {
+		u64 ns = period;
+
+		do_div(ns, IOC4_EXTINT_COUNT_DIVISOR);
+		printk(KERN_DEBUG
+		       "IOC4 %s: PCI clock is %llu ns.\n",
+		       pci_name(idd->idd_pdev), (unsigned long long)ns);
+	}
+
+	/* Remember results.  We store the extint clock period rather
+	 * than the PCI clock period so that greater precision is
+	 * retained.  Divide by IOC4_EXTINT_COUNT_DIVISOR to get
+	 * PCI clock period.
+	 */
+	idd->count_period = period;
+}
+
+/* There are three variants of IOC4 cards: IO9, IO10, and PCI-RT.
+ * Each brings out different combinations of IOC4 signals, thus.
+ * the IOC4 subdrivers need to know to which we're attached.
+ *
+ * We look for the presence of a SCSI (IO9) or SATA (IO10) controller
+ * on the same PCI bus at slot number 3 to differentiate IO9 from IO10.
+ * If neither is present, it's a PCI-RT.
+ */
+static unsigned int __devinit
+ioc4_variant(struct ioc4_driver_data *idd)
+{
+	struct pci_dev *pdev = NULL;
+	int found = 0;
+
+	/* IO9: Look for a QLogic ISP 12160 at the same bus and slot 3. */
+	do {
+		pdev = pci_get_device(PCI_VENDOR_ID_QLOGIC,
+				      PCI_DEVICE_ID_QLOGIC_ISP12160, pdev);
+		if (pdev &&
+		    idd->idd_pdev->bus->number == pdev->bus->number &&
+		    3 == PCI_SLOT(pdev->devfn))
+			found = 1;
+	} while (pdev && !found);
+	if (NULL != pdev) {
+		pci_dev_put(pdev);
+		return IOC4_VARIANT_IO9;
+	}
+
+	/* IO10: Look for a Vitesse VSC 7174 at the same bus and slot 3. */
+	pdev = NULL;
+	do {
+		pdev = pci_get_device(PCI_VENDOR_ID_VITESSE,
+				      PCI_DEVICE_ID_VITESSE_VSC7174, pdev);
+		if (pdev &&
+		    idd->idd_pdev->bus->number == pdev->bus->number &&
+		    3 == PCI_SLOT(pdev->devfn))
+			found = 1;
+	} while (pdev && !found);
+	if (NULL != pdev) {
+		pci_dev_put(pdev);
+		return IOC4_VARIANT_IO10;
+	}
+
+	/* PCI-RT: No SCSI/SATA controller will be present */
+	return IOC4_VARIANT_PCI_RT;
+}
+
+static void __devinit
+ioc4_load_modules(struct work_struct *work)
+{
+	/* arg just has to be freed */
+
+	request_module("sgiioc4");
+
+	kfree(work);
+}
+
+/* Adds a new instance of an IOC4 card */
+static int __devinit
+ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
+{
+	struct ioc4_driver_data *idd;
+	struct ioc4_submodule *is;
+	uint32_t pcmd;
+	int ret;
+
+	/* Enable IOC4 and take ownership of it */
+	if ((ret = pci_enable_device(pdev))) {
+		printk(KERN_WARNING
+		       "%s: Failed to enable IOC4 device for pci_dev %s.\n",
+		       __func__, pci_name(pdev));
+		goto out;
+	}
+	pci_set_master(pdev);
+
+	/* Set up per-IOC4 data */
+	idd = kmalloc(sizeof(struct ioc4_driver_data), GFP_KERNEL);
+	if (!idd) {
+		printk(KERN_WARNING
+		       "%s: Failed to allocate IOC4 data for pci_dev %s.\n",
+		       __func__, pci_name(pdev));
+		ret = -ENODEV;
+		goto out_idd;
+	}
+	idd->idd_pdev = pdev;
+	idd->idd_pci_id = pci_id;
+
+	/* Map IOC4 misc registers.  These are shared between subdevices
+	 * so the main IOC4 module manages them.
+	 */
+	idd->idd_bar0 = pci_resource_start(idd->idd_pdev, 0);
+	if (!idd->idd_bar0) {
+		printk(KERN_WARNING
+		       "%s: Unable to find IOC4 misc resource "
+		       "for pci_dev %s.\n",
+		       __func__, pci_name(idd->idd_pdev));
+		ret = -ENODEV;
+		goto out_pci;
+	}
+	if (!request_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs),
+			    "ioc4_misc")) {
+		printk(KERN_WARNING
+		       "%s: Unable to request IOC4 misc region "
+		       "for pci_dev %s.\n",
+		       __func__, pci_name(idd->idd_pdev));
+		ret = -ENODEV;
+		goto out_pci;
+	}
+	idd->idd_misc_regs = ioremap(idd->idd_bar0,
+				     sizeof(struct ioc4_misc_regs));
+	if (!idd->idd_misc_regs) {
+		printk(KERN_WARNING
+		       "%s: Unable to remap IOC4 misc region "
+		       "for pci_dev %s.\n",
+		       __func__, pci_name(idd->idd_pdev));
+		ret = -ENODEV;
+		goto out_misc_region;
+	}
+
+	/* Failsafe portion of per-IOC4 initialization */
+
+	/* Detect card variant */
+	idd->idd_variant = ioc4_variant(idd);
+	printk(KERN_INFO "IOC4 %s: %s card detected.\n", pci_name(pdev),
+	       idd->idd_variant == IOC4_VARIANT_IO9 ? "IO9" :
+	       idd->idd_variant == IOC4_VARIANT_PCI_RT ? "PCI-RT" :
+	       idd->idd_variant == IOC4_VARIANT_IO10 ? "IO10" : "unknown");
+
+	/* Initialize IOC4 */
+	pci_read_config_dword(idd->idd_pdev, PCI_COMMAND, &pcmd);
+	pci_write_config_dword(idd->idd_pdev, PCI_COMMAND,
+			       pcmd | PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+
+	/* Determine PCI clock */
+	ioc4_clock_calibrate(idd);
+
+	/* Disable/clear all interrupts.  Need to do this here lest
+	 * one submodule request the shared IOC4 IRQ, but interrupt
+	 * is generated by a different subdevice.
+	 */
+	/* Disable */
+	writel(~0, &idd->idd_misc_regs->other_iec.raw);
+	writel(~0, &idd->idd_misc_regs->sio_iec);
+	/* Clear (i.e. acknowledge) */
+	writel(~0, &idd->idd_misc_regs->other_ir.raw);
+	writel(~0, &idd->idd_misc_regs->sio_ir);
+
+	/* Track PCI-device specific data */
+	idd->idd_serial_data = NULL;
+	pci_set_drvdata(idd->idd_pdev, idd);
+
+	mutex_lock(&ioc4_mutex);
+	list_add_tail(&idd->idd_list, &ioc4_devices);
+
+	/* Add this IOC4 to all submodules */
+	list_for_each_entry(is, &ioc4_submodules, is_list) {
+		if (is->is_probe && is->is_probe(idd)) {
+			printk(KERN_WARNING
+			       "%s: IOC4 submodule 0x%s probe failed "
+			       "for pci_dev %s.\n",
+			       __func__, module_name(is->is_owner),
+			       pci_name(idd->idd_pdev));
+		}
+	}
+	mutex_unlock(&ioc4_mutex);
+
+	/* Request sgiioc4 IDE driver on boards that bring that functionality
+	 * off of IOC4.  The root filesystem may be hosted on a drive connected
+	 * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it
+	 * won't be picked up by modprobes due to the ioc4 module owning the
+	 * PCI device.
+	 */
+	if (idd->idd_variant != IOC4_VARIANT_PCI_RT) {
+		struct work_struct *work;
+		work = kzalloc(sizeof(struct work_struct), GFP_KERNEL);
+		if (!work) {
+			printk(KERN_WARNING
+			       "%s: IOC4 unable to allocate memory for "
+			       "load of sub-modules.\n", __func__);
+		} else {
+			/* Request the module from a work procedure as the
+			 * modprobe goes out to a userland helper and that
+			 * will hang if done directly from ioc4_probe().
+			 */
+			printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n");
+			INIT_WORK(work, ioc4_load_modules);
+			schedule_work(work);
+		}
+	}
+
+	return 0;
+
+out_misc_region:
+	release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs));
+out_pci:
+	kfree(idd);
+out_idd:
+	pci_disable_device(pdev);
+out:
+	return ret;
+}
+
+/* Removes a particular instance of an IOC4 card. */
+static void __devexit
+ioc4_remove(struct pci_dev *pdev)
+{
+	struct ioc4_submodule *is;
+	struct ioc4_driver_data *idd;
+
+	idd = pci_get_drvdata(pdev);
+
+	/* Remove this IOC4 from all submodules */
+	mutex_lock(&ioc4_mutex);
+	list_for_each_entry(is, &ioc4_submodules, is_list) {
+		if (is->is_remove && is->is_remove(idd)) {
+			printk(KERN_WARNING
+			       "%s: IOC4 submodule 0x%s remove failed "
+			       "for pci_dev %s.\n",
+			       __func__, module_name(is->is_owner),
+			       pci_name(idd->idd_pdev));
+		}
+	}
+	mutex_unlock(&ioc4_mutex);
+
+	/* Release resources */
+	iounmap(idd->idd_misc_regs);
+	if (!idd->idd_bar0) {
+		printk(KERN_WARNING
+		       "%s: Unable to get IOC4 misc mapping for pci_dev %s. "
+		       "Device removal may be incomplete.\n",
+		       __func__, pci_name(idd->idd_pdev));
+	}
+	release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs));
+
+	/* Disable IOC4 and relinquish */
+	pci_disable_device(pdev);
+
+	/* Remove and free driver data */
+	mutex_lock(&ioc4_mutex);
+	list_del(&idd->idd_list);
+	mutex_unlock(&ioc4_mutex);
+	kfree(idd);
+}
+
+static struct pci_device_id ioc4_id_table[] = {
+	{PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC4, PCI_ANY_ID,
+	 PCI_ANY_ID, 0x0b4000, 0xFFFFFF},
+	{0}
+};
+
+static struct pci_driver ioc4_driver = {
+	.name = "IOC4",
+	.id_table = ioc4_id_table,
+	.probe = ioc4_probe,
+	.remove = __devexit_p(ioc4_remove),
+};
+
+MODULE_DEVICE_TABLE(pci, ioc4_id_table);
+
+/*********************
+ * Module management *
+ *********************/
+
+/* Module load */
+static int __init
+ioc4_init(void)
+{
+	return pci_register_driver(&ioc4_driver);
+}
+
+/* Module unload */
+static void __exit
+ioc4_exit(void)
+{
+	/* Ensure ioc4_load_modules() has completed before exiting */
+	flush_scheduled_work();
+	pci_unregister_driver(&ioc4_driver);
+}
+
+module_init(ioc4_init);
+module_exit(ioc4_exit);
+
+MODULE_AUTHOR("Brent Casavant - Silicon Graphics, Inc. <bcasavan@sgi.com>");
+MODULE_DESCRIPTION("PCI driver master module for SGI IOC4 Base-IO Card");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(ioc4_register_submodule);
+EXPORT_SYMBOL(ioc4_unregister_submodule);
diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c
new file mode 100644
index 00000000000..a71e245801e
--- /dev/null
+++ b/drivers/misc/isl29003.c
@@ -0,0 +1,475 @@
+/*
+ *  isl29003.c - Linux kernel module for
+ * 	Intersil ISL29003 ambient light sensor
+ *
+ *  See file:Documentation/misc-devices/isl29003
+ *
+ *  Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ *  Based on code written by
+ *  	Rodolfo Giometti <giometti@linux.it>
+ *  	Eurotech S.p.A. <info@eurotech.it>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+
+#define ISL29003_DRV_NAME	"isl29003"
+#define DRIVER_VERSION		"1.0"
+
+#define ISL29003_REG_COMMAND		0x00
+#define ISL29003_ADC_ENABLED		(1 << 7)
+#define ISL29003_ADC_PD			(1 << 6)
+#define ISL29003_TIMING_INT		(1 << 5)
+#define ISL29003_MODE_SHIFT		(2)
+#define ISL29003_MODE_MASK		(0x3 << ISL29003_MODE_SHIFT)
+#define ISL29003_RES_SHIFT		(0)
+#define ISL29003_RES_MASK		(0x3 << ISL29003_RES_SHIFT)
+
+#define ISL29003_REG_CONTROL		0x01
+#define ISL29003_INT_FLG		(1 << 5)
+#define ISL29003_RANGE_SHIFT		(2)
+#define ISL29003_RANGE_MASK		(0x3 << ISL29003_RANGE_SHIFT)
+#define ISL29003_INT_PERSISTS_SHIFT	(0)
+#define ISL29003_INT_PERSISTS_MASK	(0xf << ISL29003_INT_PERSISTS_SHIFT)
+
+#define ISL29003_REG_IRQ_THRESH_HI	0x02
+#define ISL29003_REG_IRQ_THRESH_LO	0x03
+#define ISL29003_REG_LSB_SENSOR		0x04
+#define ISL29003_REG_MSB_SENSOR		0x05
+#define ISL29003_REG_LSB_TIMER		0x06
+#define ISL29003_REG_MSB_TIMER		0x07
+
+#define ISL29003_NUM_CACHABLE_REGS	4
+
+struct isl29003_data {
+	struct i2c_client *client;
+	struct mutex lock;
+	u8 reg_cache[ISL29003_NUM_CACHABLE_REGS];
+	u8 power_state_before_suspend;
+};
+
+static int gain_range[] = {
+	1000, 4000, 16000, 64000
+};
+
+/*
+ * register access helpers
+ */
+
+static int __isl29003_read_reg(struct i2c_client *client,
+			       u32 reg, u8 mask, u8 shift)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	return (data->reg_cache[reg] & mask) >> shift;
+}
+
+static int __isl29003_write_reg(struct i2c_client *client,
+				u32 reg, u8 mask, u8 shift, u8 val)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int ret = 0;
+	u8 tmp;
+
+	if (reg >= ISL29003_NUM_CACHABLE_REGS)
+		return -EINVAL;
+
+	mutex_lock(&data->lock);
+
+	tmp = data->reg_cache[reg];
+	tmp &= ~mask;
+	tmp |= val << shift;
+
+	ret = i2c_smbus_write_byte_data(client, reg, tmp);
+	if (!ret)
+		data->reg_cache[reg] = tmp;
+
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+/*
+ * internally used functions
+ */
+
+/* range */
+static int isl29003_get_range(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_CONTROL,
+		ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT);
+}
+
+static int isl29003_set_range(struct i2c_client *client, int range)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_CONTROL,
+		ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT, range);
+}
+
+/* resolution */
+static int isl29003_get_resolution(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT);
+}
+
+static int isl29003_set_resolution(struct i2c_client *client, int res)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT, res);
+}
+
+/* mode */
+static int isl29003_get_mode(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT);
+}
+
+static int isl29003_set_mode(struct i2c_client *client, int mode)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT, mode);
+}
+
+/* power_state */
+static int isl29003_set_power_state(struct i2c_client *client, int state)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+				ISL29003_ADC_ENABLED | ISL29003_ADC_PD, 0,
+				state ? ISL29003_ADC_ENABLED : ISL29003_ADC_PD);
+}
+
+static int isl29003_get_power_state(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND];
+	return ~cmdreg & ISL29003_ADC_PD;
+}
+
+static int isl29003_get_adc_value(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int lsb, msb, range, bitdepth;
+
+	mutex_lock(&data->lock);
+	lsb = i2c_smbus_read_byte_data(client, ISL29003_REG_LSB_SENSOR);
+
+	if (lsb < 0) {
+		mutex_unlock(&data->lock);
+		return lsb;
+	}
+
+	msb = i2c_smbus_read_byte_data(client, ISL29003_REG_MSB_SENSOR);
+	mutex_unlock(&data->lock);
+
+	if (msb < 0)
+		return msb;
+
+	range = isl29003_get_range(client);
+	bitdepth = (4 - isl29003_get_resolution(client)) * 4;
+	return (((msb << 8) | lsb) * gain_range[range]) >> bitdepth;
+}
+
+/*
+ * sysfs layer
+ */
+
+/* range */
+static ssize_t isl29003_show_range(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	return sprintf(buf, "%i\n", isl29003_get_range(client));
+}
+
+static ssize_t isl29003_store_range(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	if ((strict_strtoul(buf, 10, &val) < 0) || (val > 3))
+		return -EINVAL;
+
+	ret = isl29003_set_range(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(range, S_IWUSR | S_IRUGO,
+		   isl29003_show_range, isl29003_store_range);
+
+
+/* resolution */
+static ssize_t isl29003_show_resolution(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	return sprintf(buf, "%d\n", isl29003_get_resolution(client));
+}
+
+static ssize_t isl29003_store_resolution(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	if ((strict_strtoul(buf, 10, &val) < 0) || (val > 3))
+		return -EINVAL;
+
+	ret = isl29003_set_resolution(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(resolution, S_IWUSR | S_IRUGO,
+		   isl29003_show_resolution, isl29003_store_resolution);
+
+/* mode */
+static ssize_t isl29003_show_mode(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	return sprintf(buf, "%d\n", isl29003_get_mode(client));
+}
+
+static ssize_t isl29003_store_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	if ((strict_strtoul(buf, 10, &val) < 0) || (val > 2))
+		return -EINVAL;
+
+	ret = isl29003_set_mode(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO,
+		   isl29003_show_mode, isl29003_store_mode);
+
+
+/* power state */
+static ssize_t isl29003_show_power_state(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	return sprintf(buf, "%d\n", isl29003_get_power_state(client));
+}
+
+static ssize_t isl29003_store_power_state(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	if ((strict_strtoul(buf, 10, &val) < 0) || (val > 1))
+		return -EINVAL;
+
+	ret = isl29003_set_power_state(client, val);
+	return ret ? ret : count;
+}
+
+static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
+		   isl29003_show_power_state, isl29003_store_power_state);
+
+
+/* lux */
+static ssize_t isl29003_show_lux(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	/* No LUX data if not operational */
+	if (!isl29003_get_power_state(client))
+		return -EBUSY;
+
+	return sprintf(buf, "%d\n", isl29003_get_adc_value(client));
+}
+
+static DEVICE_ATTR(lux, S_IRUGO, isl29003_show_lux, NULL);
+
+static struct attribute *isl29003_attributes[] = {
+	&dev_attr_range.attr,
+	&dev_attr_resolution.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_power_state.attr,
+	&dev_attr_lux.attr,
+	NULL
+};
+
+static const struct attribute_group isl29003_attr_group = {
+	.attrs = isl29003_attributes,
+};
+
+static int isl29003_init_client(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int i;
+
+	/* read all the registers once to fill the cache.
+	 * if one of the reads fails, we consider the init failed */
+	for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) {
+		int v = i2c_smbus_read_byte_data(client, i);
+		if (v < 0)
+			return -ENODEV;
+
+		data->reg_cache[i] = v;
+	}
+
+	/* set defaults */
+	isl29003_set_range(client, 0);
+	isl29003_set_resolution(client, 0);
+	isl29003_set_mode(client, 0);
+	isl29003_set_power_state(client, 0);
+
+	return 0;
+}
+
+/*
+ * I2C layer
+ */
+
+static int __devinit isl29003_probe(struct i2c_client *client,
+				    const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct isl29003_data *data;
+	int err = 0;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
+		return -EIO;
+
+	data = kzalloc(sizeof(struct isl29003_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->client = client;
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->lock);
+
+	/* initialize the ISL29003 chip */
+	err = isl29003_init_client(client);
+	if (err)
+		goto exit_kfree;
+
+	/* register sysfs hooks */
+	err = sysfs_create_group(&client->dev.kobj, &isl29003_attr_group);
+	if (err)
+		goto exit_kfree;
+
+	dev_info(&client->dev, "driver version %s enabled\n", DRIVER_VERSION);
+	return 0;
+
+exit_kfree:
+	kfree(data);
+	return err;
+}
+
+static int __devexit isl29003_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &isl29003_attr_group);
+	isl29003_set_power_state(client, 0);
+	kfree(i2c_get_clientdata(client));
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int isl29003_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+
+	data->power_state_before_suspend = isl29003_get_power_state(client);
+	return isl29003_set_power_state(client, 0);
+}
+
+static int isl29003_resume(struct i2c_client *client)
+{
+	int i;
+	struct isl29003_data *data = i2c_get_clientdata(client);
+
+	/* restore registers from cache */
+	for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++)
+		if (i2c_smbus_write_byte_data(client, i, data->reg_cache[i]))
+			return -EIO;
+
+	return isl29003_set_power_state(client,
+		data->power_state_before_suspend);
+}
+
+#else
+#define isl29003_suspend	NULL
+#define isl29003_resume		NULL
+#endif /* CONFIG_PM */
+
+static const struct i2c_device_id isl29003_id[] = {
+	{ "isl29003", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, isl29003_id);
+
+static struct i2c_driver isl29003_driver = {
+	.driver = {
+		.name	= ISL29003_DRV_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.suspend = isl29003_suspend,
+	.resume	= isl29003_resume,
+	.probe	= isl29003_probe,
+	.remove	= __devexit_p(isl29003_remove),
+	.id_table = isl29003_id,
+};
+
+static int __init isl29003_init(void)
+{
+	return i2c_add_driver(&isl29003_driver);
+}
+
+static void __exit isl29003_exit(void)
+{
+	i2c_del_driver(&isl29003_driver);
+}
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("ISL29003 ambient light sensor driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRIVER_VERSION);
+
+module_init(isl29003_init);
+module_exit(isl29003_exit);
+
diff --git a/drivers/misc/iwmc3200top/Kconfig b/drivers/misc/iwmc3200top/Kconfig
new file mode 100644
index 00000000000..9e4b88fb57f
--- /dev/null
+++ b/drivers/misc/iwmc3200top/Kconfig
@@ -0,0 +1,20 @@
+config IWMC3200TOP
+        tristate "Intel Wireless MultiCom Top Driver"
+        depends on MMC && EXPERIMENTAL
+        select FW_LOADER
+	---help---
+	  Intel Wireless MultiCom 3200 Top driver is responsible for
+	  for firmware load and enabled coms enumeration
+
+config IWMC3200TOP_DEBUG
+	bool "Enable full debug output of iwmc3200top Driver"
+	depends on IWMC3200TOP
+	---help---
+	  Enable full debug output of iwmc3200top Driver
+
+config IWMC3200TOP_DEBUGFS
+	bool "Enable Debugfs debugging interface for iwmc3200top"
+	depends on IWMC3200TOP
+	---help---
+	  Enable creation of debugfs files for iwmc3200top
+
diff --git a/drivers/misc/iwmc3200top/Makefile b/drivers/misc/iwmc3200top/Makefile
new file mode 100644
index 00000000000..fbf53fb4634
--- /dev/null
+++ b/drivers/misc/iwmc3200top/Makefile
@@ -0,0 +1,29 @@
+# iwmc3200top - Intel Wireless MultiCom 3200 Top Driver
+# drivers/misc/iwmc3200top/Makefile
+#
+# Copyright (C) 2009 Intel Corporation. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License version
+# 2 as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
+#
+# Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com>
+#  -
+#
+#
+
+obj-$(CONFIG_IWMC3200TOP)	+= iwmc3200top.o
+iwmc3200top-objs	:= main.o fw-download.o
+iwmc3200top-$(CONFIG_IWMC3200TOP_DEBUG) += log.o
+iwmc3200top-$(CONFIG_IWMC3200TOP_DEBUGFS) += debugfs.o
diff --git a/drivers/misc/iwmc3200top/debugfs.c b/drivers/misc/iwmc3200top/debugfs.c
new file mode 100644
index 00000000000..0c8ea0a1c8a
--- /dev/null
+++ b/drivers/misc/iwmc3200top/debugfs.c
@@ -0,0 +1,133 @@
+/*
+ * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver
+ * drivers/misc/iwmc3200top/debufs.c
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com>
+ *  -
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/mmc/sdio_func.h>
+#include <linux/mmc/sdio.h>
+#include <linux/debugfs.h>
+
+#include "iwmc3200top.h"
+#include "fw-msg.h"
+#include "log.h"
+#include "debugfs.h"
+
+
+
+/*      Constants definition        */
+#define HEXADECIMAL_RADIX	16
+
+/*      Functions definition        */
+
+
+#define DEBUGFS_ADD(name, parent) do {					\
+	dbgfs->dbgfs_##parent##_files.file_##name =			\
+	debugfs_create_file(#name, 0644, dbgfs->dir_##parent, priv,	\
+				&iwmct_dbgfs_##name##_ops);		\
+} while (0)
+
+#define DEBUGFS_RM(name)  do {		\
+	debugfs_remove(name);		\
+	name = NULL;			\
+} while (0)
+
+#define DEBUGFS_READ_FUNC(name)						\
+ssize_t iwmct_dbgfs_##name##_read(struct file *file,			\
+				  char __user *user_buf,		\
+				  size_t count, loff_t *ppos);
+
+#define DEBUGFS_WRITE_FUNC(name)					\
+ssize_t iwmct_dbgfs_##name##_write(struct file *file,			\
+				   const char __user *user_buf,		\
+				   size_t count, loff_t *ppos);
+
+#define DEBUGFS_READ_FILE_OPS(name)					\
+	DEBUGFS_READ_FUNC(name)						\
+	static const struct file_operations iwmct_dbgfs_##name##_ops = {  \
+		.read = iwmct_dbgfs_##name##_read,			\
+		.open = iwmct_dbgfs_open_file_generic,			\
+	};
+
+#define DEBUGFS_WRITE_FILE_OPS(name)					\
+	DEBUGFS_WRITE_FUNC(name)					\
+	static const struct file_operations iwmct_dbgfs_##name##_ops = {  \
+		.write = iwmct_dbgfs_##name##_write,			\
+		.open = iwmct_dbgfs_open_file_generic,			\
+	};
+
+#define DEBUGFS_READ_WRITE_FILE_OPS(name)				\
+	DEBUGFS_READ_FUNC(name)						\
+	DEBUGFS_WRITE_FUNC(name)					\
+	static const struct file_operations iwmct_dbgfs_##name##_ops = {\
+		.write = iwmct_dbgfs_##name##_write,			\
+		.read = iwmct_dbgfs_##name##_read,			\
+		.open = iwmct_dbgfs_open_file_generic,			\
+	};
+
+
+/*      Debugfs file ops definitions        */
+
+/*
+ * Create the debugfs files and directories
+ *
+ */
+void iwmct_dbgfs_register(struct iwmct_priv *priv, const char *name)
+{
+	struct iwmct_debugfs *dbgfs;
+
+	dbgfs = kzalloc(sizeof(struct iwmct_debugfs), GFP_KERNEL);
+	if (!dbgfs) {
+		LOG_ERROR(priv, DEBUGFS, "failed to allocate %zd bytes\n",
+					sizeof(struct iwmct_debugfs));
+		return;
+	}
+
+	priv->dbgfs = dbgfs;
+	dbgfs->name = name;
+	dbgfs->dir_drv = debugfs_create_dir(name, NULL);
+	if (!dbgfs->dir_drv) {
+		LOG_ERROR(priv, DEBUGFS, "failed to create debugfs dir\n");
+		return;
+	}
+
+	return;
+}
+
+/**
+ * Remove the debugfs files and directories
+ *
+ */
+void iwmct_dbgfs_unregister(struct iwmct_debugfs *dbgfs)
+{
+	if (!dbgfs)
+		return;
+
+	DEBUGFS_RM(dbgfs->dir_drv);
+	kfree(dbgfs);
+	dbgfs = NULL;
+}
+
diff --git a/drivers/misc/iwmc3200top/debugfs.h b/drivers/misc/iwmc3200top/debugfs.h
new file mode 100644
index 00000000000..71d45759b40
--- /dev/null
+++ b/drivers/misc/iwmc3200top/debugfs.h
@@ -0,0 +1,58 @@
+/*
+ * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver
+ * drivers/misc/iwmc3200top/debufs.h
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com>
+ *  -
+ *
+ */
+
+#ifndef __DEBUGFS_H__
+#define __DEBUGFS_H__
+
+
+#ifdef CONFIG_IWMC3200TOP_DEBUGFS
+
+struct iwmct_debugfs {
+	const char *name;
+	struct dentry *dir_drv;
+	struct dir_drv_files {
+	} dbgfs_drv_files;
+};
+
+void iwmct_dbgfs_register(struct iwmct_priv *priv, const char *name);
+void iwmct_dbgfs_unregister(struct iwmct_debugfs *dbgfs);
+
+#else /* CONFIG_IWMC3200TOP_DEBUGFS */
+
+struct iwmct_debugfs;
+
+static inline void
+iwmct_dbgfs_register(struct iwmct_priv *priv, const char *name)
+{}
+
+static inline void
+iwmct_dbgfs_unregister(struct iwmct_debugfs *dbgfs)
+{}
+
+#endif /* CONFIG_IWMC3200TOP_DEBUGFS */
+
+#endif /* __DEBUGFS_H__ */
+
diff --git a/drivers/misc/iwmc3200top/fw-download.c b/drivers/misc/iwmc3200top/fw-download.c
new file mode 100644
index 00000000000..50d431e469f
--- /dev/null
+++ b/drivers/misc/iwmc3200top/fw-download.c
@@ -0,0 +1,355 @@
+/*
+ * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver
+ * drivers/misc/iwmc3200top/fw-download.c
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com>
+ *  -
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/mmc/sdio_func.h>
+#include <asm/unaligned.h>
+
+#include "iwmc3200top.h"
+#include "log.h"
+#include "fw-msg.h"
+
+#define CHECKSUM_BYTES_NUM sizeof(u32)
+
+/**
+  init parser struct with file
+ */
+static int iwmct_fw_parser_init(struct iwmct_priv *priv, const u8 *file,
+			      size_t file_size, size_t block_size)
+{
+	struct iwmct_parser *parser = &priv->parser;
+	struct iwmct_fw_hdr *fw_hdr = &parser->versions;
+
+	LOG_INFOEX(priv, INIT, "-->\n");
+
+	LOG_INFO(priv, FW_DOWNLOAD, "file_size=%zd\n", file_size);
+
+	parser->file = file;
+	parser->file_size = file_size;
+	parser->cur_pos = 0;
+	parser->buf = NULL;
+
+	parser->buf = kzalloc(block_size, GFP_KERNEL);
+	if (!parser->buf) {
+		LOG_ERROR(priv, FW_DOWNLOAD, "kzalloc error\n");
+		return -ENOMEM;
+	}
+	parser->buf_size = block_size;
+
+	/* extract fw versions */
+	memcpy(fw_hdr, parser->file, sizeof(struct iwmct_fw_hdr));
+	LOG_INFO(priv, FW_DOWNLOAD, "fw versions are:\n"
+		"top %u.%u.%u gps %u.%u.%u bt %u.%u.%u tic %s\n",
+		fw_hdr->top_major, fw_hdr->top_minor, fw_hdr->top_revision,
+		fw_hdr->gps_major, fw_hdr->gps_minor, fw_hdr->gps_revision,
+		fw_hdr->bt_major, fw_hdr->bt_minor, fw_hdr->bt_revision,
+		fw_hdr->tic_name);
+
+	parser->cur_pos += sizeof(struct iwmct_fw_hdr);
+
+	LOG_INFOEX(priv, INIT, "<--\n");
+	return 0;
+}
+
+static bool iwmct_checksum(struct iwmct_priv *priv)
+{
+	struct iwmct_parser *parser = &priv->parser;
+	__le32 *file = (__le32 *)parser->file;
+	int i, pad, steps;
+	u32 accum = 0;
+	u32 checksum;
+	u32 mask = 0xffffffff;
+
+	pad = (parser->file_size - CHECKSUM_BYTES_NUM) % 4;
+	steps =  (parser->file_size - CHECKSUM_BYTES_NUM) / 4;
+
+	LOG_INFO(priv, FW_DOWNLOAD, "pad=%d steps=%d\n", pad, steps);
+
+	for (i = 0; i < steps; i++)
+		accum += le32_to_cpu(file[i]);
+
+	if (pad) {
+		mask <<= 8 * (4 - pad);
+		accum += le32_to_cpu(file[steps]) & mask;
+	}
+
+	checksum = get_unaligned_le32((__le32 *)(parser->file +
+			parser->file_size - CHECKSUM_BYTES_NUM));
+
+	LOG_INFO(priv, FW_DOWNLOAD,
+		"compare checksum accum=0x%x to checksum=0x%x\n",
+		accum, checksum);
+
+	return checksum == accum;
+}
+
+static int iwmct_parse_next_section(struct iwmct_priv *priv, const u8 **p_sec,
+				  size_t *sec_size, __le32 *sec_addr)
+{
+	struct iwmct_parser *parser = &priv->parser;
+	struct iwmct_dbg *dbg = &priv->dbg;
+	struct iwmct_fw_sec_hdr *sec_hdr;
+
+	LOG_INFOEX(priv, INIT, "-->\n");
+
+	while (parser->cur_pos + sizeof(struct iwmct_fw_sec_hdr)
+		<= parser->file_size) {
+
+		sec_hdr = (struct iwmct_fw_sec_hdr *)
+				(parser->file + parser->cur_pos);
+		parser->cur_pos += sizeof(struct iwmct_fw_sec_hdr);
+
+		LOG_INFO(priv, FW_DOWNLOAD,
+			"sec hdr: type=%s addr=0x%x size=%d\n",
+			sec_hdr->type, sec_hdr->target_addr,
+			sec_hdr->data_size);
+
+		if (strcmp(sec_hdr->type, "ENT") == 0)
+			parser->entry_point = le32_to_cpu(sec_hdr->target_addr);
+		else if (strcmp(sec_hdr->type, "LBL") == 0)
+			strcpy(dbg->label_fw, parser->file + parser->cur_pos);
+		else if (((strcmp(sec_hdr->type, "TOP") == 0) &&
+			  (priv->barker & BARKER_DNLOAD_TOP_MSK)) ||
+			 ((strcmp(sec_hdr->type, "GPS") == 0) &&
+			  (priv->barker & BARKER_DNLOAD_GPS_MSK)) ||
+			 ((strcmp(sec_hdr->type, "BTH") == 0) &&
+			  (priv->barker & BARKER_DNLOAD_BT_MSK))) {
+			*sec_addr = sec_hdr->target_addr;
+			*sec_size = le32_to_cpu(sec_hdr->data_size);
+			*p_sec = parser->file + parser->cur_pos;
+			parser->cur_pos += le32_to_cpu(sec_hdr->data_size);
+			return 1;
+		} else if (strcmp(sec_hdr->type, "LOG") != 0)
+			LOG_WARNING(priv, FW_DOWNLOAD,
+				    "skipping section type %s\n",
+				    sec_hdr->type);
+
+		parser->cur_pos += le32_to_cpu(sec_hdr->data_size);
+		LOG_INFO(priv, FW_DOWNLOAD,
+			"finished with section cur_pos=%zd\n", parser->cur_pos);
+	}
+
+	LOG_INFOEX(priv, INIT, "<--\n");
+	return 0;
+}
+
+static int iwmct_download_section(struct iwmct_priv *priv, const u8 *p_sec,
+				size_t sec_size, __le32 addr)
+{
+	struct iwmct_parser *parser = &priv->parser;
+	struct iwmct_fw_load_hdr *hdr = (struct iwmct_fw_load_hdr *)parser->buf;
+	const u8 *cur_block = p_sec;
+	size_t sent = 0;
+	int cnt = 0;
+	int ret = 0;
+	u32 cmd = 0;
+
+	LOG_INFOEX(priv, INIT, "-->\n");
+	LOG_INFO(priv, FW_DOWNLOAD, "Download address 0x%x size 0x%zx\n",
+				addr, sec_size);
+
+	while (sent < sec_size) {
+		int i;
+		u32 chksm = 0;
+		u32 reset = atomic_read(&priv->reset);
+		/* actual FW data */
+		u32 data_size = min(parser->buf_size - sizeof(*hdr),
+				    sec_size - sent);
+		/* Pad to block size */
+		u32 trans_size = (data_size + sizeof(*hdr) +
+				  IWMC_SDIO_BLK_SIZE - 1) &
+				  ~(IWMC_SDIO_BLK_SIZE - 1);
+		++cnt;
+
+		/* in case of reset, interrupt FW DOWNLAOD */
+		if (reset) {
+			LOG_INFO(priv, FW_DOWNLOAD,
+				 "Reset detected. Abort FW download!!!");
+			ret = -ECANCELED;
+			goto exit;
+		}
+
+		memset(parser->buf, 0, parser->buf_size);
+		cmd |= IWMC_OPCODE_WRITE << CMD_HDR_OPCODE_POS;
+		cmd |= IWMC_CMD_SIGNATURE << CMD_HDR_SIGNATURE_POS;
+		cmd |= (priv->dbg.direct ? 1 : 0) << CMD_HDR_DIRECT_ACCESS_POS;
+		cmd |= (priv->dbg.checksum ? 1 : 0) << CMD_HDR_USE_CHECKSUM_POS;
+		hdr->data_size = cpu_to_le32(data_size);
+		hdr->target_addr = addr;
+
+		/* checksum is allowed for sizes divisible by 4 */
+		if (data_size & 0x3)
+			cmd &= ~CMD_HDR_USE_CHECKSUM_MSK;
+
+		memcpy(hdr->data, cur_block, data_size);
+
+
+		if (cmd & CMD_HDR_USE_CHECKSUM_MSK) {
+
+			chksm = data_size + le32_to_cpu(addr) + cmd;
+			for (i = 0; i < data_size >> 2; i++)
+				chksm += ((u32 *)cur_block)[i];
+
+			hdr->block_chksm = cpu_to_le32(chksm);
+			LOG_INFO(priv, FW_DOWNLOAD, "Checksum = 0x%X\n",
+				 hdr->block_chksm);
+		}
+
+		LOG_INFO(priv, FW_DOWNLOAD, "trans#%d, len=%d, sent=%zd, "
+				"sec_size=%zd, startAddress 0x%X\n",
+				cnt, trans_size, sent, sec_size, addr);
+
+		if (priv->dbg.dump)
+			LOG_HEXDUMP(FW_DOWNLOAD, parser->buf, trans_size);
+
+
+		hdr->cmd = cpu_to_le32(cmd);
+		/* send it down */
+		/* TODO: add more proper sending and error checking */
+		ret = iwmct_tx(priv, 0, parser->buf, trans_size);
+		if (ret != 0) {
+			LOG_INFO(priv, FW_DOWNLOAD,
+				"iwmct_tx returned %d\n", ret);
+			goto exit;
+		}
+
+		addr = cpu_to_le32(le32_to_cpu(addr) + data_size);
+		sent += data_size;
+		cur_block = p_sec + sent;
+
+		if (priv->dbg.blocks && (cnt + 1) >= priv->dbg.blocks) {
+			LOG_INFO(priv, FW_DOWNLOAD,
+				"Block number limit is reached [%d]\n",
+				priv->dbg.blocks);
+			break;
+		}
+	}
+
+	if (sent < sec_size)
+		ret = -EINVAL;
+exit:
+	LOG_INFOEX(priv, INIT, "<--\n");
+	return ret;
+}
+
+static int iwmct_kick_fw(struct iwmct_priv *priv, bool jump)
+{
+	struct iwmct_parser *parser = &priv->parser;
+	struct iwmct_fw_load_hdr *hdr = (struct iwmct_fw_load_hdr *)parser->buf;
+	int ret;
+	u32 cmd;
+
+	LOG_INFOEX(priv, INIT, "-->\n");
+
+	memset(parser->buf, 0, parser->buf_size);
+	cmd = IWMC_CMD_SIGNATURE << CMD_HDR_SIGNATURE_POS;
+	if (jump) {
+		cmd |= IWMC_OPCODE_JUMP << CMD_HDR_OPCODE_POS;
+		hdr->target_addr = cpu_to_le32(parser->entry_point);
+		LOG_INFO(priv, FW_DOWNLOAD, "jump address 0x%x\n",
+				parser->entry_point);
+	} else {
+		cmd |= IWMC_OPCODE_LAST_COMMAND << CMD_HDR_OPCODE_POS;
+		LOG_INFO(priv, FW_DOWNLOAD, "last command\n");
+	}
+
+	hdr->cmd = cpu_to_le32(cmd);
+
+	LOG_HEXDUMP(FW_DOWNLOAD, parser->buf, sizeof(*hdr));
+	/* send it down */
+	/* TODO: add more proper sending and error checking */
+	ret = iwmct_tx(priv, 0, parser->buf, IWMC_SDIO_BLK_SIZE);
+	if (ret)
+		LOG_INFO(priv, FW_DOWNLOAD, "iwmct_tx returned %d", ret);
+
+	LOG_INFOEX(priv, INIT, "<--\n");
+	return 0;
+}
+
+int iwmct_fw_load(struct iwmct_priv *priv)
+{
+	const u8 *fw_name = FW_NAME(FW_API_VER);
+	const struct firmware *raw;
+	const u8 *pdata;
+	size_t len;
+	__le32 addr;
+	int ret;
+
+	/* clear parser struct */
+	memset(&priv->parser, 0, sizeof(struct iwmct_parser));
+
+	/* get the firmware */
+	ret = request_firmware(&raw, fw_name, &priv->func->dev);
+	if (ret < 0) {
+		LOG_ERROR(priv, FW_DOWNLOAD, "%s request_firmware failed %d\n",
+			  fw_name, ret);
+		goto exit;
+	}
+
+	if (raw->size < sizeof(struct iwmct_fw_sec_hdr)) {
+		LOG_ERROR(priv, FW_DOWNLOAD, "%s smaller then (%zd) (%zd)\n",
+			  fw_name, sizeof(struct iwmct_fw_sec_hdr), raw->size);
+		goto exit;
+	}
+
+	LOG_INFO(priv, FW_DOWNLOAD, "Read firmware '%s'\n", fw_name);
+
+	ret = iwmct_fw_parser_init(priv, raw->data, raw->size, priv->trans_len);
+	if (ret < 0) {
+		LOG_ERROR(priv, FW_DOWNLOAD,
+			  "iwmct_parser_init failed: Reason %d\n", ret);
+		goto exit;
+	}
+
+	/* checksum  */
+	if (!iwmct_checksum(priv)) {
+		LOG_ERROR(priv, FW_DOWNLOAD, "checksum error\n");
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	/* download firmware to device */
+	while (iwmct_parse_next_section(priv, &pdata, &len, &addr)) {
+		if (iwmct_download_section(priv, pdata, len, addr)) {
+			LOG_ERROR(priv, FW_DOWNLOAD,
+				  "%s download section failed\n", fw_name);
+			ret = -EIO;
+			goto exit;
+		}
+	}
+
+	iwmct_kick_fw(priv, !!(priv->barker & BARKER_DNLOAD_JUMP_MSK));
+
+exit:
+	kfree(priv->parser.buf);
+
+	if (raw)
+		release_firmware(raw);
+
+	raw = NULL;
+
+	return ret;
+}
diff --git a/drivers/misc/iwmc3200top/fw-msg.h b/drivers/misc/iwmc3200top/fw-msg.h
new file mode 100644
index 00000000000..9e26b75bd48
--- /dev/null
+++ b/drivers/misc/iwmc3200top/fw-msg.h
@@ -0,0 +1,113 @@
+/*
+ * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver
+ * drivers/misc/iwmc3200top/fw-msg.h
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com>
+ *  -
+ *
+ */
+
+#ifndef __FWMSG_H__
+#define __FWMSG_H__
+
+#define COMM_TYPE_D2H           	0xFF
+#define COMM_TYPE_H2D           	0xEE
+
+#define COMM_CATEGORY_OPERATIONAL      	0x00
+#define COMM_CATEGORY_DEBUG            	0x01
+#define COMM_CATEGORY_TESTABILITY      	0x02
+#define COMM_CATEGORY_DIAGNOSTICS      	0x03
+
+#define OP_DBG_ZSTR_MSG			cpu_to_le16(0x1A)
+
+#define FW_LOG_SRC_MAX			32
+#define FW_LOG_SRC_ALL			255
+
+#define FW_STRING_TABLE_ADDR		cpu_to_le32(0x0C000000)
+
+#define CMD_DBG_LOG_LEVEL		cpu_to_le16(0x0001)
+#define CMD_TST_DEV_RESET		cpu_to_le16(0x0060)
+#define CMD_TST_FUNC_RESET		cpu_to_le16(0x0062)
+#define CMD_TST_IFACE_RESET		cpu_to_le16(0x0064)
+#define CMD_TST_CPU_UTILIZATION		cpu_to_le16(0x0065)
+#define CMD_TST_TOP_DEEP_SLEEP		cpu_to_le16(0x0080)
+#define CMD_TST_WAKEUP			cpu_to_le16(0x0081)
+#define CMD_TST_FUNC_WAKEUP		cpu_to_le16(0x0082)
+#define CMD_TST_FUNC_DEEP_SLEEP_REQUEST	cpu_to_le16(0x0083)
+#define CMD_TST_GET_MEM_DUMP		cpu_to_le16(0x0096)
+
+#define OP_OPR_ALIVE			cpu_to_le16(0x0010)
+#define OP_OPR_CMD_ACK			cpu_to_le16(0x001F)
+#define OP_OPR_CMD_NACK			cpu_to_le16(0x0020)
+#define OP_TST_MEM_DUMP			cpu_to_le16(0x0043)
+
+#define CMD_FLAG_PADDING_256		0x80
+
+#define FW_HCMD_BLOCK_SIZE      	256
+
+struct msg_hdr {
+	u8 type;
+	u8 category;
+	__le16 opcode;
+	u8 seqnum;
+	u8 flags;
+	__le16 length;
+} __attribute__((__packed__));
+
+struct log_hdr {
+	__le32 timestamp;
+	u8 severity;
+	u8 logsource;
+	__le16 reserved;
+} __attribute__((__packed__));
+
+struct mdump_hdr {
+	u8 dmpid;
+	u8 frag;
+	__le16 size;
+	__le32 addr;
+} __attribute__((__packed__));
+
+struct top_msg {
+	struct msg_hdr hdr;
+	union {
+		/* D2H messages */
+		struct {
+			struct log_hdr log_hdr;
+			u8 data[1];
+		} __attribute__((__packed__)) log;
+
+		struct {
+			struct log_hdr log_hdr;
+			struct mdump_hdr md_hdr;
+			u8 data[1];
+		} __attribute__((__packed__)) mdump;
+
+		/* H2D messages */
+		struct {
+			u8 logsource;
+			u8 sevmask;
+		} __attribute__((__packed__)) logdefs[FW_LOG_SRC_MAX];
+		struct mdump_hdr mdump_req;
+	} u;
+} __attribute__((__packed__));
+
+
+#endif /* __FWMSG_H__ */
diff --git a/drivers/misc/iwmc3200top/iwmc3200top.h b/drivers/misc/iwmc3200top/iwmc3200top.h
new file mode 100644
index 00000000000..43bd510e187
--- /dev/null
+++ b/drivers/misc/iwmc3200top/iwmc3200top.h
@@ -0,0 +1,209 @@
+/*
+ * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver
+ * drivers/misc/iwmc3200top/iwmc3200top.h
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com>
+ *  -
+ *
+ */
+
+#ifndef __IWMC3200TOP_H__
+#define __IWMC3200TOP_H__
+
+#include <linux/workqueue.h>
+
+#define DRV_NAME "iwmc3200top"
+#define FW_API_VER 1
+#define _FW_NAME(api) DRV_NAME "." #api ".fw"
+#define FW_NAME(api) _FW_NAME(api)
+
+#define IWMC_SDIO_BLK_SIZE			256
+#define IWMC_DEFAULT_TR_BLK			64
+#define IWMC_SDIO_DATA_ADDR			0x0
+#define IWMC_SDIO_INTR_ENABLE_ADDR		0x14
+#define IWMC_SDIO_INTR_STATUS_ADDR		0x13
+#define IWMC_SDIO_INTR_CLEAR_ADDR		0x13
+#define IWMC_SDIO_INTR_GET_SIZE_ADDR		0x2C
+
+#define COMM_HUB_HEADER_LENGTH 16
+#define LOGGER_HEADER_LENGTH   10
+
+
+#define BARKER_DNLOAD_BT_POS		0
+#define BARKER_DNLOAD_BT_MSK		BIT(BARKER_DNLOAD_BT_POS)
+#define BARKER_DNLOAD_GPS_POS		1
+#define BARKER_DNLOAD_GPS_MSK		BIT(BARKER_DNLOAD_GPS_POS)
+#define BARKER_DNLOAD_TOP_POS		2
+#define BARKER_DNLOAD_TOP_MSK		BIT(BARKER_DNLOAD_TOP_POS)
+#define BARKER_DNLOAD_RESERVED1_POS	3
+#define BARKER_DNLOAD_RESERVED1_MSK	BIT(BARKER_DNLOAD_RESERVED1_POS)
+#define BARKER_DNLOAD_JUMP_POS		4
+#define BARKER_DNLOAD_JUMP_MSK		BIT(BARKER_DNLOAD_JUMP_POS)
+#define BARKER_DNLOAD_SYNC_POS		5
+#define BARKER_DNLOAD_SYNC_MSK		BIT(BARKER_DNLOAD_SYNC_POS)
+#define BARKER_DNLOAD_RESERVED2_POS	6
+#define BARKER_DNLOAD_RESERVED2_MSK	(0x3 << BARKER_DNLOAD_RESERVED2_POS)
+#define BARKER_DNLOAD_BARKER_POS	8
+#define BARKER_DNLOAD_BARKER_MSK	(0xffffff << BARKER_DNLOAD_BARKER_POS)
+
+#define IWMC_BARKER_REBOOT 	(0xdeadbe << BARKER_DNLOAD_BARKER_POS)
+/* whole field barker */
+#define IWMC_BARKER_ACK 	0xfeedbabe
+
+#define IWMC_CMD_SIGNATURE 	0xcbbc
+
+#define CMD_HDR_OPCODE_POS		0
+#define CMD_HDR_OPCODE_MSK_MSK		(0xf << CMD_HDR_OPCODE_MSK_POS)
+#define CMD_HDR_RESPONSE_CODE_POS	4
+#define CMD_HDR_RESPONSE_CODE_MSK	(0xf << CMD_HDR_RESPONSE_CODE_POS)
+#define CMD_HDR_USE_CHECKSUM_POS	8
+#define CMD_HDR_USE_CHECKSUM_MSK	BIT(CMD_HDR_USE_CHECKSUM_POS)
+#define CMD_HDR_RESPONSE_REQUIRED_POS	9
+#define CMD_HDR_RESPONSE_REQUIRED_MSK	BIT(CMD_HDR_RESPONSE_REQUIRED_POS)
+#define CMD_HDR_DIRECT_ACCESS_POS	10
+#define CMD_HDR_DIRECT_ACCESS_MSK	BIT(CMD_HDR_DIRECT_ACCESS_POS)
+#define CMD_HDR_RESERVED_POS		11
+#define CMD_HDR_RESERVED_MSK		BIT(0x1f << CMD_HDR_RESERVED_POS)
+#define CMD_HDR_SIGNATURE_POS		16
+#define CMD_HDR_SIGNATURE_MSK		BIT(0xffff << CMD_HDR_SIGNATURE_POS)
+
+enum {
+	IWMC_OPCODE_PING = 0,
+	IWMC_OPCODE_READ = 1,
+	IWMC_OPCODE_WRITE = 2,
+	IWMC_OPCODE_JUMP = 3,
+	IWMC_OPCODE_REBOOT = 4,
+	IWMC_OPCODE_PERSISTENT_WRITE = 5,
+	IWMC_OPCODE_PERSISTENT_READ = 6,
+	IWMC_OPCODE_READ_MODIFY_WRITE = 7,
+	IWMC_OPCODE_LAST_COMMAND = 15
+};
+
+struct iwmct_fw_load_hdr {
+	__le32 cmd;
+	__le32 target_addr;
+	__le32 data_size;
+	__le32 block_chksm;
+	u8 data[0];
+};
+
+/**
+ * struct iwmct_fw_hdr
+ * holds all sw components versions
+ */
+struct iwmct_fw_hdr {
+	u8 top_major;
+	u8 top_minor;
+	u8 top_revision;
+	u8 gps_major;
+	u8 gps_minor;
+	u8 gps_revision;
+	u8 bt_major;
+	u8 bt_minor;
+	u8 bt_revision;
+	u8 tic_name[31];
+};
+
+/**
+ * struct iwmct_fw_sec_hdr
+ * @type: function type
+ * @data_size: section's data size
+ * @target_addr: download address
+ */
+struct iwmct_fw_sec_hdr {
+	u8 type[4];
+	__le32 data_size;
+	__le32 target_addr;
+};
+
+/**
+ * struct iwmct_parser
+ * @file: fw image
+ * @file_size: fw size
+ * @cur_pos: position in file
+ * @buf: temp buf for download
+ * @buf_size: size of buf
+ * @entry_point: address to jump in fw kick-off
+ */
+struct iwmct_parser {
+	const u8 *file;
+	size_t file_size;
+	size_t cur_pos;
+	u8 *buf;
+	size_t buf_size;
+	u32 entry_point;
+	struct iwmct_fw_hdr versions;
+};
+
+
+struct iwmct_work_struct {
+	struct list_head list;
+	ssize_t iosize;
+};
+
+struct iwmct_dbg {
+	int blocks;
+	bool dump;
+	bool jump;
+	bool direct;
+	bool checksum;
+	bool fw_download;
+	int block_size;
+	int download_trans_blks;
+
+	char label_fw[256];
+};
+
+struct iwmct_debugfs;
+
+struct iwmct_priv {
+	struct sdio_func *func;
+	struct iwmct_debugfs *dbgfs;
+	struct iwmct_parser parser;
+	atomic_t reset;
+	atomic_t dev_sync;
+	u32 trans_len;
+	u32 barker;
+	struct iwmct_dbg dbg;
+
+	/* drivers work queue */
+	struct workqueue_struct *wq;
+	struct workqueue_struct *bus_rescan_wq;
+	struct work_struct bus_rescan_worker;
+	struct work_struct isr_worker;
+
+	/* drivers wait queue */
+	wait_queue_head_t wait_q;
+
+	/* rx request list */
+	struct list_head read_req_list;
+};
+
+extern int iwmct_tx(struct iwmct_priv *priv, unsigned int addr,
+		void *src, int count);
+
+extern int iwmct_fw_load(struct iwmct_priv *priv);
+
+extern void iwmct_dbg_init_params(struct iwmct_priv *drv);
+extern void iwmct_dbg_init_drv_attrs(struct device_driver *drv);
+extern void iwmct_dbg_remove_drv_attrs(struct device_driver *drv);
+extern int iwmct_send_hcmd(struct iwmct_priv *priv, u8 *cmd, u16 len);
+
+#endif  /*  __IWMC3200TOP_H__  */
diff --git a/drivers/misc/iwmc3200top/log.c b/drivers/misc/iwmc3200top/log.c
new file mode 100644
index 00000000000..d569279698f
--- /dev/null
+++ b/drivers/misc/iwmc3200top/log.c
@@ -0,0 +1,347 @@
+/*
+ * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver
+ * drivers/misc/iwmc3200top/log.c
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com>
+ *  -
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmc/sdio_func.h>
+#include <linux/ctype.h>
+#include "fw-msg.h"
+#include "iwmc3200top.h"
+#include "log.h"
+
+/* Maximal hexadecimal string size of the FW memdump message */
+#define LOG_MSG_SIZE_MAX		12400
+
+/* iwmct_logdefs is a global used by log macros */
+u8 iwmct_logdefs[LOG_SRC_MAX];
+static u8 iwmct_fw_logdefs[FW_LOG_SRC_MAX];
+
+
+static int _log_set_log_filter(u8 *logdefs, int size, u8 src, u8 logmask)
+{
+	int i;
+
+	if (src < size)
+		logdefs[src] = logmask;
+	else if (src == LOG_SRC_ALL)
+		for (i = 0; i < size; i++)
+			logdefs[i] = logmask;
+	else
+		return -1;
+
+	return 0;
+}
+
+
+int iwmct_log_set_filter(u8 src, u8 logmask)
+{
+	return _log_set_log_filter(iwmct_logdefs, LOG_SRC_MAX, src, logmask);
+}
+
+
+int iwmct_log_set_fw_filter(u8 src, u8 logmask)
+{
+	return _log_set_log_filter(iwmct_fw_logdefs,
+				   FW_LOG_SRC_MAX, src, logmask);
+}
+
+
+static int log_msg_format_hex(char *str, int slen, u8 *ibuf,
+			      int ilen, char *pref)
+{
+	int pos = 0;
+	int i;
+	int len;
+
+	for (pos = 0, i = 0; pos < slen - 2 && pref[i] != '\0'; i++, pos++)
+		str[pos] = pref[i];
+
+	for (i = 0; pos < slen - 2 && i < ilen; pos += len, i++)
+		len = snprintf(&str[pos], slen - pos - 1, " %2.2X", ibuf[i]);
+
+	if (i < ilen)
+		return -1;
+
+	return 0;
+}
+
+/*	NOTE: This function is not thread safe.
+	Currently it's called only from sdio rx worker - no race there
+*/
+void iwmct_log_top_message(struct iwmct_priv *priv, u8 *buf, int len)
+{
+	struct top_msg *msg;
+	static char logbuf[LOG_MSG_SIZE_MAX];
+
+	msg = (struct top_msg *)buf;
+
+	if (len < sizeof(msg->hdr) + sizeof(msg->u.log.log_hdr)) {
+		LOG_ERROR(priv, FW_MSG, "Log message from TOP "
+			  "is too short %d (expected %zd)\n",
+			  len, sizeof(msg->hdr) + sizeof(msg->u.log.log_hdr));
+		return;
+	}
+
+	if (!(iwmct_fw_logdefs[msg->u.log.log_hdr.logsource] &
+		BIT(msg->u.log.log_hdr.severity)) ||
+	    !(iwmct_logdefs[LOG_SRC_FW_MSG] & BIT(msg->u.log.log_hdr.severity)))
+		return;
+
+	switch (msg->hdr.category) {
+	case COMM_CATEGORY_TESTABILITY:
+		if (!(iwmct_logdefs[LOG_SRC_TST] &
+		      BIT(msg->u.log.log_hdr.severity)))
+			return;
+		if (log_msg_format_hex(logbuf, LOG_MSG_SIZE_MAX, buf,
+				       le16_to_cpu(msg->hdr.length) +
+				       sizeof(msg->hdr), "<TST>"))
+			LOG_WARNING(priv, TST,
+				  "TOP TST message is too long, truncating...");
+		LOG_WARNING(priv, TST, "%s\n", logbuf);
+		break;
+	case COMM_CATEGORY_DEBUG:
+		if (msg->hdr.opcode == OP_DBG_ZSTR_MSG)
+			LOG_INFO(priv, FW_MSG, "%s %s", "<DBG>",
+				       ((u8 *)msg) + sizeof(msg->hdr)
+					+ sizeof(msg->u.log.log_hdr));
+		else {
+			if (log_msg_format_hex(logbuf, LOG_MSG_SIZE_MAX, buf,
+					le16_to_cpu(msg->hdr.length)
+						+ sizeof(msg->hdr),
+					"<DBG>"))
+				LOG_WARNING(priv, FW_MSG,
+					"TOP DBG message is too long,"
+					"truncating...");
+			LOG_WARNING(priv, FW_MSG, "%s\n", logbuf);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static int _log_get_filter_str(u8 *logdefs, int logdefsz, char *buf, int size)
+{
+	int i, pos, len;
+	for (i = 0, pos = 0; (pos < size-1) && (i < logdefsz); i++) {
+		len = snprintf(&buf[pos], size - pos - 1, "0x%02X%02X,",
+				i, logdefs[i]);
+		pos += len;
+	}
+	buf[pos-1] = '\n';
+	buf[pos] = '\0';
+
+	if (i < logdefsz)
+		return -1;
+	return 0;
+}
+
+int log_get_filter_str(char *buf, int size)
+{
+	return _log_get_filter_str(iwmct_logdefs, LOG_SRC_MAX, buf, size);
+}
+
+int log_get_fw_filter_str(char *buf, int size)
+{
+	return _log_get_filter_str(iwmct_fw_logdefs, FW_LOG_SRC_MAX, buf, size);
+}
+
+#define HEXADECIMAL_RADIX	16
+#define LOG_SRC_FORMAT		7 /* log level is in format of "0xXXXX," */
+
+ssize_t show_iwmct_log_level(struct device *d,
+				struct device_attribute *attr, char *buf)
+{
+	struct iwmct_priv *priv = dev_get_drvdata(d);
+	char *str_buf;
+	int buf_size;
+	ssize_t ret;
+
+	buf_size = (LOG_SRC_FORMAT * LOG_SRC_MAX) + 1;
+	str_buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!str_buf) {
+		LOG_ERROR(priv, DEBUGFS,
+			"failed to allocate %d bytes\n", buf_size);
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	if (log_get_filter_str(str_buf, buf_size) < 0) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = sprintf(buf, "%s", str_buf);
+
+exit:
+	kfree(str_buf);
+	return ret;
+}
+
+ssize_t store_iwmct_log_level(struct device *d,
+			struct device_attribute *attr,
+			const char *buf, size_t count)
+{
+	struct iwmct_priv *priv = dev_get_drvdata(d);
+	char *token, *str_buf = NULL;
+	long val;
+	ssize_t ret = count;
+	u8 src, mask;
+
+	if (!count)
+		goto exit;
+
+	str_buf = kzalloc(count, GFP_KERNEL);
+	if (!str_buf) {
+		LOG_ERROR(priv, DEBUGFS,
+			"failed to allocate %zd bytes\n", count);
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	memcpy(str_buf, buf, count);
+
+	while ((token = strsep(&str_buf, ",")) != NULL) {
+		while (isspace(*token))
+			++token;
+		if (strict_strtol(token, HEXADECIMAL_RADIX, &val)) {
+			LOG_ERROR(priv, DEBUGFS,
+				  "failed to convert string to long %s\n",
+				  token);
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		mask  = val & 0xFF;
+		src = (val & 0XFF00) >> 8;
+		iwmct_log_set_filter(src, mask);
+	}
+
+exit:
+	kfree(str_buf);
+	return ret;
+}
+
+ssize_t show_iwmct_log_level_fw(struct device *d,
+			struct device_attribute *attr, char *buf)
+{
+	struct iwmct_priv *priv = dev_get_drvdata(d);
+	char *str_buf;
+	int buf_size;
+	ssize_t ret;
+
+	buf_size = (LOG_SRC_FORMAT * FW_LOG_SRC_MAX) + 2;
+
+	str_buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!str_buf) {
+		LOG_ERROR(priv, DEBUGFS,
+			"failed to allocate %d bytes\n", buf_size);
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	if (log_get_fw_filter_str(str_buf, buf_size) < 0) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = sprintf(buf, "%s", str_buf);
+
+exit:
+	kfree(str_buf);
+	return ret;
+}
+
+ssize_t store_iwmct_log_level_fw(struct device *d,
+			struct device_attribute *attr,
+			const char *buf, size_t count)
+{
+	struct iwmct_priv *priv = dev_get_drvdata(d);
+	struct top_msg cmd;
+	char *token, *str_buf = NULL;
+	ssize_t ret = count;
+	u16 cmdlen = 0;
+	int i;
+	long val;
+	u8 src, mask;
+
+	if (!count)
+		goto exit;
+
+	str_buf = kzalloc(count, GFP_KERNEL);
+	if (!str_buf) {
+		LOG_ERROR(priv, DEBUGFS,
+			"failed to allocate %zd bytes\n", count);
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	memcpy(str_buf, buf, count);
+
+	cmd.hdr.type = COMM_TYPE_H2D;
+	cmd.hdr.category = COMM_CATEGORY_DEBUG;
+	cmd.hdr.opcode = CMD_DBG_LOG_LEVEL;
+
+	for (i = 0; ((token = strsep(&str_buf, ",")) != NULL) &&
+		     (i < FW_LOG_SRC_MAX); i++) {
+
+		while (isspace(*token))
+			++token;
+
+		if (strict_strtol(token, HEXADECIMAL_RADIX, &val)) {
+			LOG_ERROR(priv, DEBUGFS,
+				  "failed to convert string to long %s\n",
+				  token);
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		mask  = val & 0xFF; /* LSB */
+		src = (val & 0XFF00) >> 8; /* 2nd least significant byte. */
+		iwmct_log_set_fw_filter(src, mask);
+
+		cmd.u.logdefs[i].logsource = src;
+		cmd.u.logdefs[i].sevmask = mask;
+	}
+
+	cmd.hdr.length = cpu_to_le16(i * sizeof(cmd.u.logdefs[0]));
+	cmdlen = (i * sizeof(cmd.u.logdefs[0]) + sizeof(cmd.hdr));
+
+	ret = iwmct_send_hcmd(priv, (u8 *)&cmd, cmdlen);
+	if (ret) {
+		LOG_ERROR(priv, DEBUGFS,
+			  "Failed to send %d bytes of fwcmd, ret=%zd\n",
+			  cmdlen, ret);
+		goto exit;
+	} else
+		LOG_INFO(priv, DEBUGFS, "fwcmd sent (%d bytes)\n", cmdlen);
+
+	ret = count;
+
+exit:
+	kfree(str_buf);
+	return ret;
+}
+
diff --git a/drivers/misc/iwmc3200top/log.h b/drivers/misc/iwmc3200top/log.h
new file mode 100644
index 00000000000..aba8121f978
--- /dev/null
+++ b/drivers/misc/iwmc3200top/log.h
@@ -0,0 +1,158 @@
+/*
+ * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver
+ * drivers/misc/iwmc3200top/log.h
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com>
+ *  -
+ *
+ */
+
+#ifndef __LOG_H__
+#define __LOG_H__
+
+
+/* log severity:
+ * The log levels here match FW log levels
+ * so values need to stay as is */
+#define LOG_SEV_CRITICAL		0
+#define LOG_SEV_ERROR			1
+#define LOG_SEV_WARNING			2
+#define LOG_SEV_INFO			3
+#define LOG_SEV_INFOEX			4
+
+#define LOG_SEV_FILTER_ALL		\
+	(BIT(LOG_SEV_CRITICAL) |	\
+	 BIT(LOG_SEV_ERROR)    |	\
+	 BIT(LOG_SEV_WARNING)  | 	\
+	 BIT(LOG_SEV_INFO)     |	\
+	 BIT(LOG_SEV_INFOEX))
+
+/* log source */
+#define LOG_SRC_INIT			0
+#define LOG_SRC_DEBUGFS			1
+#define LOG_SRC_FW_DOWNLOAD		2
+#define LOG_SRC_FW_MSG			3
+#define LOG_SRC_TST			4
+#define LOG_SRC_IRQ			5
+
+#define	LOG_SRC_MAX			6
+#define	LOG_SRC_ALL			0xFF
+
+/**
+ * Default intitialization runtime log level
+ */
+#ifndef LOG_SEV_FILTER_RUNTIME
+#define LOG_SEV_FILTER_RUNTIME			\
+	(BIT(LOG_SEV_CRITICAL)	|		\
+	 BIT(LOG_SEV_ERROR)	|		\
+	 BIT(LOG_SEV_WARNING))
+#endif
+
+#ifndef FW_LOG_SEV_FILTER_RUNTIME
+#define FW_LOG_SEV_FILTER_RUNTIME	LOG_SEV_FILTER_ALL
+#endif
+
+#ifdef CONFIG_IWMC3200TOP_DEBUG
+/**
+ * Log macros
+ */
+
+#define priv2dev(priv) (&(priv->func)->dev)
+
+#define LOG_CRITICAL(priv, src, fmt, args...)				\
+do {									\
+	if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_CRITICAL))	\
+		dev_crit(priv2dev(priv), "%s %d: " fmt,			\
+			__func__, __LINE__, ##args);			\
+} while (0)
+
+#define LOG_ERROR(priv, src, fmt, args...)				\
+do {									\
+	if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_ERROR))	\
+		dev_err(priv2dev(priv), "%s %d: " fmt,			\
+			__func__, __LINE__, ##args);			\
+} while (0)
+
+#define LOG_WARNING(priv, src, fmt, args...)				\
+do {									\
+	if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_WARNING))	\
+		dev_warn(priv2dev(priv), "%s %d: " fmt,			\
+			 __func__, __LINE__, ##args);			\
+} while (0)
+
+#define LOG_INFO(priv, src, fmt, args...)				\
+do {									\
+	if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_INFO))		\
+		dev_info(priv2dev(priv), "%s %d: " fmt,			\
+			 __func__, __LINE__, ##args);			\
+} while (0)
+
+#define LOG_INFOEX(priv, src, fmt, args...)				\
+do {									\
+	if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_INFOEX))	\
+		dev_dbg(priv2dev(priv), "%s %d: " fmt,			\
+			 __func__, __LINE__, ##args);			\
+} while (0)
+
+#define LOG_HEXDUMP(src, ptr, len)					\
+do {									\
+	if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_INFOEX))	\
+		print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE,	\
+				16, 1, ptr, len, false);		\
+} while (0)
+
+void iwmct_log_top_message(struct iwmct_priv *priv, u8 *buf, int len);
+
+extern u8 iwmct_logdefs[];
+
+int iwmct_log_set_filter(u8 src, u8 logmask);
+int iwmct_log_set_fw_filter(u8 src, u8 logmask);
+
+ssize_t show_iwmct_log_level(struct device *d,
+			struct device_attribute *attr, char *buf);
+ssize_t store_iwmct_log_level(struct device *d,
+			struct device_attribute *attr,
+			const char *buf, size_t count);
+ssize_t show_iwmct_log_level_fw(struct device *d,
+			struct device_attribute *attr, char *buf);
+ssize_t store_iwmct_log_level_fw(struct device *d,
+			struct device_attribute *attr,
+			const char *buf, size_t count);
+
+#else
+
+#define LOG_CRITICAL(priv, src, fmt, args...)
+#define LOG_ERROR(priv, src, fmt, args...)
+#define LOG_WARNING(priv, src, fmt, args...)
+#define LOG_INFO(priv, src, fmt, args...)
+#define LOG_INFOEX(priv, src, fmt, args...)
+#define LOG_HEXDUMP(src, ptr, len)
+
+static inline void iwmct_log_top_message(struct iwmct_priv *priv,
+					 u8 *buf, int len) {}
+static inline int iwmct_log_set_filter(u8 src, u8 logmask) { return 0; }
+static inline int iwmct_log_set_fw_filter(u8 src, u8 logmask) { return 0; }
+
+#endif /* CONFIG_IWMC3200TOP_DEBUG */
+
+int log_get_filter_str(char *buf, int size);
+int log_get_fw_filter_str(char *buf, int size);
+
+#endif /* __LOG_H__ */
diff --git a/drivers/misc/iwmc3200top/main.c b/drivers/misc/iwmc3200top/main.c
new file mode 100644
index 00000000000..fafcaa481d7
--- /dev/null
+++ b/drivers/misc/iwmc3200top/main.c
@@ -0,0 +1,678 @@
+/*
+ * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver
+ * drivers/misc/iwmc3200top/main.c
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com>
+ *  -
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/mmc/sdio_ids.h>
+#include <linux/mmc/sdio_func.h>
+#include <linux/mmc/sdio.h>
+
+#include "iwmc3200top.h"
+#include "log.h"
+#include "fw-msg.h"
+#include "debugfs.h"
+
+
+#define DRIVER_DESCRIPTION "Intel(R) IWMC 3200 Top Driver"
+#define DRIVER_COPYRIGHT "Copyright (c) 2008 Intel Corporation."
+
+#define DRIVER_VERSION  "0.1.62"
+
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_COPYRIGHT);
+MODULE_FIRMWARE(FW_NAME(FW_API_VER));
+
+/*
+ * This workers main task is to wait for OP_OPR_ALIVE
+ * from TOP FW until ALIVE_MSG_TIMOUT timeout is elapsed.
+ * When OP_OPR_ALIVE received it will issue
+ * a call to "bus_rescan_devices".
+ */
+static void iwmct_rescan_worker(struct work_struct *ws)
+{
+	struct iwmct_priv *priv;
+	int ret;
+
+	priv = container_of(ws, struct iwmct_priv, bus_rescan_worker);
+
+	LOG_INFO(priv, FW_MSG, "Calling bus_rescan\n");
+
+	ret = bus_rescan_devices(priv->func->dev.bus);
+	if (ret < 0)
+		LOG_INFO(priv, FW_DOWNLOAD, "bus_rescan_devices FAILED!!!\n");
+}
+
+static void op_top_message(struct iwmct_priv *priv, struct top_msg *msg)
+{
+	switch (msg->hdr.opcode) {
+	case OP_OPR_ALIVE:
+		LOG_INFO(priv, FW_MSG, "Got ALIVE from device, wake rescan\n");
+		queue_work(priv->bus_rescan_wq, &priv->bus_rescan_worker);
+		break;
+	default:
+		LOG_INFO(priv, FW_MSG, "Received msg opcode 0x%X\n",
+			msg->hdr.opcode);
+		break;
+	}
+}
+
+
+static void handle_top_message(struct iwmct_priv *priv, u8 *buf, int len)
+{
+	struct top_msg *msg;
+
+	msg = (struct top_msg *)buf;
+
+	if (msg->hdr.type != COMM_TYPE_D2H) {
+		LOG_ERROR(priv, FW_MSG,
+			"Message from TOP with invalid message type 0x%X\n",
+			msg->hdr.type);
+		return;
+	}
+
+	if (len < sizeof(msg->hdr)) {
+		LOG_ERROR(priv, FW_MSG,
+			"Message from TOP is too short for message header "
+			"received %d bytes, expected at least %zd bytes\n",
+			len, sizeof(msg->hdr));
+		return;
+	}
+
+	if (len < le16_to_cpu(msg->hdr.length) + sizeof(msg->hdr)) {
+		LOG_ERROR(priv, FW_MSG,
+			"Message length (%d bytes) is shorter than "
+			"in header (%d bytes)\n",
+			len, le16_to_cpu(msg->hdr.length));
+		return;
+	}
+
+	switch (msg->hdr.category) {
+	case COMM_CATEGORY_OPERATIONAL:
+		op_top_message(priv, (struct top_msg *)buf);
+		break;
+
+	case COMM_CATEGORY_DEBUG:
+	case COMM_CATEGORY_TESTABILITY:
+	case COMM_CATEGORY_DIAGNOSTICS:
+		iwmct_log_top_message(priv, buf, len);
+		break;
+
+	default:
+		LOG_ERROR(priv, FW_MSG,
+			"Message from TOP with unknown category 0x%X\n",
+			msg->hdr.category);
+		break;
+	}
+}
+
+int iwmct_send_hcmd(struct iwmct_priv *priv, u8 *cmd, u16 len)
+{
+	int ret;
+	u8 *buf;
+
+	LOG_INFOEX(priv, FW_MSG, "Sending hcmd:\n");
+
+	/* add padding to 256 for IWMC */
+	((struct top_msg *)cmd)->hdr.flags |= CMD_FLAG_PADDING_256;
+
+	LOG_HEXDUMP(FW_MSG, cmd, len);
+
+	if (len > FW_HCMD_BLOCK_SIZE) {
+		LOG_ERROR(priv, FW_MSG, "size %d exceeded hcmd max size %d\n",
+			  len, FW_HCMD_BLOCK_SIZE);
+		return -1;
+	}
+
+	buf = kzalloc(FW_HCMD_BLOCK_SIZE, GFP_KERNEL);
+	if (!buf) {
+		LOG_ERROR(priv, FW_MSG, "kzalloc error, buf size %d\n",
+			  FW_HCMD_BLOCK_SIZE);
+		return -1;
+	}
+
+	memcpy(buf, cmd, len);
+
+	sdio_claim_host(priv->func);
+	ret = sdio_memcpy_toio(priv->func, IWMC_SDIO_DATA_ADDR, buf,
+			       FW_HCMD_BLOCK_SIZE);
+	sdio_release_host(priv->func);
+
+	kfree(buf);
+	return ret;
+}
+
+int iwmct_tx(struct iwmct_priv *priv, unsigned int addr,
+	void *src, int count)
+{
+	int ret;
+
+	sdio_claim_host(priv->func);
+	ret = sdio_memcpy_toio(priv->func, addr, src, count);
+	sdio_release_host(priv->func);
+
+	return ret;
+}
+
+static void iwmct_irq_read_worker(struct work_struct *ws)
+{
+	struct iwmct_priv *priv;
+	struct iwmct_work_struct *read_req;
+	__le32 *buf = NULL;
+	int ret;
+	int iosize;
+	u32 barker;
+	bool is_barker;
+
+	priv = container_of(ws, struct iwmct_priv, isr_worker);
+
+	LOG_INFO(priv, IRQ, "enter iwmct_irq_read_worker %p\n", ws);
+
+	/* --------------------- Handshake with device -------------------- */
+	sdio_claim_host(priv->func);
+
+	/* all list manipulations have to be protected by
+	 * sdio_claim_host/sdio_release_host */
+	if (list_empty(&priv->read_req_list)) {
+		LOG_ERROR(priv, IRQ, "read_req_list empty in read worker\n");
+		goto exit_release;
+	}
+
+	read_req = list_entry(priv->read_req_list.next,
+			      struct iwmct_work_struct, list);
+
+	list_del(&read_req->list);
+	iosize = read_req->iosize;
+	kfree(read_req);
+
+	buf = kzalloc(iosize, GFP_KERNEL);
+	if (!buf) {
+		LOG_ERROR(priv, IRQ, "kzalloc error, buf size %d\n", iosize);
+		goto exit_release;
+	}
+
+	LOG_INFO(priv, IRQ, "iosize=%d, buf=%p, func=%d\n",
+				iosize, buf, priv->func->num);
+
+	/* read from device */
+	ret = sdio_memcpy_fromio(priv->func, buf, IWMC_SDIO_DATA_ADDR, iosize);
+	if (ret) {
+		LOG_ERROR(priv, IRQ, "error %d reading buffer\n", ret);
+		goto exit_release;
+	}
+
+	LOG_HEXDUMP(IRQ, (u8 *)buf, iosize);
+
+	barker = le32_to_cpu(buf[0]);
+
+	/* Verify whether it's a barker and if not - treat as regular Rx */
+	if (barker == IWMC_BARKER_ACK ||
+	    (barker & BARKER_DNLOAD_BARKER_MSK) == IWMC_BARKER_REBOOT) {
+
+		/* Valid Barker is equal on first 4 dwords */
+		is_barker = (buf[1] == buf[0]) &&
+			    (buf[2] == buf[0]) &&
+			    (buf[3] == buf[0]);
+
+		if (!is_barker) {
+			LOG_WARNING(priv, IRQ,
+				"Potentially inconsistent barker "
+				"%08X_%08X_%08X_%08X\n",
+				le32_to_cpu(buf[0]), le32_to_cpu(buf[1]),
+				le32_to_cpu(buf[2]), le32_to_cpu(buf[3]));
+		}
+	} else {
+		is_barker = false;
+	}
+
+	/* Handle Top CommHub message */
+	if (!is_barker) {
+		sdio_release_host(priv->func);
+		handle_top_message(priv, (u8 *)buf, iosize);
+		goto exit;
+	} else if (barker == IWMC_BARKER_ACK) { /* Handle barkers */
+		if (atomic_read(&priv->dev_sync) == 0) {
+			LOG_ERROR(priv, IRQ,
+				  "ACK barker arrived out-of-sync\n");
+			goto exit_release;
+		}
+
+		/* Continuing to FW download (after Sync is completed)*/
+		atomic_set(&priv->dev_sync, 0);
+		LOG_INFO(priv, IRQ, "ACK barker arrived "
+				"- starting FW download\n");
+	} else { /* REBOOT barker */
+		LOG_INFO(priv, IRQ, "Recieved reboot barker: %x\n", barker);
+		priv->barker = barker;
+
+		if (barker & BARKER_DNLOAD_SYNC_MSK) {
+			/* Send the same barker back */
+			ret = sdio_memcpy_toio(priv->func, IWMC_SDIO_DATA_ADDR,
+					       buf, iosize);
+			if (ret) {
+				LOG_ERROR(priv, IRQ,
+					 "error %d echoing barker\n", ret);
+				goto exit_release;
+			}
+			LOG_INFO(priv, IRQ, "Echoing barker to device\n");
+			atomic_set(&priv->dev_sync, 1);
+			goto exit_release;
+		}
+
+		/* Continuing to FW download (without Sync) */
+		LOG_INFO(priv, IRQ, "No sync requested "
+				    "- starting FW download\n");
+	}
+
+	sdio_release_host(priv->func);
+
+
+	LOG_INFO(priv, IRQ, "barker download request 0x%x is:\n", priv->barker);
+	LOG_INFO(priv, IRQ, "*******  Top FW %s requested ********\n",
+			(priv->barker & BARKER_DNLOAD_TOP_MSK) ? "was" : "not");
+	LOG_INFO(priv, IRQ, "*******  GPS FW %s requested ********\n",
+			(priv->barker & BARKER_DNLOAD_GPS_MSK) ? "was" : "not");
+	LOG_INFO(priv, IRQ, "*******  BT FW %s requested ********\n",
+			(priv->barker & BARKER_DNLOAD_BT_MSK) ? "was" : "not");
+
+	if (priv->dbg.fw_download)
+		iwmct_fw_load(priv);
+	else
+		LOG_ERROR(priv, IRQ, "FW download not allowed\n");
+
+	goto exit;
+
+exit_release:
+	sdio_release_host(priv->func);
+exit:
+	kfree(buf);
+	LOG_INFO(priv, IRQ, "exit iwmct_irq_read_worker\n");
+}
+
+static void iwmct_irq(struct sdio_func *func)
+{
+	struct iwmct_priv *priv;
+	int val, ret;
+	int iosize;
+	int addr = IWMC_SDIO_INTR_GET_SIZE_ADDR;
+	struct iwmct_work_struct *read_req;
+
+	priv = sdio_get_drvdata(func);
+
+	LOG_INFO(priv, IRQ, "enter iwmct_irq\n");
+
+	/* read the function's status register */
+	val = sdio_readb(func, IWMC_SDIO_INTR_STATUS_ADDR, &ret);
+
+	LOG_INFO(priv, IRQ, "iir value = %d, ret=%d\n", val, ret);
+
+	if (!val) {
+		LOG_ERROR(priv, IRQ, "iir = 0, exiting ISR\n");
+		goto exit_clear_intr;
+	}
+
+
+	/*
+	 * read 2 bytes of the transaction size
+	 * IMPORTANT: sdio transaction size has to be read before clearing
+	 * sdio interrupt!!!
+	 */
+	val = sdio_readb(priv->func, addr++, &ret);
+	iosize = val;
+	val = sdio_readb(priv->func, addr++, &ret);
+	iosize += val << 8;
+
+	LOG_INFO(priv, IRQ, "READ size %d\n", iosize);
+
+	if (iosize == 0) {
+		LOG_ERROR(priv, IRQ, "READ size %d, exiting ISR\n", iosize);
+		goto exit_clear_intr;
+	}
+
+	/* allocate a work structure to pass iosize to the worker */
+	read_req = kzalloc(sizeof(struct iwmct_work_struct), GFP_KERNEL);
+	if (!read_req) {
+		LOG_ERROR(priv, IRQ, "failed to allocate read_req, exit ISR\n");
+		goto exit_clear_intr;
+	}
+
+	INIT_LIST_HEAD(&read_req->list);
+	read_req->iosize = iosize;
+
+	list_add_tail(&priv->read_req_list, &read_req->list);
+
+	/* clear the function's interrupt request bit (write 1 to clear) */
+	sdio_writeb(func, 1, IWMC_SDIO_INTR_CLEAR_ADDR, &ret);
+
+	queue_work(priv->wq, &priv->isr_worker);
+
+	LOG_INFO(priv, IRQ, "exit iwmct_irq\n");
+
+	return;
+
+exit_clear_intr:
+	/* clear the function's interrupt request bit (write 1 to clear) */
+	sdio_writeb(func, 1, IWMC_SDIO_INTR_CLEAR_ADDR, &ret);
+}
+
+
+static int blocks;
+module_param(blocks, int, 0604);
+MODULE_PARM_DESC(blocks, "max_blocks_to_send");
+
+static int dump;
+module_param(dump, bool, 0604);
+MODULE_PARM_DESC(dump, "dump_hex_content");
+
+static int jump = 1;
+module_param(jump, bool, 0604);
+
+static int direct = 1;
+module_param(direct, bool, 0604);
+
+static int checksum = 1;
+module_param(checksum, bool, 0604);
+
+static int fw_download = 1;
+module_param(fw_download, bool, 0604);
+
+static int block_size = IWMC_SDIO_BLK_SIZE;
+module_param(block_size, int, 0404);
+
+static int download_trans_blks = IWMC_DEFAULT_TR_BLK;
+module_param(download_trans_blks, int, 0604);
+
+static int rubbish_barker;
+module_param(rubbish_barker, bool, 0604);
+
+#ifdef CONFIG_IWMC3200TOP_DEBUG
+static int log_level[LOG_SRC_MAX];
+static unsigned int log_level_argc;
+module_param_array(log_level, int, &log_level_argc, 0604);
+MODULE_PARM_DESC(log_level, "log_level");
+
+static int log_level_fw[FW_LOG_SRC_MAX];
+static unsigned int log_level_fw_argc;
+module_param_array(log_level_fw, int, &log_level_fw_argc, 0604);
+MODULE_PARM_DESC(log_level_fw, "log_level_fw");
+#endif
+
+void iwmct_dbg_init_params(struct iwmct_priv *priv)
+{
+#ifdef CONFIG_IWMC3200TOP_DEBUG
+	int i;
+
+	for (i = 0; i < log_level_argc; i++) {
+		dev_notice(&priv->func->dev, "log_level[%d]=0x%X\n",
+						i, log_level[i]);
+		iwmct_log_set_filter((log_level[i] >> 8) & 0xFF,
+			       log_level[i] & 0xFF);
+	}
+	for (i = 0; i < log_level_fw_argc; i++) {
+		dev_notice(&priv->func->dev, "log_level_fw[%d]=0x%X\n",
+						i, log_level_fw[i]);
+		iwmct_log_set_fw_filter((log_level_fw[i] >> 8) & 0xFF,
+				  log_level_fw[i] & 0xFF);
+	}
+#endif
+
+	priv->dbg.blocks = blocks;
+	LOG_INFO(priv, INIT, "blocks=%d\n", blocks);
+	priv->dbg.dump = (bool)dump;
+	LOG_INFO(priv, INIT, "dump=%d\n", dump);
+	priv->dbg.jump = (bool)jump;
+	LOG_INFO(priv, INIT, "jump=%d\n", jump);
+	priv->dbg.direct = (bool)direct;
+	LOG_INFO(priv, INIT, "direct=%d\n", direct);
+	priv->dbg.checksum = (bool)checksum;
+	LOG_INFO(priv, INIT, "checksum=%d\n", checksum);
+	priv->dbg.fw_download = (bool)fw_download;
+	LOG_INFO(priv, INIT, "fw_download=%d\n", fw_download);
+	priv->dbg.block_size = block_size;
+	LOG_INFO(priv, INIT, "block_size=%d\n", block_size);
+	priv->dbg.download_trans_blks = download_trans_blks;
+	LOG_INFO(priv, INIT, "download_trans_blks=%d\n", download_trans_blks);
+}
+
+/*****************************************************************************
+ *
+ * sysfs attributes
+ *
+ *****************************************************************************/
+static ssize_t show_iwmct_fw_version(struct device *d,
+				  struct device_attribute *attr, char *buf)
+{
+	struct iwmct_priv *priv = dev_get_drvdata(d);
+	return sprintf(buf, "%s\n", priv->dbg.label_fw);
+}
+static DEVICE_ATTR(cc_label_fw, S_IRUGO, show_iwmct_fw_version, NULL);
+
+#ifdef CONFIG_IWMC3200TOP_DEBUG
+static DEVICE_ATTR(log_level, S_IWUSR | S_IRUGO,
+		   show_iwmct_log_level, store_iwmct_log_level);
+static DEVICE_ATTR(log_level_fw, S_IWUSR | S_IRUGO,
+		   show_iwmct_log_level_fw, store_iwmct_log_level_fw);
+#endif
+
+static struct attribute *iwmct_sysfs_entries[] = {
+	&dev_attr_cc_label_fw.attr,
+#ifdef CONFIG_IWMC3200TOP_DEBUG
+	&dev_attr_log_level.attr,
+	&dev_attr_log_level_fw.attr,
+#endif
+	NULL
+};
+
+static struct attribute_group iwmct_attribute_group = {
+	.name = NULL,		/* put in device directory */
+	.attrs = iwmct_sysfs_entries,
+};
+
+
+static int iwmct_probe(struct sdio_func *func,
+			   const struct sdio_device_id *id)
+{
+	struct iwmct_priv *priv;
+	int ret;
+	int val = 1;
+	int addr = IWMC_SDIO_INTR_ENABLE_ADDR;
+
+	dev_dbg(&func->dev, "enter iwmct_probe\n");
+
+	dev_dbg(&func->dev, "IRQ polling period id %u msecs, HZ is %d\n",
+		jiffies_to_msecs(2147483647), HZ);
+
+	priv = kzalloc(sizeof(struct iwmct_priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&func->dev, "kzalloc error\n");
+		return -ENOMEM;
+	}
+	priv->func = func;
+	sdio_set_drvdata(func, priv);
+
+
+	/* create drivers work queue */
+	priv->wq = create_workqueue(DRV_NAME "_wq");
+	priv->bus_rescan_wq = create_workqueue(DRV_NAME "_rescan_wq");
+	INIT_WORK(&priv->bus_rescan_worker, iwmct_rescan_worker);
+	INIT_WORK(&priv->isr_worker, iwmct_irq_read_worker);
+
+	init_waitqueue_head(&priv->wait_q);
+
+	sdio_claim_host(func);
+	/* FIXME: Remove after it is fixed in the Boot ROM upgrade */
+	func->enable_timeout = 10;
+
+	/* In our HW, setting the block size also wakes up the boot rom. */
+	ret = sdio_set_block_size(func, priv->dbg.block_size);
+	if (ret) {
+		LOG_ERROR(priv, INIT,
+			"sdio_set_block_size() failure: %d\n", ret);
+		goto error_sdio_enable;
+	}
+
+	ret = sdio_enable_func(func);
+	if (ret) {
+		LOG_ERROR(priv, INIT, "sdio_enable_func() failure: %d\n", ret);
+		goto error_sdio_enable;
+	}
+
+	/* init reset and dev_sync states */
+	atomic_set(&priv->reset, 0);
+	atomic_set(&priv->dev_sync, 0);
+
+	/* init read req queue */
+	INIT_LIST_HEAD(&priv->read_req_list);
+
+	/* process configurable parameters */
+	iwmct_dbg_init_params(priv);
+	ret = sysfs_create_group(&func->dev.kobj, &iwmct_attribute_group);
+	if (ret) {
+		LOG_ERROR(priv, INIT, "Failed to register attributes and "
+			 "initialize module_params\n");
+		goto error_dev_attrs;
+	}
+
+	iwmct_dbgfs_register(priv, DRV_NAME);
+
+	if (!priv->dbg.direct && priv->dbg.download_trans_blks > 8) {
+		LOG_INFO(priv, INIT,
+			 "Reducing transaction to 8 blocks = 2K (from %d)\n",
+			 priv->dbg.download_trans_blks);
+		priv->dbg.download_trans_blks = 8;
+	}
+	priv->trans_len = priv->dbg.download_trans_blks * priv->dbg.block_size;
+	LOG_INFO(priv, INIT, "Transaction length = %d\n", priv->trans_len);
+
+	ret = sdio_claim_irq(func, iwmct_irq);
+	if (ret) {
+		LOG_ERROR(priv, INIT, "sdio_claim_irq() failure: %d\n", ret);
+		goto error_claim_irq;
+	}
+
+
+	/* Enable function's interrupt */
+	sdio_writeb(priv->func, val, addr, &ret);
+	if (ret) {
+		LOG_ERROR(priv, INIT, "Failure writing to "
+			  "Interrupt Enable Register (%d): %d\n", addr, ret);
+		goto error_enable_int;
+	}
+
+	sdio_release_host(func);
+
+	LOG_INFO(priv, INIT, "exit iwmct_probe\n");
+
+	return ret;
+
+error_enable_int:
+	sdio_release_irq(func);
+error_claim_irq:
+	sdio_disable_func(func);
+error_dev_attrs:
+	iwmct_dbgfs_unregister(priv->dbgfs);
+	sysfs_remove_group(&func->dev.kobj, &iwmct_attribute_group);
+error_sdio_enable:
+	sdio_release_host(func);
+	return ret;
+}
+
+static void iwmct_remove(struct sdio_func *func)
+{
+	struct iwmct_work_struct *read_req;
+	struct iwmct_priv *priv = sdio_get_drvdata(func);
+
+	priv = sdio_get_drvdata(func);
+
+	LOG_INFO(priv, INIT, "enter\n");
+
+	sdio_claim_host(func);
+	sdio_release_irq(func);
+	sdio_release_host(func);
+
+	/* Safely destroy osc workqueue */
+	destroy_workqueue(priv->bus_rescan_wq);
+	destroy_workqueue(priv->wq);
+
+	sdio_claim_host(func);
+	sdio_disable_func(func);
+	sysfs_remove_group(&func->dev.kobj, &iwmct_attribute_group);
+	iwmct_dbgfs_unregister(priv->dbgfs);
+	sdio_release_host(func);
+
+	/* free read requests */
+	while (!list_empty(&priv->read_req_list)) {
+		read_req = list_entry(priv->read_req_list.next,
+			struct iwmct_work_struct, list);
+
+		list_del(&read_req->list);
+		kfree(read_req);
+	}
+
+	kfree(priv);
+}
+
+
+static const struct sdio_device_id iwmct_ids[] = {
+	/* Intel Wireless MultiCom 3200 Top Driver */
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_INTEL, 0x1404)},
+	{ },	/* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(sdio, iwmct_ids);
+
+static struct sdio_driver iwmct_driver = {
+	.probe		= iwmct_probe,
+	.remove		= iwmct_remove,
+	.name		= DRV_NAME,
+	.id_table	= iwmct_ids,
+};
+
+static int __init iwmct_init(void)
+{
+	int rc;
+
+	/* Default log filter settings */
+	iwmct_log_set_filter(LOG_SRC_ALL, LOG_SEV_FILTER_RUNTIME);
+	iwmct_log_set_filter(LOG_SRC_FW_MSG, LOG_SEV_FILTER_ALL);
+	iwmct_log_set_fw_filter(LOG_SRC_ALL, FW_LOG_SEV_FILTER_RUNTIME);
+
+	rc = sdio_register_driver(&iwmct_driver);
+
+	return rc;
+}
+
+static void __exit iwmct_exit(void)
+{
+	sdio_unregister_driver(&iwmct_driver);
+}
+
+module_init(iwmct_init);
+module_exit(iwmct_exit);
+
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
new file mode 100644
index 00000000000..fcb6ec1af17
--- /dev/null
+++ b/drivers/misc/kgdbts.c
@@ -0,0 +1,1126 @@
+/*
+ * kgdbts is a test suite for kgdb for the sole purpose of validating
+ * that key pieces of the kgdb internals are working properly such as
+ * HW/SW breakpoints, single stepping, and NMI.
+ *
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2008 Wind River Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/* Information about the kgdb test suite.
+ * -------------------------------------
+ *
+ * The kgdb test suite is designed as a KGDB I/O module which
+ * simulates the communications that a debugger would have with kgdb.
+ * The tests are broken up in to a line by line and referenced here as
+ * a "get" which is kgdb requesting input and "put" which is kgdb
+ * sending a response.
+ *
+ * The kgdb suite can be invoked from the kernel command line
+ * arguments system or executed dynamically at run time.  The test
+ * suite uses the variable "kgdbts" to obtain the information about
+ * which tests to run and to configure the verbosity level.  The
+ * following are the various characters you can use with the kgdbts=
+ * line:
+ *
+ * When using the "kgdbts=" you only choose one of the following core
+ * test types:
+ * A = Run all the core tests silently
+ * V1 = Run all the core tests with minimal output
+ * V2 = Run all the core tests in debug mode
+ *
+ * You can also specify optional tests:
+ * N## = Go to sleep with interrupts of for ## seconds
+ *       to test the HW NMI watchdog
+ * F## = Break at do_fork for ## iterations
+ * S## = Break at sys_open for ## iterations
+ * I## = Run the single step test ## iterations
+ *
+ * NOTE: that the do_fork and sys_open tests are mutually exclusive.
+ *
+ * To invoke the kgdb test suite from boot you use a kernel start
+ * argument as follows:
+ * 	kgdbts=V1 kgdbwait
+ * Or if you wanted to perform the NMI test for 6 seconds and do_fork
+ * test for 100 forks, you could use:
+ * 	kgdbts=V1N6F100 kgdbwait
+ *
+ * The test suite can also be invoked at run time with:
+ *	echo kgdbts=V1N6F100 > /sys/module/kgdbts/parameters/kgdbts
+ * Or as another example:
+ *	echo kgdbts=V2 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * When developing a new kgdb arch specific implementation or
+ * using these tests for the purpose of regression testing,
+ * several invocations are required.
+ *
+ * 1) Boot with the test suite enabled by using the kernel arguments
+ *       "kgdbts=V1F100 kgdbwait"
+ *    ## If kgdb arch specific implementation has NMI use
+ *       "kgdbts=V1N6F100
+ *
+ * 2) After the system boot run the basic test.
+ * echo kgdbts=V1 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * 3) Run the concurrency tests.  It is best to use n+1
+ *    while loops where n is the number of cpus you have
+ *    in your system.  The example below uses only two
+ *    loops.
+ *
+ * ## This tests break points on sys_open
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * echo kgdbts=V1S10000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ * fg # and hit control-c
+ * ## This tests break points on do_fork
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * echo kgdbts=V1F1000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/kgdb.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+
+#define v1printk(a...) do { \
+	if (verbose) \
+		printk(KERN_INFO a); \
+	} while (0)
+#define v2printk(a...) do { \
+	if (verbose > 1) \
+		printk(KERN_INFO a); \
+		touch_nmi_watchdog();	\
+	} while (0)
+#define eprintk(a...) do { \
+		printk(KERN_ERR a); \
+		WARN_ON(1); \
+	} while (0)
+#define MAX_CONFIG_LEN		40
+
+static struct kgdb_io kgdbts_io_ops;
+static char get_buf[BUFMAX];
+static int get_buf_cnt;
+static char put_buf[BUFMAX];
+static int put_buf_cnt;
+static char scratch_buf[BUFMAX];
+static int verbose;
+static int repeat_test;
+static int test_complete;
+static int send_ack;
+static int final_ack;
+static int force_hwbrks;
+static int hwbreaks_ok;
+static int hw_break_val;
+static int hw_break_val2;
+#if defined(CONFIG_ARM) || defined(CONFIG_MIPS) || defined(CONFIG_SPARC)
+static int arch_needs_sstep_emulation = 1;
+#else
+static int arch_needs_sstep_emulation;
+#endif
+static unsigned long sstep_addr;
+static int sstep_state;
+
+/* Storage for the registers, in GDB format. */
+static unsigned long kgdbts_gdb_regs[(NUMREGBYTES +
+					sizeof(unsigned long) - 1) /
+					sizeof(unsigned long)];
+static struct pt_regs kgdbts_regs;
+
+/* -1 = init not run yet, 0 = unconfigured, 1 = configured. */
+static int configured		= -1;
+
+#ifdef CONFIG_KGDB_TESTS_BOOT_STRING
+static char config[MAX_CONFIG_LEN] = CONFIG_KGDB_TESTS_BOOT_STRING;
+#else
+static char config[MAX_CONFIG_LEN];
+#endif
+static struct kparam_string kps = {
+	.string			= config,
+	.maxlen			= MAX_CONFIG_LEN,
+};
+
+static void fill_get_buf(char *buf);
+
+struct test_struct {
+	char *get;
+	char *put;
+	void (*get_handler)(char *);
+	int (*put_handler)(char *, char *);
+};
+
+struct test_state {
+	char *name;
+	struct test_struct *tst;
+	int idx;
+	int (*run_test) (int, int);
+	int (*validate_put) (char *);
+};
+
+static struct test_state ts;
+
+static int kgdbts_unreg_thread(void *ptr)
+{
+	/* Wait until the tests are complete and then ungresiter the I/O
+	 * driver.
+	 */
+	while (!final_ack)
+		msleep_interruptible(1500);
+
+	if (configured)
+		kgdb_unregister_io_module(&kgdbts_io_ops);
+	configured = 0;
+
+	return 0;
+}
+
+/* This is noinline such that it can be used for a single location to
+ * place a breakpoint
+ */
+static noinline void kgdbts_break_test(void)
+{
+	v2printk("kgdbts: breakpoint complete\n");
+}
+
+/* Lookup symbol info in the kernel */
+static unsigned long lookup_addr(char *arg)
+{
+	unsigned long addr = 0;
+
+	if (!strcmp(arg, "kgdbts_break_test"))
+		addr = (unsigned long)kgdbts_break_test;
+	else if (!strcmp(arg, "sys_open"))
+		addr = (unsigned long)sys_open;
+	else if (!strcmp(arg, "do_fork"))
+		addr = (unsigned long)do_fork;
+	else if (!strcmp(arg, "hw_break_val"))
+		addr = (unsigned long)&hw_break_val;
+	return addr;
+}
+
+static void break_helper(char *bp_type, char *arg, unsigned long vaddr)
+{
+	unsigned long addr;
+
+	if (arg)
+		addr = lookup_addr(arg);
+	else
+		addr = vaddr;
+
+	sprintf(scratch_buf, "%s,%lx,%i", bp_type, addr,
+		BREAK_INSTR_SIZE);
+	fill_get_buf(scratch_buf);
+}
+
+static void sw_break(char *arg)
+{
+	break_helper(force_hwbrks ? "Z1" : "Z0", arg, 0);
+}
+
+static void sw_rem_break(char *arg)
+{
+	break_helper(force_hwbrks ? "z1" : "z0", arg, 0);
+}
+
+static void hw_break(char *arg)
+{
+	break_helper("Z1", arg, 0);
+}
+
+static void hw_rem_break(char *arg)
+{
+	break_helper("z1", arg, 0);
+}
+
+static void hw_write_break(char *arg)
+{
+	break_helper("Z2", arg, 0);
+}
+
+static void hw_rem_write_break(char *arg)
+{
+	break_helper("z2", arg, 0);
+}
+
+static void hw_access_break(char *arg)
+{
+	break_helper("Z4", arg, 0);
+}
+
+static void hw_rem_access_break(char *arg)
+{
+	break_helper("z4", arg, 0);
+}
+
+static void hw_break_val_access(void)
+{
+	hw_break_val2 = hw_break_val;
+}
+
+static void hw_break_val_write(void)
+{
+	hw_break_val++;
+}
+
+static int check_and_rewind_pc(char *put_str, char *arg)
+{
+	unsigned long addr = lookup_addr(arg);
+	int offset = 0;
+
+	kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+		 NUMREGBYTES);
+	gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+	v2printk("Stopped at IP: %lx\n", instruction_pointer(&kgdbts_regs));
+#ifdef CONFIG_X86
+	/* On x86 a breakpoint stop requires it to be decremented */
+	if (addr + 1 == kgdbts_regs.ip)
+		offset = -1;
+#endif
+	if (strcmp(arg, "silent") &&
+		instruction_pointer(&kgdbts_regs) + offset != addr) {
+		eprintk("kgdbts: BP mismatch %lx expected %lx\n",
+			   instruction_pointer(&kgdbts_regs) + offset, addr);
+		return 1;
+	}
+#ifdef CONFIG_X86
+	/* On x86 adjust the instruction pointer if needed */
+	kgdbts_regs.ip += offset;
+#endif
+	return 0;
+}
+
+static int check_single_step(char *put_str, char *arg)
+{
+	unsigned long addr = lookup_addr(arg);
+	/*
+	 * From an arch indepent point of view the instruction pointer
+	 * should be on a different instruction
+	 */
+	kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+		 NUMREGBYTES);
+	gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+	v2printk("Singlestep stopped at IP: %lx\n",
+		   instruction_pointer(&kgdbts_regs));
+	if (instruction_pointer(&kgdbts_regs) == addr) {
+		eprintk("kgdbts: SingleStep failed at %lx\n",
+			   instruction_pointer(&kgdbts_regs));
+		return 1;
+	}
+
+	return 0;
+}
+
+static void write_regs(char *arg)
+{
+	memset(scratch_buf, 0, sizeof(scratch_buf));
+	scratch_buf[0] = 'G';
+	pt_regs_to_gdb_regs(kgdbts_gdb_regs, &kgdbts_regs);
+	kgdb_mem2hex((char *)kgdbts_gdb_regs, &scratch_buf[1], NUMREGBYTES);
+	fill_get_buf(scratch_buf);
+}
+
+static void skip_back_repeat_test(char *arg)
+{
+	int go_back = simple_strtol(arg, NULL, 10);
+
+	repeat_test--;
+	if (repeat_test <= 0)
+		ts.idx++;
+	else
+		ts.idx -= go_back;
+	fill_get_buf(ts.tst[ts.idx].get);
+}
+
+static int got_break(char *put_str, char *arg)
+{
+	test_complete = 1;
+	if (!strncmp(put_str+1, arg, 2)) {
+		if (!strncmp(arg, "T0", 2))
+			test_complete = 2;
+		return 0;
+	}
+	return 1;
+}
+
+static void emul_sstep_get(char *arg)
+{
+	if (!arch_needs_sstep_emulation) {
+		fill_get_buf(arg);
+		return;
+	}
+	switch (sstep_state) {
+	case 0:
+		v2printk("Emulate single step\n");
+		/* Start by looking at the current PC */
+		fill_get_buf("g");
+		break;
+	case 1:
+		/* set breakpoint */
+		break_helper("Z0", NULL, sstep_addr);
+		break;
+	case 2:
+		/* Continue */
+		fill_get_buf("c");
+		break;
+	case 3:
+		/* Clear breakpoint */
+		break_helper("z0", NULL, sstep_addr);
+		break;
+	default:
+		eprintk("kgdbts: ERROR failed sstep get emulation\n");
+	}
+	sstep_state++;
+}
+
+static int emul_sstep_put(char *put_str, char *arg)
+{
+	if (!arch_needs_sstep_emulation) {
+		if (!strncmp(put_str+1, arg, 2))
+			return 0;
+		return 1;
+	}
+	switch (sstep_state) {
+	case 1:
+		/* validate the "g" packet to get the IP */
+		kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+			 NUMREGBYTES);
+		gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+		v2printk("Stopped at IP: %lx\n",
+			 instruction_pointer(&kgdbts_regs));
+		/* Want to stop at IP + break instruction size by default */
+		sstep_addr = instruction_pointer(&kgdbts_regs) +
+			BREAK_INSTR_SIZE;
+		break;
+	case 2:
+		if (strncmp(put_str, "$OK", 3)) {
+			eprintk("kgdbts: failed sstep break set\n");
+			return 1;
+		}
+		break;
+	case 3:
+		if (strncmp(put_str, "$T0", 3)) {
+			eprintk("kgdbts: failed continue sstep\n");
+			return 1;
+		}
+		break;
+	case 4:
+		if (strncmp(put_str, "$OK", 3)) {
+			eprintk("kgdbts: failed sstep break unset\n");
+			return 1;
+		}
+		/* Single step is complete so continue on! */
+		sstep_state = 0;
+		return 0;
+	default:
+		eprintk("kgdbts: ERROR failed sstep put emulation\n");
+	}
+
+	/* Continue on the same test line until emulation is complete */
+	ts.idx--;
+	return 0;
+}
+
+static int final_ack_set(char *put_str, char *arg)
+{
+	if (strncmp(put_str+1, arg, 2))
+		return 1;
+	final_ack = 1;
+	return 0;
+}
+/*
+ * Test to plant a breakpoint and detach, which should clear out the
+ * breakpoint and restore the original instruction.
+ */
+static struct test_struct plant_and_detach_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "", "" },
+};
+
+/*
+ * Simple test to write in a software breakpoint, check for the
+ * correct stop location and detach.
+ */
+static struct test_struct sw_breakpoint_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test a known bad memory read location to test the fault handler and
+ * read bytes 1-8 at the bad address
+ */
+static struct test_struct bad_read_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "m0,1", "E*" }, /* read 1 byte at address 1 */
+	{ "m0,2", "E*" }, /* read 1 byte at address 2 */
+	{ "m0,3", "E*" }, /* read 1 byte at address 3 */
+	{ "m0,4", "E*" }, /* read 1 byte at address 4 */
+	{ "m0,5", "E*" }, /* read 1 byte at address 5 */
+	{ "m0,6", "E*" }, /* read 1 byte at address 6 */
+	{ "m0,7", "E*" }, /* read 1 byte at address 7 */
+	{ "m0,8", "E*" }, /* read 1 byte at address 8 */
+	{ "D", "OK" }, /* Detach which removes all breakpoints and continues */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint, remove it, single step, plant it
+ * again and detach.
+ */
+static struct test_struct singlestep_break_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs }, /* Write registers */
+	{ "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+	{ "g", "kgdbts_break_test", NULL, check_single_step },
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs }, /* Write registers */
+	{ "D", "OK" }, /* Remove all breakpoints and continues */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint at do_fork for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct do_fork_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "do_fork", NULL, check_and_rewind_pc }, /* check location */
+	{ "write", "OK", write_regs }, /* Write registers */
+	{ "do_fork", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+	{ "g", "do_fork", NULL, check_single_step },
+	{ "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+	{ "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+	{ "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+	{ "", "" },
+};
+
+/* Test for hitting a breakpoint at sys_open for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct sys_open_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "sys_open", NULL, check_and_rewind_pc }, /* check location */
+	{ "write", "OK", write_regs }, /* Write registers */
+	{ "sys_open", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+	{ "g", "sys_open", NULL, check_single_step },
+	{ "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+	{ "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+	{ "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a simple hw breakpoint
+ */
+static struct test_struct hw_breakpoint_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a hw write breakpoint
+ */
+static struct test_struct hw_write_break_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */
+	{ "c", "T0*", NULL, got_break }, /* Continue */
+	{ "g", "silent", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct hw_access_break_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */
+	{ "c", "T0*", NULL, got_break }, /* Continue */
+	{ "g", "silent", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct nmi_sleep_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "c", "T0*", NULL, got_break }, /* Continue */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+static void fill_get_buf(char *buf)
+{
+	unsigned char checksum = 0;
+	int count = 0;
+	char ch;
+
+	strcpy(get_buf, "$");
+	strcat(get_buf, buf);
+	while ((ch = buf[count])) {
+		checksum += ch;
+		count++;
+	}
+	strcat(get_buf, "#");
+	get_buf[count + 2] = hex_asc_hi(checksum);
+	get_buf[count + 3] = hex_asc_lo(checksum);
+	get_buf[count + 4] = '\0';
+	v2printk("get%i: %s\n", ts.idx, get_buf);
+}
+
+static int validate_simple_test(char *put_str)
+{
+	char *chk_str;
+
+	if (ts.tst[ts.idx].put_handler)
+		return ts.tst[ts.idx].put_handler(put_str,
+			ts.tst[ts.idx].put);
+
+	chk_str = ts.tst[ts.idx].put;
+	if (*put_str == '$')
+		put_str++;
+
+	while (*chk_str != '\0' && *put_str != '\0') {
+		/* If someone does a * to match the rest of the string, allow
+		 * it, or stop if the recieved string is complete.
+		 */
+		if (*put_str == '#' || *chk_str == '*')
+			return 0;
+		if (*put_str != *chk_str)
+			return 1;
+
+		chk_str++;
+		put_str++;
+	}
+	if (*chk_str == '\0' && (*put_str == '\0' || *put_str == '#'))
+		return 0;
+
+	return 1;
+}
+
+static int run_simple_test(int is_get_char, int chr)
+{
+	int ret = 0;
+	if (is_get_char) {
+		/* Send an ACK on the get if a prior put completed and set the
+		 * send ack variable
+		 */
+		if (send_ack) {
+			send_ack = 0;
+			return '+';
+		}
+		/* On the first get char, fill the transmit buffer and then
+		 * take from the get_string.
+		 */
+		if (get_buf_cnt == 0) {
+			if (ts.tst[ts.idx].get_handler)
+				ts.tst[ts.idx].get_handler(ts.tst[ts.idx].get);
+			else
+				fill_get_buf(ts.tst[ts.idx].get);
+		}
+
+		if (get_buf[get_buf_cnt] == '\0') {
+			eprintk("kgdbts: ERROR GET: EOB on '%s' at %i\n",
+			   ts.name, ts.idx);
+			get_buf_cnt = 0;
+			fill_get_buf("D");
+		}
+		ret = get_buf[get_buf_cnt];
+		get_buf_cnt++;
+		return ret;
+	}
+
+	/* This callback is a put char which is when kgdb sends data to
+	 * this I/O module.
+	 */
+	if (ts.tst[ts.idx].get[0] == '\0' &&
+		ts.tst[ts.idx].put[0] == '\0') {
+		eprintk("kgdbts: ERROR: beyond end of test on"
+			   " '%s' line %i\n", ts.name, ts.idx);
+		return 0;
+	}
+
+	if (put_buf_cnt >= BUFMAX) {
+		eprintk("kgdbts: ERROR: put buffer overflow on"
+			   " '%s' line %i\n", ts.name, ts.idx);
+		put_buf_cnt = 0;
+		return 0;
+	}
+	/* Ignore everything until the first valid packet start '$' */
+	if (put_buf_cnt == 0 && chr != '$')
+		return 0;
+
+	put_buf[put_buf_cnt] = chr;
+	put_buf_cnt++;
+
+	/* End of packet == #XX so look for the '#' */
+	if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') {
+		if (put_buf_cnt >= BUFMAX) {
+			eprintk("kgdbts: ERROR: put buffer overflow on"
+				" '%s' line %i\n", ts.name, ts.idx);
+			put_buf_cnt = 0;
+			return 0;
+		}
+		put_buf[put_buf_cnt] = '\0';
+		v2printk("put%i: %s\n", ts.idx, put_buf);
+		/* Trigger check here */
+		if (ts.validate_put && ts.validate_put(put_buf)) {
+			eprintk("kgdbts: ERROR PUT: end of test "
+			   "buffer on '%s' line %i expected %s got %s\n",
+			   ts.name, ts.idx, ts.tst[ts.idx].put, put_buf);
+		}
+		ts.idx++;
+		put_buf_cnt = 0;
+		get_buf_cnt = 0;
+		send_ack = 1;
+	}
+	return 0;
+}
+
+static void init_simple_test(void)
+{
+	memset(&ts, 0, sizeof(ts));
+	ts.run_test = run_simple_test;
+	ts.validate_put = validate_simple_test;
+}
+
+static void run_plant_and_detach_test(int is_early)
+{
+	char before[BREAK_INSTR_SIZE];
+	char after[BREAK_INSTR_SIZE];
+
+	probe_kernel_read(before, (char *)kgdbts_break_test,
+	  BREAK_INSTR_SIZE);
+	init_simple_test();
+	ts.tst = plant_and_detach_test;
+	ts.name = "plant_and_detach_test";
+	/* Activate test with initial breakpoint */
+	if (!is_early)
+		kgdb_breakpoint();
+	probe_kernel_read(after, (char *)kgdbts_break_test,
+	  BREAK_INSTR_SIZE);
+	if (memcmp(before, after, BREAK_INSTR_SIZE)) {
+		printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n");
+		panic("kgdb memory corruption");
+	}
+
+	/* complete the detach test */
+	if (!is_early)
+		kgdbts_break_test();
+}
+
+static void run_breakpoint_test(int is_hw_breakpoint)
+{
+	test_complete = 0;
+	init_simple_test();
+	if (is_hw_breakpoint) {
+		ts.tst = hw_breakpoint_test;
+		ts.name = "hw_breakpoint_test";
+	} else {
+		ts.tst = sw_breakpoint_test;
+		ts.name = "sw_breakpoint_test";
+	}
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	/* run code with the break point in it */
+	kgdbts_break_test();
+	kgdb_breakpoint();
+
+	if (test_complete)
+		return;
+
+	eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+	if (is_hw_breakpoint)
+		hwbreaks_ok = 0;
+}
+
+static void run_hw_break_test(int is_write_test)
+{
+	test_complete = 0;
+	init_simple_test();
+	if (is_write_test) {
+		ts.tst = hw_write_break_test;
+		ts.name = "hw_write_break_test";
+	} else {
+		ts.tst = hw_access_break_test;
+		ts.name = "hw_access_break_test";
+	}
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	hw_break_val_access();
+	if (is_write_test) {
+		if (test_complete == 2) {
+			eprintk("kgdbts: ERROR %s broke on access\n",
+				ts.name);
+			hwbreaks_ok = 0;
+		}
+		hw_break_val_write();
+	}
+	kgdb_breakpoint();
+
+	if (test_complete == 1)
+		return;
+
+	eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+	hwbreaks_ok = 0;
+}
+
+static void run_nmi_sleep_test(int nmi_sleep)
+{
+	unsigned long flags;
+
+	init_simple_test();
+	ts.tst = nmi_sleep_test;
+	ts.name = "nmi_sleep_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	local_irq_save(flags);
+	mdelay(nmi_sleep*1000);
+	touch_nmi_watchdog();
+	local_irq_restore(flags);
+	if (test_complete != 2)
+		eprintk("kgdbts: ERROR nmi_test did not hit nmi\n");
+	kgdb_breakpoint();
+	if (test_complete == 1)
+		return;
+
+	eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+}
+
+static void run_bad_read_test(void)
+{
+	init_simple_test();
+	ts.tst = bad_read_test;
+	ts.name = "bad_read_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+}
+
+static void run_do_fork_test(void)
+{
+	init_simple_test();
+	ts.tst = do_fork_test;
+	ts.name = "do_fork_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+}
+
+static void run_sys_open_test(void)
+{
+	init_simple_test();
+	ts.tst = sys_open_test;
+	ts.name = "sys_open_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+}
+
+static void run_singlestep_break_test(void)
+{
+	init_simple_test();
+	ts.tst = singlestep_break_test;
+	ts.name = "singlestep_breakpoint_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	kgdbts_break_test();
+	kgdbts_break_test();
+}
+
+static void kgdbts_run_tests(void)
+{
+	char *ptr;
+	int fork_test = 0;
+	int do_sys_open_test = 0;
+	int sstep_test = 1000;
+	int nmi_sleep = 0;
+	int i;
+
+	ptr = strchr(config, 'F');
+	if (ptr)
+		fork_test = simple_strtol(ptr + 1, NULL, 10);
+	ptr = strchr(config, 'S');
+	if (ptr)
+		do_sys_open_test = simple_strtol(ptr + 1, NULL, 10);
+	ptr = strchr(config, 'N');
+	if (ptr)
+		nmi_sleep = simple_strtol(ptr+1, NULL, 10);
+	ptr = strchr(config, 'I');
+	if (ptr)
+		sstep_test = simple_strtol(ptr+1, NULL, 10);
+
+	/* required internal KGDB tests */
+	v1printk("kgdbts:RUN plant and detach test\n");
+	run_plant_and_detach_test(0);
+	v1printk("kgdbts:RUN sw breakpoint test\n");
+	run_breakpoint_test(0);
+	v1printk("kgdbts:RUN bad memory access test\n");
+	run_bad_read_test();
+	v1printk("kgdbts:RUN singlestep test %i iterations\n", sstep_test);
+	for (i = 0; i < sstep_test; i++) {
+		run_singlestep_break_test();
+		if (i % 100 == 0)
+			v1printk("kgdbts:RUN singlestep [%i/%i]\n",
+				 i, sstep_test);
+	}
+
+	/* ===Optional tests=== */
+
+	/* All HW break point tests */
+	if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) {
+		hwbreaks_ok = 1;
+		v1printk("kgdbts:RUN hw breakpoint test\n");
+		run_breakpoint_test(1);
+		v1printk("kgdbts:RUN hw write breakpoint test\n");
+		run_hw_break_test(1);
+		v1printk("kgdbts:RUN access write breakpoint test\n");
+		run_hw_break_test(0);
+	}
+
+	if (nmi_sleep) {
+		v1printk("kgdbts:RUN NMI sleep %i seconds test\n", nmi_sleep);
+		run_nmi_sleep_test(nmi_sleep);
+	}
+
+#ifdef CONFIG_DEBUG_RODATA
+	/* Until there is an api to write to read-only text segments, use
+	 * HW breakpoints for the remainder of any tests, else print a
+	 * failure message if hw breakpoints do not work.
+	 */
+	if (!(arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT && hwbreaks_ok)) {
+		eprintk("kgdbts: HW breakpoints do not work,"
+			"skipping remaining tests\n");
+		return;
+	}
+	force_hwbrks = 1;
+#endif /* CONFIG_DEBUG_RODATA */
+
+	/* If the do_fork test is run it will be the last test that is
+	 * executed because a kernel thread will be spawned at the very
+	 * end to unregister the debug hooks.
+	 */
+	if (fork_test) {
+		repeat_test = fork_test;
+		printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n",
+			repeat_test);
+		kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+		run_do_fork_test();
+		return;
+	}
+
+	/* If the sys_open test is run it will be the last test that is
+	 * executed because a kernel thread will be spawned at the very
+	 * end to unregister the debug hooks.
+	 */
+	if (do_sys_open_test) {
+		repeat_test = do_sys_open_test;
+		printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n",
+			repeat_test);
+		kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+		run_sys_open_test();
+		return;
+	}
+	/* Shutdown and unregister */
+	kgdb_unregister_io_module(&kgdbts_io_ops);
+	configured = 0;
+}
+
+static int kgdbts_option_setup(char *opt)
+{
+	if (strlen(opt) > MAX_CONFIG_LEN) {
+		printk(KERN_ERR "kgdbts: config string too long\n");
+		return -ENOSPC;
+	}
+	strcpy(config, opt);
+
+	verbose = 0;
+	if (strstr(config, "V1"))
+		verbose = 1;
+	if (strstr(config, "V2"))
+		verbose = 2;
+
+	return 0;
+}
+
+__setup("kgdbts=", kgdbts_option_setup);
+
+static int configure_kgdbts(void)
+{
+	int err = 0;
+
+	if (!strlen(config) || isspace(config[0]))
+		goto noconfig;
+	err = kgdbts_option_setup(config);
+	if (err)
+		goto noconfig;
+
+	final_ack = 0;
+	run_plant_and_detach_test(1);
+
+	err = kgdb_register_io_module(&kgdbts_io_ops);
+	if (err) {
+		configured = 0;
+		return err;
+	}
+	configured = 1;
+	kgdbts_run_tests();
+
+	return err;
+
+noconfig:
+	config[0] = 0;
+	configured = 0;
+
+	return err;
+}
+
+static int __init init_kgdbts(void)
+{
+	/* Already configured? */
+	if (configured == 1)
+		return 0;
+
+	return configure_kgdbts();
+}
+
+static void cleanup_kgdbts(void)
+{
+	if (configured == 1)
+		kgdb_unregister_io_module(&kgdbts_io_ops);
+}
+
+static int kgdbts_get_char(void)
+{
+	int val = 0;
+
+	if (ts.run_test)
+		val = ts.run_test(1, 0);
+
+	return val;
+}
+
+static void kgdbts_put_char(u8 chr)
+{
+	if (ts.run_test)
+		ts.run_test(0, chr);
+}
+
+static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp)
+{
+	int len = strlen(kmessage);
+
+	if (len >= MAX_CONFIG_LEN) {
+		printk(KERN_ERR "kgdbts: config string too long\n");
+		return -ENOSPC;
+	}
+
+	/* Only copy in the string if the init function has not run yet */
+	if (configured < 0) {
+		strcpy(config, kmessage);
+		return 0;
+	}
+
+	if (kgdb_connected) {
+		printk(KERN_ERR
+	       "kgdbts: Cannot reconfigure while KGDB is connected.\n");
+
+		return -EBUSY;
+	}
+
+	strcpy(config, kmessage);
+	/* Chop out \n char as a result of echo */
+	if (config[len - 1] == '\n')
+		config[len - 1] = '\0';
+
+	if (configured == 1)
+		cleanup_kgdbts();
+
+	/* Go and configure with the new params. */
+	return configure_kgdbts();
+}
+
+static void kgdbts_pre_exp_handler(void)
+{
+	/* Increment the module count when the debugger is active */
+	if (!kgdb_connected)
+		try_module_get(THIS_MODULE);
+}
+
+static void kgdbts_post_exp_handler(void)
+{
+	/* decrement the module count when the debugger detaches */
+	if (!kgdb_connected)
+		module_put(THIS_MODULE);
+}
+
+static struct kgdb_io kgdbts_io_ops = {
+	.name			= "kgdbts",
+	.read_char		= kgdbts_get_char,
+	.write_char		= kgdbts_put_char,
+	.pre_exception		= kgdbts_pre_exp_handler,
+	.post_exception		= kgdbts_post_exp_handler,
+};
+
+module_init(init_kgdbts);
+module_exit(cleanup_kgdbts);
+module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644);
+MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]");
+MODULE_DESCRIPTION("KGDB Test Suite");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wind River Systems, Inc.");
+
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
new file mode 100644
index 00000000000..3648b23d5c9
--- /dev/null
+++ b/drivers/misc/lkdtm.c
@@ -0,0 +1,345 @@
+/*
+ * Kprobe module for testing crash dumps
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Ankita Garg <ankita@in.ibm.com>
+ *
+ * This module induces system failures at predefined crashpoints to
+ * evaluate the reliability of crash dumps obtained using different dumping
+ * solutions.
+ *
+ * It is adapted from the Linux Kernel Dump Test Tool by
+ * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net>
+ *
+ * Usage :  insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<>
+ *							[cpoint_count={>0}]
+ *
+ * recur_count : Recursion level for the stack overflow test. Default is 10.
+ *
+ * cpoint_name : Crash point where the kernel is to be crashed. It can be
+ *		 one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY,
+ *		 FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD,
+ *		 IDE_CORE_CP
+ *
+ * cpoint_type : Indicates the action to be taken on hitting the crash point.
+ *		 It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW
+ *
+ * cpoint_count : Indicates the number of times the crash point is to be hit
+ *		  to trigger an action. The default is 10.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/kprobes.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/hrtimer.h>
+#include <scsi/scsi_cmnd.h>
+
+#ifdef CONFIG_IDE
+#include <linux/ide.h>
+#endif
+
+#define NUM_CPOINTS 8
+#define NUM_CPOINT_TYPES 5
+#define DEFAULT_COUNT 10
+#define REC_NUM_DEFAULT 10
+
+enum cname {
+	INVALID,
+	INT_HARDWARE_ENTRY,
+	INT_HW_IRQ_EN,
+	INT_TASKLET_ENTRY,
+	FS_DEVRW,
+	MEM_SWAPOUT,
+	TIMERADD,
+	SCSI_DISPATCH_CMD,
+	IDE_CORE_CP
+};
+
+enum ctype {
+	NONE,
+	PANIC,
+	BUG,
+	EXCEPTION,
+	LOOP,
+	OVERFLOW
+};
+
+static char* cp_name[] = {
+	"INT_HARDWARE_ENTRY",
+	"INT_HW_IRQ_EN",
+	"INT_TASKLET_ENTRY",
+	"FS_DEVRW",
+	"MEM_SWAPOUT",
+	"TIMERADD",
+	"SCSI_DISPATCH_CMD",
+	"IDE_CORE_CP"
+};
+
+static char* cp_type[] = {
+	"PANIC",
+	"BUG",
+	"EXCEPTION",
+	"LOOP",
+	"OVERFLOW"
+};
+
+static struct jprobe lkdtm;
+
+static int lkdtm_parse_commandline(void);
+static void lkdtm_handler(void);
+
+static char* cpoint_name;
+static char* cpoint_type;
+static int cpoint_count = DEFAULT_COUNT;
+static int recur_count = REC_NUM_DEFAULT;
+
+static enum cname cpoint = INVALID;
+static enum ctype cptype = NONE;
+static int count = DEFAULT_COUNT;
+
+module_param(recur_count, int, 0644);
+MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test, "\
+				 "default is 10");
+module_param(cpoint_name, charp, 0644);
+MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
+module_param(cpoint_type, charp, 0644);
+MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
+				"hitting the crash point");
+module_param(cpoint_count, int, 0644);
+MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
+				"crash point is to be hit to trigger action");
+
+static unsigned int jp_do_irq(unsigned int irq)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+static irqreturn_t jp_handle_irq_event(unsigned int irq,
+				       struct irqaction *action)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+static void jp_tasklet_action(struct softirq_action *a)
+{
+	lkdtm_handler();
+	jprobe_return();
+}
+
+static void jp_ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
+{
+	lkdtm_handler();
+	jprobe_return();
+}
+
+struct scan_control;
+
+static unsigned long jp_shrink_inactive_list(unsigned long max_scan,
+					     struct zone *zone,
+					     struct scan_control *sc)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+static int jp_hrtimer_start(struct hrtimer *timer, ktime_t tim,
+			    const enum hrtimer_mode mode)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+static int jp_scsi_dispatch_cmd(struct scsi_cmnd *cmd)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+#ifdef CONFIG_IDE
+int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file,
+			struct block_device *bdev, unsigned int cmd,
+			unsigned long arg)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+#endif
+
+static int lkdtm_parse_commandline(void)
+{
+	int i;
+
+	if (cpoint_name == NULL || cpoint_type == NULL ||
+					cpoint_count < 1 || recur_count < 1)
+		return -EINVAL;
+
+	for (i = 0; i < NUM_CPOINTS; ++i) {
+		if (!strcmp(cpoint_name, cp_name[i])) {
+			cpoint = i + 1;
+			break;
+		}
+	}
+
+	for (i = 0; i < NUM_CPOINT_TYPES; ++i) {
+		if (!strcmp(cpoint_type, cp_type[i])) {
+			cptype = i + 1;
+			break;
+		}
+	}
+
+	if (cpoint == INVALID || cptype == NONE)
+                return -EINVAL;
+
+	count = cpoint_count;
+
+	return 0;
+}
+
+static int recursive_loop(int a)
+{
+	char buf[1024];
+
+	memset(buf,0xFF,1024);
+	recur_count--;
+	if (!recur_count)
+		return 0;
+	else
+        	return recursive_loop(a);
+}
+
+void lkdtm_handler(void)
+{
+	printk(KERN_INFO "lkdtm : Crash point %s of type %s hit\n",
+					 cpoint_name, cpoint_type);
+	--count;
+
+	if (count == 0) {
+		switch (cptype) {
+		case NONE:
+			break;
+		case PANIC:
+			printk(KERN_INFO "lkdtm : PANIC\n");
+			panic("dumptest");
+			break;
+		case BUG:
+			printk(KERN_INFO "lkdtm : BUG\n");
+			BUG();
+			break;
+		case EXCEPTION:
+			printk(KERN_INFO "lkdtm : EXCEPTION\n");
+			*((int *) 0) = 0;
+			break;
+		case LOOP:
+			printk(KERN_INFO "lkdtm : LOOP\n");
+			for (;;);
+			break;
+		case OVERFLOW:
+			printk(KERN_INFO "lkdtm : OVERFLOW\n");
+			(void) recursive_loop(0);
+			break;
+		default:
+			break;
+		}
+		count = cpoint_count;
+	}
+}
+
+static int __init lkdtm_module_init(void)
+{
+	int ret;
+
+	if (lkdtm_parse_commandline() == -EINVAL) {
+		printk(KERN_INFO "lkdtm : Invalid command\n");
+		return -EINVAL;
+	}
+
+	switch (cpoint) {
+	case INT_HARDWARE_ENTRY:
+		lkdtm.kp.symbol_name = "do_IRQ";
+		lkdtm.entry = (kprobe_opcode_t*) jp_do_irq;
+		break;
+	case INT_HW_IRQ_EN:
+		lkdtm.kp.symbol_name = "handle_IRQ_event";
+		lkdtm.entry = (kprobe_opcode_t*) jp_handle_irq_event;
+		break;
+	case INT_TASKLET_ENTRY:
+		lkdtm.kp.symbol_name = "tasklet_action";
+		lkdtm.entry = (kprobe_opcode_t*) jp_tasklet_action;
+		break;
+	case FS_DEVRW:
+		lkdtm.kp.symbol_name = "ll_rw_block";
+		lkdtm.entry = (kprobe_opcode_t*) jp_ll_rw_block;
+		break;
+	case MEM_SWAPOUT:
+		lkdtm.kp.symbol_name = "shrink_inactive_list";
+		lkdtm.entry = (kprobe_opcode_t*) jp_shrink_inactive_list;
+		break;
+	case TIMERADD:
+		lkdtm.kp.symbol_name = "hrtimer_start";
+		lkdtm.entry = (kprobe_opcode_t*) jp_hrtimer_start;
+		break;
+	case SCSI_DISPATCH_CMD:
+		lkdtm.kp.symbol_name = "scsi_dispatch_cmd";
+		lkdtm.entry = (kprobe_opcode_t*) jp_scsi_dispatch_cmd;
+		break;
+	case IDE_CORE_CP:
+#ifdef CONFIG_IDE
+		lkdtm.kp.symbol_name = "generic_ide_ioctl";
+		lkdtm.entry = (kprobe_opcode_t*) jp_generic_ide_ioctl;
+#else
+		printk(KERN_INFO "lkdtm : Crash point not available\n");
+#endif
+		break;
+	default:
+		printk(KERN_INFO "lkdtm : Invalid Crash Point\n");
+		break;
+	}
+
+	if ((ret = register_jprobe(&lkdtm)) < 0) {
+                printk(KERN_INFO "lkdtm : Couldn't register jprobe\n");
+                return ret;
+	}
+
+	printk(KERN_INFO "lkdtm : Crash point %s of type %s registered\n",
+						cpoint_name, cpoint_type);
+	return 0;
+}
+
+static void __exit lkdtm_module_exit(void)
+{
+        unregister_jprobe(&lkdtm);
+        printk(KERN_INFO "lkdtm : Crash point unregistered\n");
+}
+
+module_init(lkdtm_module_init);
+module_exit(lkdtm_module_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
new file mode 100644
index 00000000000..04c27266f56
--- /dev/null
+++ b/drivers/misc/phantom.c
@@ -0,0 +1,569 @@
+/*
+ *  Copyright (C) 2005-2007 Jiri Slaby <jirislaby@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  You need a userspace library to cooperate with this driver. It (and other
+ *  info) may be obtained here:
+ *  http://www.fi.muni.cz/~xslaby/phantom.html
+ *  or alternatively, you might use OpenHaptics provided by Sensable.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/interrupt.h>
+#include <linux/cdev.h>
+#include <linux/phantom.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+
+#include <asm/atomic.h>
+#include <asm/io.h>
+
+#define PHANTOM_VERSION		"n0.9.8"
+
+#define PHANTOM_MAX_MINORS	8
+
+#define PHN_IRQCTL		0x4c    /* irq control in caddr space */
+
+#define PHB_RUNNING		1
+#define PHB_NOT_OH		2
+
+static struct class *phantom_class;
+static int phantom_major;
+
+struct phantom_device {
+	unsigned int opened;
+	void __iomem *caddr;
+	u32 __iomem *iaddr;
+	u32 __iomem *oaddr;
+	unsigned long status;
+	atomic_t counter;
+
+	wait_queue_head_t wait;
+	struct cdev cdev;
+
+	struct mutex open_lock;
+	spinlock_t regs_lock;
+
+	/* used in NOT_OH mode */
+	struct phm_regs oregs;
+	u32 ctl_reg;
+};
+
+static unsigned char phantom_devices[PHANTOM_MAX_MINORS];
+
+static int phantom_status(struct phantom_device *dev, unsigned long newstat)
+{
+	pr_debug("phantom_status %lx %lx\n", dev->status, newstat);
+
+	if (!(dev->status & PHB_RUNNING) && (newstat & PHB_RUNNING)) {
+		atomic_set(&dev->counter, 0);
+		iowrite32(PHN_CTL_IRQ, dev->iaddr + PHN_CONTROL);
+		iowrite32(0x43, dev->caddr + PHN_IRQCTL);
+		ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+	} else if ((dev->status & PHB_RUNNING) && !(newstat & PHB_RUNNING)) {
+		iowrite32(0, dev->caddr + PHN_IRQCTL);
+		ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+	}
+
+	dev->status = newstat;
+
+	return 0;
+}
+
+/*
+ * File ops
+ */
+
+static long phantom_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	struct phantom_device *dev = file->private_data;
+	struct phm_regs rs;
+	struct phm_reg r;
+	void __user *argp = (void __user *)arg;
+	unsigned long flags;
+	unsigned int i;
+
+	switch (cmd) {
+	case PHN_SETREG:
+	case PHN_SET_REG:
+		if (copy_from_user(&r, argp, sizeof(r)))
+			return -EFAULT;
+
+		if (r.reg > 7)
+			return -EINVAL;
+
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) &&
+				phantom_status(dev, dev->status | PHB_RUNNING)){
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
+			return -ENODEV;
+		}
+
+		pr_debug("phantom: writing %x to %u\n", r.value, r.reg);
+
+		/* preserve amp bit (don't allow to change it when in NOT_OH) */
+		if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) {
+			r.value &= ~PHN_CTL_AMP;
+			r.value |= dev->ctl_reg & PHN_CTL_AMP;
+			dev->ctl_reg = r.value;
+		}
+
+		iowrite32(r.value, dev->iaddr + r.reg);
+		ioread32(dev->iaddr); /* PCI posting */
+
+		if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ))
+			phantom_status(dev, dev->status & ~PHB_RUNNING);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
+	case PHN_SETREGS:
+	case PHN_SET_REGS:
+		if (copy_from_user(&rs, argp, sizeof(rs)))
+			return -EFAULT;
+
+		pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask);
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_NOT_OH)
+			memcpy(&dev->oregs, &rs, sizeof(rs));
+		else {
+			u32 m = min(rs.count, 8U);
+			for (i = 0; i < m; i++)
+				if (rs.mask & BIT(i))
+					iowrite32(rs.values[i], dev->oaddr + i);
+			ioread32(dev->iaddr); /* PCI posting */
+		}
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
+	case PHN_GETREG:
+	case PHN_GET_REG:
+		if (copy_from_user(&r, argp, sizeof(r)))
+			return -EFAULT;
+
+		if (r.reg > 7)
+			return -EINVAL;
+
+		r.value = ioread32(dev->iaddr + r.reg);
+
+		if (copy_to_user(argp, &r, sizeof(r)))
+			return -EFAULT;
+		break;
+	case PHN_GETREGS:
+	case PHN_GET_REGS: {
+		u32 m;
+
+		if (copy_from_user(&rs, argp, sizeof(rs)))
+			return -EFAULT;
+
+		m = min(rs.count, 8U);
+
+		pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask);
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		for (i = 0; i < m; i++)
+			if (rs.mask & BIT(i))
+				rs.values[i] = ioread32(dev->iaddr + i);
+		atomic_set(&dev->counter, 0);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+
+		if (copy_to_user(argp, &rs, sizeof(rs)))
+			return -EFAULT;
+		break;
+	} case PHN_NOT_OH:
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_RUNNING) {
+			printk(KERN_ERR "phantom: you need to set NOT_OH "
+					"before you start the device!\n");
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
+			return -EINVAL;
+		}
+		dev->status |= PHB_NOT_OH;
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
+	default:
+		return -ENOTTY;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_COMPAT
+static long phantom_compat_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	if (_IOC_NR(cmd) <= 3 && _IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
+		cmd &= ~(_IOC_SIZEMASK << _IOC_SIZESHIFT);
+		cmd |= sizeof(void *) << _IOC_SIZESHIFT;
+	}
+	return phantom_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
+}
+#else
+#define phantom_compat_ioctl NULL
+#endif
+
+static int phantom_open(struct inode *inode, struct file *file)
+{
+	struct phantom_device *dev = container_of(inode->i_cdev,
+			struct phantom_device, cdev);
+
+	lock_kernel();
+	nonseekable_open(inode, file);
+
+	if (mutex_lock_interruptible(&dev->open_lock)) {
+		unlock_kernel();
+		return -ERESTARTSYS;
+	}
+
+	if (dev->opened) {
+		mutex_unlock(&dev->open_lock);
+		unlock_kernel();
+		return -EINVAL;
+	}
+
+	WARN_ON(dev->status & PHB_NOT_OH);
+
+	file->private_data = dev;
+
+	atomic_set(&dev->counter, 0);
+	dev->opened++;
+	mutex_unlock(&dev->open_lock);
+	unlock_kernel();
+	return 0;
+}
+
+static int phantom_release(struct inode *inode, struct file *file)
+{
+	struct phantom_device *dev = file->private_data;
+
+	mutex_lock(&dev->open_lock);
+
+	dev->opened = 0;
+	phantom_status(dev, dev->status & ~PHB_RUNNING);
+	dev->status &= ~PHB_NOT_OH;
+
+	mutex_unlock(&dev->open_lock);
+
+	return 0;
+}
+
+static unsigned int phantom_poll(struct file *file, poll_table *wait)
+{
+	struct phantom_device *dev = file->private_data;
+	unsigned int mask = 0;
+
+	pr_debug("phantom_poll: %d\n", atomic_read(&dev->counter));
+	poll_wait(file, &dev->wait, wait);
+
+	if (!(dev->status & PHB_RUNNING))
+		mask = POLLERR;
+	else if (atomic_read(&dev->counter))
+		mask = POLLIN | POLLRDNORM;
+
+	pr_debug("phantom_poll end: %x/%d\n", mask, atomic_read(&dev->counter));
+
+	return mask;
+}
+
+static const struct file_operations phantom_file_ops = {
+	.open = phantom_open,
+	.release = phantom_release,
+	.unlocked_ioctl = phantom_ioctl,
+	.compat_ioctl = phantom_compat_ioctl,
+	.poll = phantom_poll,
+};
+
+static irqreturn_t phantom_isr(int irq, void *data)
+{
+	struct phantom_device *dev = data;
+	unsigned int i;
+	u32 ctl;
+
+	spin_lock(&dev->regs_lock);
+	ctl = ioread32(dev->iaddr + PHN_CONTROL);
+	if (!(ctl & PHN_CTL_IRQ)) {
+		spin_unlock(&dev->regs_lock);
+		return IRQ_NONE;
+	}
+
+	iowrite32(0, dev->iaddr);
+	iowrite32(0xc0, dev->iaddr);
+
+	if (dev->status & PHB_NOT_OH) {
+		struct phm_regs *r = &dev->oregs;
+		u32 m = min(r->count, 8U);
+
+		for (i = 0; i < m; i++)
+			if (r->mask & BIT(i))
+				iowrite32(r->values[i], dev->oaddr + i);
+
+		dev->ctl_reg ^= PHN_CTL_AMP;
+		iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL);
+	}
+	spin_unlock(&dev->regs_lock);
+
+	ioread32(dev->iaddr); /* PCI posting */
+
+	atomic_inc(&dev->counter);
+	wake_up_interruptible(&dev->wait);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Init and deinit driver
+ */
+
+static unsigned int __devinit phantom_get_free(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < PHANTOM_MAX_MINORS; i++)
+		if (phantom_devices[i] == 0)
+			break;
+
+	return i;
+}
+
+static int __devinit phantom_probe(struct pci_dev *pdev,
+	const struct pci_device_id *pci_id)
+{
+	struct phantom_device *pht;
+	unsigned int minor;
+	int retval;
+
+	retval = pci_enable_device(pdev);
+	if (retval)
+		goto err;
+
+	minor = phantom_get_free();
+	if (minor == PHANTOM_MAX_MINORS) {
+		dev_err(&pdev->dev, "too many devices found!\n");
+		retval = -EIO;
+		goto err_dis;
+	}
+
+	phantom_devices[minor] = 1;
+
+	retval = pci_request_regions(pdev, "phantom");
+	if (retval)
+		goto err_null;
+
+	retval = -ENOMEM;
+	pht = kzalloc(sizeof(*pht), GFP_KERNEL);
+	if (pht == NULL) {
+		dev_err(&pdev->dev, "unable to allocate device\n");
+		goto err_reg;
+	}
+
+	pht->caddr = pci_iomap(pdev, 0, 0);
+	if (pht->caddr == NULL) {
+		dev_err(&pdev->dev, "can't remap conf space\n");
+		goto err_fr;
+	}
+	pht->iaddr = pci_iomap(pdev, 2, 0);
+	if (pht->iaddr == NULL) {
+		dev_err(&pdev->dev, "can't remap input space\n");
+		goto err_unmc;
+	}
+	pht->oaddr = pci_iomap(pdev, 3, 0);
+	if (pht->oaddr == NULL) {
+		dev_err(&pdev->dev, "can't remap output space\n");
+		goto err_unmi;
+	}
+
+	mutex_init(&pht->open_lock);
+	spin_lock_init(&pht->regs_lock);
+	init_waitqueue_head(&pht->wait);
+	cdev_init(&pht->cdev, &phantom_file_ops);
+	pht->cdev.owner = THIS_MODULE;
+
+	iowrite32(0, pht->caddr + PHN_IRQCTL);
+	ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */
+	retval = request_irq(pdev->irq, phantom_isr,
+			IRQF_SHARED | IRQF_DISABLED, "phantom", pht);
+	if (retval) {
+		dev_err(&pdev->dev, "can't establish ISR\n");
+		goto err_unmo;
+	}
+
+	retval = cdev_add(&pht->cdev, MKDEV(phantom_major, minor), 1);
+	if (retval) {
+		dev_err(&pdev->dev, "chardev registration failed\n");
+		goto err_irq;
+	}
+
+	if (IS_ERR(device_create(phantom_class, &pdev->dev,
+				 MKDEV(phantom_major, minor), NULL,
+				 "phantom%u", minor)))
+		dev_err(&pdev->dev, "can't create device\n");
+
+	pci_set_drvdata(pdev, pht);
+
+	return 0;
+err_irq:
+	free_irq(pdev->irq, pht);
+err_unmo:
+	pci_iounmap(pdev, pht->oaddr);
+err_unmi:
+	pci_iounmap(pdev, pht->iaddr);
+err_unmc:
+	pci_iounmap(pdev, pht->caddr);
+err_fr:
+	kfree(pht);
+err_reg:
+	pci_release_regions(pdev);
+err_null:
+	phantom_devices[minor] = 0;
+err_dis:
+	pci_disable_device(pdev);
+err:
+	return retval;
+}
+
+static void __devexit phantom_remove(struct pci_dev *pdev)
+{
+	struct phantom_device *pht = pci_get_drvdata(pdev);
+	unsigned int minor = MINOR(pht->cdev.dev);
+
+	device_destroy(phantom_class, MKDEV(phantom_major, minor));
+
+	cdev_del(&pht->cdev);
+
+	iowrite32(0, pht->caddr + PHN_IRQCTL);
+	ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */
+	free_irq(pdev->irq, pht);
+
+	pci_iounmap(pdev, pht->oaddr);
+	pci_iounmap(pdev, pht->iaddr);
+	pci_iounmap(pdev, pht->caddr);
+
+	kfree(pht);
+
+	pci_release_regions(pdev);
+
+	phantom_devices[minor] = 0;
+
+	pci_disable_device(pdev);
+}
+
+#ifdef CONFIG_PM
+static int phantom_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct phantom_device *dev = pci_get_drvdata(pdev);
+
+	iowrite32(0, dev->caddr + PHN_IRQCTL);
+	ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+
+	synchronize_irq(pdev->irq);
+
+	return 0;
+}
+
+static int phantom_resume(struct pci_dev *pdev)
+{
+	struct phantom_device *dev = pci_get_drvdata(pdev);
+
+	iowrite32(0, dev->caddr + PHN_IRQCTL);
+
+	return 0;
+}
+#else
+#define phantom_suspend	NULL
+#define phantom_resume	NULL
+#endif
+
+static struct pci_device_id phantom_pci_tbl[] __devinitdata = {
+	{ .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_9050,
+	  .subvendor = PCI_VENDOR_ID_PLX, .subdevice = PCI_DEVICE_ID_PLX_9050,
+	  .class = PCI_CLASS_BRIDGE_OTHER << 8, .class_mask = 0xffff00 },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, phantom_pci_tbl);
+
+static struct pci_driver phantom_pci_driver = {
+	.name = "phantom",
+	.id_table = phantom_pci_tbl,
+	.probe = phantom_probe,
+	.remove = __devexit_p(phantom_remove),
+	.suspend = phantom_suspend,
+	.resume = phantom_resume
+};
+
+static ssize_t phantom_show_version(struct class *cls, char *buf)
+{
+	return sprintf(buf, PHANTOM_VERSION "\n");
+}
+
+static CLASS_ATTR(version, 0444, phantom_show_version, NULL);
+
+static int __init phantom_init(void)
+{
+	int retval;
+	dev_t dev;
+
+	phantom_class = class_create(THIS_MODULE, "phantom");
+	if (IS_ERR(phantom_class)) {
+		retval = PTR_ERR(phantom_class);
+		printk(KERN_ERR "phantom: can't register phantom class\n");
+		goto err;
+	}
+	retval = class_create_file(phantom_class, &class_attr_version);
+	if (retval) {
+		printk(KERN_ERR "phantom: can't create sysfs version file\n");
+		goto err_class;
+	}
+
+	retval = alloc_chrdev_region(&dev, 0, PHANTOM_MAX_MINORS, "phantom");
+	if (retval) {
+		printk(KERN_ERR "phantom: can't register character device\n");
+		goto err_attr;
+	}
+	phantom_major = MAJOR(dev);
+
+	retval = pci_register_driver(&phantom_pci_driver);
+	if (retval) {
+		printk(KERN_ERR "phantom: can't register pci driver\n");
+		goto err_unchr;
+	}
+
+	printk(KERN_INFO "Phantom Linux Driver, version " PHANTOM_VERSION ", "
+			"init OK\n");
+
+	return 0;
+err_unchr:
+	unregister_chrdev_region(dev, PHANTOM_MAX_MINORS);
+err_attr:
+	class_remove_file(phantom_class, &class_attr_version);
+err_class:
+	class_destroy(phantom_class);
+err:
+	return retval;
+}
+
+static void __exit phantom_exit(void)
+{
+	pci_unregister_driver(&phantom_pci_driver);
+
+	unregister_chrdev_region(MKDEV(phantom_major, 0), PHANTOM_MAX_MINORS);
+
+	class_remove_file(phantom_class, &class_attr_version);
+	class_destroy(phantom_class);
+
+	pr_debug("phantom: module successfully removed\n");
+}
+
+module_init(phantom_init);
+module_exit(phantom_exit);
+
+MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>");
+MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PHANTOM_VERSION);
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
new file mode 100644
index 00000000000..7c4c306dfa8
--- /dev/null
+++ b/drivers/misc/sgi-gru/Makefile
@@ -0,0 +1,7 @@
+ifdef CONFIG_SGI_GRU_DEBUG
+  EXTRA_CFLAGS	+= -DDEBUG
+endif
+
+obj-$(CONFIG_SGI_GRU) := gru.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o
+
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
new file mode 100644
index 00000000000..3ad76cd18b4
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru.h
@@ -0,0 +1,78 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRU_H__
+#define __GRU_H__
+
+/*
+ * GRU architectural definitions
+ */
+#define GRU_CACHE_LINE_BYTES		64
+#define GRU_HANDLE_STRIDE		256
+#define GRU_CB_BASE			0
+#define GRU_DS_BASE			0x20000
+
+/*
+ * Size used to map GRU GSeg
+ */
+#if defined(CONFIG_IA64)
+#define GRU_GSEG_PAGESIZE	(256 * 1024UL)
+#elif defined(CONFIG_X86_64)
+#define GRU_GSEG_PAGESIZE	(256 * 1024UL)		/* ZZZ 2MB ??? */
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Structure for obtaining GRU resource information
+ */
+struct gru_chiplet_info {
+	int	node;
+	int	chiplet;
+	int	blade;
+	int	total_dsr_bytes;
+	int	total_cbr;
+	int	total_user_dsr_bytes;
+	int	total_user_cbr;
+	int	free_user_dsr_bytes;
+	int	free_user_cbr;
+};
+
+/*
+ * Statictics kept for each context.
+ */
+struct gru_gseg_statistics {
+	unsigned long	fmm_tlbmiss;
+	unsigned long	upm_tlbmiss;
+	unsigned long	tlbdropin;
+	unsigned long	context_stolen;
+	unsigned long	reserved[10];
+};
+
+/* Flags for GRU options on the gru_create_context() call */
+/* Select one of the follow 4 options to specify how TLB misses are handled */
+#define GRU_OPT_MISS_DEFAULT	0x0000	/* Use default mode */
+#define GRU_OPT_MISS_USER_POLL	0x0001	/* User will poll CB for faults */
+#define GRU_OPT_MISS_FMM_INTR	0x0002	/* Send interrupt to cpu to
+					   handle fault */
+#define GRU_OPT_MISS_FMM_POLL	0x0003	/* Use system polling thread */
+#define GRU_OPT_MISS_MASK	0x0003	/* Mask for TLB MISS option */
+
+
+
+#endif		/* __GRU_H__ */
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
new file mode 100644
index 00000000000..d95587cc794
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -0,0 +1,735 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRU_INSTRUCTIONS_H__
+#define __GRU_INSTRUCTIONS_H__
+
+extern int gru_check_status_proc(void *cb);
+extern int gru_wait_proc(void *cb);
+extern void gru_wait_abort_proc(void *cb);
+
+
+
+/*
+ * Architecture dependent functions
+ */
+
+#if defined(CONFIG_IA64)
+#include <linux/compiler.h>
+#include <asm/intrinsics.h>
+#define __flush_cache(p)		ia64_fc((unsigned long)p)
+/* Use volatile on IA64 to ensure ordering via st4.rel */
+#define gru_ordered_store_ulong(p, v)					\
+		do {							\
+			barrier();					\
+			*((volatile unsigned long *)(p)) = v; /* force st.rel */	\
+		} while (0)
+#elif defined(CONFIG_X86_64)
+#define __flush_cache(p)		clflush(p)
+#define gru_ordered_store_ulong(p, v)					\
+		do {							\
+			barrier();					\
+			*(unsigned long *)p = v;			\
+		} while (0)
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Control block status and exception codes
+ */
+#define CBS_IDLE			0
+#define CBS_EXCEPTION			1
+#define CBS_ACTIVE			2
+#define CBS_CALL_OS			3
+
+/* CB substatus bitmasks */
+#define CBSS_MSG_QUEUE_MASK		7
+#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK	8
+
+/* CB substatus message queue values (low 3 bits of substatus) */
+#define CBSS_NO_ERROR			0
+#define CBSS_LB_OVERFLOWED		1
+#define CBSS_QLIMIT_REACHED		2
+#define CBSS_PAGE_OVERFLOW		3
+#define CBSS_AMO_NACKED			4
+#define CBSS_PUT_NACKED			5
+
+/*
+ * Structure used to fetch exception detail for CBs that terminate with
+ * CBS_EXCEPTION
+ */
+struct control_block_extended_exc_detail {
+	unsigned long	cb;
+	int		opc;
+	int		ecause;
+	int		exopc;
+	long		exceptdet0;
+	int		exceptdet1;
+	int		cbrstate;
+	int		cbrexecstatus;
+};
+
+/*
+ * Instruction formats
+ */
+
+/*
+ * Generic instruction format.
+ * This definition has precise bit field definitions.
+ */
+struct gru_instruction_bits {
+    /* DW 0  - low */
+    unsigned int		icmd:      1;
+    unsigned char		ima:	   3;	/* CB_DelRep, unmapped mode */
+    unsigned char		reserved0: 4;
+    unsigned int		xtype:     3;
+    unsigned int		iaa0:      2;
+    unsigned int		iaa1:      2;
+    unsigned char		reserved1: 1;
+    unsigned char		opc:       8;	/* opcode */
+    unsigned char		exopc:     8;	/* extended opcode */
+    /* DW 0  - high */
+    unsigned int		idef2:    22;	/* TRi0 */
+    unsigned char		reserved2: 2;
+    unsigned char		istatus:   2;
+    unsigned char		isubstatus:4;
+    unsigned char		reserved3: 1;
+    unsigned char		tlb_fault_color: 1;
+    /* DW 1 */
+    unsigned long		idef4;		/* 42 bits: TRi1, BufSize */
+    /* DW 2-6 */
+    unsigned long		idef1;		/* BAddr0 */
+    unsigned long		idef5;		/* Nelem */
+    unsigned long		idef6;		/* Stride, Operand1 */
+    unsigned long		idef3;		/* BAddr1, Value, Operand2 */
+    unsigned long		reserved4;
+    /* DW 7 */
+    unsigned long		avalue;		 /* AValue */
+};
+
+/*
+ * Generic instruction with friendlier names. This format is used
+ * for inline instructions.
+ */
+struct gru_instruction {
+    /* DW 0 */
+    union {
+    	unsigned long		op64;    /* icmd,xtype,iaa0,ima,opc,tri0 */
+	struct {
+		unsigned int	op32;
+		unsigned int	tri0;
+	};
+    };
+    unsigned long		tri1_bufsize;		/* DW 1 */
+    unsigned long		baddr0;			/* DW 2 */
+    unsigned long		nelem;			/* DW 3 */
+    unsigned long		op1_stride;		/* DW 4 */
+    unsigned long		op2_value_baddr1;	/* DW 5 */
+    unsigned long		reserved0;		/* DW 6 */
+    unsigned long		avalue;			/* DW 7 */
+};
+
+/* Some shifts and masks for the low 64 bits of a GRU command */
+#define GRU_CB_ICMD_SHFT	0
+#define GRU_CB_ICMD_MASK	0x1
+#define GRU_CB_XTYPE_SHFT	8
+#define GRU_CB_XTYPE_MASK	0x7
+#define GRU_CB_IAA0_SHFT	11
+#define GRU_CB_IAA0_MASK	0x3
+#define GRU_CB_IAA1_SHFT	13
+#define GRU_CB_IAA1_MASK	0x3
+#define GRU_CB_IMA_SHFT		1
+#define GRU_CB_IMA_MASK		0x3
+#define GRU_CB_OPC_SHFT		16
+#define GRU_CB_OPC_MASK		0xff
+#define GRU_CB_EXOPC_SHFT	24
+#define GRU_CB_EXOPC_MASK	0xff
+#define GRU_IDEF2_SHFT		32
+#define GRU_IDEF2_MASK		0x3ffff
+#define GRU_ISTATUS_SHFT	56
+#define GRU_ISTATUS_MASK	0x3
+
+/* GRU instruction opcodes (opc field) */
+#define OP_NOP		0x00
+#define OP_BCOPY	0x01
+#define OP_VLOAD	0x02
+#define OP_IVLOAD	0x03
+#define OP_VSTORE	0x04
+#define OP_IVSTORE	0x05
+#define OP_VSET		0x06
+#define OP_IVSET	0x07
+#define OP_MESQ		0x08
+#define OP_GAMXR	0x09
+#define OP_GAMIR	0x0a
+#define OP_GAMIRR	0x0b
+#define OP_GAMER	0x0c
+#define OP_GAMERR	0x0d
+#define OP_BSTORE	0x0e
+#define OP_VFLUSH	0x0f
+
+
+/* Extended opcodes values (exopc field) */
+
+/* GAMIR - AMOs with implicit operands */
+#define EOP_IR_FETCH	0x01 /* Plain fetch of memory */
+#define EOP_IR_CLR	0x02 /* Fetch and clear */
+#define EOP_IR_INC	0x05 /* Fetch and increment */
+#define EOP_IR_DEC	0x07 /* Fetch and decrement */
+#define EOP_IR_QCHK1	0x0d /* Queue check, 64 byte msg */
+#define EOP_IR_QCHK2	0x0e /* Queue check, 128 byte msg */
+
+/* GAMIRR - Registered AMOs with implicit operands */
+#define EOP_IRR_FETCH	0x01 /* Registered fetch of memory */
+#define EOP_IRR_CLR	0x02 /* Registered fetch and clear */
+#define EOP_IRR_INC	0x05 /* Registered fetch and increment */
+#define EOP_IRR_DEC	0x07 /* Registered fetch and decrement */
+#define EOP_IRR_DECZ	0x0f /* Registered fetch and decrement, update on zero*/
+
+/* GAMER - AMOs with explicit operands */
+#define EOP_ER_SWAP	0x00 /* Exchange argument and memory */
+#define EOP_ER_OR	0x01 /* Logical OR with memory */
+#define EOP_ER_AND	0x02 /* Logical AND with memory */
+#define EOP_ER_XOR	0x03 /* Logical XOR with memory */
+#define EOP_ER_ADD	0x04 /* Add value to memory */
+#define EOP_ER_CSWAP	0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ER_CADD	0x0c /* Queue check, operand1*64 byte msg */
+
+/* GAMERR - Registered AMOs with explicit operands */
+#define EOP_ERR_SWAP	0x00 /* Exchange argument and memory */
+#define EOP_ERR_OR	0x01 /* Logical OR with memory */
+#define EOP_ERR_AND	0x02 /* Logical AND with memory */
+#define EOP_ERR_XOR	0x03 /* Logical XOR with memory */
+#define EOP_ERR_ADD	0x04 /* Add value to memory */
+#define EOP_ERR_CSWAP	0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ERR_EPOLL	0x09 /* Poll for equality */
+#define EOP_ERR_NPOLL	0x0a /* Poll for inequality */
+
+/* GAMXR - SGI Arithmetic unit */
+#define EOP_XR_CSWAP	0x0b /* Masked compare exchange */
+
+
+/* Transfer types (xtype field) */
+#define XTYPE_B		0x0	/* byte */
+#define XTYPE_S		0x1	/* short (2-byte) */
+#define XTYPE_W		0x2	/* word (4-byte) */
+#define XTYPE_DW	0x3	/* doubleword (8-byte) */
+#define XTYPE_CL	0x6	/* cacheline (64-byte) */
+
+
+/* Instruction access attributes (iaa0, iaa1 fields) */
+#define IAA_RAM		0x0	/* normal cached RAM access */
+#define IAA_NCRAM	0x2	/* noncoherent RAM access */
+#define IAA_MMIO	0x1	/* noncoherent memory-mapped I/O space */
+#define IAA_REGISTER	0x3	/* memory-mapped registers, etc. */
+
+
+/* Instruction mode attributes (ima field) */
+#define IMA_MAPPED	0x0	/* Virtual mode  */
+#define IMA_CB_DELAY	0x1	/* hold read responses until status changes */
+#define IMA_UNMAPPED	0x2	/* bypass the TLBs (OS only) */
+#define IMA_INTERRUPT	0x4	/* Interrupt when instruction completes */
+
+/* CBE ecause bits */
+#define CBE_CAUSE_RI				(1 << 0)
+#define CBE_CAUSE_INVALID_INSTRUCTION		(1 << 1)
+#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN	(1 << 2)
+#define CBE_CAUSE_PE_CHECK_DATA_ERROR		(1 << 3)
+#define CBE_CAUSE_IAA_GAA_MISMATCH		(1 << 4)
+#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION	(1 << 5)
+#define CBE_CAUSE_OS_FATAL_TLB_FAULT		(1 << 6)
+#define CBE_CAUSE_EXECUTION_HW_ERROR		(1 << 7)
+#define CBE_CAUSE_TLBHW_ERROR			(1 << 8)
+#define CBE_CAUSE_RA_REQUEST_TIMEOUT		(1 << 9)
+#define CBE_CAUSE_HA_REQUEST_TIMEOUT		(1 << 10)
+#define CBE_CAUSE_RA_RESPONSE_FATAL		(1 << 11)
+#define CBE_CAUSE_RA_RESPONSE_NON_FATAL		(1 << 12)
+#define CBE_CAUSE_HA_RESPONSE_FATAL		(1 << 13)
+#define CBE_CAUSE_HA_RESPONSE_NON_FATAL		(1 << 14)
+#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR	(1 << 15)
+#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR	(1 << 16)
+#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR	(1 << 17)
+#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR	(1 << 18)
+#define CBE_CAUSE_FORCED_ERROR			(1 << 19)
+
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT			0
+#define CBR_EXS_INT_OCC_BIT			1
+#define CBR_EXS_PENDING_BIT			2
+#define CBR_EXS_QUEUED_BIT			3
+#define CBR_EXS_TLB_INVAL_BIT			4
+#define CBR_EXS_EXCEPTION_BIT			5
+#define CBR_EXS_CB_INT_PENDING_BIT		6
+
+#define CBR_EXS_ABORT_OCC			(1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC				(1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING				(1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED				(1 << CBR_EXS_QUEUED_BIT)
+#define CBR_EXS_TLB_INVAL			(1 << CBR_EXS_TLB_INVAL_BIT)
+#define CBR_EXS_EXCEPTION			(1 << CBR_EXS_EXCEPTION_BIT)
+#define CBR_EXS_CB_INT_PENDING			(1 << CBR_EXS_CB_INT_PENDING_BIT)
+
+/*
+ * Exceptions are retried for the following cases. If any OTHER bits are set
+ * in ecause, the exception is not retryable.
+ */
+#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR |		\
+			      CBE_CAUSE_TLBHW_ERROR |			\
+			      CBE_CAUSE_RA_REQUEST_TIMEOUT |		\
+			      CBE_CAUSE_RA_RESPONSE_NON_FATAL |		\
+			      CBE_CAUSE_HA_RESPONSE_NON_FATAL |		\
+			      CBE_CAUSE_RA_RESPONSE_DATA_ERROR |	\
+			      CBE_CAUSE_HA_RESPONSE_DATA_ERROR		\
+			      )
+
+/* Message queue head structure */
+union gru_mesqhead {
+	unsigned long	val;
+	struct {
+		unsigned int	head;
+		unsigned int	limit;
+	};
+};
+
+
+/* Generate the low word of a GRU instruction */
+static inline unsigned long
+__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
+       unsigned char iaa0, unsigned char iaa1,
+       unsigned long idef2, unsigned char ima)
+{
+    return (1 << GRU_CB_ICMD_SHFT) |
+	   ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) |
+	   (idef2<< GRU_IDEF2_SHFT) |
+	   (iaa0 << GRU_CB_IAA0_SHFT) |
+	   (iaa1 << GRU_CB_IAA1_SHFT) |
+	   (ima << GRU_CB_IMA_SHFT) |
+	   (xtype << GRU_CB_XTYPE_SHFT) |
+	   (opcode << GRU_CB_OPC_SHFT) |
+	   (exopc << GRU_CB_EXOPC_SHFT);
+}
+
+/*
+ * Architecture specific intrinsics
+ */
+static inline void gru_flush_cache(void *p)
+{
+	__flush_cache(p);
+}
+
+/*
+ * Store the lower 64 bits of the command including the "start" bit. Then
+ * start the instruction executing.
+ */
+static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64)
+{
+	gru_ordered_store_ulong(ins, op64);
+	mb();
+	gru_flush_cache(ins);
+}
+
+
+/* Convert "hints" to IMA */
+#define CB_IMA(h)		((h) | IMA_UNMAPPED)
+
+/* Convert data segment cache line index into TRI0 / TRI1 value */
+#define GRU_DINDEX(i)		((i) * GRU_CACHE_LINE_BYTES)
+
+/* Inline functions for GRU instructions.
+ *     Note:
+ *     	- nelem and stride are in elements
+ *     	- tri0/tri1 is in bytes for the beginning of the data segment.
+ */
+static inline void gru_vload_phys(void *cb, unsigned long gpa,
+		unsigned int tri0, int iaa, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+	ins->nelem = 1;
+	ins->op1_stride = 1;
+	gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0,
+					(unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vstore_phys(void *cb, unsigned long gpa,
+		unsigned int tri0, int iaa, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+	ins->nelem = 1;
+	ins->op1_stride = 1;
+	gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0,
+					(unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vload(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
+					(unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vstore(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_ivload(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned int tri1, unsigned char xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_ivstore(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned int tri1,
+		unsigned char xtype, unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vset(void *cb, unsigned long mem_addr,
+		unsigned long value, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op2_value_baddr1 = value;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0,
+					 0, CB_IMA(hints)));
+}
+
+static inline void gru_ivset(void *cb, unsigned long mem_addr,
+		unsigned int tri1, unsigned long value, unsigned char xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op2_value_baddr1 = value;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_vflush(void *cb, unsigned long mem_addr,
+		unsigned long nelem, unsigned char xtype, unsigned long stride,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op1_stride = stride;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_nop(void *cb, int hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints)));
+}
+
+
+static inline void gru_bcopy(void *cb, const unsigned long src,
+		unsigned long dest,
+		unsigned int tri0, unsigned int xtype, unsigned long nelem,
+		unsigned int bufsize, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op2_value_baddr1 = (long)dest;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = bufsize;
+	gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM,
+					IAA_RAM, tri0, CB_IMA(hints)));
+}
+
+static inline void gru_bstore(void *cb, const unsigned long src,
+		unsigned long dest, unsigned int tri0, unsigned int xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op2_value_baddr1 = (long)dest;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_gamir(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamer(void *cb, int exopc, unsigned long src,
+		unsigned int xtype,
+		unsigned long operand1, unsigned long operand2,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op1_stride = operand1;
+	ins->op2_value_baddr1 = operand2;
+	gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long operand1,
+		unsigned long operand2, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op1_stride = operand1;
+	ins->op2_value_baddr1 = operand2;
+	gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamxr(void *cb, unsigned long src,
+		unsigned int tri0, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->nelem = 4;
+	gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
+				 IAA_RAM, 0, 0, CB_IMA(hints)));
+}
+
+static inline void gru_mesq(void *cb, unsigned long queue,
+		unsigned long tri0, unsigned long nelem,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)queue;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline unsigned long gru_get_amo_value(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue;
+}
+
+static inline int gru_get_amo_value_head(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue & 0xffffffff;
+}
+
+static inline int gru_get_amo_value_limit(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue >> 32;
+}
+
+static inline union gru_mesqhead  gru_mesq_head(int head, int limit)
+{
+	union gru_mesqhead mqh;
+
+	mqh.head = head;
+	mqh.limit = limit;
+	return mqh;
+}
+
+/*
+ * Get struct control_block_extended_exc_detail for CB.
+ */
+extern int gru_get_cb_exception_detail(void *cb,
+		       struct control_block_extended_exc_detail *excdet);
+
+#define GRU_EXC_STR_SIZE		256
+
+
+/*
+ * Control block definition for checking status
+ */
+struct gru_control_block_status {
+	unsigned int	icmd		:1;
+	unsigned int	ima		:3;
+	unsigned int	reserved0	:4;
+	unsigned int	unused1		:24;
+	unsigned int	unused2		:24;
+	unsigned int	istatus		:2;
+	unsigned int	isubstatus	:4;
+	unsigned int	unused3		:2;
+};
+
+/* Get CB status */
+static inline int gru_get_cb_status(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->istatus;
+}
+
+/* Get CB message queue substatus */
+static inline int gru_get_cb_message_queue_substatus(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
+}
+
+/* Get CB substatus */
+static inline int gru_get_cb_substatus(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->isubstatus;
+}
+
+/*
+ * User interface to check an instruction status. UPM and exceptions
+ * are handled automatically. However, this function does NOT wait
+ * for an active instruction to complete.
+ *
+ */
+static inline int gru_check_status(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+	int ret;
+
+	ret = cbs->istatus;
+	if (ret != CBS_ACTIVE)
+		ret = gru_check_status_proc(cb);
+	return ret;
+}
+
+/*
+ * User interface (via inline function) to wait for an instruction
+ * to complete. Completion status (IDLE or EXCEPTION is returned
+ * to the user. Exception due to hardware errors are automatically
+ * retried before returning an exception.
+ *
+ */
+static inline int gru_wait(void *cb)
+{
+	return gru_wait_proc(cb);
+}
+
+/*
+ * Wait for CB to complete. Aborts program if error. (Note: error does NOT
+ * mean TLB mis - only fatal errors such as memory parity error or user
+ * bugs will cause termination.
+ */
+static inline void gru_wait_abort(void *cb)
+{
+	gru_wait_abort_proc(cb);
+}
+
+/*
+ * Get a pointer to the start of a gseg
+ * 	p	- Any valid pointer within the gseg
+ */
+static inline void *gru_get_gseg_pointer (void *p)
+{
+	return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1));
+}
+
+/*
+ * Get a pointer to a control block
+ * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
+ * 	index	- index of desired CB
+ */
+static inline void *gru_get_cb_pointer(void *gseg,
+						      int index)
+{
+	return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
+}
+
+/*
+ * Get a pointer to a cacheline in the data segment portion of a GSeg
+ * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
+ * 	index	- index of desired cache line
+ */
+static inline void *gru_get_data_pointer(void *gseg, int index)
+{
+	return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
+}
+
+/*
+ * Convert a vaddr into the tri index within the GSEG
+ * 	vaddr		- virtual address of within gseg
+ */
+static inline int gru_get_tri(void *vaddr)
+{
+	return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
+}
+#endif		/* __GRU_INSTRUCTIONS_H__ */
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
new file mode 100644
index 00000000000..38657cdaf54
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -0,0 +1,901 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              FAULT HANDLER FOR GRU DETECTED TLB MISSES
+ *
+ * This file contains code that handles TLB misses within the GRU.
+ * These misses are reported either via interrupts or user polling of
+ * the user CB.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/security.h>
+#include <asm/pgtable.h>
+#include "gru.h"
+#include "grutables.h"
+#include "grulib.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/* Return codes for vtop functions */
+#define VTOP_SUCCESS               0
+#define VTOP_INVALID               -1
+#define VTOP_RETRY                 -2
+
+
+/*
+ * Test if a physical address is a valid GRU GSEG address
+ */
+static inline int is_gru_paddr(unsigned long paddr)
+{
+	return paddr >= gru_start_paddr && paddr < gru_end_paddr;
+}
+
+/*
+ * Find the vma of a GRU segment. Caller must hold mmap_sem.
+ */
+struct vm_area_struct *gru_find_vma(unsigned long vaddr)
+{
+	struct vm_area_struct *vma;
+
+	vma = find_vma(current->mm, vaddr);
+	if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
+		return vma;
+	return NULL;
+}
+
+/*
+ * Find and lock the gts that contains the specified user vaddr.
+ *
+ * Returns:
+ * 	- *gts with the mmap_sem locked for read and the GTS locked.
+ *	- NULL if vaddr invalid OR is not a valid GSEG vaddr.
+ */
+
+static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct gru_thread_state *gts = NULL;
+
+	down_read(&mm->mmap_sem);
+	vma = gru_find_vma(vaddr);
+	if (vma)
+		gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+	if (gts)
+		mutex_lock(&gts->ts_ctxlock);
+	else
+		up_read(&mm->mmap_sem);
+	return gts;
+}
+
+static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct gru_thread_state *gts = ERR_PTR(-EINVAL);
+
+	down_write(&mm->mmap_sem);
+	vma = gru_find_vma(vaddr);
+	if (!vma)
+		goto err;
+
+	gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
+	if (IS_ERR(gts))
+		goto err;
+	mutex_lock(&gts->ts_ctxlock);
+	downgrade_write(&mm->mmap_sem);
+	return gts;
+
+err:
+	up_write(&mm->mmap_sem);
+	return gts;
+}
+
+/*
+ * Unlock a GTS that was previously locked with gru_find_lock_gts().
+ */
+static void gru_unlock_gts(struct gru_thread_state *gts)
+{
+	mutex_unlock(&gts->ts_ctxlock);
+	up_read(&current->mm->mmap_sem);
+}
+
+/*
+ * Set a CB.istatus to active using a user virtual address. This must be done
+ * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
+ * If the line is evicted, the status may be lost. The in-cache update
+ * is necessary to prevent the user from seeing a stale cb.istatus that will
+ * change as soon as the TFH restart is complete. Races may cause an
+ * occasional failure to clear the cb.istatus, but that is ok.
+ */
+static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk)
+{
+	if (cbk) {
+		cbk->istatus = CBS_ACTIVE;
+	}
+}
+
+/*
+ * Read & clear a TFM
+ *
+ * The GRU has an array of fault maps. A map is private to a cpu
+ * Only one cpu will be accessing a cpu's fault map.
+ *
+ * This function scans the cpu-private fault map & clears all bits that
+ * are set. The function returns a bitmap that indicates the bits that
+ * were cleared. Note that sense the maps may be updated asynchronously by
+ * the GRU, atomic operations must be used to clear bits.
+ */
+static void get_clear_fault_map(struct gru_state *gru,
+				struct gru_tlb_fault_map *imap,
+				struct gru_tlb_fault_map *dmap)
+{
+	unsigned long i, k;
+	struct gru_tlb_fault_map *tfm;
+
+	tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
+	prefetchw(tfm);		/* Helps on hardware, required for emulator */
+	for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
+		k = tfm->fault_bits[i];
+		if (k)
+			k = xchg(&tfm->fault_bits[i], 0UL);
+		imap->fault_bits[i] = k;
+		k = tfm->done_bits[i];
+		if (k)
+			k = xchg(&tfm->done_bits[i], 0UL);
+		dmap->fault_bits[i] = k;
+	}
+
+	/*
+	 * Not functionally required but helps performance. (Required
+	 * on emulator)
+	 */
+	gru_flush_cache(tfm);
+}
+
+/*
+ * Atomic (interrupt context) & non-atomic (user context) functions to
+ * convert a vaddr into a physical address. The size of the page
+ * is returned in pageshift.
+ * 	returns:
+ * 		  0 - successful
+ * 		< 0 - error code
+ * 		  1 - (atomic only) try again in non-atomic context
+ */
+static int non_atomic_pte_lookup(struct vm_area_struct *vma,
+				 unsigned long vaddr, int write,
+				 unsigned long *paddr, int *pageshift)
+{
+	struct page *page;
+
+#ifdef CONFIG_HUGETLB_PAGE
+	*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+#else
+	*pageshift = PAGE_SHIFT;
+#endif
+	if (get_user_pages
+	    (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
+		return -EFAULT;
+	*paddr = page_to_phys(page);
+	put_page(page);
+	return 0;
+}
+
+/*
+ * atomic_pte_lookup
+ *
+ * Convert a user virtual address to a physical address
+ * Only supports Intel large pages (2MB only) on x86_64.
+ *	ZZZ - hugepage support is incomplete
+ *
+ * NOTE: mmap_sem is already held on entry to this function. This
+ * guarantees existence of the page tables.
+ */
+static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
+	int write, unsigned long *paddr, int *pageshift)
+{
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pud_t *pudp;
+	pte_t pte;
+
+	pgdp = pgd_offset(vma->vm_mm, vaddr);
+	if (unlikely(pgd_none(*pgdp)))
+		goto err;
+
+	pudp = pud_offset(pgdp, vaddr);
+	if (unlikely(pud_none(*pudp)))
+		goto err;
+
+	pmdp = pmd_offset(pudp, vaddr);
+	if (unlikely(pmd_none(*pmdp)))
+		goto err;
+#ifdef CONFIG_X86_64
+	if (unlikely(pmd_large(*pmdp)))
+		pte = *(pte_t *) pmdp;
+	else
+#endif
+		pte = *pte_offset_kernel(pmdp, vaddr);
+
+	if (unlikely(!pte_present(pte) ||
+		     (write && (!pte_write(pte) || !pte_dirty(pte)))))
+		return 1;
+
+	*paddr = pte_pfn(pte) << PAGE_SHIFT;
+#ifdef CONFIG_HUGETLB_PAGE
+	*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+#else
+	*pageshift = PAGE_SHIFT;
+#endif
+	return 0;
+
+err:
+	return 1;
+}
+
+static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
+		    int write, int atomic, unsigned long *gpa, int *pageshift)
+{
+	struct mm_struct *mm = gts->ts_mm;
+	struct vm_area_struct *vma;
+	unsigned long paddr;
+	int ret, ps;
+
+	vma = find_vma(mm, vaddr);
+	if (!vma)
+		goto inval;
+
+	/*
+	 * Atomic lookup is faster & usually works even if called in non-atomic
+	 * context.
+	 */
+	rmb();	/* Must/check ms_range_active before loading PTEs */
+	ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
+	if (ret) {
+		if (atomic)
+			goto upm;
+		if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps))
+			goto inval;
+	}
+	if (is_gru_paddr(paddr))
+		goto inval;
+	paddr = paddr & ~((1UL << ps) - 1);
+	*gpa = uv_soc_phys_ram_to_gpa(paddr);
+	*pageshift = ps;
+	return VTOP_SUCCESS;
+
+inval:
+	return VTOP_INVALID;
+upm:
+	return VTOP_RETRY;
+}
+
+
+/*
+ * Flush a CBE from cache. The CBE is clean in the cache. Dirty the
+ * CBE cacheline so that the line will be written back to home agent.
+ * Otherwise the line may be silently dropped. This has no impact
+ * except on performance.
+ */
+static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
+{
+	if (unlikely(cbe)) {
+		cbe->cbrexecstatus = 0;         /* make CL dirty */
+		gru_flush_cache(cbe);
+	}
+}
+
+/*
+ * Preload the TLB with entries that may be required. Currently, preloading
+ * is implemented only for BCOPY. Preload  <tlb_preload_count> pages OR to
+ * the end of the bcopy tranfer, whichever is smaller.
+ */
+static void gru_preload_tlb(struct gru_state *gru,
+			struct gru_thread_state *gts, int atomic,
+			unsigned long fault_vaddr, int asid, int write,
+			unsigned char tlb_preload_count,
+			struct gru_tlb_fault_handle *tfh,
+			struct gru_control_block_extended *cbe)
+{
+	unsigned long vaddr = 0, gpa;
+	int ret, pageshift;
+
+	if (cbe->opccpy != OP_BCOPY)
+		return;
+
+	if (fault_vaddr == cbe->cbe_baddr0)
+		vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
+	else if (fault_vaddr == cbe->cbe_baddr1)
+		vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;
+
+	fault_vaddr &= PAGE_MASK;
+	vaddr &= PAGE_MASK;
+	vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);
+
+	while (vaddr > fault_vaddr) {
+		ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+		if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
+					  GRU_PAGESIZE(pageshift)))
+			return;
+		gru_dbg(grudev,
+			"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
+			atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
+			vaddr, asid, write, pageshift, gpa);
+		vaddr -= PAGE_SIZE;
+		STAT(tlb_preload_page);
+	}
+}
+
+/*
+ * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
+ *	Input:
+ *		cb    Address of user CBR. Null if not running in user context
+ * 	Return:
+ * 		  0 = dropin, exception, or switch to UPM successful
+ * 		  1 = range invalidate active
+ * 		< 0 = error code
+ *
+ */
+static int gru_try_dropin(struct gru_state *gru,
+			  struct gru_thread_state *gts,
+			  struct gru_tlb_fault_handle *tfh,
+			  struct gru_instruction_bits *cbk)
+{
+	struct gru_control_block_extended *cbe = NULL;
+	unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
+	int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
+	unsigned long gpa = 0, vaddr = 0;
+
+	/*
+	 * NOTE: The GRU contains magic hardware that eliminates races between
+	 * TLB invalidates and TLB dropins. If an invalidate occurs
+	 * in the window between reading the TFH and the subsequent TLB dropin,
+	 * the dropin is ignored. This eliminates the need for additional locks.
+	 */
+
+	/*
+	 * Prefetch the CBE if doing TLB preloading
+	 */
+	if (unlikely(tlb_preload_count)) {
+		cbe = gru_tfh_to_cbe(tfh);
+		prefetchw(cbe);
+	}
+
+	/*
+	 * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
+	 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
+	 * is a transient state.
+	 */
+	if (tfh->status != TFHSTATUS_EXCEPTION) {
+		gru_flush_cache(tfh);
+		sync_core();
+		if (tfh->status != TFHSTATUS_EXCEPTION)
+			goto failnoexception;
+		STAT(tfh_stale_on_fault);
+	}
+	if (tfh->state == TFHSTATE_IDLE)
+		goto failidle;
+	if (tfh->state == TFHSTATE_MISS_FMM && cbk)
+		goto failfmm;
+
+	write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
+	vaddr = tfh->missvaddr;
+	asid = tfh->missasid;
+	indexway = tfh->indexway;
+	if (asid == 0)
+		goto failnoasid;
+
+	rmb();	/* TFH must be cache resident before reading ms_range_active */
+
+	/*
+	 * TFH is cache resident - at least briefly. Fail the dropin
+	 * if a range invalidate is active.
+	 */
+	if (atomic_read(&gts->ts_gms->ms_range_active))
+		goto failactive;
+
+	ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+	if (ret == VTOP_INVALID)
+		goto failinval;
+	if (ret == VTOP_RETRY)
+		goto failupm;
+
+	if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) {
+		gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift);
+		if (atomic || !gru_update_cch(gts)) {
+			gts->ts_force_cch_reload = 1;
+			goto failupm;
+		}
+	}
+
+	if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
+		gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
+		gru_flush_cache_cbe(cbe);
+	}
+
+	gru_cb_set_istatus_active(cbk);
+	gts->ustats.tlbdropin++;
+	tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
+			  GRU_PAGESIZE(pageshift));
+	gru_dbg(grudev,
+		"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x,"
+		" rw %d, ps %d, gpa 0x%lx\n",
+		atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid,
+		indexway, write, pageshift, gpa);
+	STAT(tlb_dropin);
+	return 0;
+
+failnoasid:
+	/* No asid (delayed unload). */
+	STAT(tlb_dropin_fail_no_asid);
+	gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	if (!cbk)
+		tfh_user_polling_mode(tfh);
+	else
+		gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	return -EAGAIN;
+
+failupm:
+	/* Atomic failure switch CBR to UPM */
+	tfh_user_polling_mode(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_upm);
+	gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	return 1;
+
+failfmm:
+	/* FMM state on UPM call */
+	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_fmm);
+	gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
+	return 0;
+
+failnoexception:
+	/* TFH status did not show exception pending */
+	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	if (cbk)
+		gru_flush_cache(cbk);
+	STAT(tlb_dropin_fail_no_exception);
+	gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n",
+		tfh, tfh->status, tfh->state);
+	return 0;
+
+failidle:
+	/* TFH state was idle  - no miss pending */
+	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	if (cbk)
+		gru_flush_cache(cbk);
+	STAT(tlb_dropin_fail_idle);
+	gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
+	return 0;
+
+failinval:
+	/* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
+	tfh_exception(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_invalid);
+	gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	return -EFAULT;
+
+failactive:
+	/* Range invalidate active. Switch to UPM iff atomic */
+	if (!cbk)
+		tfh_user_polling_mode(tfh);
+	else
+		gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_range_active);
+	gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
+		tfh, vaddr);
+	return 1;
+}
+
+/*
+ * Process an external interrupt from the GRU. This interrupt is
+ * caused by a TLB miss.
+ * Note that this is the interrupt handler that is registered with linux
+ * interrupt handlers.
+ */
+static irqreturn_t gru_intr(int chiplet, int blade)
+{
+	struct gru_state *gru;
+	struct gru_tlb_fault_map imap, dmap;
+	struct gru_thread_state *gts;
+	struct gru_tlb_fault_handle *tfh = NULL;
+	struct completion *cmp;
+	int cbrnum, ctxnum;
+
+	STAT(intr);
+
+	gru = &gru_base[blade]->bs_grus[chiplet];
+	if (!gru) {
+		dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n",
+			raw_smp_processor_id(), chiplet);
+		return IRQ_NONE;
+	}
+	get_clear_fault_map(gru, &imap, &dmap);
+	gru_dbg(grudev,
+		"cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n",
+		smp_processor_id(), chiplet, gru->gs_gid,
+		imap.fault_bits[0], imap.fault_bits[1],
+		dmap.fault_bits[0], dmap.fault_bits[1]);
+
+	for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
+		STAT(intr_cbr);
+		cmp = gru->gs_blade->bs_async_wq;
+		if (cmp)
+			complete(cmp);
+		gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
+			gru->gs_gid, cbrnum, cmp ? cmp->done : -1);
+	}
+
+	for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
+		STAT(intr_tfh);
+		tfh = get_tfh_by_index(gru, cbrnum);
+		prefetchw(tfh);	/* Helps on hdw, required for emulator */
+
+		/*
+		 * When hardware sets a bit in the faultmap, it implicitly
+		 * locks the GRU context so that it cannot be unloaded.
+		 * The gts cannot change until a TFH start/writestart command
+		 * is issued.
+		 */
+		ctxnum = tfh->ctxnum;
+		gts = gru->gs_gts[ctxnum];
+
+		/* Spurious interrupts can cause this. Ignore. */
+		if (!gts) {
+			STAT(intr_spurious);
+			continue;
+		}
+
+		/*
+		 * This is running in interrupt context. Trylock the mmap_sem.
+		 * If it fails, retry the fault in user context.
+		 */
+		gts->ustats.fmm_tlbmiss++;
+		if (!gts->ts_force_cch_reload &&
+					down_read_trylock(&gts->ts_mm->mmap_sem)) {
+			gru_try_dropin(gru, gts, tfh, NULL);
+			up_read(&gts->ts_mm->mmap_sem);
+		} else {
+			tfh_user_polling_mode(tfh);
+			STAT(intr_mm_lock_failed);
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+irqreturn_t gru0_intr(int irq, void *dev_id)
+{
+	return gru_intr(0, uv_numa_blade_id());
+}
+
+irqreturn_t gru1_intr(int irq, void *dev_id)
+{
+	return gru_intr(1, uv_numa_blade_id());
+}
+
+irqreturn_t gru_intr_mblade(int irq, void *dev_id)
+{
+	int blade;
+
+	for_each_possible_blade(blade) {
+		if (uv_blade_nr_possible_cpus(blade))
+			continue;
+		 gru_intr(0, blade);
+		 gru_intr(1, blade);
+	}
+	return IRQ_HANDLED;
+}
+
+
+static int gru_user_dropin(struct gru_thread_state *gts,
+			   struct gru_tlb_fault_handle *tfh,
+			   void *cb)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	int ret;
+
+	gts->ustats.upm_tlbmiss++;
+	while (1) {
+		wait_event(gms->ms_wait_queue,
+			   atomic_read(&gms->ms_range_active) == 0);
+		prefetchw(tfh);	/* Helps on hdw, required for emulator */
+		ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb);
+		if (ret <= 0)
+			return ret;
+		STAT(call_os_wait_queue);
+	}
+}
+
+/*
+ * This interface is called as a result of a user detecting a "call OS" bit
+ * in a user CB. Normally means that a TLB fault has occurred.
+ * 	cb - user virtual address of the CB
+ */
+int gru_handle_user_call_os(unsigned long cb)
+{
+	struct gru_tlb_fault_handle *tfh;
+	struct gru_thread_state *gts;
+	void *cbk;
+	int ucbnum, cbrnum, ret = -EINVAL;
+
+	STAT(call_os);
+
+	/* sanity check the cb pointer */
+	ucbnum = get_cb_number((void *)cb);
+	if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
+		return -EINVAL;
+
+	gts = gru_find_lock_gts(cb);
+	if (!gts)
+		return -EINVAL;
+	gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
+
+	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
+		goto exit;
+
+	gru_check_context_placement(gts);
+
+	/*
+	 * CCH may contain stale data if ts_force_cch_reload is set.
+	 */
+	if (gts->ts_gru && gts->ts_force_cch_reload) {
+		gts->ts_force_cch_reload = 0;
+		gru_update_cch(gts);
+	}
+
+	ret = -EAGAIN;
+	cbrnum = thread_cbr_number(gts, ucbnum);
+	if (gts->ts_gru) {
+		tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
+		cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr,
+				gts->ts_ctxnum, ucbnum);
+		ret = gru_user_dropin(gts, tfh, cbk);
+	}
+exit:
+	gru_unlock_gts(gts);
+	return ret;
+}
+
+/*
+ * Fetch the exception detail information for a CB that terminated with
+ * an exception.
+ */
+int gru_get_exception_detail(unsigned long arg)
+{
+	struct control_block_extended_exc_detail excdet;
+	struct gru_control_block_extended *cbe;
+	struct gru_thread_state *gts;
+	int ucbnum, cbrnum, ret;
+
+	STAT(user_exception);
+	if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
+		return -EFAULT;
+
+	gts = gru_find_lock_gts(excdet.cb);
+	if (!gts)
+		return -EINVAL;
+
+	gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
+	ucbnum = get_cb_number((void *)excdet.cb);
+	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
+		ret = -EINVAL;
+	} else if (gts->ts_gru) {
+		cbrnum = thread_cbr_number(gts, ucbnum);
+		cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
+		gru_flush_cache(cbe);	/* CBE not coherent */
+		sync_core();		/* make sure we are have current data */
+		excdet.opc = cbe->opccpy;
+		excdet.exopc = cbe->exopccpy;
+		excdet.ecause = cbe->ecause;
+		excdet.exceptdet0 = cbe->idef1upd;
+		excdet.exceptdet1 = cbe->idef3upd;
+		excdet.cbrstate = cbe->cbrstate;
+		excdet.cbrexecstatus = cbe->cbrexecstatus;
+		gru_flush_cache_cbe(cbe);
+		ret = 0;
+	} else {
+		ret = -EAGAIN;
+	}
+	gru_unlock_gts(gts);
+
+	gru_dbg(grudev,
+		"cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
+		"exdet0 0x%lx, exdet1 0x%x\n",
+		excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
+		excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
+	if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
+		ret = -EFAULT;
+	return ret;
+}
+
+/*
+ * User request to unload a context. Content is saved for possible reload.
+ */
+static int gru_unload_all_contexts(void)
+{
+	struct gru_thread_state *gts;
+	struct gru_state *gru;
+	int gid, ctxnum;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	foreach_gid(gid) {
+		gru = GID_TO_GRU(gid);
+		spin_lock(&gru->gs_lock);
+		for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+			gts = gru->gs_gts[ctxnum];
+			if (gts && mutex_trylock(&gts->ts_ctxlock)) {
+				spin_unlock(&gru->gs_lock);
+				gru_unload_context(gts, 1);
+				mutex_unlock(&gts->ts_ctxlock);
+				spin_lock(&gru->gs_lock);
+			}
+		}
+		spin_unlock(&gru->gs_lock);
+	}
+	return 0;
+}
+
+int gru_user_unload_context(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_unload_context_req req;
+
+	STAT(user_unload_context);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
+
+	if (!req.gseg)
+		return gru_unload_all_contexts();
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts)
+		return -EINVAL;
+
+	if (gts->ts_gru)
+		gru_unload_context(gts, 1);
+	gru_unlock_gts(gts);
+
+	return 0;
+}
+
+/*
+ * User request to flush a range of virtual addresses from the GRU TLB
+ * (Mainly for testing).
+ */
+int gru_user_flush_tlb(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_flush_tlb_req req;
+	struct gru_mm_struct *gms;
+
+	STAT(user_flush_tlb);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
+		req.vaddr, req.len);
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts)
+		return -EINVAL;
+
+	gms = gts->ts_gms;
+	gru_unlock_gts(gts);
+	gru_flush_tlb_range(gms, req.vaddr, req.len);
+
+	return 0;
+}
+
+/*
+ * Fetch GSEG statisticss
+ */
+long gru_get_gseg_statistics(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_get_gseg_statistics_req req;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	/*
+	 * The library creates arrays of contexts for threaded programs.
+	 * If no gts exists in the array, the context has never been used & all
+	 * statistics are implicitly 0.
+	 */
+	gts = gru_find_lock_gts(req.gseg);
+	if (gts) {
+		memcpy(&req.stats, &gts->ustats, sizeof(gts->ustats));
+		gru_unlock_gts(gts);
+	} else {
+		memset(&req.stats, 0, sizeof(gts->ustats));
+	}
+
+	if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Register the current task as the user of the GSEG slice.
+ * Needed for TLB fault interrupt targeting.
+ */
+int gru_set_context_option(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_set_context_option_req req;
+	int ret = 0;
+
+	STAT(set_context_option);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+	gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts) {
+		gts = gru_alloc_locked_gts(req.gseg);
+		if (IS_ERR(gts))
+			return PTR_ERR(gts);
+	}
+
+	switch (req.op) {
+	case sco_blade_chiplet:
+		/* Select blade/chiplet for GRU context */
+		if (req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || !gru_base[req.val1] ||
+		    req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB) {
+			ret = -EINVAL;
+		} else {
+			gts->ts_user_blade_id = req.val1;
+			gts->ts_user_chiplet_id = req.val0;
+			gru_check_context_placement(gts);
+		}
+		break;
+	case sco_gseg_owner:
+ 		/* Register the current task as the GSEG owner */
+		gts->ts_tgid_owner = current->tgid;
+		break;
+	case sco_cch_req_slice:
+ 		/* Set the CCH slice option */
+		gts->ts_cch_req_slice = req.val1 & 3;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	gru_unlock_gts(gts);
+
+	return ret;
+}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
new file mode 100644
index 00000000000..cb3b4d22847
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -0,0 +1,617 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              FILE OPERATIONS & DRIVER INITIALIZATION
+ *
+ * This file supports the user system call for file open, close, mmap, etc.
+ * This also incudes the driver initialization code.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#ifdef CONFIG_X86_64
+#include <asm/uv/uv_irq.h>
+#endif
+#include <asm/uv/uv.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_mmrs.h>
+
+struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
+unsigned long gru_start_paddr __read_mostly;
+void *gru_start_vaddr __read_mostly;
+unsigned long gru_end_paddr __read_mostly;
+unsigned int gru_max_gids __read_mostly;
+struct gru_stats_s gru_stats;
+
+/* Guaranteed user available resources on each node */
+static int max_user_cbrs, max_user_dsr_bytes;
+
+static struct miscdevice gru_miscdev;
+
+
+/*
+ * gru_vma_close
+ *
+ * Called when unmapping a device mapping. Frees all gru resources
+ * and tables belonging to the vma.
+ */
+static void gru_vma_close(struct vm_area_struct *vma)
+{
+	struct gru_vma_data *vdata;
+	struct gru_thread_state *gts;
+	struct list_head *entry, *next;
+
+	if (!vma->vm_private_data)
+		return;
+
+	vdata = vma->vm_private_data;
+	vma->vm_private_data = NULL;
+	gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
+				vdata);
+	list_for_each_safe(entry, next, &vdata->vd_head) {
+		gts =
+		    list_entry(entry, struct gru_thread_state, ts_next);
+		list_del(&gts->ts_next);
+		mutex_lock(&gts->ts_ctxlock);
+		if (gts->ts_gru)
+			gru_unload_context(gts, 0);
+		mutex_unlock(&gts->ts_ctxlock);
+		gts_drop(gts);
+	}
+	kfree(vdata);
+	STAT(vdata_free);
+}
+
+/*
+ * gru_file_mmap
+ *
+ * Called when mmapping the device.  Initializes the vma with a fault handler
+ * and private data structure necessary to allocate, track, and free the
+ * underlying pages.
+ */
+static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
+		return -EPERM;
+
+	if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
+				vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
+		return -EINVAL;
+
+	vma->vm_flags |=
+	    (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
+			VM_RESERVED);
+	vma->vm_page_prot = PAGE_SHARED;
+	vma->vm_ops = &gru_vm_ops;
+
+	vma->vm_private_data = gru_alloc_vma_data(vma, 0);
+	if (!vma->vm_private_data)
+		return -ENOMEM;
+
+	gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
+		file, vma->vm_start, vma, vma->vm_private_data);
+	return 0;
+}
+
+/*
+ * Create a new GRU context
+ */
+static int gru_create_new_context(unsigned long arg)
+{
+	struct gru_create_context_req req;
+	struct vm_area_struct *vma;
+	struct gru_vma_data *vdata;
+	int ret = -EINVAL;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	if (req.data_segment_bytes > max_user_dsr_bytes)
+		return -EINVAL;
+	if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count)
+		return -EINVAL;
+
+	if (!(req.options & GRU_OPT_MISS_MASK))
+		req.options |= GRU_OPT_MISS_FMM_INTR;
+
+	down_write(&current->mm->mmap_sem);
+	vma = gru_find_vma(req.gseg);
+	if (vma) {
+		vdata = vma->vm_private_data;
+		vdata->vd_user_options = req.options;
+		vdata->vd_dsr_au_count =
+		    GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
+		vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
+		vdata->vd_tlb_preload_count = req.tlb_preload_count;
+		ret = 0;
+	}
+	up_write(&current->mm->mmap_sem);
+
+	return ret;
+}
+
+/*
+ * Get GRU configuration info (temp - for emulator testing)
+ */
+static long gru_get_config_info(unsigned long arg)
+{
+	struct gru_config_info info;
+	int nodesperblade;
+
+	if (num_online_nodes() > 1 &&
+			(uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
+		nodesperblade = 2;
+	else
+		nodesperblade = 1;
+	info.cpus = num_online_cpus();
+	info.nodes = num_online_nodes();
+	info.blades = info.nodes / nodesperblade;
+	info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
+
+	if (copy_to_user((void __user *)arg, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * gru_file_unlocked_ioctl
+ *
+ * Called to update file attributes via IOCTL calls.
+ */
+static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
+				    unsigned long arg)
+{
+	int err = -EBADRQC;
+
+	gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg);
+
+	switch (req) {
+	case GRU_CREATE_CONTEXT:
+		err = gru_create_new_context(arg);
+		break;
+	case GRU_SET_CONTEXT_OPTION:
+		err = gru_set_context_option(arg);
+		break;
+	case GRU_USER_GET_EXCEPTION_DETAIL:
+		err = gru_get_exception_detail(arg);
+		break;
+	case GRU_USER_UNLOAD_CONTEXT:
+		err = gru_user_unload_context(arg);
+		break;
+	case GRU_USER_FLUSH_TLB:
+		err = gru_user_flush_tlb(arg);
+		break;
+	case GRU_USER_CALL_OS:
+		err = gru_handle_user_call_os(arg);
+		break;
+	case GRU_GET_GSEG_STATISTICS:
+		err = gru_get_gseg_statistics(arg);
+		break;
+	case GRU_KTEST:
+		err = gru_ktest(arg);
+		break;
+	case GRU_GET_CONFIG_INFO:
+		err = gru_get_config_info(arg);
+		break;
+	case GRU_DUMP_CHIPLET_STATE:
+		err = gru_dump_chiplet_request(arg);
+		break;
+	}
+	return err;
+}
+
+/*
+ * Called at init time to build tables for all GRUs that are present in the
+ * system.
+ */
+static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
+			     void *vaddr, int blade_id, int chiplet_id)
+{
+	spin_lock_init(&gru->gs_lock);
+	spin_lock_init(&gru->gs_asid_lock);
+	gru->gs_gru_base_paddr = paddr;
+	gru->gs_gru_base_vaddr = vaddr;
+	gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id;
+	gru->gs_blade = gru_base[blade_id];
+	gru->gs_blade_id = blade_id;
+	gru->gs_chiplet_id = chiplet_id;
+	gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
+	gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
+	gru->gs_asid_limit = MAX_ASID;
+	gru_tgh_flush_init(gru);
+	if (gru->gs_gid >= gru_max_gids)
+		gru_max_gids = gru->gs_gid + 1;
+	gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n",
+		blade_id, gru->gs_gid, gru->gs_gru_base_vaddr,
+		gru->gs_gru_base_paddr);
+}
+
+static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
+{
+	int pnode, nid, bid, chip;
+	int cbrs, dsrbytes, n;
+	int order = get_order(sizeof(struct gru_blade_state));
+	struct page *page;
+	struct gru_state *gru;
+	unsigned long paddr;
+	void *vaddr;
+
+	max_user_cbrs = GRU_NUM_CB;
+	max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
+	for_each_possible_blade(bid) {
+		pnode = uv_blade_to_pnode(bid);
+		nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */
+		page = alloc_pages_node(nid, GFP_KERNEL, order);
+		if (!page)
+			goto fail;
+		gru_base[bid] = page_address(page);
+		memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
+		gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
+		spin_lock_init(&gru_base[bid]->bs_lock);
+		init_rwsem(&gru_base[bid]->bs_kgts_sema);
+
+		dsrbytes = 0;
+		cbrs = 0;
+		for (gru = gru_base[bid]->bs_grus, chip = 0;
+				chip < GRU_CHIPLETS_PER_BLADE;
+				chip++, gru++) {
+			paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
+			vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
+			gru_init_chiplet(gru, paddr, vaddr, bid, chip);
+			n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+			cbrs = max(cbrs, n);
+			n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+			dsrbytes = max(dsrbytes, n);
+		}
+		max_user_cbrs = min(max_user_cbrs, cbrs);
+		max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
+	}
+
+	return 0;
+
+fail:
+	for (bid--; bid >= 0; bid--)
+		free_pages((unsigned long)gru_base[bid], order);
+	return -ENOMEM;
+}
+
+static void gru_free_tables(void)
+{
+	int bid;
+	int order = get_order(sizeof(struct gru_state) *
+			      GRU_CHIPLETS_PER_BLADE);
+
+	for (bid = 0; bid < GRU_MAX_BLADES; bid++)
+		free_pages((unsigned long)gru_base[bid], order);
+}
+
+static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep)
+{
+	unsigned long mmr = 0;
+	int core;
+
+	/*
+	 * We target the cores of a blade and not the hyperthreads themselves.
+	 * There is a max of 8 cores per socket and 2 sockets per blade,
+	 * making for a max total of 16 cores (i.e., 16 CPUs without
+	 * hyperthreading and 32 CPUs with hyperthreading).
+	 */
+	core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+	if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu))
+		return 0;
+
+	if (chiplet == 0) {
+		mmr = UVH_GR0_TLB_INT0_CONFIG +
+		    core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG);
+	} else if (chiplet == 1) {
+		mmr = UVH_GR1_TLB_INT0_CONFIG +
+		    core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG);
+	} else {
+		BUG();
+	}
+
+	*corep = core;
+	return mmr;
+}
+
+#ifdef CONFIG_IA64
+
+static int gru_irq_count[GRU_CHIPLETS_PER_BLADE];
+
+static void gru_noop(unsigned int irq)
+{
+}
+
+static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = {
+	[0 ... GRU_CHIPLETS_PER_BLADE - 1] {
+		.mask		= gru_noop,
+		.unmask		= gru_noop,
+		.ack		= gru_noop
+	}
+};
+
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+			irq_handler_t irq_handler, int cpu, int blade)
+{
+	unsigned long mmr;
+	int irq = IRQ_GRU + chiplet;
+	int ret, core;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr == 0)
+		return 0;
+
+	if (gru_irq_count[chiplet] == 0) {
+		gru_chip[chiplet].name = irq_name;
+		ret = set_irq_chip(irq, &gru_chip[chiplet]);
+		if (ret) {
+			printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n",
+			       GRU_DRIVER_ID_STR, -ret);
+			return ret;
+		}
+
+		ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+		if (ret) {
+			printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+			       GRU_DRIVER_ID_STR, -ret);
+			return ret;
+		}
+	}
+	gru_irq_count[chiplet]++;
+
+	return 0;
+}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+	unsigned long mmr;
+	int core, irq = IRQ_GRU + chiplet;
+
+	if (gru_irq_count[chiplet] == 0)
+		return;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr == 0)
+		return;
+
+	if (--gru_irq_count[chiplet] == 0)
+		free_irq(irq, NULL);
+}
+
+#elif defined CONFIG_X86_64
+
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+			irq_handler_t irq_handler, int cpu, int blade)
+{
+	unsigned long mmr;
+	int irq, core;
+	int ret;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr == 0)
+		return 0;
+
+	irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU);
+	if (irq < 0) {
+		printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n",
+		       GRU_DRIVER_ID_STR, -irq);
+		return irq;
+	}
+
+	ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+	if (ret) {
+		uv_teardown_irq(irq);
+		printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+		       GRU_DRIVER_ID_STR, -ret);
+		return ret;
+	}
+	gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq;
+	return 0;
+}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+	int irq, core;
+	unsigned long mmr;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr) {
+		irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core];
+		if (irq) {
+			free_irq(irq, NULL);
+			uv_teardown_irq(irq);
+		}
+	}
+}
+
+#endif
+
+static void gru_teardown_tlb_irqs(void)
+{
+	int blade;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		blade = uv_cpu_to_blade_id(cpu);
+		gru_chiplet_teardown_tlb_irq(0, cpu, blade);
+		gru_chiplet_teardown_tlb_irq(1, cpu, blade);
+	}
+	for_each_possible_blade(blade) {
+		if (uv_blade_nr_possible_cpus(blade))
+			continue;
+		gru_chiplet_teardown_tlb_irq(0, 0, blade);
+		gru_chiplet_teardown_tlb_irq(1, 0, blade);
+	}
+}
+
+static int gru_setup_tlb_irqs(void)
+{
+	int blade;
+	int cpu;
+	int ret;
+
+	for_each_online_cpu(cpu) {
+		blade = uv_cpu_to_blade_id(cpu);
+		ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade);
+		if (ret != 0)
+			goto exit1;
+
+		ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade);
+		if (ret != 0)
+			goto exit1;
+	}
+	for_each_possible_blade(blade) {
+		if (uv_blade_nr_possible_cpus(blade))
+			continue;
+		ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade);
+		if (ret != 0)
+			goto exit1;
+
+		ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade);
+		if (ret != 0)
+			goto exit1;
+	}
+
+	return 0;
+
+exit1:
+	gru_teardown_tlb_irqs();
+	return ret;
+}
+
+/*
+ * gru_init
+ *
+ * Called at boot or module load time to initialize the GRUs.
+ */
+static int __init gru_init(void)
+{
+	int ret;
+
+	if (!is_uv_system())
+		return 0;
+
+#if defined CONFIG_IA64
+	gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
+#else
+	gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
+				0x7fffffffffffUL;
+#endif
+	gru_start_vaddr = __va(gru_start_paddr);
+	gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE;
+	printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
+	       gru_start_paddr, gru_end_paddr);
+	ret = misc_register(&gru_miscdev);
+	if (ret) {
+		printk(KERN_ERR "%s: misc_register failed\n",
+		       GRU_DRIVER_ID_STR);
+		goto exit0;
+	}
+
+	ret = gru_proc_init();
+	if (ret) {
+		printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
+		goto exit1;
+	}
+
+	ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
+	if (ret) {
+		printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
+		goto exit2;
+	}
+
+	ret = gru_setup_tlb_irqs();
+	if (ret != 0)
+		goto exit3;
+
+	gru_kservices_init();
+
+	printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
+	       GRU_DRIVER_VERSION_STR);
+	return 0;
+
+exit3:
+	gru_free_tables();
+exit2:
+	gru_proc_exit();
+exit1:
+	misc_deregister(&gru_miscdev);
+exit0:
+	return ret;
+
+}
+
+static void __exit gru_exit(void)
+{
+	if (!is_uv_system())
+		return;
+
+	gru_teardown_tlb_irqs();
+	gru_kservices_exit();
+	gru_free_tables();
+	misc_deregister(&gru_miscdev);
+	gru_proc_exit();
+}
+
+static const struct file_operations gru_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= gru_file_unlocked_ioctl,
+	.mmap		= gru_file_mmap,
+};
+
+static struct miscdevice gru_miscdev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "gru",
+	.fops		= &gru_fops,
+};
+
+const struct vm_operations_struct gru_vm_ops = {
+	.close		= gru_vma_close,
+	.fault		= gru_fault,
+};
+
+#ifndef MODULE
+fs_initcall(gru_init);
+#else
+module_init(gru_init);
+#endif
+module_exit(gru_exit);
+
+module_param(gru_options, ulong, 0644);
+MODULE_PARM_DESC(gru_options, "Various debug options");
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
+MODULE_VERSION(GRU_DRIVER_VERSION_STR);
+
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
new file mode 100644
index 00000000000..2f30badc6ff
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -0,0 +1,216 @@
+/*
+ *              GRU KERNEL MCS INSTRUCTIONS
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+/* 10 sec */
+#ifdef CONFIG_IA64
+#include <asm/processor.h>
+#define GRU_OPERATION_TIMEOUT	(((cycles_t) local_cpu_data->itc_freq)*10)
+#define CLKS2NSEC(c)		((c) *1000000000 / local_cpu_data->itc_freq)
+#else
+#include <asm/tsc.h>
+#define GRU_OPERATION_TIMEOUT	((cycles_t) tsc_khz*10*1000)
+#define CLKS2NSEC(c)		((c) * 1000000 / tsc_khz)
+#endif
+
+/* Extract the status field from a kernel handle */
+#define GET_MSEG_HANDLE_STATUS(h)	(((*(unsigned long *)(h)) >> 16) & 3)
+
+struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
+static void update_mcs_stats(enum mcs_op op, unsigned long clks)
+{
+	unsigned long nsec;
+
+	nsec = CLKS2NSEC(clks);
+	atomic_long_inc(&mcs_op_statistics[op].count);
+	atomic_long_add(nsec, &mcs_op_statistics[op].total);
+	if (mcs_op_statistics[op].max < nsec)
+		mcs_op_statistics[op].max = nsec;
+}
+
+static void start_instruction(void *h)
+{
+	unsigned long *w0 = h;
+
+	wmb();		/* setting CMD/STATUS bits must be last */
+	*w0 = *w0 | 0x20001;
+	gru_flush_cache(h);
+}
+
+static void report_instruction_timeout(void *h)
+{
+	unsigned long goff = GSEGPOFF((unsigned long)h);
+	char *id = "???";
+
+	if (TYPE_IS(CCH, goff))
+		id = "CCH";
+	else if (TYPE_IS(TGH, goff))
+		id = "TGH";
+	else if (TYPE_IS(TFH, goff))
+		id = "TFH";
+
+	panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id);
+}
+
+static int wait_instruction_complete(void *h, enum mcs_op opc)
+{
+	int status;
+	unsigned long start_time = get_cycles();
+
+	while (1) {
+		cpu_relax();
+		status = GET_MSEG_HANDLE_STATUS(h);
+		if (status != CCHSTATUS_ACTIVE)
+			break;
+		if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) {
+			report_instruction_timeout(h);
+			start_time = get_cycles();
+		}
+	}
+	if (gru_options & OPT_STATS)
+		update_mcs_stats(opc, get_cycles() - start_time);
+	return status;
+}
+
+int cch_allocate(struct gru_context_configuration_handle *cch)
+{
+	int ret;
+
+	cch->opc = CCHOP_ALLOCATE;
+	start_instruction(cch);
+	ret = wait_instruction_complete(cch, cchop_allocate);
+
+	/*
+	 * Stop speculation into the GSEG being mapped by the previous ALLOCATE.
+	 * The GSEG memory does not exist until the ALLOCATE completes.
+	 */
+	sync_core();
+	return ret;
+}
+
+int cch_start(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_START;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_start);
+}
+
+int cch_interrupt(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_INTERRUPT;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_interrupt);
+}
+
+int cch_deallocate(struct gru_context_configuration_handle *cch)
+{
+	int ret;
+
+	cch->opc = CCHOP_DEALLOCATE;
+	start_instruction(cch);
+	ret = wait_instruction_complete(cch, cchop_deallocate);
+
+	/*
+	 * Stop speculation into the GSEG being unmapped by the previous
+	 * DEALLOCATE.
+	 */
+	sync_core();
+	return ret;
+}
+
+int cch_interrupt_sync(struct gru_context_configuration_handle
+				     *cch)
+{
+	cch->opc = CCHOP_INTERRUPT_SYNC;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_interrupt_sync);
+}
+
+int tgh_invalidate(struct gru_tlb_global_handle *tgh,
+				 unsigned long vaddr, unsigned long vaddrmask,
+				 int asid, int pagesize, int global, int n,
+				 unsigned short ctxbitmap)
+{
+	tgh->vaddr = vaddr;
+	tgh->asid = asid;
+	tgh->pagesize = pagesize;
+	tgh->n = n;
+	tgh->global = global;
+	tgh->vaddrmask = vaddrmask;
+	tgh->ctxbitmap = ctxbitmap;
+	tgh->opc = TGHOP_TLBINV;
+	start_instruction(tgh);
+	return wait_instruction_complete(tgh, tghop_invalidate);
+}
+
+int tfh_write_only(struct gru_tlb_fault_handle *tfh,
+				  unsigned long paddr, int gaa,
+				  unsigned long vaddr, int asid, int dirty,
+				  int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+	tfh->gaa = gaa;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_ONLY;
+	start_instruction(tfh);
+	return wait_instruction_complete(tfh, tfhop_write_only);
+}
+
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
+				     unsigned long paddr, int gaa,
+				     unsigned long vaddr, int asid, int dirty,
+				     int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+	tfh->gaa = gaa;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_RESTART;
+	start_instruction(tfh);
+}
+
+void tfh_restart(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_RESTART;
+	start_instruction(tfh);
+}
+
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_USER_POLLING_MODE;
+	start_instruction(tfh);
+}
+
+void tfh_exception(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_EXCEPTION;
+	start_instruction(tfh);
+}
+
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
new file mode 100644
index 00000000000..3f998b924d8
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -0,0 +1,531 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              GRU HANDLE DEFINITION
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRUHANDLES_H__
+#define __GRUHANDLES_H__
+#include "gru_instructions.h"
+
+/*
+ * Manifest constants for GRU Memory Map
+ */
+#define GRU_GSEG0_BASE		0
+#define GRU_MCS_BASE		(64 * 1024 * 1024)
+#define GRU_SIZE		(128UL * 1024 * 1024)
+
+/* Handle & resource counts */
+#define GRU_NUM_CB		128
+#define GRU_NUM_DSR_BYTES	(32 * 1024)
+#define GRU_NUM_TFM		16
+#define GRU_NUM_TGH		24
+#define GRU_NUM_CBE		128
+#define GRU_NUM_TFH		128
+#define GRU_NUM_CCH		16
+
+/* Maximum resource counts that can be reserved by user programs */
+#define GRU_NUM_USER_CBR	GRU_NUM_CBE
+#define GRU_NUM_USER_DSR_BYTES	GRU_NUM_DSR_BYTES
+
+/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
+ * are the same */
+#define GRU_HANDLE_BYTES	64
+#define GRU_HANDLE_STRIDE	256
+
+/* Base addresses of handles */
+#define GRU_TFM_BASE		(GRU_MCS_BASE + 0x00000)
+#define GRU_TGH_BASE		(GRU_MCS_BASE + 0x08000)
+#define GRU_CBE_BASE		(GRU_MCS_BASE + 0x10000)
+#define GRU_TFH_BASE		(GRU_MCS_BASE + 0x18000)
+#define GRU_CCH_BASE		(GRU_MCS_BASE + 0x20000)
+
+/* User gseg constants */
+#define GRU_GSEG_STRIDE		(4 * 1024 * 1024)
+#define GSEG_BASE(a)		((a) & ~(GRU_GSEG_PAGESIZE - 1))
+
+/* Data segment constants */
+#define GRU_DSR_AU_BYTES	1024
+#define GRU_DSR_CL		(GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU_CL		(GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU		(GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
+
+/* Control block constants */
+#define GRU_CBR_AU_SIZE		2
+#define GRU_CBR_AU		(GRU_NUM_CBE / GRU_CBR_AU_SIZE)
+
+/* Convert resource counts to the number of AU */
+#define GRU_DS_BYTES_TO_AU(n)	DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
+#define GRU_CB_COUNT_TO_AU(n)	DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
+
+/* UV limits */
+#define GRU_CHIPLETS_PER_HUB	2
+#define GRU_HUBS_PER_BLADE	1
+#define GRU_CHIPLETS_PER_BLADE	(GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
+
+/* User GRU Gseg offsets */
+#define GRU_CB_BASE		0
+#define GRU_CB_LIMIT		(GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
+#define GRU_DS_BASE		0x20000
+#define GRU_DS_LIMIT		(GRU_DS_BASE + GRU_NUM_DSR_BYTES)
+
+/* Convert a GRU physical address to the chiplet offset */
+#define GSEGPOFF(h) 		((h) & (GRU_SIZE - 1))
+
+/* Convert an arbitrary handle address to the beginning of the GRU segment */
+#define GRUBASE(h)		((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
+
+/* Test a valid handle address to determine the type */
+#define TYPE_IS(hn, h)		((h) >= GRU_##hn##_BASE && (h) <	\
+		GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE &&   \
+		(((h) & (GRU_HANDLE_STRIDE - 1)) == 0))
+
+
+/* General addressing macros. */
+static inline void *get_gseg_base_address(void *base, int ctxnum)
+{
+	return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
+}
+
+static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
+{
+	return (void *)(get_gseg_base_address(base, ctxnum) +
+			GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
+}
+
+static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
+{
+	return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
+			GRU_CACHE_LINE_BYTES * line);
+}
+
+static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
+{
+	return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
+{
+	return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
+{
+	return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
+{
+	return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_context_configuration_handle *get_cch(void *base,
+					int ctxnum)
+{
+	return (struct gru_context_configuration_handle *)(base +
+				GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline unsigned long get_cb_number(void *cb)
+{
+	return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
+					GRU_HANDLE_STRIDE;
+}
+
+/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
+static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
+							int chiplet)
+{
+	return paddr + GRU_SIZE * (2 * pnode  + chiplet);
+}
+
+static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
+{
+	return vaddr + GRU_SIZE * (2 * pnode  + chiplet);
+}
+
+static inline struct gru_control_block_extended *gru_tfh_to_cbe(
+					struct gru_tlb_fault_handle *tfh)
+{
+	unsigned long cbe;
+
+	cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE;
+	return (struct gru_control_block_extended*)cbe;
+}
+
+
+
+
+/*
+ * Global TLB Fault Map
+ * 	Bitmap of outstanding TLB misses needing interrupt/polling service.
+ *
+ */
+struct gru_tlb_fault_map {
+	unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+	unsigned long fill0[2];
+	unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+	unsigned long fill1[2];
+};
+
+/*
+ * TGH - TLB Global Handle
+ * 	Used for TLB flushing.
+ *
+ */
+struct gru_tlb_global_handle {
+	unsigned int cmd:1;		/* DW 0 */
+	unsigned int delresp:1;
+	unsigned int opc:1;
+	unsigned int fill1:5;
+
+	unsigned int fill2:8;
+
+	unsigned int status:2;
+	unsigned long fill3:2;
+	unsigned int state:3;
+	unsigned long fill4:1;
+
+	unsigned int cause:3;
+	unsigned long fill5:37;
+
+	unsigned long vaddr:64;		/* DW 1 */
+
+	unsigned int asid:24;		/* DW 2 */
+	unsigned int fill6:8;
+
+	unsigned int pagesize:5;
+	unsigned int fill7:11;
+
+	unsigned int global:1;
+	unsigned int fill8:15;
+
+	unsigned long vaddrmask:39;	/* DW 3 */
+	unsigned int fill9:9;
+	unsigned int n:10;
+	unsigned int fill10:6;
+
+	unsigned int ctxbitmap:16;	/* DW4 */
+	unsigned long fill11[3];
+};
+
+enum gru_tgh_cmd {
+	TGHCMD_START
+};
+
+enum gru_tgh_opc {
+	TGHOP_TLBNOP,
+	TGHOP_TLBINV
+};
+
+enum gru_tgh_status {
+	TGHSTATUS_IDLE,
+	TGHSTATUS_EXCEPTION,
+	TGHSTATUS_ACTIVE
+};
+
+enum gru_tgh_state {
+	TGHSTATE_IDLE,
+	TGHSTATE_PE_INVAL,
+	TGHSTATE_INTERRUPT_INVAL,
+	TGHSTATE_WAITDONE,
+	TGHSTATE_RESTART_CTX,
+};
+
+enum gru_tgh_cause {
+	TGHCAUSE_RR_ECC,
+	TGHCAUSE_TLB_ECC,
+	TGHCAUSE_LRU_ECC,
+	TGHCAUSE_PS_ECC,
+	TGHCAUSE_MUL_ERR,
+	TGHCAUSE_DATA_ERR,
+	TGHCAUSE_SW_FORCE
+};
+
+
+/*
+ * TFH - TLB Global Handle
+ * 	Used for TLB dropins into the GRU TLB.
+ *
+ */
+struct gru_tlb_fault_handle {
+	unsigned int cmd:1;		/* DW 0 - low 32*/
+	unsigned int delresp:1;
+	unsigned int fill0:2;
+	unsigned int opc:3;
+	unsigned int fill1:9;
+
+	unsigned int status:2;
+	unsigned int fill2:2;
+	unsigned int state:3;
+	unsigned int fill3:1;
+
+	unsigned int cause:6;
+	unsigned int cb_int:1;
+	unsigned int fill4:1;
+
+	unsigned int indexway:12;	/* DW 0 - high 32 */
+	unsigned int fill5:4;
+
+	unsigned int ctxnum:4;
+	unsigned int fill6:12;
+
+	unsigned long missvaddr:64;	/* DW 1 */
+
+	unsigned int missasid:24;	/* DW 2 */
+	unsigned int fill7:8;
+	unsigned int fillasid:24;
+	unsigned int dirty:1;
+	unsigned int gaa:2;
+	unsigned long fill8:5;
+
+	unsigned long pfn:41;		/* DW 3 */
+	unsigned int fill9:7;
+	unsigned int pagesize:5;
+	unsigned int fill10:11;
+
+	unsigned long fillvaddr:64;	/* DW 4 */
+
+	unsigned long fill11[3];
+};
+
+enum gru_tfh_opc {
+	TFHOP_NOOP,
+	TFHOP_RESTART,
+	TFHOP_WRITE_ONLY,
+	TFHOP_WRITE_RESTART,
+	TFHOP_EXCEPTION,
+	TFHOP_USER_POLLING_MODE = 7,
+};
+
+enum tfh_status {
+	TFHSTATUS_IDLE,
+	TFHSTATUS_EXCEPTION,
+	TFHSTATUS_ACTIVE,
+};
+
+enum tfh_state {
+	TFHSTATE_INACTIVE,
+	TFHSTATE_IDLE,
+	TFHSTATE_MISS_UPM,
+	TFHSTATE_MISS_FMM,
+	TFHSTATE_HW_ERR,
+	TFHSTATE_WRITE_TLB,
+	TFHSTATE_RESTART_CBR,
+};
+
+/* TFH cause bits */
+enum tfh_cause {
+	TFHCAUSE_NONE,
+	TFHCAUSE_TLB_MISS,
+	TFHCAUSE_TLB_MOD,
+	TFHCAUSE_HW_ERROR_RR,
+	TFHCAUSE_HW_ERROR_MAIN_ARRAY,
+	TFHCAUSE_HW_ERROR_VALID,
+	TFHCAUSE_HW_ERROR_PAGESIZE,
+	TFHCAUSE_INSTRUCTION_EXCEPTION,
+	TFHCAUSE_UNCORRECTIBLE_ERROR,
+};
+
+/* GAA values */
+#define GAA_RAM				0x0
+#define GAA_NCRAM			0x2
+#define GAA_MMIO			0x1
+#define GAA_REGISTER			0x3
+
+/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
+#define GRU_PADDR_SHIFT			12
+
+/*
+ * Context Configuration handle
+ * 	Used to allocate resources to a GSEG context.
+ *
+ */
+struct gru_context_configuration_handle {
+	unsigned int cmd:1;			/* DW0 */
+	unsigned int delresp:1;
+	unsigned int opc:3;
+	unsigned int unmap_enable:1;
+	unsigned int req_slice_set_enable:1;
+	unsigned int req_slice:2;
+	unsigned int cb_int_enable:1;
+	unsigned int tlb_int_enable:1;
+	unsigned int tfm_fault_bit_enable:1;
+	unsigned int tlb_int_select:4;
+
+	unsigned int status:2;
+	unsigned int state:2;
+	unsigned int reserved2:4;
+
+	unsigned int cause:4;
+	unsigned int tfm_done_bit_enable:1;
+	unsigned int unused:3;
+
+	unsigned int dsr_allocation_map;
+
+	unsigned long cbr_allocation_map;	/* DW1 */
+
+	unsigned int asid[8];			/* DW 2 - 5 */
+	unsigned short sizeavail[8];		/* DW 6 - 7 */
+} __attribute__ ((packed));
+
+enum gru_cch_opc {
+	CCHOP_START = 1,
+	CCHOP_ALLOCATE,
+	CCHOP_INTERRUPT,
+	CCHOP_DEALLOCATE,
+	CCHOP_INTERRUPT_SYNC,
+};
+
+enum gru_cch_status {
+	CCHSTATUS_IDLE,
+	CCHSTATUS_EXCEPTION,
+	CCHSTATUS_ACTIVE,
+};
+
+enum gru_cch_state {
+	CCHSTATE_INACTIVE,
+	CCHSTATE_MAPPED,
+	CCHSTATE_ACTIVE,
+	CCHSTATE_INTERRUPTED,
+};
+
+/* CCH Exception cause */
+enum gru_cch_cause {
+	CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
+	CCHCAUSE_ILLEGAL_OPCODE = 2,
+	CCHCAUSE_INVALID_START_REQUEST = 3,
+	CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
+	CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
+	CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
+	CCHCAUSE_CCH_BUSY = 7,
+	CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
+	CCHCAUSE_BAD_TFM_CONFIG = 9,
+	CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
+	CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
+	CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
+};
+/*
+ * CBE - Control Block Extended
+ * 	Maintains internal GRU state for active CBs.
+ *
+ */
+struct gru_control_block_extended {
+	unsigned int reserved0:1;	/* DW 0  - low */
+	unsigned int imacpy:3;
+	unsigned int reserved1:4;
+	unsigned int xtypecpy:3;
+	unsigned int iaa0cpy:2;
+	unsigned int iaa1cpy:2;
+	unsigned int reserved2:1;
+	unsigned int opccpy:8;
+	unsigned int exopccpy:8;
+
+	unsigned int idef2cpy:22;	/* DW 0  - high */
+	unsigned int reserved3:10;
+
+	unsigned int idef4cpy:22;	/* DW 1 */
+	unsigned int reserved4:10;
+	unsigned int idef4upd:22;
+	unsigned int reserved5:10;
+
+	unsigned long idef1upd:64;	/* DW 2 */
+
+	unsigned long idef5cpy:64;	/* DW 3 */
+
+	unsigned long idef6cpy:64;	/* DW 4 */
+
+	unsigned long idef3upd:64;	/* DW 5 */
+
+	unsigned long idef5upd:64;	/* DW 6 */
+
+	unsigned int idef2upd:22;	/* DW 7 */
+	unsigned int reserved6:10;
+
+	unsigned int ecause:20;
+	unsigned int cbrstate:4;
+	unsigned int cbrexecstatus:8;
+};
+
+/* CBE fields for active BCOPY instructions */
+#define cbe_baddr0	idef1upd
+#define cbe_baddr1	idef3upd
+#define cbe_src_cl	idef6cpy
+#define cbe_nelemcur	idef5upd
+
+enum gru_cbr_state {
+	CBRSTATE_INACTIVE,
+	CBRSTATE_IDLE,
+	CBRSTATE_PE_CHECK,
+	CBRSTATE_QUEUED,
+	CBRSTATE_WAIT_RESPONSE,
+	CBRSTATE_INTERRUPTED,
+	CBRSTATE_INTERRUPTED_MISS_FMM,
+	CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
+	CBRSTATE_INTERRUPTED_MISS_UPM,
+	CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
+	CBRSTATE_REQUEST_ISSUE,
+	CBRSTATE_BUSY_INTERRUPT,
+};
+
+/* CBE cbrexecstatus bits  - defined in gru_instructions.h*/
+/* CBE ecause bits  - defined in gru_instructions.h */
+
+/*
+ * Convert a processor pagesize into the strange encoded pagesize used by the
+ * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
+ * 	pagesize	log pagesize	grupagesize
+ * 	  4k			12	0
+ * 	 16k 			14	1
+ * 	 64k			16	2
+ * 	256k			18	3
+ * 	  1m			20	4
+ * 	  2m			21	5
+ * 	  4m			22	6
+ * 	 16m			24	7
+ * 	 64m			26	8
+ * 	...
+ */
+#define GRU_PAGESIZE(sh)	((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6)
+#define GRU_SIZEAVAIL(sh)	(1UL << GRU_PAGESIZE(sh))
+
+/* minimum TLB purge count to ensure a full purge */
+#define GRUMAXINVAL		1024UL
+
+int cch_allocate(struct gru_context_configuration_handle *cch);
+int cch_start(struct gru_context_configuration_handle *cch);
+int cch_interrupt(struct gru_context_configuration_handle *cch);
+int cch_deallocate(struct gru_context_configuration_handle *cch);
+int cch_interrupt_sync(struct gru_context_configuration_handle *cch);
+int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr,
+	unsigned long vaddrmask, int asid, int pagesize, int global, int n,
+	unsigned short ctxbitmap);
+int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_restart(struct gru_tlb_fault_handle *tfh);
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh);
+void tfh_exception(struct gru_tlb_fault_handle *tfh);
+
+#endif /* __GRUHANDLES_H__ */
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
new file mode 100644
index 00000000000..9b2062d1732
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -0,0 +1,235 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *            Dump GRU State
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+#include "grulib.h"
+
+#define CCH_LOCK_ATTEMPTS	10
+
+static int gru_user_copy_handle(void __user **dp, void *s)
+{
+	if (copy_to_user(*dp, s, GRU_HANDLE_BYTES))
+		return -1;
+	*dp += GRU_HANDLE_BYTES;
+	return 0;
+}
+
+static int gru_dump_context_data(void *grubase,
+			struct gru_context_configuration_handle *cch,
+			void __user *ubuf, int ctxnum, int dsrcnt,
+			int flush_cbrs)
+{
+	void *cb, *cbe, *tfh, *gseg;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	tfh = grubase + GRU_TFH_BASE;
+
+	for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) {
+		if (flush_cbrs)
+			gru_flush_cache(cb);
+		if (gru_user_copy_handle(&ubuf, cb))
+			goto fail;
+		if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE))
+			goto fail;
+		if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE))
+			goto fail;
+		cb += GRU_HANDLE_STRIDE;
+	}
+	if (dsrcnt)
+		memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE);
+	return 0;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_tfm(struct gru_state *gru,
+		void __user *ubuf, void __user *ubufend)
+{
+	struct gru_tlb_fault_map *tfm;
+	int i, ret, bytes;
+
+	bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+	if (bytes > ubufend - ubuf)
+		ret = -EFBIG;
+
+	for (i = 0; i < GRU_NUM_TFM; i++) {
+		tfm = get_tfm(gru->gs_gru_base_vaddr, i);
+		if (gru_user_copy_handle(&ubuf, tfm))
+			goto fail;
+	}
+	return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_tgh(struct gru_state *gru,
+		void __user *ubuf, void __user *ubufend)
+{
+	struct gru_tlb_global_handle *tgh;
+	int i, ret, bytes;
+
+	bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+	if (bytes > ubufend - ubuf)
+		ret = -EFBIG;
+
+	for (i = 0; i < GRU_NUM_TGH; i++) {
+		tgh = get_tgh(gru->gs_gru_base_vaddr, i);
+		if (gru_user_copy_handle(&ubuf, tgh))
+			goto fail;
+	}
+	return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_context(struct gru_state *gru, int ctxnum,
+		void __user *ubuf, void __user *ubufend, char data_opt,
+		char lock_cch, char flush_cbrs)
+{
+	struct gru_dump_context_header hdr;
+	struct gru_dump_context_header __user *uhdr = ubuf;
+	struct gru_context_configuration_handle *cch, *ubufcch;
+	struct gru_thread_state *gts;
+	int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0;
+	void *grubase;
+
+	memset(&hdr, 0, sizeof(hdr));
+	grubase = gru->gs_gru_base_vaddr;
+	cch = get_cch(grubase, ctxnum);
+	for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) {
+		cch_locked =  trylock_cch_handle(cch);
+		if (cch_locked)
+			break;
+		msleep(1);
+	}
+
+	ubuf += sizeof(hdr);
+	ubufcch = ubuf;
+	if (gru_user_copy_handle(&ubuf, cch))
+		goto fail;
+	if (cch_locked)
+		ubufcch->delresp = 0;
+	bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES;
+
+	if (cch_locked || !lock_cch) {
+		gts = gru->gs_gts[ctxnum];
+		if (gts && gts->ts_vma) {
+			hdr.pid = gts->ts_tgid_owner;
+			hdr.vaddr = gts->ts_vma->vm_start;
+		}
+		if (cch->state != CCHSTATE_INACTIVE) {
+			cbrcnt = hweight64(cch->cbr_allocation_map) *
+						GRU_CBR_AU_SIZE;
+			dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) *
+						GRU_DSR_AU_CL : 0;
+		}
+		bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES;
+		if (bytes > ubufend - ubuf)
+			ret = -EFBIG;
+		else
+			ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum,
+							dsrcnt, flush_cbrs);
+	}
+	if (cch_locked)
+		unlock_cch_handle(cch);
+	if (ret)
+		return ret;
+
+	hdr.magic = GRU_DUMP_MAGIC;
+	hdr.gid = gru->gs_gid;
+	hdr.ctxnum = ctxnum;
+	hdr.cbrcnt = cbrcnt;
+	hdr.dsrcnt = dsrcnt;
+	hdr.cch_locked = cch_locked;
+	if (!ret && copy_to_user((void __user *)uhdr, &hdr, sizeof(hdr)))
+		ret = -EFAULT;
+
+	return ret ? ret : bytes;
+
+fail:
+	unlock_cch_handle(cch);
+	return -EFAULT;
+}
+
+int gru_dump_chiplet_request(unsigned long arg)
+{
+	struct gru_state *gru;
+	struct gru_dump_chiplet_state_req req;
+	void __user *ubuf;
+	void __user *ubufend;
+	int ctxnum, ret, cnt = 0;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	/* Currently, only dump by gid is implemented */
+	if (req.gid >= gru_max_gids || req.gid < 0)
+		return -EINVAL;
+
+	gru = GID_TO_GRU(req.gid);
+	ubuf = req.buf;
+	ubufend = req.buf + req.buflen;
+
+	ret = gru_dump_tfm(gru, ubuf, ubufend);
+	if (ret < 0)
+		goto fail;
+	ubuf += ret;
+
+	ret = gru_dump_tgh(gru, ubuf, ubufend);
+	if (ret < 0)
+		goto fail;
+	ubuf += ret;
+
+	for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+		if (req.ctxnum == ctxnum || req.ctxnum < 0) {
+			ret = gru_dump_context(gru, ctxnum, ubuf, ubufend,
+						req.data_opt, req.lock_cch,
+						req.flush_cbrs);
+			if (ret < 0)
+				goto fail;
+			ubuf += ret;
+			cnt++;
+		}
+	}
+
+	if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+		return -EFAULT;
+	return cnt;
+
+fail:
+	return ret;
+}
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
new file mode 100644
index 00000000000..34749ee88df
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -0,0 +1,1161 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              KERNEL SERVICES THAT USE THE GRU
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <asm/io_apic.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+#include "grukservices.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/*
+ * Kernel GRU Usage
+ *
+ * The following is an interim algorithm for management of kernel GRU
+ * resources. This will likely be replaced when we better understand the
+ * kernel/user requirements.
+ *
+ * Blade percpu resources reserved for kernel use. These resources are
+ * reserved whenever the the kernel context for the blade is loaded. Note
+ * that the kernel context is not guaranteed to be always available. It is
+ * loaded on demand & can be stolen by a user if the user demand exceeds the
+ * kernel demand. The kernel can always reload the kernel context but
+ * a SLEEP may be required!!!.
+ *
+ * Async Overview:
+ *
+ * 	Each blade has one "kernel context" that owns GRU kernel resources
+ * 	located on the blade. Kernel drivers use GRU resources in this context
+ * 	for sending messages, zeroing memory, etc.
+ *
+ * 	The kernel context is dynamically loaded on demand. If it is not in
+ * 	use by the kernel, the kernel context can be unloaded & given to a user.
+ * 	The kernel context will be reloaded when needed. This may require that
+ * 	a context be stolen from a user.
+ * 		NOTE: frequent unloading/reloading of the kernel context is
+ * 		expensive. We are depending on batch schedulers, cpusets, sane
+ * 		drivers or some other mechanism to prevent the need for frequent
+ *	 	stealing/reloading.
+ *
+ * 	The kernel context consists of two parts:
+ * 		- 1 CB & a few DSRs that are reserved for each cpu on the blade.
+ * 		  Each cpu has it's own private resources & does not share them
+ * 		  with other cpus. These resources are used serially, ie,
+ * 		  locked, used & unlocked  on each call to a function in
+ * 		  grukservices.
+ * 		  	(Now that we have dynamic loading of kernel contexts, I
+ * 		  	 may rethink this & allow sharing between cpus....)
+ *
+ *		- Additional resources can be reserved long term & used directly
+ *		  by UV drivers located in the kernel. Drivers using these GRU
+ *		  resources can use asynchronous GRU instructions that send
+ *		  interrupts on completion.
+ *		  	- these resources must be explicitly locked/unlocked
+ *		  	- locked resources prevent (obviously) the kernel
+ *		  	  context from being unloaded.
+ *			- drivers using these resource directly issue their own
+ *			  GRU instruction and must wait/check completion.
+ *
+ * 		  When these resources are reserved, the caller can optionally
+ * 		  associate a wait_queue with the resources and use asynchronous
+ * 		  GRU instructions. When an async GRU instruction completes, the
+ * 		  driver will do a wakeup on the event.
+ *
+ */
+
+
+#define ASYNC_HAN_TO_BID(h)	((h) - 1)
+#define ASYNC_BID_TO_HAN(b)	((b) + 1)
+#define ASYNC_HAN_TO_BS(h)	gru_base[ASYNC_HAN_TO_BID(h)]
+
+#define GRU_NUM_KERNEL_CBR	1
+#define GRU_NUM_KERNEL_DSR_BYTES 256
+#define GRU_NUM_KERNEL_DSR_CL	(GRU_NUM_KERNEL_DSR_BYTES /		\
+					GRU_CACHE_LINE_BYTES)
+
+/* GRU instruction attributes for all instructions */
+#define IMA			IMA_CB_DELAY
+
+/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
+#define __gru_cacheline_aligned__                               \
+	__attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
+
+#define MAGIC	0x1234567887654321UL
+
+/* Default retry count for GRU errors on kernel instructions */
+#define EXCEPTION_RETRY_LIMIT	3
+
+/* Status of message queue sections */
+#define MQS_EMPTY		0
+#define MQS_FULL		1
+#define MQS_NOOP		2
+
+/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
+/* optimized for x86_64 */
+struct message_queue {
+	union gru_mesqhead	head __gru_cacheline_aligned__;	/* CL 0 */
+	int			qlines;				/* DW 1 */
+	long 			hstatus[2];
+	void 			*next __gru_cacheline_aligned__;/* CL 1 */
+	void 			*limit;
+	void 			*start;
+	void 			*start2;
+	char			data ____cacheline_aligned;	/* CL 2 */
+};
+
+/* First word in every message - used by mesq interface */
+struct message_header {
+	char	present;
+	char	present2;
+	char 	lines;
+	char	fill;
+};
+
+#define HSTATUS(mq, h)	((mq) + offsetof(struct message_queue, hstatus[h]))
+
+/*
+ * Reload the blade's kernel context into a GRU chiplet. Called holding
+ * the bs_kgts_sema for READ. Will steal user contexts if necessary.
+ */
+static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
+{
+	struct gru_state *gru;
+	struct gru_thread_state *kgts;
+	void *vaddr;
+	int ctxnum, ncpus;
+
+	up_read(&bs->bs_kgts_sema);
+	down_write(&bs->bs_kgts_sema);
+
+	if (!bs->bs_kgts) {
+		bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+		bs->bs_kgts->ts_user_blade_id = blade_id;
+	}
+	kgts = bs->bs_kgts;
+
+	if (!kgts->ts_gru) {
+		STAT(load_kernel_context);
+		ncpus = uv_blade_nr_possible_cpus(blade_id);
+		kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
+			GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
+		kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
+			GRU_NUM_KERNEL_DSR_BYTES * ncpus +
+				bs->bs_async_dsr_bytes);
+		while (!gru_assign_gru_context(kgts)) {
+			msleep(1);
+			gru_steal_context(kgts);
+		}
+		gru_load_context(kgts);
+		gru = bs->bs_kgts->ts_gru;
+		vaddr = gru->gs_gru_base_vaddr;
+		ctxnum = kgts->ts_ctxnum;
+		bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
+		bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
+	}
+	downgrade_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Free all kernel contexts that are not currently in use.
+ *   Returns 0 if all freed, else number of inuse context.
+ */
+static int gru_free_kernel_contexts(void)
+{
+	struct gru_blade_state *bs;
+	struct gru_thread_state *kgts;
+	int bid, ret = 0;
+
+	for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
+		bs = gru_base[bid];
+		if (!bs)
+			continue;
+
+		/* Ignore busy contexts. Don't want to block here.  */
+		if (down_write_trylock(&bs->bs_kgts_sema)) {
+			kgts = bs->bs_kgts;
+			if (kgts && kgts->ts_gru)
+				gru_unload_context(kgts, 0);
+			bs->bs_kgts = NULL;
+			up_write(&bs->bs_kgts_sema);
+			kfree(kgts);
+		} else {
+			ret++;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Lock & load the kernel context for the specified blade.
+ */
+static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
+{
+	struct gru_blade_state *bs;
+	int bid;
+
+	STAT(lock_kernel_context);
+again:
+	bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;
+	bs = gru_base[bid];
+
+	/* Handle the case where migration occured while waiting for the sema */
+	down_read(&bs->bs_kgts_sema);
+	if (blade_id < 0 && bid != uv_numa_blade_id()) {
+		up_read(&bs->bs_kgts_sema);
+		goto again;
+	}
+	if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
+		gru_load_kernel_context(bs, bid);
+	return bs;
+
+}
+
+/*
+ * Unlock the kernel context for the specified blade. Context is not
+ * unloaded but may be stolen before next use.
+ */
+static void gru_unlock_kernel_context(int blade_id)
+{
+	struct gru_blade_state *bs;
+
+	bs = gru_base[blade_id];
+	up_read(&bs->bs_kgts_sema);
+	STAT(unlock_kernel_context);
+}
+
+/*
+ * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
+ * 	- returns with preemption disabled
+ */
+static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
+{
+	struct gru_blade_state *bs;
+	int lcpu;
+
+	BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
+	preempt_disable();
+	bs = gru_lock_kernel_context(-1);
+	lcpu = uv_blade_processor_id();
+	*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
+	*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
+	return 0;
+}
+
+/*
+ * Free the current cpus reserved DSR/CBR resources.
+ */
+static void gru_free_cpu_resources(void *cb, void *dsr)
+{
+	gru_unlock_kernel_context(uv_numa_blade_id());
+	preempt_enable();
+}
+
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *   Note: currently supports only 1 reservation per blade.
+ *
+ * 	input:
+ * 		blade_id  - blade on which resources should be reserved
+ * 		cbrs	  - number of CBRs
+ * 		dsr_bytes - number of DSR bytes needed
+ *	output:
+ *		handle to identify resource
+ *		(0 = async resources already reserved)
+ */
+unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+			struct completion *cmp)
+{
+	struct gru_blade_state *bs;
+	struct gru_thread_state *kgts;
+	int ret = 0;
+
+	bs = gru_base[blade_id];
+
+	down_write(&bs->bs_kgts_sema);
+
+	/* Verify no resources already reserved */
+	if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
+		goto done;
+	bs->bs_async_dsr_bytes = dsr_bytes;
+	bs->bs_async_cbrs = cbrs;
+	bs->bs_async_wq = cmp;
+	kgts = bs->bs_kgts;
+
+	/* Resources changed. Unload context if already loaded */
+	if (kgts && kgts->ts_gru)
+		gru_unload_context(kgts, 0);
+	ret = ASYNC_BID_TO_HAN(blade_id);
+
+done:
+	up_write(&bs->bs_kgts_sema);
+	return ret;
+}
+
+/*
+ * Release async resources previously reserved.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_release_async_resources(unsigned long han)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+	down_write(&bs->bs_kgts_sema);
+	bs->bs_async_dsr_bytes = 0;
+	bs->bs_async_cbrs = 0;
+	bs->bs_async_wq = NULL;
+	up_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_wait_async_cbr(unsigned long han)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+	wait_for_completion(bs->bs_async_wq);
+	mb();
+}
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ *	output:
+ *		cb  - pointer to first CBR
+ *		dsr - pointer to first DSR
+ */
+void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+	int blade_id = ASYNC_HAN_TO_BID(han);
+	int ncpus;
+
+	gru_lock_kernel_context(blade_id);
+	ncpus = uv_blade_nr_possible_cpus(blade_id);
+	if (cb)
+		*cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
+	if (dsr)
+		*dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
+}
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_unlock_async_resource(unsigned long han)
+{
+	int blade_id = ASYNC_HAN_TO_BID(han);
+
+	gru_unlock_kernel_context(blade_id);
+}
+
+/*----------------------------------------------------------------------*/
+int gru_get_cb_exception_detail(void *cb,
+		struct control_block_extended_exc_detail *excdet)
+{
+	struct gru_control_block_extended *cbe;
+	struct gru_thread_state *kgts = NULL;
+	unsigned long off;
+	int cbrnum, bid;
+
+	/*
+	 * Locate kgts for cb. This algorithm is SLOW but
+	 * this function is rarely called (ie., almost never).
+	 * Performance does not matter.
+	 */
+	for_each_possible_blade(bid) {
+		if (!gru_base[bid])
+			break;
+		kgts = gru_base[bid]->bs_kgts;
+		if (!kgts || !kgts->ts_gru)
+			continue;
+		off = cb - kgts->ts_gru->gs_gru_base_vaddr;
+		if (off < GRU_SIZE)
+			break;
+		kgts = NULL;
+	}
+	BUG_ON(!kgts);
+	cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
+	cbe = get_cbe(GRUBASE(cb), cbrnum);
+	gru_flush_cache(cbe);	/* CBE not coherent */
+	sync_core();
+	excdet->opc = cbe->opccpy;
+	excdet->exopc = cbe->exopccpy;
+	excdet->ecause = cbe->ecause;
+	excdet->exceptdet0 = cbe->idef1upd;
+	excdet->exceptdet1 = cbe->idef3upd;
+	gru_flush_cache(cbe);
+	return 0;
+}
+
+char *gru_get_cb_exception_detail_str(int ret, void *cb,
+				      char *buf, int size)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	struct control_block_extended_exc_detail excdet;
+
+	if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
+		gru_get_cb_exception_detail(cb, &excdet);
+		snprintf(buf, size,
+			"GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
+			"excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
+			gen, excdet.opc, excdet.exopc, excdet.ecause,
+			excdet.exceptdet0, excdet.exceptdet1);
+	} else {
+		snprintf(buf, size, "No exception");
+	}
+	return buf;
+}
+
+static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
+{
+	while (gen->istatus >= CBS_ACTIVE) {
+		cpu_relax();
+		barrier();
+	}
+	return gen->istatus;
+}
+
+static int gru_retry_exception(void *cb)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	struct control_block_extended_exc_detail excdet;
+	int retry = EXCEPTION_RETRY_LIMIT;
+
+	while (1)  {
+		if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
+			return CBS_IDLE;
+		if (gru_get_cb_message_queue_substatus(cb))
+			return CBS_EXCEPTION;
+		gru_get_cb_exception_detail(cb, &excdet);
+		if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
+				(excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
+			break;
+		if (retry-- == 0)
+			break;
+		gen->icmd = 1;
+		gru_flush_cache(gen);
+	}
+	return CBS_EXCEPTION;
+}
+
+int gru_check_status_proc(void *cb)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	int ret;
+
+	ret = gen->istatus;
+	if (ret == CBS_EXCEPTION)
+		ret = gru_retry_exception(cb);
+	rmb();
+	return ret;
+
+}
+
+int gru_wait_proc(void *cb)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	int ret;
+
+	ret = gru_wait_idle_or_exception(gen);
+	if (ret == CBS_EXCEPTION)
+		ret = gru_retry_exception(cb);
+	rmb();
+	return ret;
+}
+
+void gru_abort(int ret, void *cb, char *str)
+{
+	char buf[GRU_EXC_STR_SIZE];
+
+	panic("GRU FATAL ERROR: %s - %s\n", str,
+	      gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
+}
+
+void gru_wait_abort_proc(void *cb)
+{
+	int ret;
+
+	ret = gru_wait_proc(cb);
+	if (ret)
+		gru_abort(ret, cb, "gru_wait_abort");
+}
+
+
+/*------------------------------ MESSAGE QUEUES -----------------------------*/
+
+/* Internal status . These are NOT returned to the user. */
+#define MQIE_AGAIN		-1	/* try again */
+
+
+/*
+ * Save/restore the "present" flag that is in the second line of 2-line
+ * messages
+ */
+static inline int get_present2(void *p)
+{
+	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+	return mhdr->present;
+}
+
+static inline void restore_present2(void *p, int val)
+{
+	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+	mhdr->present = val;
+}
+
+/*
+ * Create a message queue.
+ * 	qlines - message queue size in cache lines. Includes 2-line header.
+ */
+int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+		void *p, unsigned int bytes, int nasid, int vector, int apicid)
+{
+	struct message_queue *mq = p;
+	unsigned int qlines;
+
+	qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
+	memset(mq, 0, bytes);
+	mq->start = &mq->data;
+	mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
+	mq->next = &mq->data;
+	mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
+	mq->qlines = qlines;
+	mq->hstatus[0] = 0;
+	mq->hstatus[1] = 1;
+	mq->head = gru_mesq_head(2, qlines / 2 + 1);
+	mqd->mq = mq;
+	mqd->mq_gpa = uv_gpa(mq);
+	mqd->qlines = qlines;
+	mqd->interrupt_pnode = nasid >> 1;
+	mqd->interrupt_vector = vector;
+	mqd->interrupt_apicid = apicid;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gru_create_message_queue);
+
+/*
+ * Send a NOOP message to a message queue
+ * 	Returns:
+ * 		 0 - if queue is full after the send. This is the normal case
+ * 		     but various races can change this.
+ *		-1 - if mesq sent successfully but queue not full
+ *		>0 - unexpected error. MQE_xxx returned
+ */
+static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg)
+{
+	const struct message_header noop_header = {
+					.present = MQS_NOOP, .lines = 1};
+	unsigned long m;
+	int substatus, ret;
+	struct message_header save_mhdr, *mhdr = mesg;
+
+	STAT(mesq_noop);
+	save_mhdr = *mhdr;
+	*mhdr = noop_header;
+	gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA);
+	ret = gru_wait(cb);
+
+	if (ret) {
+		substatus = gru_get_cb_message_queue_substatus(cb);
+		switch (substatus) {
+		case CBSS_NO_ERROR:
+			STAT(mesq_noop_unexpected_error);
+			ret = MQE_UNEXPECTED_CB_ERR;
+			break;
+		case CBSS_LB_OVERFLOWED:
+			STAT(mesq_noop_lb_overflow);
+			ret = MQE_CONGESTION;
+			break;
+		case CBSS_QLIMIT_REACHED:
+			STAT(mesq_noop_qlimit_reached);
+			ret = 0;
+			break;
+		case CBSS_AMO_NACKED:
+			STAT(mesq_noop_amo_nacked);
+			ret = MQE_CONGESTION;
+			break;
+		case CBSS_PUT_NACKED:
+			STAT(mesq_noop_put_nacked);
+			m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+			gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
+						IMA);
+			if (gru_wait(cb) == CBS_IDLE)
+				ret = MQIE_AGAIN;
+			else
+				ret = MQE_UNEXPECTED_CB_ERR;
+			break;
+		case CBSS_PAGE_OVERFLOW:
+			STAT(mesq_noop_page_overflow);
+			/* fallthru */
+		default:
+			BUG();
+		}
+	}
+	*mhdr = save_mhdr;
+	return ret;
+}
+
+/*
+ * Handle a gru_mesq full.
+ */
+static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg, int lines)
+{
+	union gru_mesqhead mqh;
+	unsigned int limit, head;
+	unsigned long avalue;
+	int half, qlines;
+
+	/* Determine if switching to first/second half of q */
+	avalue = gru_get_amo_value(cb);
+	head = gru_get_amo_value_head(cb);
+	limit = gru_get_amo_value_limit(cb);
+
+	qlines = mqd->qlines;
+	half = (limit != qlines);
+
+	if (half)
+		mqh = gru_mesq_head(qlines / 2 + 1, qlines);
+	else
+		mqh = gru_mesq_head(2, qlines / 2 + 1);
+
+	/* Try to get lock for switching head pointer */
+	gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		goto cberr;
+	if (!gru_get_amo_value(cb)) {
+		STAT(mesq_qf_locked);
+		return MQE_QUEUE_FULL;
+	}
+
+	/* Got the lock. Send optional NOP if queue not full, */
+	if (head != limit) {
+		if (send_noop_message(cb, mqd, mesg)) {
+			gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),
+					XTYPE_DW, IMA);
+			if (gru_wait(cb) != CBS_IDLE)
+				goto cberr;
+			STAT(mesq_qf_noop_not_full);
+			return MQIE_AGAIN;
+		}
+		avalue++;
+	}
+
+	/* Then flip queuehead to other half of queue. */
+	gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue,
+							IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		goto cberr;
+
+	/* If not successfully in swapping queue head, clear the hstatus lock */
+	if (gru_get_amo_value(cb) != avalue) {
+		STAT(mesq_qf_switch_head_failed);
+		gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,
+							IMA);
+		if (gru_wait(cb) != CBS_IDLE)
+			goto cberr;
+	}
+	return MQIE_AGAIN;
+cberr:
+	STAT(mesq_qf_unexpected_error);
+	return MQE_UNEXPECTED_CB_ERR;
+}
+
+/*
+ * Handle a PUT failure. Note: if message was a 2-line message, one of the
+ * lines might have successfully have been written. Before sending the
+ * message, "present" must be cleared in BOTH lines to prevent the receiver
+ * from prematurely seeing the full message.
+ */
+static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
+			void *mesg, int lines)
+{
+	unsigned long m, *val = mesg, gpa, save;
+	int ret;
+
+	m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+	if (lines == 2) {
+		gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
+		if (gru_wait(cb) != CBS_IDLE)
+			return MQE_UNEXPECTED_CB_ERR;
+	}
+	gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		return MQE_UNEXPECTED_CB_ERR;
+
+	if (!mqd->interrupt_vector)
+		return MQE_OK;
+
+	/*
+	 * Send a cross-partition interrupt to the SSI that contains the target
+	 * message queue. Normally, the interrupt is automatically delivered by
+	 * hardware but some error conditions require explicit delivery.
+	 * Use the GRU to deliver the interrupt. Otherwise partition failures
+	 * could cause unrecovered errors.
+	 */
+	gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT);
+	save = *val;
+	*val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector,
+				dest_Fixed);
+	gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA);
+	ret = gru_wait(cb);
+	*val = save;
+	if (ret != CBS_IDLE)
+		return MQE_UNEXPECTED_CB_ERR;
+	return MQE_OK;
+}
+
+/*
+ * Handle a gru_mesq failure. Some of these failures are software recoverable
+ * or retryable.
+ */
+static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg, int lines)
+{
+	int substatus, ret = 0;
+
+	substatus = gru_get_cb_message_queue_substatus(cb);
+	switch (substatus) {
+	case CBSS_NO_ERROR:
+		STAT(mesq_send_unexpected_error);
+		ret = MQE_UNEXPECTED_CB_ERR;
+		break;
+	case CBSS_LB_OVERFLOWED:
+		STAT(mesq_send_lb_overflow);
+		ret = MQE_CONGESTION;
+		break;
+	case CBSS_QLIMIT_REACHED:
+		STAT(mesq_send_qlimit_reached);
+		ret = send_message_queue_full(cb, mqd, mesg, lines);
+		break;
+	case CBSS_AMO_NACKED:
+		STAT(mesq_send_amo_nacked);
+		ret = MQE_CONGESTION;
+		break;
+	case CBSS_PUT_NACKED:
+		STAT(mesq_send_put_nacked);
+		ret = send_message_put_nacked(cb, mqd, mesg, lines);
+		break;
+	case CBSS_PAGE_OVERFLOW:
+		STAT(mesq_page_overflow);
+		/* fallthru */
+	default:
+		BUG();
+	}
+	return ret;
+}
+
+/*
+ * Send a message to a message queue
+ * 	mqd	message queue descriptor
+ * 	mesg	message. ust be vaddr within a GSEG
+ * 	bytes	message size (<= 2 CL)
+ */
+int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg,
+				unsigned int bytes)
+{
+	struct message_header *mhdr;
+	void *cb;
+	void *dsr;
+	int istatus, clines, ret;
+
+	STAT(mesq_send);
+	BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
+
+	clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);
+	if (gru_get_cpu_resources(bytes, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	memcpy(dsr, mesg, bytes);
+	mhdr = dsr;
+	mhdr->present = MQS_FULL;
+	mhdr->lines = clines;
+	if (clines == 2) {
+		mhdr->present2 = get_present2(mhdr);
+		restore_present2(mhdr, MQS_FULL);
+	}
+
+	do {
+		ret = MQE_OK;
+		gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA);
+		istatus = gru_wait(cb);
+		if (istatus != CBS_IDLE)
+			ret = send_message_failure(cb, mqd, dsr, clines);
+	} while (ret == MQIE_AGAIN);
+	gru_free_cpu_resources(cb, dsr);
+
+	if (ret)
+		STAT(mesq_send_failed);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_send_message_gpa);
+
+/*
+ * Advance the receive pointer for the queue to the next message.
+ */
+void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg)
+{
+	struct message_queue *mq = mqd->mq;
+	struct message_header *mhdr = mq->next;
+	void *next, *pnext;
+	int half = -1;
+	int lines = mhdr->lines;
+
+	if (lines == 2)
+		restore_present2(mhdr, MQS_EMPTY);
+	mhdr->present = MQS_EMPTY;
+
+	pnext = mq->next;
+	next = pnext + GRU_CACHE_LINE_BYTES * lines;
+	if (next == mq->limit) {
+		next = mq->start;
+		half = 1;
+	} else if (pnext < mq->start2 && next >= mq->start2) {
+		half = 0;
+	}
+
+	if (half >= 0)
+		mq->hstatus[half] = 1;
+	mq->next = next;
+}
+EXPORT_SYMBOL_GPL(gru_free_message);
+
+/*
+ * Get next message from message queue. Return NULL if no message
+ * present. User must call next_message() to move to next message.
+ * 	rmq	message queue
+ */
+void *gru_get_next_message(struct gru_message_queue_desc *mqd)
+{
+	struct message_queue *mq = mqd->mq;
+	struct message_header *mhdr = mq->next;
+	int present = mhdr->present;
+
+	/* skip NOOP messages */
+	while (present == MQS_NOOP) {
+		gru_free_message(mqd, mhdr);
+		mhdr = mq->next;
+		present = mhdr->present;
+	}
+
+	/* Wait for both halves of 2 line messages */
+	if (present == MQS_FULL && mhdr->lines == 2 &&
+				get_present2(mhdr) == MQS_EMPTY)
+		present = MQS_EMPTY;
+
+	if (!present) {
+		STAT(mesq_receive_none);
+		return NULL;
+	}
+
+	if (mhdr->lines == 2)
+		restore_present2(mhdr, mhdr->present2);
+
+	STAT(mesq_receive);
+	return mhdr;
+}
+EXPORT_SYMBOL_GPL(gru_get_next_message);
+
+/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
+
+/*
+ * Load a DW from a global GPA. The GPA can be a memory or MMR address.
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa)
+{
+	void *cb;
+	void *dsr;
+	int ret, iaa;
+
+	STAT(read_gpa);
+	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	iaa = gpa >> 62;
+	gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);
+	ret = gru_wait(cb);
+	if (ret == CBS_IDLE)
+		*value = *(unsigned long *)dsr;
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_read_gpa);
+
+
+/*
+ * Copy a block of data using the GRU resources
+ */
+int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+				unsigned int bytes)
+{
+	void *cb;
+	void *dsr;
+	int ret;
+
+	STAT(copy_gpa);
+	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
+		  XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA);
+	ret = gru_wait(cb);
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_copy_gpa);
+
+/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
+/* 	Temp - will delete after we gain confidence in the GRU		*/
+
+static int quicktest0(unsigned long arg)
+{
+	unsigned long word0;
+	unsigned long word1;
+	void *cb;
+	void *dsr;
+	unsigned long *p;
+	int ret = -EIO;
+
+	if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	p = dsr;
+	word0 = MAGIC;
+	word1 = 0;
+
+	gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE) {
+		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
+		goto done;
+	}
+
+	if (*p != MAGIC) {
+		printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
+		goto done;
+	}
+	gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE) {
+		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
+		goto done;
+	}
+
+	if (word0 != word1 || word1 != MAGIC) {
+		printk(KERN_DEBUG
+		       "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
+		     smp_processor_id(), word1, MAGIC);
+		goto done;
+	}
+	ret = 0;
+
+done:
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+
+#define ALIGNUP(p, q)	((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
+
+static int quicktest1(unsigned long arg)
+{
+	struct gru_message_queue_desc mqd;
+	void *p, *mq;
+	unsigned long *dw;
+	int i, ret = -EIO;
+	char mes[GRU_CACHE_LINE_BYTES], *m;
+
+	/* Need  1K cacheline aligned that does not cross page boundary */
+	p = kmalloc(4096, 0);
+	if (p == NULL)
+		return -ENOMEM;
+	mq = ALIGNUP(p, 1024);
+	memset(mes, 0xee, sizeof(mes));
+	dw = mq;
+
+	gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
+	for (i = 0; i < 6; i++) {
+		mes[8] = i;
+		do {
+			ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
+		} while (ret == MQE_CONGESTION);
+		if (ret)
+			break;
+	}
+	if (ret != MQE_QUEUE_FULL || i != 4) {
+		printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n",
+		       smp_processor_id(), ret, i);
+		goto done;
+	}
+
+	for (i = 0; i < 6; i++) {
+		m = gru_get_next_message(&mqd);
+		if (!m || m[8] != i)
+			break;
+		gru_free_message(&mqd, m);
+	}
+	if (i != 4) {
+		printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
+			smp_processor_id(), i, m, m ? m[8] : -1);
+		goto done;
+	}
+	ret = 0;
+
+done:
+	kfree(p);
+	return ret;
+}
+
+static int quicktest2(unsigned long arg)
+{
+	static DECLARE_COMPLETION(cmp);
+	unsigned long han;
+	int blade_id = 0;
+	int numcb = 4;
+	int ret = 0;
+	unsigned long *buf;
+	void *cb0, *cb;
+	struct gru_control_block_status *gen;
+	int i, k, istatus, bytes;
+
+	bytes = numcb * 4 * 8;
+	buf = kmalloc(bytes, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = -EBUSY;
+	han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
+	if (!han)
+		goto done;
+
+	gru_lock_async_resource(han, &cb0, NULL);
+	memset(buf, 0xee, bytes);
+	for (i = 0; i < numcb; i++)
+		gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
+				XTYPE_DW, 4, 1, IMA_INTERRUPT);
+
+	ret = 0;
+	k = numcb;
+	do {
+		gru_wait_async_cbr(han);
+		for (i = 0; i < numcb; i++) {
+			cb = cb0 + i * GRU_HANDLE_STRIDE;
+			istatus = gru_check_status(cb);
+			if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
+				break;
+		}
+		if (i == numcb)
+			continue;
+		if (istatus != CBS_IDLE) {
+			printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
+			ret = -EFAULT;
+		} else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||
+				buf[4 * i + 3]) {
+			printk(KERN_DEBUG "GRU:%d quicktest2:cb %d,  buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
+			       smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);
+			ret = -EIO;
+		}
+		k--;
+		gen = cb;
+		gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */
+	} while (k);
+	BUG_ON(cmp.done);
+
+	gru_unlock_async_resource(han);
+	gru_release_async_resources(han);
+done:
+	kfree(buf);
+	return ret;
+}
+
+#define BUFSIZE 200
+static int quicktest3(unsigned long arg)
+{
+	char buf1[BUFSIZE], buf2[BUFSIZE];
+	int ret = 0;
+
+	memset(buf2, 0, sizeof(buf2));
+	memset(buf1, get_cycles() & 255, sizeof(buf1));
+	gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
+	if (memcmp(buf1, buf2, BUFSIZE)) {
+		printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
+		ret = -EIO;
+	}
+	return ret;
+}
+
+/*
+ * Debugging only. User hook for various kernel tests
+ * of driver & gru.
+ */
+int gru_ktest(unsigned long arg)
+{
+	int ret = -EINVAL;
+
+	switch (arg & 0xff) {
+	case 0:
+		ret = quicktest0(arg);
+		break;
+	case 1:
+		ret = quicktest1(arg);
+		break;
+	case 2:
+		ret = quicktest2(arg);
+		break;
+	case 3:
+		ret = quicktest3(arg);
+		break;
+	case 99:
+		ret = gru_free_kernel_contexts();
+		break;
+	}
+	return ret;
+
+}
+
+int gru_kservices_init(void)
+{
+	return 0;
+}
+
+void gru_kservices_exit(void)
+{
+	if (gru_free_kernel_contexts())
+		BUG();
+}
+
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
new file mode 100644
index 00000000000..02aa94d8484
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -0,0 +1,214 @@
+
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#ifndef __GRU_KSERVICES_H_
+#define __GRU_KSERVICES_H_
+
+
+/*
+ * Message queues using the GRU to send/receive messages.
+ *
+ * These function allow the user to create a message queue for
+ * sending/receiving 1 or 2 cacheline messages using the GRU.
+ *
+ * Processes SENDING messages will use a kernel CBR/DSR to send
+ * the message. This is transparent to the caller.
+ *
+ * The receiver does not use any GRU resources.
+ *
+ * The functions support:
+ * 	- single receiver
+ * 	- multiple senders
+ *	- cross partition message
+ *
+ * Missing features ZZZ:
+ * 	- user options for dealing with timeouts, queue full, etc.
+ * 	- gru_create_message_queue() needs interrupt vector info
+ */
+
+struct gru_message_queue_desc {
+	void		*mq;			/* message queue vaddress */
+	unsigned long	mq_gpa;			/* global address of mq */
+	int		qlines;			/* queue size in CL */
+	int		interrupt_vector;	/* interrupt vector */
+	int		interrupt_pnode;	/* pnode for interrupt */
+	int		interrupt_apicid;	/* lapicid for interrupt */
+};
+
+/*
+ * Initialize a user allocated chunk of memory to be used as
+ * a message queue. The caller must ensure that the queue is
+ * in contiguous physical memory and is cacheline aligned.
+ *
+ * Message queue size is the total number of bytes allocated
+ * to the queue including a 2 cacheline header that is used
+ * to manage the queue.
+ *
+ *  Input:
+ * 	mqd	pointer to message queue descriptor
+ * 	p	pointer to user allocated mesq memory.
+ * 	bytes	size of message queue in bytes
+ *      vector	interrupt vector (zero if no interrupts)
+ *      nasid	nasid of blade where interrupt is delivered
+ *      apicid	apicid of cpu for interrupt
+ *
+ *  Errors:
+ *  	0	OK
+ *  	>0	error
+ */
+extern int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+		void *p, unsigned int bytes, int nasid, int vector, int apicid);
+
+/*
+ * Send a message to a message queue.
+ *
+ * Note: The message queue transport mechanism uses the first 32
+ * bits of the message. Users should avoid using these bits.
+ *
+ *
+ *   Input:
+ * 	mqd	pointer to message queue descriptor
+ * 	mesg	pointer to message. Must be 64-bit aligned
+ * 	bytes	size of message in bytes
+ *
+ *   Output:
+ *      0	message sent
+ *     >0	Send failure - see error codes below
+ *
+ */
+extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd,
+			void *mesg, unsigned int bytes);
+
+/* Status values for gru_send_message() */
+#define MQE_OK			0	/* message sent successfully */
+#define MQE_CONGESTION		1	/* temporary congestion, try again */
+#define MQE_QUEUE_FULL		2	/* queue is full */
+#define MQE_UNEXPECTED_CB_ERR	3	/* unexpected CB error */
+#define MQE_PAGE_OVERFLOW	10	/* BUG - queue overflowed a page */
+#define MQE_BUG_NO_RESOURCES	11	/* BUG - could not alloc GRU cb/dsr */
+
+/*
+ * Advance the receive pointer for the message queue to the next message.
+ * Note: current API requires messages to be gotten & freed in order. Future
+ * API extensions may allow for out-of-order freeing.
+ *
+ *   Input
+ * 	mqd	pointer to message queue descriptor
+ * 	mesq	message being freed
+ */
+extern void gru_free_message(struct gru_message_queue_desc *mqd,
+			     void *mesq);
+
+/*
+ * Get next message from message queue. Returns pointer to
+ * message OR NULL if no message present.
+ * User must call gru_free_message() after message is processed
+ * in order to move the queue pointers to next message.
+ *
+ *   Input
+ * 	mqd	pointer to message queue descriptor
+ *
+ *   Output:
+ *	p	pointer to message
+ *	NULL	no message available
+ */
+extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
+
+
+/*
+ * Read a GRU global GPA. Source can be located in a remote partition.
+ *
+ *    Input:
+ *    	value		memory address where MMR value is returned
+ *    	gpa		source numalink physical address of GPA
+ *
+ *    Output:
+ *	0		OK
+ *	>0		error
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa);
+
+
+/*
+ * Copy data using the GRU. Source or destination can be located in a remote
+ * partition.
+ *
+ *    Input:
+ *    	dest_gpa	destination global physical address
+ *    	src_gpa		source global physical address
+ *    	bytes		number of bytes to copy
+ *
+ *    Output:
+ *	0		OK
+ *	>0		error
+ */
+extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+							unsigned int bytes);
+
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *
+ * 	input:
+ * 		blade_id  - blade on which resources should be reserved
+ * 		cbrs	  - number of CBRs
+ * 		dsr_bytes - number of DSR bytes needed
+ * 		cmp	  - completion structure for waiting for
+ * 			    async completions
+ *	output:
+ *		handle to identify resource
+ *		(0 = no resources)
+ */
+extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+				struct completion *cmp);
+
+/*
+ * Release async resources previously reserved.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_release_async_resources(unsigned long han);
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_wait_async_cbr(unsigned long han);
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ *	output:
+ *		cb  - pointer to first CBR
+ *		dsr - pointer to first DSR
+ */
+extern void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr);
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_unlock_async_resource(unsigned long han);
+
+#endif 		/* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
new file mode 100644
index 00000000000..e77d1b1f9d0
--- /dev/null
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -0,0 +1,153 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRULIB_H__
+#define __GRULIB_H__
+
+#define GRU_BASENAME		"gru"
+#define GRU_FULLNAME		"/dev/gru"
+#define GRU_IOCTL_NUM 		 'G'
+
+/*
+ * Maximum number of GRU segments that a user can have open
+ * ZZZ temp - set high for testing. Revisit.
+ */
+#define GRU_MAX_OPEN_CONTEXTS		32
+
+/* Set Number of Request Blocks */
+#define GRU_CREATE_CONTEXT		_IOWR(GRU_IOCTL_NUM, 1, void *)
+
+/*  Set Context Options */
+#define GRU_SET_CONTEXT_OPTION		_IOWR(GRU_IOCTL_NUM, 4, void *)
+
+/* Fetch exception detail */
+#define GRU_USER_GET_EXCEPTION_DETAIL	_IOWR(GRU_IOCTL_NUM, 6, void *)
+
+/* For user call_os handling - normally a TLB fault */
+#define GRU_USER_CALL_OS		_IOWR(GRU_IOCTL_NUM, 8, void *)
+
+/* For user unload context */
+#define GRU_USER_UNLOAD_CONTEXT		_IOWR(GRU_IOCTL_NUM, 9, void *)
+
+/* For dumpping GRU chiplet state */
+#define GRU_DUMP_CHIPLET_STATE		_IOWR(GRU_IOCTL_NUM, 11, void *)
+
+/* For getting gseg statistics */
+#define GRU_GET_GSEG_STATISTICS		_IOWR(GRU_IOCTL_NUM, 12, void *)
+
+/* For user TLB flushing (primarily for tests) */
+#define GRU_USER_FLUSH_TLB		_IOWR(GRU_IOCTL_NUM, 50, void *)
+
+/* Get some config options (primarily for tests & emulator) */
+#define GRU_GET_CONFIG_INFO		_IOWR(GRU_IOCTL_NUM, 51, void *)
+
+/* Various kernel self-tests */
+#define GRU_KTEST			_IOWR(GRU_IOCTL_NUM, 52, void *)
+
+#define CONTEXT_WINDOW_BYTES(th)        (GRU_GSEG_PAGESIZE * (th))
+#define THREAD_POINTER(p, th)		(p + GRU_GSEG_PAGESIZE * (th))
+#define GSEG_START(cb)			((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1)))
+
+struct gru_get_gseg_statistics_req {
+	unsigned long			gseg;
+	struct gru_gseg_statistics	stats;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_create_context_req {
+	unsigned long		gseg;
+	unsigned int		data_segment_bytes;
+	unsigned int		control_blocks;
+	unsigned int		maximum_thread_count;
+	unsigned int		options;
+	unsigned char		tlb_preload_count;
+};
+
+/*
+ * Structure used to pass unload context parameters to the driver
+ */
+struct gru_unload_context_req {
+	unsigned long	gseg;
+};
+
+/*
+ * Structure used to set context options
+ */
+enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet};
+struct gru_set_context_option_req {
+	unsigned long	gseg;
+	int		op;
+	int		val0;
+	long		val1;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_flush_tlb_req {
+	unsigned long	gseg;
+	unsigned long	vaddr;
+	size_t		len;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+enum {dcs_pid, dcs_gid};
+struct gru_dump_chiplet_state_req {
+	unsigned int	op;
+	unsigned int	gid;
+	int		ctxnum;
+	char		data_opt;
+	char		lock_cch;
+	char		flush_cbrs;
+	char		fill[10];
+	pid_t		pid;
+	void		*buf;
+	size_t		buflen;
+	/* ---- output --- */
+	unsigned int	num_contexts;
+};
+
+#define GRU_DUMP_MAGIC	0x3474ab6c
+struct gru_dump_context_header {
+	unsigned int	magic;
+	unsigned int	gid;
+	unsigned char	ctxnum;
+	unsigned char	cbrcnt;
+	unsigned char	dsrcnt;
+	pid_t		pid;
+	unsigned long	vaddr;
+	int		cch_locked;
+	unsigned long	data[0];
+};
+
+/*
+ * GRU configuration info (temp - for testing)
+ */
+struct gru_config_info {
+	int		cpus;
+	int		blades;
+	int		nodes;
+	int		chiplets;
+	int		fill[16];
+};
+
+#endif /* __GRULIB_H__ */
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
new file mode 100644
index 00000000000..f8538bbd0bf
--- /dev/null
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -0,0 +1,972 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *            DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+
+unsigned long gru_options __read_mostly;
+
+static struct device_driver gru_driver = {
+	.name = "gru"
+};
+
+static struct device gru_device = {
+	.init_name = "",
+	.driver = &gru_driver,
+};
+
+struct device *grudev = &gru_device;
+
+/*
+ * Select a gru fault map to be used by the current cpu. Note that
+ * multiple cpus may be using the same map.
+ *	ZZZ should be inline but did not work on emulator
+ */
+int gru_cpu_fault_map_id(void)
+{
+#ifdef CONFIG_IA64
+	return uv_blade_processor_id() % GRU_NUM_TFM;
+#else
+	int cpu = smp_processor_id();
+	int id, core;
+
+	core = uv_cpu_core_number(cpu);
+	id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+	return id;
+#endif
+}
+
+/*--------- ASID Management -------------------------------------------
+ *
+ *  Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
+ *  Once MAX is reached, flush the TLB & start over. However,
+ *  some asids may still be in use. There won't be many (percentage wise) still
+ *  in use. Search active contexts & determine the value of the first
+ *  asid in use ("x"s below). Set "limit" to this value.
+ *  This defines a block of assignable asids.
+ *
+ *  When "limit" is reached, search forward from limit+1 and determine the
+ *  next block of assignable asids.
+ *
+ *  Repeat until MAX_ASID is reached, then start over again.
+ *
+ *  Each time MAX_ASID is reached, increment the asid generation. Since
+ *  the search for in-use asids only checks contexts with GRUs currently
+ *  assigned, asids in some contexts will be missed. Prior to loading
+ *  a context, the asid generation of the GTS asid is rechecked. If it
+ *  doesn't match the current generation, a new asid will be assigned.
+ *
+ *   	0---------------x------------x---------------------x----|
+ *	  ^-next	^-limit	   				^-MAX_ASID
+ *
+ * All asid manipulation & context loading/unloading is protected by the
+ * gs_lock.
+ */
+
+/* Hit the asid limit. Start over */
+static int gru_wrap_asid(struct gru_state *gru)
+{
+	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
+	STAT(asid_wrap);
+	gru->gs_asid_gen++;
+	return MIN_ASID;
+}
+
+/* Find the next chunk of unused asids */
+static int gru_reset_asid_limit(struct gru_state *gru, int asid)
+{
+	int i, gid, inuse_asid, limit;
+
+	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
+	STAT(asid_next);
+	limit = MAX_ASID;
+	if (asid >= limit)
+		asid = gru_wrap_asid(gru);
+	gru_flush_all_tlb(gru);
+	gid = gru->gs_gid;
+again:
+	for (i = 0; i < GRU_NUM_CCH; i++) {
+		if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
+			continue;
+		inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
+		gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
+			gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms,
+			inuse_asid, i);
+		if (inuse_asid == asid) {
+			asid += ASID_INC;
+			if (asid >= limit) {
+				/*
+				 * empty range: reset the range limit and
+				 * start over
+				 */
+				limit = MAX_ASID;
+				if (asid >= MAX_ASID)
+					asid = gru_wrap_asid(gru);
+				goto again;
+			}
+		}
+
+		if ((inuse_asid > asid) && (inuse_asid < limit))
+			limit = inuse_asid;
+	}
+	gru->gs_asid_limit = limit;
+	gru->gs_asid = asid;
+	gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid,
+					asid, limit);
+	return asid;
+}
+
+/* Assign a new ASID to a thread context.  */
+static int gru_assign_asid(struct gru_state *gru)
+{
+	int asid;
+
+	gru->gs_asid += ASID_INC;
+	asid = gru->gs_asid;
+	if (asid >= gru->gs_asid_limit)
+		asid = gru_reset_asid_limit(gru, asid);
+
+	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
+	return asid;
+}
+
+/*
+ * Clear n bits in a word. Return a word indicating the bits that were cleared.
+ * Optionally, build an array of chars that contain the bit numbers allocated.
+ */
+static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
+				       char *idx)
+{
+	unsigned long bits = 0;
+	int i;
+
+	while (n--) {
+		i = find_first_bit(p, mmax);
+		if (i == mmax)
+			BUG();
+		__clear_bit(i, p);
+		__set_bit(i, &bits);
+		if (idx)
+			*idx++ = i;
+	}
+	return bits;
+}
+
+unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
+				       char *cbmap)
+{
+	return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
+				 cbmap);
+}
+
+unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
+				       char *dsmap)
+{
+	return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
+				 dsmap);
+}
+
+static void reserve_gru_resources(struct gru_state *gru,
+				  struct gru_thread_state *gts)
+{
+	gru->gs_active_contexts++;
+	gts->ts_cbr_map =
+	    gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
+				     gts->ts_cbr_idx);
+	gts->ts_dsr_map =
+	    gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
+}
+
+static void free_gru_resources(struct gru_state *gru,
+			       struct gru_thread_state *gts)
+{
+	gru->gs_active_contexts--;
+	gru->gs_cbr_map |= gts->ts_cbr_map;
+	gru->gs_dsr_map |= gts->ts_dsr_map;
+}
+
+/*
+ * Check if a GRU has sufficient free resources to satisfy an allocation
+ * request. Note: GRU locks may or may not be held when this is called. If
+ * not held, recheck after acquiring the appropriate locks.
+ *
+ * Returns 1 if sufficient resources, 0 if not
+ */
+static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
+			       int dsr_au_count, int max_active_contexts)
+{
+	return hweight64(gru->gs_cbr_map) >= cbr_au_count
+		&& hweight64(gru->gs_dsr_map) >= dsr_au_count
+		&& gru->gs_active_contexts < max_active_contexts;
+}
+
+/*
+ * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
+ * context.
+ */
+static int gru_load_mm_tracker(struct gru_state *gru,
+					struct gru_thread_state *gts)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
+	unsigned short ctxbitmap = (1 << gts->ts_ctxnum);
+	int asid;
+
+	spin_lock(&gms->ms_asid_lock);
+	asid = asids->mt_asid;
+
+	spin_lock(&gru->gs_asid_lock);
+	if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen !=
+			  gru->gs_asid_gen)) {
+		asid = gru_assign_asid(gru);
+		asids->mt_asid = asid;
+		asids->mt_asid_gen = gru->gs_asid_gen;
+		STAT(asid_new);
+	} else {
+		STAT(asid_reuse);
+	}
+	spin_unlock(&gru->gs_asid_lock);
+
+	BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
+	asids->mt_ctxbitmap |= ctxbitmap;
+	if (!test_bit(gru->gs_gid, gms->ms_asidmap))
+		__set_bit(gru->gs_gid, gms->ms_asidmap);
+	spin_unlock(&gms->ms_asid_lock);
+
+	gru_dbg(grudev,
+		"gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
+		gru->gs_gid, gts, gms, gts->ts_ctxnum, asid,
+		gms->ms_asidmap[0]);
+	return asid;
+}
+
+static void gru_unload_mm_tracker(struct gru_state *gru,
+					struct gru_thread_state *gts)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	struct gru_mm_tracker *asids;
+	unsigned short ctxbitmap;
+
+	asids = &gms->ms_asids[gru->gs_gid];
+	ctxbitmap = (1 << gts->ts_ctxnum);
+	spin_lock(&gms->ms_asid_lock);
+	spin_lock(&gru->gs_asid_lock);
+	BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
+	asids->mt_ctxbitmap ^= ctxbitmap;
+	gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
+		gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
+	spin_unlock(&gru->gs_asid_lock);
+	spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Decrement the reference count on a GTS structure. Free the structure
+ * if the reference count goes to zero.
+ */
+void gts_drop(struct gru_thread_state *gts)
+{
+	if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
+		if (gts->ts_gms)
+			gru_drop_mmu_notifier(gts->ts_gms);
+		kfree(gts);
+		STAT(gts_free);
+	}
+}
+
+/*
+ * Locate the GTS structure for the current thread.
+ */
+static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
+			    *vdata, int tsid)
+{
+	struct gru_thread_state *gts;
+
+	list_for_each_entry(gts, &vdata->vd_head, ts_next)
+	    if (gts->ts_tsid == tsid)
+		return gts;
+	return NULL;
+}
+
+/*
+ * Allocate a thread state structure.
+ */
+struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+		int cbr_au_count, int dsr_au_count,
+		unsigned char tlb_preload_count, int options, int tsid)
+{
+	struct gru_thread_state *gts;
+	struct gru_mm_struct *gms;
+	int bytes;
+
+	bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
+	bytes += sizeof(struct gru_thread_state);
+	gts = kmalloc(bytes, GFP_KERNEL);
+	if (!gts)
+		return ERR_PTR(-ENOMEM);
+
+	STAT(gts_alloc);
+	memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
+	atomic_set(&gts->ts_refcnt, 1);
+	mutex_init(&gts->ts_ctxlock);
+	gts->ts_cbr_au_count = cbr_au_count;
+	gts->ts_dsr_au_count = dsr_au_count;
+	gts->ts_tlb_preload_count = tlb_preload_count;
+	gts->ts_user_options = options;
+	gts->ts_user_blade_id = -1;
+	gts->ts_user_chiplet_id = -1;
+	gts->ts_tsid = tsid;
+	gts->ts_ctxnum = NULLCTX;
+	gts->ts_tlb_int_select = -1;
+	gts->ts_cch_req_slice = -1;
+	gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
+	if (vma) {
+		gts->ts_mm = current->mm;
+		gts->ts_vma = vma;
+		gms = gru_register_mmu_notifier();
+		if (IS_ERR(gms))
+			goto err;
+		gts->ts_gms = gms;
+	}
+
+	gru_dbg(grudev, "alloc gts %p\n", gts);
+	return gts;
+
+err:
+	gts_drop(gts);
+	return ERR_CAST(gms);
+}
+
+/*
+ * Allocate a vma private data structure.
+ */
+struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
+{
+	struct gru_vma_data *vdata = NULL;
+
+	vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
+	if (!vdata)
+		return NULL;
+
+	STAT(vdata_alloc);
+	INIT_LIST_HEAD(&vdata->vd_head);
+	spin_lock_init(&vdata->vd_lock);
+	gru_dbg(grudev, "alloc vdata %p\n", vdata);
+	return vdata;
+}
+
+/*
+ * Find the thread state structure for the current thread.
+ */
+struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
+					int tsid)
+{
+	struct gru_vma_data *vdata = vma->vm_private_data;
+	struct gru_thread_state *gts;
+
+	spin_lock(&vdata->vd_lock);
+	gts = gru_find_current_gts_nolock(vdata, tsid);
+	spin_unlock(&vdata->vd_lock);
+	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+	return gts;
+}
+
+/*
+ * Allocate a new thread state for a GSEG. Note that races may allow
+ * another thread to race to create a gts.
+ */
+struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
+					int tsid)
+{
+	struct gru_vma_data *vdata = vma->vm_private_data;
+	struct gru_thread_state *gts, *ngts;
+
+	gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
+			    vdata->vd_dsr_au_count,
+			    vdata->vd_tlb_preload_count,
+			    vdata->vd_user_options, tsid);
+	if (IS_ERR(gts))
+		return gts;
+
+	spin_lock(&vdata->vd_lock);
+	ngts = gru_find_current_gts_nolock(vdata, tsid);
+	if (ngts) {
+		gts_drop(gts);
+		gts = ngts;
+		STAT(gts_double_allocate);
+	} else {
+		list_add(&gts->ts_next, &vdata->vd_head);
+	}
+	spin_unlock(&vdata->vd_lock);
+	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+	return gts;
+}
+
+/*
+ * Free the GRU context assigned to the thread state.
+ */
+static void gru_free_gru_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru;
+
+	gru = gts->ts_gru;
+	gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid);
+
+	spin_lock(&gru->gs_lock);
+	gru->gs_gts[gts->ts_ctxnum] = NULL;
+	free_gru_resources(gru, gts);
+	BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
+	__clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
+	gts->ts_ctxnum = NULLCTX;
+	gts->ts_gru = NULL;
+	gts->ts_blade = -1;
+	spin_unlock(&gru->gs_lock);
+
+	gts_drop(gts);
+	STAT(free_context);
+}
+
+/*
+ * Prefetching cachelines help hardware performance.
+ * (Strictly a performance enhancement. Not functionally required).
+ */
+static void prefetch_data(void *p, int num, int stride)
+{
+	while (num-- > 0) {
+		prefetchw(p);
+		p += stride;
+	}
+}
+
+static inline long gru_copy_handle(void *d, void *s)
+{
+	memcpy(d, s, GRU_HANDLE_BYTES);
+	return GRU_HANDLE_BYTES;
+}
+
+static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
+				unsigned long cbrmap, unsigned long length)
+{
+	int i, scr;
+
+	prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
+		      GRU_CACHE_LINE_BYTES);
+
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
+		prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
+			      GRU_CACHE_LINE_BYTES);
+		cb += GRU_HANDLE_STRIDE;
+	}
+}
+
+static void gru_load_context_data(void *save, void *grubase, int ctxnum,
+				  unsigned long cbrmap, unsigned long dsrmap,
+				  int data_valid)
+{
+	void *gseg, *cb, *cbe;
+	unsigned long length;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+	gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		if (data_valid) {
+			save += gru_copy_handle(cb, save);
+			save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
+						save);
+		} else {
+			memset(cb, 0, GRU_CACHE_LINE_BYTES);
+			memset(cbe + i * GRU_HANDLE_STRIDE, 0,
+						GRU_CACHE_LINE_BYTES);
+		}
+		/* Flush CBE to hide race in context restart */
+		mb();
+		gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
+		cb += GRU_HANDLE_STRIDE;
+	}
+
+	if (data_valid)
+		memcpy(gseg + GRU_DS_BASE, save, length);
+	else
+		memset(gseg + GRU_DS_BASE, 0, length);
+}
+
+static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
+				    unsigned long cbrmap, unsigned long dsrmap)
+{
+	void *gseg, *cb, *cbe;
+	unsigned long length;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+
+	/* CBEs may not be coherent. Flush them from cache */
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr)
+		gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
+	mb();		/* Let the CL flush complete */
+
+	gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		save += gru_copy_handle(save, cb);
+		save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
+		cb += GRU_HANDLE_STRIDE;
+	}
+	memcpy(save, gseg + GRU_DS_BASE, length);
+}
+
+void gru_unload_context(struct gru_thread_state *gts, int savestate)
+{
+	struct gru_state *gru = gts->ts_gru;
+	struct gru_context_configuration_handle *cch;
+	int ctxnum = gts->ts_ctxnum;
+
+	if (!is_kernel_context(gts))
+		zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+	gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n",
+		gts, gts->ts_cbr_map, gts->ts_dsr_map);
+	lock_cch_handle(cch);
+	if (cch_interrupt_sync(cch))
+		BUG();
+
+	if (!is_kernel_context(gts))
+		gru_unload_mm_tracker(gru, gts);
+	if (savestate) {
+		gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
+					ctxnum, gts->ts_cbr_map,
+					gts->ts_dsr_map);
+		gts->ts_data_valid = 1;
+	}
+
+	if (cch_deallocate(cch))
+		BUG();
+	unlock_cch_handle(cch);
+
+	gru_free_gru_context(gts);
+}
+
+/*
+ * Load a GRU context by copying it from the thread data structure in memory
+ * to the GRU.
+ */
+void gru_load_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru = gts->ts_gru;
+	struct gru_context_configuration_handle *cch;
+	int i, err, asid, ctxnum = gts->ts_ctxnum;
+
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+	lock_cch_handle(cch);
+	cch->tfm_fault_bit_enable =
+	    (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+	     || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+	cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+	if (cch->tlb_int_enable) {
+		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+		cch->tlb_int_select = gts->ts_tlb_int_select;
+	}
+	if (gts->ts_cch_req_slice >= 0) {
+		cch->req_slice_set_enable = 1;
+		cch->req_slice = gts->ts_cch_req_slice;
+	} else {
+		cch->req_slice_set_enable =0;
+	}
+	cch->tfm_done_bit_enable = 0;
+	cch->dsr_allocation_map = gts->ts_dsr_map;
+	cch->cbr_allocation_map = gts->ts_cbr_map;
+
+	if (is_kernel_context(gts)) {
+		cch->unmap_enable = 1;
+		cch->tfm_done_bit_enable = 1;
+		cch->cb_int_enable = 1;
+		cch->tlb_int_select = 0;	/* For now, ints go to cpu 0 */
+	} else {
+		cch->unmap_enable = 0;
+		cch->tfm_done_bit_enable = 0;
+		cch->cb_int_enable = 0;
+		asid = gru_load_mm_tracker(gru, gts);
+		for (i = 0; i < 8; i++) {
+			cch->asid[i] = asid + i;
+			cch->sizeavail[i] = gts->ts_sizeavail;
+		}
+	}
+
+	err = cch_allocate(cch);
+	if (err) {
+		gru_dbg(grudev,
+			"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
+			err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
+		BUG();
+	}
+
+	gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
+			gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
+
+	if (cch_start(cch))
+		BUG();
+	unlock_cch_handle(cch);
+
+	gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n",
+		gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map,
+		(gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select);
+}
+
+/*
+ * Update fields in an active CCH:
+ * 	- retarget interrupts on local blade
+ * 	- update sizeavail mask
+ */
+int gru_update_cch(struct gru_thread_state *gts)
+{
+	struct gru_context_configuration_handle *cch;
+	struct gru_state *gru = gts->ts_gru;
+	int i, ctxnum = gts->ts_ctxnum, ret = 0;
+
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+	lock_cch_handle(cch);
+	if (cch->state == CCHSTATE_ACTIVE) {
+		if (gru->gs_gts[gts->ts_ctxnum] != gts)
+			goto exit;
+		if (cch_interrupt(cch))
+			BUG();
+		for (i = 0; i < 8; i++)
+			cch->sizeavail[i] = gts->ts_sizeavail;
+		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+		cch->tlb_int_select = gru_cpu_fault_map_id();
+		cch->tfm_fault_bit_enable =
+		  (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+		    || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+		if (cch_start(cch))
+			BUG();
+		ret = 1;
+	}
+exit:
+	unlock_cch_handle(cch);
+	return ret;
+}
+
+/*
+ * Update CCH tlb interrupt select. Required when all the following is true:
+ * 	- task's GRU context is loaded into a GRU
+ * 	- task is using interrupt notification for TLB faults
+ * 	- task has migrated to a different cpu on the same blade where
+ * 	  it was previously running.
+ */
+static int gru_retarget_intr(struct gru_thread_state *gts)
+{
+	if (gts->ts_tlb_int_select < 0
+	    || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
+		return 0;
+
+	gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
+		gru_cpu_fault_map_id());
+	return gru_update_cch(gts);
+}
+
+/*
+ * Check if a GRU context is allowed to use a specific chiplet. By default
+ * a context is assigned to any blade-local chiplet. However, users can
+ * override this.
+ * 	Returns 1 if assignment allowed, 0 otherwise
+ */
+static int gru_check_chiplet_assignment(struct gru_state *gru,
+					struct gru_thread_state *gts)
+{
+	int blade_id;
+	int chiplet_id;
+
+	blade_id = gts->ts_user_blade_id;
+	if (blade_id < 0)
+		blade_id = uv_numa_blade_id();
+
+	chiplet_id = gts->ts_user_chiplet_id;
+	return gru->gs_blade_id == blade_id &&
+		(chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id);
+}
+
+/*
+ * Unload the gru context if it is not assigned to the correct blade or
+ * chiplet. Misassignment can occur if the process migrates to a different
+ * blade or if the user changes the selected blade/chiplet.
+ */
+void gru_check_context_placement(struct gru_thread_state *gts)
+{
+	struct gru_state *gru;
+
+	/*
+	 * If the current task is the context owner, verify that the
+	 * context is correctly placed. This test is skipped for non-owner
+	 * references. Pthread apps use non-owner references to the CBRs.
+	 */
+	gru = gts->ts_gru;
+	if (!gru || gts->ts_tgid_owner != current->tgid)
+		return;
+
+	if (!gru_check_chiplet_assignment(gru, gts)) {
+		STAT(check_context_unload);
+		gru_unload_context(gts, 1);
+	} else if (gru_retarget_intr(gts)) {
+		STAT(check_context_retarget_intr);
+	}
+}
+
+
+/*
+ * Insufficient GRU resources available on the local blade. Steal a context from
+ * a process. This is a hack until a _real_ resource scheduler is written....
+ */
+#define next_ctxnum(n)	((n) <  GRU_NUM_CCH - 2 ? (n) + 1 : 0)
+#define next_gru(b, g)	(((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ?  \
+				 ((g)+1) : &(b)->bs_grus[0])
+
+static int is_gts_stealable(struct gru_thread_state *gts,
+		struct gru_blade_state *bs)
+{
+	if (is_kernel_context(gts))
+		return down_write_trylock(&bs->bs_kgts_sema);
+	else
+		return mutex_trylock(&gts->ts_ctxlock);
+}
+
+static void gts_stolen(struct gru_thread_state *gts,
+		struct gru_blade_state *bs)
+{
+	if (is_kernel_context(gts)) {
+		up_write(&bs->bs_kgts_sema);
+		STAT(steal_kernel_context);
+	} else {
+		mutex_unlock(&gts->ts_ctxlock);
+		STAT(steal_user_context);
+	}
+}
+
+void gru_steal_context(struct gru_thread_state *gts)
+{
+	struct gru_blade_state *blade;
+	struct gru_state *gru, *gru0;
+	struct gru_thread_state *ngts = NULL;
+	int ctxnum, ctxnum0, flag = 0, cbr, dsr;
+	int blade_id;
+
+	blade_id = gts->ts_user_blade_id;
+	if (blade_id < 0)
+		blade_id = uv_numa_blade_id();
+	cbr = gts->ts_cbr_au_count;
+	dsr = gts->ts_dsr_au_count;
+
+	blade = gru_base[blade_id];
+	spin_lock(&blade->bs_lock);
+
+	ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
+	gru = blade->bs_lru_gru;
+	if (ctxnum == 0)
+		gru = next_gru(blade, gru);
+	blade->bs_lru_gru = gru;
+	blade->bs_lru_ctxnum = ctxnum;
+	ctxnum0 = ctxnum;
+	gru0 = gru;
+	while (1) {
+		if (gru_check_chiplet_assignment(gru, gts)) {
+			if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
+				break;
+			spin_lock(&gru->gs_lock);
+			for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
+				if (flag && gru == gru0 && ctxnum == ctxnum0)
+					break;
+				ngts = gru->gs_gts[ctxnum];
+				/*
+			 	* We are grabbing locks out of order, so trylock is
+			 	* needed. GTSs are usually not locked, so the odds of
+			 	* success are high. If trylock fails, try to steal a
+			 	* different GSEG.
+			 	*/
+				if (ngts && is_gts_stealable(ngts, blade))
+					break;
+				ngts = NULL;
+			}
+			spin_unlock(&gru->gs_lock);
+			if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
+				break;
+		}
+		if (flag && gru == gru0)
+			break;
+		flag = 1;
+		ctxnum = 0;
+		gru = next_gru(blade, gru);
+	}
+	spin_unlock(&blade->bs_lock);
+
+	if (ngts) {
+		gts->ustats.context_stolen++;
+		ngts->ts_steal_jiffies = jiffies;
+		gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
+		gts_stolen(ngts, blade);
+	} else {
+		STAT(steal_context_failed);
+	}
+	gru_dbg(grudev,
+		"stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
+		" avail cb %ld, ds %ld\n",
+		gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
+		hweight64(gru->gs_dsr_map));
+}
+
+/*
+ * Assign a gru context.
+ */
+static int gru_assign_context_number(struct gru_state *gru)
+{
+	int ctxnum;
+
+	ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
+	__set_bit(ctxnum, &gru->gs_context_map);
+	return ctxnum;
+}
+
+/*
+ * Scan the GRUs on the local blade & assign a GRU context.
+ */
+struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru, *grux;
+	int i, max_active_contexts;
+	int blade_id = gts->ts_user_blade_id;
+
+	if (blade_id < 0)
+		blade_id = uv_numa_blade_id();
+again:
+	gru = NULL;
+	max_active_contexts = GRU_NUM_CCH;
+	for_each_gru_on_blade(grux, blade_id, i) {
+		if (!gru_check_chiplet_assignment(grux, gts))
+			continue;
+		if (check_gru_resources(grux, gts->ts_cbr_au_count,
+					gts->ts_dsr_au_count,
+					max_active_contexts)) {
+			gru = grux;
+			max_active_contexts = grux->gs_active_contexts;
+			if (max_active_contexts == 0)
+				break;
+		}
+	}
+
+	if (gru) {
+		spin_lock(&gru->gs_lock);
+		if (!check_gru_resources(gru, gts->ts_cbr_au_count,
+					 gts->ts_dsr_au_count, GRU_NUM_CCH)) {
+			spin_unlock(&gru->gs_lock);
+			goto again;
+		}
+		reserve_gru_resources(gru, gts);
+		gts->ts_gru = gru;
+		gts->ts_blade = gru->gs_blade_id;
+		gts->ts_ctxnum = gru_assign_context_number(gru);
+		atomic_inc(&gts->ts_refcnt);
+		gru->gs_gts[gts->ts_ctxnum] = gts;
+		spin_unlock(&gru->gs_lock);
+
+		STAT(assign_context);
+		gru_dbg(grudev,
+			"gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
+			gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
+			gts->ts_gru->gs_gid, gts->ts_ctxnum,
+			gts->ts_cbr_au_count, gts->ts_dsr_au_count);
+	} else {
+		gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
+		STAT(assign_context_failed);
+	}
+
+	return gru;
+}
+
+/*
+ * gru_nopage
+ *
+ * Map the user's GRU segment
+ *
+ * 	Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
+ */
+int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct gru_thread_state *gts;
+	unsigned long paddr, vaddr;
+
+	vaddr = (unsigned long)vmf->virtual_address;
+	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
+		vma, vaddr, GSEG_BASE(vaddr));
+	STAT(nopfn);
+
+	/* The following check ensures vaddr is a valid address in the VMA */
+	gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+	if (!gts)
+		return VM_FAULT_SIGBUS;
+
+again:
+	mutex_lock(&gts->ts_ctxlock);
+	preempt_disable();
+
+	gru_check_context_placement(gts);
+
+	if (!gts->ts_gru) {
+		STAT(load_user_context);
+		if (!gru_assign_gru_context(gts)) {
+			preempt_enable();
+			mutex_unlock(&gts->ts_ctxlock);
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
+			if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
+				gru_steal_context(gts);
+			goto again;
+		}
+		gru_load_context(gts);
+		paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
+		remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
+				paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
+				vma->vm_page_prot);
+	}
+
+	preempt_enable();
+	mutex_unlock(&gts->ts_ctxlock);
+
+	return VM_FAULT_NOPAGE;
+}
+
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
new file mode 100644
index 00000000000..7768b87d995
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -0,0 +1,381 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              PROC INTERFACES
+ *
+ * This file supports the /proc interfaces for the GRU driver
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#define printstat(s, f)		printstat_val(s, &gru_stats.f, #f)
+
+static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
+{
+	unsigned long val = atomic_long_read(v);
+
+	seq_printf(s, "%16lu %s\n", val, id);
+}
+
+static int statistics_show(struct seq_file *s, void *p)
+{
+	printstat(s, vdata_alloc);
+	printstat(s, vdata_free);
+	printstat(s, gts_alloc);
+	printstat(s, gts_free);
+	printstat(s, gms_alloc);
+	printstat(s, gms_free);
+	printstat(s, gts_double_allocate);
+	printstat(s, assign_context);
+	printstat(s, assign_context_failed);
+	printstat(s, free_context);
+	printstat(s, load_user_context);
+	printstat(s, load_kernel_context);
+	printstat(s, lock_kernel_context);
+	printstat(s, unlock_kernel_context);
+	printstat(s, steal_user_context);
+	printstat(s, steal_kernel_context);
+	printstat(s, steal_context_failed);
+	printstat(s, nopfn);
+	printstat(s, asid_new);
+	printstat(s, asid_next);
+	printstat(s, asid_wrap);
+	printstat(s, asid_reuse);
+	printstat(s, intr);
+	printstat(s, intr_cbr);
+	printstat(s, intr_tfh);
+	printstat(s, intr_spurious);
+	printstat(s, intr_mm_lock_failed);
+	printstat(s, call_os);
+	printstat(s, call_os_wait_queue);
+	printstat(s, user_flush_tlb);
+	printstat(s, user_unload_context);
+	printstat(s, user_exception);
+	printstat(s, set_context_option);
+	printstat(s, check_context_retarget_intr);
+	printstat(s, check_context_unload);
+	printstat(s, tlb_dropin);
+	printstat(s, tlb_preload_page);
+	printstat(s, tlb_dropin_fail_no_asid);
+	printstat(s, tlb_dropin_fail_upm);
+	printstat(s, tlb_dropin_fail_invalid);
+	printstat(s, tlb_dropin_fail_range_active);
+	printstat(s, tlb_dropin_fail_idle);
+	printstat(s, tlb_dropin_fail_fmm);
+	printstat(s, tlb_dropin_fail_no_exception);
+	printstat(s, tfh_stale_on_fault);
+	printstat(s, mmu_invalidate_range);
+	printstat(s, mmu_invalidate_page);
+	printstat(s, flush_tlb);
+	printstat(s, flush_tlb_gru);
+	printstat(s, flush_tlb_gru_tgh);
+	printstat(s, flush_tlb_gru_zero_asid);
+	printstat(s, copy_gpa);
+	printstat(s, read_gpa);
+	printstat(s, mesq_receive);
+	printstat(s, mesq_receive_none);
+	printstat(s, mesq_send);
+	printstat(s, mesq_send_failed);
+	printstat(s, mesq_noop);
+	printstat(s, mesq_send_unexpected_error);
+	printstat(s, mesq_send_lb_overflow);
+	printstat(s, mesq_send_qlimit_reached);
+	printstat(s, mesq_send_amo_nacked);
+	printstat(s, mesq_send_put_nacked);
+	printstat(s, mesq_qf_locked);
+	printstat(s, mesq_qf_noop_not_full);
+	printstat(s, mesq_qf_switch_head_failed);
+	printstat(s, mesq_qf_unexpected_error);
+	printstat(s, mesq_noop_unexpected_error);
+	printstat(s, mesq_noop_lb_overflow);
+	printstat(s, mesq_noop_qlimit_reached);
+	printstat(s, mesq_noop_amo_nacked);
+	printstat(s, mesq_noop_put_nacked);
+	printstat(s, mesq_noop_page_overflow);
+	return 0;
+}
+
+static ssize_t statistics_write(struct file *file, const char __user *userbuf,
+				size_t count, loff_t *data)
+{
+	memset(&gru_stats, 0, sizeof(gru_stats));
+	return count;
+}
+
+static int mcs_statistics_show(struct seq_file *s, void *p)
+{
+	int op;
+	unsigned long total, count, max;
+	static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt",
+		"cch_interrupt_sync", "cch_deallocate", "tfh_write_only",
+		"tfh_write_restart", "tgh_invalidate"};
+
+	seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks");
+	for (op = 0; op < mcsop_last; op++) {
+		count = atomic_long_read(&mcs_op_statistics[op].count);
+		total = atomic_long_read(&mcs_op_statistics[op].total);
+		max = mcs_op_statistics[op].max;
+		seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count,
+			   count ? total / count : 0, max);
+	}
+	return 0;
+}
+
+static ssize_t mcs_statistics_write(struct file *file,
+			const char __user *userbuf, size_t count, loff_t *data)
+{
+	memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics));
+	return count;
+}
+
+static int options_show(struct seq_file *s, void *p)
+{
+	seq_printf(s, "#bitmask: 1=trace, 2=statistics\n");
+	seq_printf(s, "0x%lx\n", gru_options);
+	return 0;
+}
+
+static ssize_t options_write(struct file *file, const char __user *userbuf,
+			     size_t count, loff_t *data)
+{
+	char buf[20];
+
+	if (count >= sizeof(buf))
+		return -EINVAL;
+	if (copy_from_user(buf, userbuf, count))
+		return -EFAULT;
+	buf[count] = '\0';
+	if (strict_strtoul(buf, 0, &gru_options))
+		return -EINVAL;
+
+	return count;
+}
+
+static int cch_seq_show(struct seq_file *file, void *data)
+{
+	long gid = *(long *)data;
+	int i;
+	struct gru_state *gru = GID_TO_GRU(gid);
+	struct gru_thread_state *ts;
+	const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
+
+	if (gid == 0)
+		seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid",
+			   "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode");
+	if (gru)
+		for (i = 0; i < GRU_NUM_CCH; i++) {
+			ts = gru->gs_gts[i];
+			if (!ts)
+				continue;
+			seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n",
+				   gru->gs_gid, gru->gs_blade_id, i,
+				   is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid,
+				   is_kernel_context(ts) ? 0 : ts->ts_tgid_owner,
+				   ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
+				   ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
+				   mode[ts->ts_user_options &
+					GRU_OPT_MISS_MASK]);
+		}
+
+	return 0;
+}
+
+static int gru_seq_show(struct seq_file *file, void *data)
+{
+	long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
+	struct gru_state *gru = GID_TO_GRU(gid);
+
+	if (gid == 0) {
+		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
+			   "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
+		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
+			   "busy", "busy", "free", "free", "free");
+	}
+	if (gru) {
+		ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
+		cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+		dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+		seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
+			   gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
+			   GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
+			   ctxfree, cbrfree, dsrfree);
+	}
+
+	return 0;
+}
+
+static void seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static void *seq_start(struct seq_file *file, loff_t *gid)
+{
+	if (*gid < gru_max_gids)
+		return gid;
+	return NULL;
+}
+
+static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
+{
+	(*gid)++;
+	if (*gid < gru_max_gids)
+		return gid;
+	return NULL;
+}
+
+static const struct seq_operations cch_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= cch_seq_show
+};
+
+static const struct seq_operations gru_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= gru_seq_show
+};
+
+static int statistics_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, statistics_show, NULL);
+}
+
+static int mcs_statistics_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mcs_statistics_show, NULL);
+}
+
+static int options_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, options_show, NULL);
+}
+
+static int cch_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &cch_seq_ops);
+}
+
+static int gru_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &gru_seq_ops);
+}
+
+/* *INDENT-OFF* */
+static const struct file_operations statistics_fops = {
+	.open 		= statistics_open,
+	.read 		= seq_read,
+	.write 		= statistics_write,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
+static const struct file_operations mcs_statistics_fops = {
+	.open 		= mcs_statistics_open,
+	.read 		= seq_read,
+	.write 		= mcs_statistics_write,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
+static const struct file_operations options_fops = {
+	.open 		= options_open,
+	.read 		= seq_read,
+	.write 		= options_write,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
+static const struct file_operations cch_fops = {
+	.open 		= cch_open,
+	.read 		= seq_read,
+	.llseek 	= seq_lseek,
+	.release 	= seq_release,
+};
+static const struct file_operations gru_fops = {
+	.open 		= gru_open,
+	.read 		= seq_read,
+	.llseek 	= seq_lseek,
+	.release 	= seq_release,
+};
+
+static struct proc_entry {
+	char *name;
+	int mode;
+	const struct file_operations *fops;
+	struct proc_dir_entry *entry;
+} proc_files[] = {
+	{"statistics", 0644, &statistics_fops},
+	{"mcs_statistics", 0644, &mcs_statistics_fops},
+	{"debug_options", 0644, &options_fops},
+	{"cch_status", 0444, &cch_fops},
+	{"gru_status", 0444, &gru_fops},
+	{NULL}
+};
+/* *INDENT-ON* */
+
+static struct proc_dir_entry *proc_gru __read_mostly;
+
+static int create_proc_file(struct proc_entry *p)
+{
+	p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
+	if (!p->entry)
+		return -1;
+	return 0;
+}
+
+static void delete_proc_files(void)
+{
+	struct proc_entry *p;
+
+	if (proc_gru) {
+		for (p = proc_files; p->name; p++)
+			if (p->entry)
+				remove_proc_entry(p->name, proc_gru);
+		remove_proc_entry("gru", proc_gru->parent);
+	}
+}
+
+int gru_proc_init(void)
+{
+	struct proc_entry *p;
+
+	proc_gru = proc_mkdir("sgi_uv/gru", NULL);
+
+	for (p = proc_files; p->name; p++)
+		if (create_proc_file(p))
+			goto err;
+	return 0;
+
+err:
+	delete_proc_files();
+	return -1;
+}
+
+void gru_proc_exit(void)
+{
+	delete_proc_files();
+}
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
new file mode 100644
index 00000000000..02a77b8b8ee
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -0,0 +1,685 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *            GRU DRIVER TABLES, MACROS, externs, etc
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRUTABLES_H__
+#define __GRUTABLES_H__
+
+/*
+ * GRU Chiplet:
+ *   The GRU is a user addressible memory accelerator. It provides
+ *   several forms of load, store, memset, bcopy instructions. In addition, it
+ *   contains special instructions for AMOs, sending messages to message
+ *   queues, etc.
+ *
+ *   The GRU is an integral part of the node controller. It connects
+ *   directly to the cpu socket. In its current implementation, there are 2
+ *   GRU chiplets in the node controller on each blade (~node).
+ *
+ *   The entire GRU memory space is fully coherent and cacheable by the cpus.
+ *
+ *   Each GRU chiplet has a physical memory map that looks like the following:
+ *
+ *   	+-----------------+
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	+-----------------+
+ *   	|  system control |
+ *   	+-----------------+        _______ +-------------+
+ *   	|/////////////////|       /        |             |
+ *   	|/////////////////|      /         |             |
+ *   	|/////////////////|     /          | instructions|
+ *   	|/////////////////|    /           |             |
+ *   	|/////////////////|   /            |             |
+ *   	|/////////////////|  /             |-------------|
+ *   	|/////////////////| /              |             |
+ *   	+-----------------+                |             |
+ *   	|   context 15    |                |  data       |
+ *   	+-----------------+                |             |
+ *   	|    ......       | \              |             |
+ *   	+-----------------+  \____________ +-------------+
+ *   	|   context 1     |
+ *   	+-----------------+
+ *   	|   context 0     |
+ *   	+-----------------+
+ *
+ *   Each of the "contexts" is a chunk of memory that can be mmaped into user
+ *   space. The context consists of 2 parts:
+ *
+ *  	- an instruction space that can be directly accessed by the user
+ *  	  to issue GRU instructions and to check instruction status.
+ *
+ *  	- a data area that acts as normal RAM.
+ *
+ *   User instructions contain virtual addresses of data to be accessed by the
+ *   GRU. The GRU contains a TLB that is used to convert these user virtual
+ *   addresses to physical addresses.
+ *
+ *   The "system control" area of the GRU chiplet is used by the kernel driver
+ *   to manage user contexts and to perform functions such as TLB dropin and
+ *   purging.
+ *
+ *   One context may be reserved for the kernel and used for cross-partition
+ *   communication. The GRU will also be used to asynchronously zero out
+ *   large blocks of memory (not currently implemented).
+ *
+ *
+ * Tables:
+ *
+ * 	VDATA-VMA Data		- Holds a few parameters. Head of linked list of
+ * 				  GTS tables for threads using the GSEG
+ * 	GTS - Gru Thread State  - contains info for managing a GSEG context. A
+ * 				  GTS is allocated for each thread accessing a
+ * 				  GSEG.
+ *     	GTD - GRU Thread Data   - contains shadow copy of GRU data when GSEG is
+ *     				  not loaded into a GRU
+ *	GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
+ *				  where a GSEG has been loaded. Similar to
+ *				  an mm_struct but for GRU.
+ *
+ *	GS  - GRU State 	- Used to manage the state of a GRU chiplet
+ *	BS  - Blade State	- Used to manage state of all GRU chiplets
+ *				  on a blade
+ *
+ *
+ *  Normal task tables for task using GRU.
+ *  		- 2 threads in process
+ *  		- 2 GSEGs open in process
+ *  		- GSEG1 is being used by both threads
+ *  		- GSEG2 is used only by thread 2
+ *
+ *       task -->|
+ *       task ---+---> mm ->------ (notifier) -------+-> gms
+ *                     |                             |
+ *                     |--> vma -> vdata ---> gts--->|		GSEG1 (thread1)
+ *                     |                  |          |
+ *                     |                  +-> gts--->|		GSEG1 (thread2)
+ *                     |                             |
+ *                     |--> vma -> vdata ---> gts--->|		GSEG2 (thread2)
+ *                     .
+ *                     .
+ *
+ *  GSEGs are marked DONTCOPY on fork
+ *
+ * At open
+ * 	file.private_data -> NULL
+ *
+ * At mmap,
+ * 	vma -> vdata
+ *
+ * After gseg reference
+ * 	vma -> vdata ->gts
+ *
+ * After fork
+ *   parent
+ * 	vma -> vdata -> gts
+ *   child
+ * 	(vma is not copied)
+ *
+ */
+
+#include <linux/rmap.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/mmu_notifier.h>
+#include "gru.h"
+#include "grulib.h"
+#include "gruhandles.h"
+
+extern struct gru_stats_s gru_stats;
+extern struct gru_blade_state *gru_base[];
+extern unsigned long gru_start_paddr, gru_end_paddr;
+extern void *gru_start_vaddr;
+extern unsigned int gru_max_gids;
+
+#define GRU_MAX_BLADES		MAX_NUMNODES
+#define GRU_MAX_GRUS		(GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
+
+#define GRU_DRIVER_ID_STR	"SGI GRU Device Driver"
+#define GRU_DRIVER_VERSION_STR	"0.85"
+
+/*
+ * GRU statistics.
+ */
+struct gru_stats_s {
+	atomic_long_t vdata_alloc;
+	atomic_long_t vdata_free;
+	atomic_long_t gts_alloc;
+	atomic_long_t gts_free;
+	atomic_long_t gms_alloc;
+	atomic_long_t gms_free;
+	atomic_long_t gts_double_allocate;
+	atomic_long_t assign_context;
+	atomic_long_t assign_context_failed;
+	atomic_long_t free_context;
+	atomic_long_t load_user_context;
+	atomic_long_t load_kernel_context;
+	atomic_long_t lock_kernel_context;
+	atomic_long_t unlock_kernel_context;
+	atomic_long_t steal_user_context;
+	atomic_long_t steal_kernel_context;
+	atomic_long_t steal_context_failed;
+	atomic_long_t nopfn;
+	atomic_long_t asid_new;
+	atomic_long_t asid_next;
+	atomic_long_t asid_wrap;
+	atomic_long_t asid_reuse;
+	atomic_long_t intr;
+	atomic_long_t intr_cbr;
+	atomic_long_t intr_tfh;
+	atomic_long_t intr_spurious;
+	atomic_long_t intr_mm_lock_failed;
+	atomic_long_t call_os;
+	atomic_long_t call_os_wait_queue;
+	atomic_long_t user_flush_tlb;
+	atomic_long_t user_unload_context;
+	atomic_long_t user_exception;
+	atomic_long_t set_context_option;
+	atomic_long_t check_context_retarget_intr;
+	atomic_long_t check_context_unload;
+	atomic_long_t tlb_dropin;
+	atomic_long_t tlb_preload_page;
+	atomic_long_t tlb_dropin_fail_no_asid;
+	atomic_long_t tlb_dropin_fail_upm;
+	atomic_long_t tlb_dropin_fail_invalid;
+	atomic_long_t tlb_dropin_fail_range_active;
+	atomic_long_t tlb_dropin_fail_idle;
+	atomic_long_t tlb_dropin_fail_fmm;
+	atomic_long_t tlb_dropin_fail_no_exception;
+	atomic_long_t tfh_stale_on_fault;
+	atomic_long_t mmu_invalidate_range;
+	atomic_long_t mmu_invalidate_page;
+	atomic_long_t flush_tlb;
+	atomic_long_t flush_tlb_gru;
+	atomic_long_t flush_tlb_gru_tgh;
+	atomic_long_t flush_tlb_gru_zero_asid;
+
+	atomic_long_t copy_gpa;
+	atomic_long_t read_gpa;
+
+	atomic_long_t mesq_receive;
+	atomic_long_t mesq_receive_none;
+	atomic_long_t mesq_send;
+	atomic_long_t mesq_send_failed;
+	atomic_long_t mesq_noop;
+	atomic_long_t mesq_send_unexpected_error;
+	atomic_long_t mesq_send_lb_overflow;
+	atomic_long_t mesq_send_qlimit_reached;
+	atomic_long_t mesq_send_amo_nacked;
+	atomic_long_t mesq_send_put_nacked;
+	atomic_long_t mesq_page_overflow;
+	atomic_long_t mesq_qf_locked;
+	atomic_long_t mesq_qf_noop_not_full;
+	atomic_long_t mesq_qf_switch_head_failed;
+	atomic_long_t mesq_qf_unexpected_error;
+	atomic_long_t mesq_noop_unexpected_error;
+	atomic_long_t mesq_noop_lb_overflow;
+	atomic_long_t mesq_noop_qlimit_reached;
+	atomic_long_t mesq_noop_amo_nacked;
+	atomic_long_t mesq_noop_put_nacked;
+	atomic_long_t mesq_noop_page_overflow;
+
+};
+
+enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync,
+	cchop_deallocate, tfhop_write_only, tfhop_write_restart,
+	tghop_invalidate, mcsop_last};
+
+struct mcs_op_statistic {
+	atomic_long_t	count;
+	atomic_long_t	total;
+	unsigned long	max;
+};
+
+extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
+#define OPT_DPRINT		1
+#define OPT_STATS		2
+
+
+#define IRQ_GRU			110	/* Starting IRQ number for interrupts */
+
+/* Delay in jiffies between attempts to assign a GRU context */
+#define GRU_ASSIGN_DELAY	((HZ * 20) / 1000)
+
+/*
+ * If a process has it's context stolen, min delay in jiffies before trying to
+ * steal a context from another process.
+ */
+#define GRU_STEAL_DELAY		((HZ * 200) / 1000)
+
+#define STAT(id)	do {						\
+				if (gru_options & OPT_STATS)		\
+					atomic_long_inc(&gru_stats.id);	\
+			} while (0)
+
+#ifdef CONFIG_SGI_GRU_DEBUG
+#define gru_dbg(dev, fmt, x...)						\
+	do {								\
+		if (gru_options & OPT_DPRINT)				\
+			printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\
+	} while (0)
+#else
+#define gru_dbg(x...)
+#endif
+
+/*-----------------------------------------------------------------------------
+ * ASID management
+ */
+#define MAX_ASID	0xfffff0
+#define MIN_ASID	8
+#define ASID_INC	8	/* number of regions */
+
+/* Generate a GRU asid value from a GRU base asid & a virtual address. */
+#define VADDR_HI_BIT		64
+#define GRUREGION(addr)		((addr) >> (VADDR_HI_BIT - 3) & 3)
+#define GRUASID(asid, addr)	((asid) + GRUREGION(addr))
+
+/*------------------------------------------------------------------------------
+ *  File & VMS Tables
+ */
+
+struct gru_state;
+
+/*
+ * This structure is pointed to from the mmstruct via the notifier pointer.
+ * There is one of these per address space.
+ */
+struct gru_mm_tracker {				/* pack to reduce size */
+	unsigned int		mt_asid_gen:24;	/* ASID wrap count */
+	unsigned int		mt_asid:24;	/* current base ASID for gru */
+	unsigned short		mt_ctxbitmap:16;/* bitmap of contexts using
+						   asid */
+} __attribute__ ((packed));
+
+struct gru_mm_struct {
+	struct mmu_notifier	ms_notifier;
+	atomic_t		ms_refcnt;
+	spinlock_t		ms_asid_lock;	/* protects ASID assignment */
+	atomic_t		ms_range_active;/* num range_invals active */
+	char			ms_released;
+	wait_queue_head_t	ms_wait_queue;
+	DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
+	struct gru_mm_tracker	ms_asids[GRU_MAX_GRUS];
+};
+
+/*
+ * One of these structures is allocated when a GSEG is mmaped. The
+ * structure is pointed to by the vma->vm_private_data field in the vma struct.
+ */
+struct gru_vma_data {
+	spinlock_t		vd_lock;	/* Serialize access to vma */
+	struct list_head	vd_head;	/* head of linked list of gts */
+	long			vd_user_options;/* misc user option flags */
+	int			vd_cbr_au_count;
+	int			vd_dsr_au_count;
+	unsigned char		vd_tlb_preload_count;
+};
+
+/*
+ * One of these is allocated for each thread accessing a mmaped GRU. A linked
+ * list of these structure is hung off the struct gru_vma_data in the mm_struct.
+ */
+struct gru_thread_state {
+	struct list_head	ts_next;	/* list - head at vma-private */
+	struct mutex		ts_ctxlock;	/* load/unload CTX lock */
+	struct mm_struct	*ts_mm;		/* mm currently mapped to
+						   context */
+	struct vm_area_struct	*ts_vma;	/* vma of GRU context */
+	struct gru_state	*ts_gru;	/* GRU where the context is
+						   loaded */
+	struct gru_mm_struct	*ts_gms;	/* asid & ioproc struct */
+	unsigned char		ts_tlb_preload_count; /* TLB preload pages */
+	unsigned long		ts_cbr_map;	/* map of allocated CBRs */
+	unsigned long		ts_dsr_map;	/* map of allocated DATA
+						   resources */
+	unsigned long		ts_steal_jiffies;/* jiffies when context last
+						    stolen */
+	long			ts_user_options;/* misc user option flags */
+	pid_t			ts_tgid_owner;	/* task that is using the
+						   context - for migration */
+	short			ts_user_blade_id;/* user selected blade */
+	char			ts_user_chiplet_id;/* user selected chiplet */
+	unsigned short		ts_sizeavail;	/* Pagesizes in use */
+	int			ts_tsid;	/* thread that owns the
+						   structure */
+	int			ts_tlb_int_select;/* target cpu if interrupts
+						     enabled */
+	int			ts_ctxnum;	/* context number where the
+						   context is loaded */
+	atomic_t		ts_refcnt;	/* reference count GTS */
+	unsigned char		ts_dsr_au_count;/* Number of DSR resources
+						   required for contest */
+	unsigned char		ts_cbr_au_count;/* Number of CBR resources
+						   required for contest */
+	char			ts_cch_req_slice;/* CCH packet slice */
+	char			ts_blade;	/* If >= 0, migrate context if
+						   ref from diferent blade */
+	char			ts_force_cch_reload;
+	char			ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
+							  allocated CB */
+	int			ts_data_valid;	/* Indicates if ts_gdata has
+						   valid data */
+	struct gru_gseg_statistics ustats;	/* User statistics */
+	unsigned long		ts_gdata[0];	/* save area for GRU data (CB,
+						   DS, CBE) */
+};
+
+/*
+ * Threaded programs actually allocate an array of GSEGs when a context is
+ * created. Each thread uses a separate GSEG. TSID is the index into the GSEG
+ * array.
+ */
+#define TSID(a, v)		(((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
+#define UGRUADDR(gts)		((gts)->ts_vma->vm_start +		\
+					(gts)->ts_tsid * GRU_GSEG_PAGESIZE)
+
+#define NULLCTX			(-1)	/* if context not loaded into GRU */
+
+/*-----------------------------------------------------------------------------
+ *  GRU State Tables
+ */
+
+/*
+ * One of these exists for each GRU chiplet.
+ */
+struct gru_state {
+	struct gru_blade_state	*gs_blade;		/* GRU state for entire
+							   blade */
+	unsigned long		gs_gru_base_paddr;	/* Physical address of
+							   gru segments (64) */
+	void			*gs_gru_base_vaddr;	/* Virtual address of
+							   gru segments (64) */
+	unsigned short		gs_gid;			/* unique GRU number */
+	unsigned short		gs_blade_id;		/* blade of GRU */
+	unsigned char		gs_chiplet_id;		/* blade chiplet of GRU */
+	unsigned char		gs_tgh_local_shift;	/* used to pick TGH for
+							   local flush */
+	unsigned char		gs_tgh_first_remote;	/* starting TGH# for
+							   remote flush */
+	spinlock_t		gs_asid_lock;		/* lock used for
+							   assigning asids */
+	spinlock_t		gs_lock;		/* lock used for
+							   assigning contexts */
+
+	/* -- the following are protected by the gs_asid_lock spinlock ---- */
+	unsigned int		gs_asid;		/* Next availe ASID */
+	unsigned int		gs_asid_limit;		/* Limit of available
+							   ASIDs */
+	unsigned int		gs_asid_gen;		/* asid generation.
+							   Inc on wrap */
+
+	/* --- the following fields are protected by the gs_lock spinlock --- */
+	unsigned long		gs_context_map;		/* bitmap to manage
+							   contexts in use */
+	unsigned long		gs_cbr_map;		/* bitmap to manage CB
+							   resources */
+	unsigned long		gs_dsr_map;		/* bitmap used to manage
+							   DATA resources */
+	unsigned int		gs_reserved_cbrs;	/* Number of kernel-
+							   reserved cbrs */
+	unsigned int		gs_reserved_dsr_bytes;	/* Bytes of kernel-
+							   reserved dsrs */
+	unsigned short		gs_active_contexts;	/* number of contexts
+							   in use */
+	struct gru_thread_state	*gs_gts[GRU_NUM_CCH];	/* GTS currently using
+							   the context */
+	int			gs_irq[GRU_NUM_TFM];	/* Interrupt irqs */
+};
+
+/*
+ * This structure contains the GRU state for all the GRUs on a blade.
+ */
+struct gru_blade_state {
+	void			*kernel_cb;		/* First kernel
+							   reserved cb */
+	void			*kernel_dsr;		/* First kernel
+							   reserved DSR */
+	struct rw_semaphore	bs_kgts_sema;		/* lock for kgts */
+	struct gru_thread_state *bs_kgts;		/* GTS for kernel use */
+
+	/* ---- the following are used for managing kernel async GRU CBRs --- */
+	int			bs_async_dsr_bytes;	/* DSRs for async */
+	int			bs_async_cbrs;		/* CBRs AU for async */
+	struct completion	*bs_async_wq;
+
+	/* ---- the following are protected by the bs_lock spinlock ---- */
+	spinlock_t		bs_lock;		/* lock used for
+							   stealing contexts */
+	int			bs_lru_ctxnum;		/* STEAL - last context
+							   stolen */
+	struct gru_state	*bs_lru_gru;		/* STEAL - last gru
+							   stolen */
+
+	struct gru_state	bs_grus[GRU_CHIPLETS_PER_BLADE];
+};
+
+/*-----------------------------------------------------------------------------
+ * Address Primitives
+ */
+#define get_tfm_for_cpu(g, c)						\
+	((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
+#define get_tfh_by_index(g, i)						\
+	((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
+#define get_tgh_by_index(g, i)						\
+	((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
+#define get_cbe_by_index(g, i)						\
+	((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
+			(i)))
+
+/*-----------------------------------------------------------------------------
+ * Useful Macros
+ */
+
+/* Given a blade# & chiplet#, get a pointer to the GRU */
+#define get_gru(b, c)		(&gru_base[b]->bs_grus[c])
+
+/* Number of bytes to save/restore when unloading/loading GRU contexts */
+#define DSR_BYTES(dsr)		((dsr) * GRU_DSR_AU_BYTES)
+#define CBR_BYTES(cbr)		((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
+
+/* Convert a user CB number to the actual CBRNUM */
+#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
+				  * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
+
+/* Convert a gid to a pointer to the GRU */
+#define GID_TO_GRU(gid)							\
+	(gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ?			\
+		(&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]->		\
+			bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) :	\
+	 NULL)
+
+/* Scan all active GRUs in a GRU bitmap */
+#define for_each_gru_in_bitmap(gid, map)				\
+	for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\
+		(gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid)))
+
+/* Scan all active GRUs on a specific blade */
+#define for_each_gru_on_blade(gru, nid, i)				\
+	for ((gru) = gru_base[nid]->bs_grus, (i) = 0;			\
+			(i) < GRU_CHIPLETS_PER_BLADE;			\
+			(i)++, (gru)++)
+
+/* Scan all GRUs */
+#define foreach_gid(gid)						\
+	for ((gid) = 0; (gid) < gru_max_gids; (gid)++)
+
+/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
+#define for_each_gts_on_gru(gts, gru, ctxnum)				\
+	for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++)		\
+		if (((gts) = (gru)->gs_gts[ctxnum]))
+
+/* Scan each CBR whose bit is set in a TFM (or copy of) */
+#define for_each_cbr_in_tfm(i, map)					\
+	for ((i) = find_first_bit(map, GRU_NUM_CBE);			\
+			(i) < GRU_NUM_CBE;				\
+			(i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i))
+
+/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
+#define for_each_cbr_in_allocation_map(i, map, k)			\
+	for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU;	\
+			(k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) 	\
+		for ((i) = (k)*GRU_CBR_AU_SIZE;				\
+				(i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
+
+/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
+#define for_each_dsr_in_allocation_map(i, map, k)			\
+	for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\
+			(k) < GRU_DSR_AU;				\
+			(k) = find_next_bit((const unsigned long *)map,	\
+					  GRU_DSR_AU, (k) + 1))		\
+		for ((i) = (k) * GRU_DSR_AU_CL;				\
+				(i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
+
+#define gseg_physical_address(gru, ctxnum)				\
+		((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
+#define gseg_virtual_address(gru, ctxnum)				\
+		((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
+
+/*-----------------------------------------------------------------------------
+ * Lock / Unlock GRU handles
+ * 	Use the "delresp" bit in the handle as a "lock" bit.
+ */
+
+/* Lock hierarchy checking enabled only in emulator */
+
+/* 0 = lock failed, 1 = locked */
+static inline int __trylock_handle(void *h)
+{
+	return !test_and_set_bit(1, h);
+}
+
+static inline void __lock_handle(void *h)
+{
+	while (test_and_set_bit(1, h))
+		cpu_relax();
+}
+
+static inline void __unlock_handle(void *h)
+{
+	clear_bit(1, h);
+}
+
+static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+	return __trylock_handle(cch);
+}
+
+static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+	__lock_handle(cch);
+}
+
+static inline void unlock_cch_handle(struct gru_context_configuration_handle
+				     *cch)
+{
+	__unlock_handle(cch);
+}
+
+static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	__lock_handle(tgh);
+}
+
+static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	__unlock_handle(tgh);
+}
+
+static inline int is_kernel_context(struct gru_thread_state *gts)
+{
+	return !gts->ts_mm;
+}
+
+/*
+ * The following are for Nehelem-EX. A more general scheme is needed for
+ * future processors.
+ */
+#define UV_MAX_INT_CORES		8
+#define uv_cpu_socket_number(p)		((cpu_physical_id(p) >> 5) & 1)
+#define uv_cpu_ht_number(p)		(cpu_physical_id(p) & 1)
+#define uv_cpu_core_number(p)		(((cpu_physical_id(p) >> 2) & 4) |	\
+					((cpu_physical_id(p) >> 1) & 3))
+/*-----------------------------------------------------------------------------
+ * Function prototypes & externs
+ */
+struct gru_unload_context_req;
+
+extern const struct vm_operations_struct gru_vm_ops;
+extern struct device *grudev;
+
+extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
+				int tsid);
+extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
+				*vma, int tsid);
+extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
+				*vma, int tsid);
+extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts);
+extern void gru_load_context(struct gru_thread_state *gts);
+extern void gru_steal_context(struct gru_thread_state *gts);
+extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
+extern int gru_update_cch(struct gru_thread_state *gts);
+extern void gts_drop(struct gru_thread_state *gts);
+extern void gru_tgh_flush_init(struct gru_state *gru);
+extern int gru_kservices_init(void);
+extern void gru_kservices_exit(void);
+extern irqreturn_t gru0_intr(int irq, void *dev_id);
+extern irqreturn_t gru1_intr(int irq, void *dev_id);
+extern irqreturn_t gru_intr_mblade(int irq, void *dev_id);
+extern int gru_dump_chiplet_request(unsigned long arg);
+extern long gru_get_gseg_statistics(unsigned long arg);
+extern int gru_handle_user_call_os(unsigned long address);
+extern int gru_user_flush_tlb(unsigned long arg);
+extern int gru_user_unload_context(unsigned long arg);
+extern int gru_get_exception_detail(unsigned long arg);
+extern int gru_set_context_option(unsigned long address);
+extern void gru_check_context_placement(struct gru_thread_state *gts);
+extern int gru_cpu_fault_map_id(void);
+extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
+extern void gru_flush_all_tlb(struct gru_state *gru);
+extern int gru_proc_init(void);
+extern void gru_proc_exit(void);
+
+extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+		int cbr_au_count, int dsr_au_count,
+		unsigned char tlb_preload_count, int options, int tsid);
+extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
+		int cbr_au_count, char *cbmap);
+extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
+		int dsr_au_count, char *dsmap);
+extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
+extern struct gru_mm_struct *gru_register_mmu_notifier(void);
+extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
+
+extern int gru_ktest(unsigned long arg);
+extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+					unsigned long len);
+
+extern unsigned long gru_options;
+
+#endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 00000000000..240a6d36166
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,378 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * 		MMUOPS callbacks  + TLB flushing
+ *
+ * This file handles emu notifier callbacks from the core kernel. The callbacks
+ * are used to update the TLB in the GRU as a result of changes in the
+ * state of a process address space. This file also handles TLB invalidates
+ * from the GRU driver.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/hugetlb.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/srcu.h>
+#include <asm/processor.h>
+#include "gru.h"
+#include "grutables.h"
+#include <asm/uv/uv_hub.h>
+
+#define gru_random()	get_cycles()
+
+/* ---------------------------------- TLB Invalidation functions --------
+ * get_tgh_handle
+ *
+ * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
+ * local blade, use a fixed TGH that is a function of the blade-local cpu
+ * number. Normally, this TGH is private to the cpu & no contention occurs for
+ * the TGH. For offblade GRUs, select a random TGH in the range above the
+ * private TGHs. A spinlock is required to access this TGH & the lock must be
+ * released when the invalidate is completes. This sucks, but it is the best we
+ * can do.
+ *
+ * Note that the spinlock is IN the TGH handle so locking does not involve
+ * additional cache lines.
+ *
+ */
+static inline int get_off_blade_tgh(struct gru_state *gru)
+{
+	int n;
+
+	n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
+	n = gru_random() % n;
+	n += gru->gs_tgh_first_remote;
+	return n;
+}
+
+static inline int get_on_blade_tgh(struct gru_state *gru)
+{
+	return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
+}
+
+static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
+							 *gru)
+{
+	struct gru_tlb_global_handle *tgh;
+	int n;
+
+	preempt_disable();
+	if (uv_numa_blade_id() == gru->gs_blade_id)
+		n = get_on_blade_tgh(gru);
+	else
+		n = get_off_blade_tgh(gru);
+	tgh = get_tgh_by_index(gru, n);
+	lock_tgh_handle(tgh);
+
+	return tgh;
+}
+
+static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	unlock_tgh_handle(tgh);
+	preempt_enable();
+}
+
+/*
+ * gru_flush_tlb_range
+ *
+ * General purpose TLB invalidation function. This function scans every GRU in
+ * the ENTIRE system (partition) looking for GRUs where the specified MM has
+ * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
+ * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
+ * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
+ * cost of (possibly) a large number of future TLBmisses.
+ *
+ * The current algorithm is optimized based on the following (somewhat true)
+ * assumptions:
+ * 	- GRU contexts are not loaded into a GRU unless a reference is made to
+ * 	  the data segment or control block (this is true, not an assumption).
+ * 	  If a DS/CB is referenced, the user will also issue instructions that
+ * 	  cause TLBmisses. It is not necessary to optimize for the case where
+ * 	  contexts are loaded but no instructions cause TLB misses. (I know
+ * 	  this will happen but I'm not optimizing for it).
+ * 	- GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
+ * 	  a few usec but in unusual cases, it could be longer. Avoid if
+ * 	  possible.
+ * 	- intrablade process migration between cpus is not frequent but is
+ * 	  common.
+ * 	- a GRU context is not typically migrated to a different GRU on the
+ * 	  blade because of intrablade migration
+ *	- interblade migration is rare. Processes migrate their GRU context to
+ *	  the new blade.
+ *	- if interblade migration occurs, migration back to the original blade
+ *	  is very very rare (ie., no optimization for this case)
+ *	- most GRU instruction operate on a subset of the user REGIONS. Code
+ *	  & shared library regions are not likely targets of GRU instructions.
+ *
+ * To help improve the efficiency of TLB invalidation, the GMS data
+ * structure is maintained for EACH address space (MM struct). The GMS is
+ * also the structure that contains the pointer to the mmu callout
+ * functions. This structure is linked to the mm_struct for the address space
+ * using the mmu "register" function. The mmu interfaces are used to
+ * provide the callbacks for TLB invalidation. The GMS contains:
+ *
+ * 	- asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
+ * 	  loaded into the GRU.
+ * 	- asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
+ * 	  the above array
+ *	- ctxbitmap[maxgrus]. Indicates the contexts that are currently active
+ *	  in the GRU for the address space. This bitmap must be passed to the
+ *	  GRU to do an invalidate.
+ *
+ * The current algorithm for invalidating TLBs is:
+ * 	- scan the asidmap for GRUs where the context has been loaded, ie,
+ * 	  asid is non-zero.
+ * 	- for each gru found:
+ * 		- if the ctxtmap is non-zero, there are active contexts in the
+ * 		  GRU. TLB invalidate instructions must be issued to the GRU.
+ *		- if the ctxtmap is zero, no context is active. Set the ASID to
+ *		  zero to force a full TLB invalidation. This is fast but will
+ *		  cause a lot of TLB misses if the context is reloaded onto the
+ *		  GRU
+ *
+ */
+
+void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+			 unsigned long len)
+{
+	struct gru_state *gru;
+	struct gru_mm_tracker *asids;
+	struct gru_tlb_global_handle *tgh;
+	unsigned long num;
+	int grupagesize, pagesize, pageshift, gid, asid;
+
+	/* ZZZ TODO - handle huge pages */
+	pageshift = PAGE_SHIFT;
+	pagesize = (1UL << pageshift);
+	grupagesize = GRU_PAGESIZE(pageshift);
+	num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
+
+	STAT(flush_tlb);
+	gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
+		start, len, gms->ms_asidmap[0]);
+
+	spin_lock(&gms->ms_asid_lock);
+	for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
+		STAT(flush_tlb_gru);
+		gru = GID_TO_GRU(gid);
+		asids = gms->ms_asids + gid;
+		asid = asids->mt_asid;
+		if (asids->mt_ctxbitmap && asid) {
+			STAT(flush_tlb_gru_tgh);
+			asid = GRUASID(asid, start);
+			gru_dbg(grudev,
+	"  FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
+			      gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
+			tgh = get_lock_tgh_handle(gru);
+			tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
+				       num - 1, asids->mt_ctxbitmap);
+			get_unlock_tgh_handle(tgh);
+		} else {
+			STAT(flush_tlb_gru_zero_asid);
+			asids->mt_asid = 0;
+			__clear_bit(gru->gs_gid, gms->ms_asidmap);
+			gru_dbg(grudev,
+	"  CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
+				gid, asid, asids->mt_ctxbitmap,
+				gms->ms_asidmap[0]);
+		}
+	}
+	spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Flush the entire TLB on a chiplet.
+ */
+void gru_flush_all_tlb(struct gru_state *gru)
+{
+	struct gru_tlb_global_handle *tgh;
+
+	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
+	tgh = get_lock_tgh_handle(gru);
+	tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
+	get_unlock_tgh_handle(tgh);
+}
+
+/*
+ * MMUOPS notifier callout functions
+ */
+static void gru_invalidate_range_start(struct mmu_notifier *mn,
+				       struct mm_struct *mm,
+				       unsigned long start, unsigned long end)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	STAT(mmu_invalidate_range);
+	atomic_inc(&gms->ms_range_active);
+	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
+		start, end, atomic_read(&gms->ms_range_active));
+	gru_flush_tlb_range(gms, start, end - start);
+}
+
+static void gru_invalidate_range_end(struct mmu_notifier *mn,
+				     struct mm_struct *mm, unsigned long start,
+				     unsigned long end)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	/* ..._and_test() provides needed barrier */
+	(void)atomic_dec_and_test(&gms->ms_range_active);
+
+	wake_up_all(&gms->ms_wait_queue);
+	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
+}
+
+static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
+				unsigned long address)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	STAT(mmu_invalidate_page);
+	gru_flush_tlb_range(gms, address, PAGE_SIZE);
+	gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
+}
+
+static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	gms->ms_released = 1;
+	gru_dbg(grudev, "gms %p\n", gms);
+}
+
+
+static const struct mmu_notifier_ops gru_mmuops = {
+	.invalidate_page	= gru_invalidate_page,
+	.invalidate_range_start	= gru_invalidate_range_start,
+	.invalidate_range_end	= gru_invalidate_range_end,
+	.release		= gru_release,
+};
+
+/* Move this to the basic mmu_notifier file. But for now... */
+static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
+			const struct mmu_notifier_ops *ops)
+{
+	struct mmu_notifier *mn, *gru_mn = NULL;
+	struct hlist_node *n;
+
+	if (mm->mmu_notifier_mm) {
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
+					 hlist)
+		    if (mn->ops == ops) {
+			gru_mn = mn;
+			break;
+		}
+		rcu_read_unlock();
+	}
+	return gru_mn;
+}
+
+struct gru_mm_struct *gru_register_mmu_notifier(void)
+{
+	struct gru_mm_struct *gms;
+	struct mmu_notifier *mn;
+	int err;
+
+	mn = mmu_find_ops(current->mm, &gru_mmuops);
+	if (mn) {
+		gms = container_of(mn, struct gru_mm_struct, ms_notifier);
+		atomic_inc(&gms->ms_refcnt);
+	} else {
+		gms = kzalloc(sizeof(*gms), GFP_KERNEL);
+		if (gms) {
+			STAT(gms_alloc);
+			spin_lock_init(&gms->ms_asid_lock);
+			gms->ms_notifier.ops = &gru_mmuops;
+			atomic_set(&gms->ms_refcnt, 1);
+			init_waitqueue_head(&gms->ms_wait_queue);
+			err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
+			if (err)
+				goto error;
+		}
+	}
+	gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
+		atomic_read(&gms->ms_refcnt));
+	return gms;
+error:
+	kfree(gms);
+	return ERR_PTR(err);
+}
+
+void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
+{
+	gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
+		atomic_read(&gms->ms_refcnt), gms->ms_released);
+	if (atomic_dec_return(&gms->ms_refcnt) == 0) {
+		if (!gms->ms_released)
+			mmu_notifier_unregister(&gms->ms_notifier, current->mm);
+		kfree(gms);
+		STAT(gms_free);
+	}
+}
+
+/*
+ * Setup TGH parameters. There are:
+ * 	- 24 TGH handles per GRU chiplet
+ * 	- a portion (MAX_LOCAL_TGH) of the handles are reserved for
+ * 	  use by blade-local cpus
+ * 	- the rest are used by off-blade cpus. This usage is
+ * 	  less frequent than blade-local usage.
+ *
+ * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
+ * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
+ * use.
+ */
+#define MAX_LOCAL_TGH	16
+
+void gru_tgh_flush_init(struct gru_state *gru)
+{
+	int cpus, shift = 0, n;
+
+	cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
+
+	/* n = cpus rounded up to next power of 2 */
+	if (cpus) {
+		n = 1 << fls(cpus - 1);
+
+		/*
+		 * shift count for converting local cpu# to TGH index
+		 *      0 if cpus <= MAX_LOCAL_TGH,
+		 *      1 if cpus <= 2*MAX_LOCAL_TGH,
+		 *      etc
+		 */
+		shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
+	}
+	gru->gs_tgh_local_shift = shift;
+
+	/* first starting TGH index to use for remote purges */
+	gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
+
+}
diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
new file mode 100644
index 00000000000..4fc40d8e1bc
--- /dev/null
+++ b/drivers/misc/sgi-xp/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for SGI's XP devices.
+#
+
+obj-$(CONFIG_SGI_XP)		+= xp.o
+xp-y				:= xp_main.o
+xp-$(CONFIG_IA64_SGI_SN2)	+= xp_sn2.o xp_nofault.o
+xp-$(CONFIG_IA64_GENERIC)	+= xp_sn2.o xp_nofault.o
+xp-$(CONFIG_IA64_SGI_UV)	+= xp_uv.o
+xp-$(CONFIG_X86_64)		+= xp_uv.o
+
+obj-$(CONFIG_SGI_XP)		+= xpc.o
+xpc-y				:= xpc_main.o xpc_channel.o xpc_partition.o
+xpc-$(CONFIG_IA64_SGI_SN2)	+= xpc_sn2.o
+xpc-$(CONFIG_IA64_GENERIC)	+= xpc_sn2.o
+xpc-$(CONFIG_IA64_SGI_UV) 	+= xpc_uv.o
+xpc-$(CONFIG_X86_64)		+= xpc_uv.o
+
+obj-$(CONFIG_SGI_XP)		+= xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
new file mode 100644
index 00000000000..851b2f25ce0
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp.h
@@ -0,0 +1,359 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * External Cross Partition (XP) structures and defines.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XP_H
+#define _DRIVERS_MISC_SGIXP_XP_H
+
+#include <linux/mutex.h>
+
+#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
+#include <asm/uv/uv.h>
+#define is_uv()		is_uv_system()
+#endif
+
+#ifndef is_uv
+#define is_uv()		0
+#endif
+
+#if defined CONFIG_IA64
+#include <asm/system.h>
+#include <asm/sn/arch.h>	/* defines is_shub1() and is_shub2() */
+#define is_shub()	ia64_platform_is("sn2")
+#endif
+
+#ifndef is_shub1
+#define is_shub1()	0
+#endif
+
+#ifndef is_shub2
+#define is_shub2()	0
+#endif
+
+#ifndef is_shub
+#define is_shub()	0
+#endif
+
+#ifdef USE_DBUG_ON
+#define DBUG_ON(condition)	BUG_ON(condition)
+#else
+#define DBUG_ON(condition)
+#endif
+
+/*
+ * Define the maximum number of partitions the system can possibly support.
+ * It is based on the maximum number of hardware partitionable regions. The
+ * term 'region' in this context refers to the minimum number of nodes that
+ * can comprise an access protection grouping. The access protection is in
+ * regards to memory, IPI and IOI.
+ *
+ * The maximum number of hardware partitionable regions is equal to the
+ * maximum number of nodes in the entire system divided by the minimum number
+ * of nodes that comprise an access protection grouping.
+ */
+#define XP_MAX_NPARTITIONS_SN2	64
+#define XP_MAX_NPARTITIONS_UV	256
+
+/*
+ * XPC establishes channel connections between the local partition and any
+ * other partition that is currently up. Over these channels, kernel-level
+ * `users' can communicate with their counterparts on the other partitions.
+ *
+ * If the need for additional channels arises, one can simply increase
+ * XPC_MAX_NCHANNELS accordingly. If the day should come where that number
+ * exceeds the absolute MAXIMUM number of channels possible (eight), then one
+ * will need to make changes to the XPC code to accommodate for this.
+ *
+ * The absolute maximum number of channels possible is limited to eight for
+ * performance reasons on sn2 hardware. The internal cross partition structures
+ * require sixteen bytes per channel, and eight allows all of this
+ * interface-shared info to fit in one 128-byte cacheline.
+ */
+#define XPC_MEM_CHANNEL		0	/* memory channel number */
+#define	XPC_NET_CHANNEL		1	/* network channel number */
+
+#define XPC_MAX_NCHANNELS	2	/* max #of channels allowed */
+
+#if XPC_MAX_NCHANNELS > 8
+#error	XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible.
+#endif
+
+/*
+ * Define macro, XPC_MSG_SIZE(), is provided for the user
+ * that wants to fit as many msg entries as possible in a given memory size
+ * (e.g. a memory page).
+ */
+#define XPC_MSG_MAX_SIZE	128
+#define XPC_MSG_HDR_MAX_SIZE	16
+#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE)
+
+#define XPC_MSG_SIZE(_payload_size) \
+				ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
+				      is_uv() ? 64 : 128)
+
+
+/*
+ * Define the return values and values passed to user's callout functions.
+ * (It is important to add new value codes at the end just preceding
+ * xpUnknownReason, which must have the highest numerical value.)
+ */
+enum xp_retval {
+	xpSuccess = 0,
+
+	xpNotConnected,		/*  1: channel is not connected */
+	xpConnected,		/*  2: channel connected (opened) */
+	xpRETIRED1,		/*  3: (formerly xpDisconnected) */
+
+	xpMsgReceived,		/*  4: message received */
+	xpMsgDelivered,		/*  5: message delivered and acknowledged */
+
+	xpRETIRED2,		/*  6: (formerly xpTransferFailed) */
+
+	xpNoWait,		/*  7: operation would require wait */
+	xpRetry,		/*  8: retry operation */
+	xpTimeout,		/*  9: timeout in xpc_allocate_msg_wait() */
+	xpInterrupted,		/* 10: interrupted wait */
+
+	xpUnequalMsgSizes,	/* 11: message size disparity between sides */
+	xpInvalidAddress,	/* 12: invalid address */
+
+	xpNoMemory,		/* 13: no memory available for XPC structures */
+	xpLackOfResources,	/* 14: insufficient resources for operation */
+	xpUnregistered,		/* 15: channel is not registered */
+	xpAlreadyRegistered,	/* 16: channel is already registered */
+
+	xpPartitionDown,	/* 17: remote partition is down */
+	xpNotLoaded,		/* 18: XPC module is not loaded */
+	xpUnloading,		/* 19: this side is unloading XPC module */
+
+	xpBadMagic,		/* 20: XPC MAGIC string not found */
+
+	xpReactivating,		/* 21: remote partition was reactivated */
+
+	xpUnregistering,	/* 22: this side is unregistering channel */
+	xpOtherUnregistering,	/* 23: other side is unregistering channel */
+
+	xpCloneKThread,		/* 24: cloning kernel thread */
+	xpCloneKThreadFailed,	/* 25: cloning kernel thread failed */
+
+	xpNoHeartbeat,		/* 26: remote partition has no heartbeat */
+
+	xpPioReadError,		/* 27: PIO read error */
+	xpPhysAddrRegFailed,	/* 28: registration of phys addr range failed */
+
+	xpRETIRED3,		/* 29: (formerly xpBteDirectoryError) */
+	xpRETIRED4,		/* 30: (formerly xpBtePoisonError) */
+	xpRETIRED5,		/* 31: (formerly xpBteWriteError) */
+	xpRETIRED6,		/* 32: (formerly xpBteAccessError) */
+	xpRETIRED7,		/* 33: (formerly xpBtePWriteError) */
+	xpRETIRED8,		/* 34: (formerly xpBtePReadError) */
+	xpRETIRED9,		/* 35: (formerly xpBteTimeOutError) */
+	xpRETIRED10,		/* 36: (formerly xpBteXtalkError) */
+	xpRETIRED11,		/* 37: (formerly xpBteNotAvailable) */
+	xpRETIRED12,		/* 38: (formerly xpBteUnmappedError) */
+
+	xpBadVersion,		/* 39: bad version number */
+	xpVarsNotSet,		/* 40: the XPC variables are not set up */
+	xpNoRsvdPageAddr,	/* 41: unable to get rsvd page's phys addr */
+	xpInvalidPartid,	/* 42: invalid partition ID */
+	xpLocalPartid,		/* 43: local partition ID */
+
+	xpOtherGoingDown,	/* 44: other side going down, reason unknown */
+	xpSystemGoingDown,	/* 45: system is going down, reason unknown */
+	xpSystemHalt,		/* 46: system is being halted */
+	xpSystemReboot,		/* 47: system is being rebooted */
+	xpSystemPoweroff,	/* 48: system is being powered off */
+
+	xpDisconnecting,	/* 49: channel disconnecting (closing) */
+
+	xpOpenCloseError,	/* 50: channel open/close protocol error */
+
+	xpDisconnected,		/* 51: channel disconnected (closed) */
+
+	xpBteCopyError,		/* 52: bte_copy() returned error */
+	xpSalError,		/* 53: sn SAL error */
+	xpRsvdPageNotSet,	/* 54: the reserved page is not set up */
+	xpPayloadTooBig,	/* 55: payload too large for message slot */
+
+	xpUnsupported,		/* 56: unsupported functionality or resource */
+	xpNeedMoreInfo,		/* 57: more info is needed by SAL */
+
+	xpGruCopyError,		/* 58: gru_copy_gru() returned error */
+	xpGruSendMqError,	/* 59: gru send message queue related error */
+
+	xpBadChannelNumber,	/* 60: invalid channel number */
+	xpBadMsgType,		/* 61: invalid message type */
+	xpBiosError,		/* 62: BIOS error */
+
+	xpUnknownReason		/* 63: unknown reason - must be last in enum */
+};
+
+/*
+ * Define the callout function type used by XPC to update the user on
+ * connection activity and state changes via the user function registered
+ * by xpc_connect().
+ *
+ * Arguments:
+ *
+ *	reason - reason code.
+ *	partid - partition ID associated with condition.
+ *	ch_number - channel # associated with condition.
+ *	data - pointer to optional data.
+ *	key - pointer to optional user-defined value provided as the "key"
+ *	      argument to xpc_connect().
+ *
+ * A reason code of xpConnected indicates that a connection has been
+ * established to the specified partition on the specified channel. The data
+ * argument indicates the max number of entries allowed in the message queue.
+ *
+ * A reason code of xpMsgReceived indicates that a XPC message arrived from
+ * the specified partition on the specified channel. The data argument
+ * specifies the address of the message's payload. The user must call
+ * xpc_received() when finished with the payload.
+ *
+ * All other reason codes indicate failure. The data argmument is NULL.
+ * When a failure reason code is received, one can assume that the channel
+ * is not connected.
+ */
+typedef void (*xpc_channel_func) (enum xp_retval reason, short partid,
+				  int ch_number, void *data, void *key);
+
+/*
+ * Define the callout function type used by XPC to notify the user of
+ * messages received and delivered via the user function registered by
+ * xpc_send_notify().
+ *
+ * Arguments:
+ *
+ *	reason - reason code.
+ *	partid - partition ID associated with condition.
+ *	ch_number - channel # associated with condition.
+ *	key - pointer to optional user-defined value provided as the "key"
+ *	      argument to xpc_send_notify().
+ *
+ * A reason code of xpMsgDelivered indicates that the message was delivered
+ * to the intended recipient and that they have acknowledged its receipt by
+ * calling xpc_received().
+ *
+ * All other reason codes indicate failure.
+ *
+ * NOTE: The user defined function must be callable by an interrupt handler
+ *       and thus cannot block.
+ */
+typedef void (*xpc_notify_func) (enum xp_retval reason, short partid,
+				 int ch_number, void *key);
+
+/*
+ * The following is a registration entry. There is a global array of these,
+ * one per channel. It is used to record the connection registration made
+ * by the users of XPC. As long as a registration entry exists, for any
+ * partition that comes up, XPC will attempt to establish a connection on
+ * that channel. Notification that a connection has been made will occur via
+ * the xpc_channel_func function.
+ *
+ * The 'func' field points to the function to call when aynchronous
+ * notification is required for such events as: a connection established/lost,
+ * or an incoming message received, or an error condition encountered. A
+ * non-NULL 'func' field indicates that there is an active registration for
+ * the channel.
+ */
+struct xpc_registration {
+	struct mutex mutex;
+	xpc_channel_func func;	/* function to call */
+	void *key;		/* pointer to user's key */
+	u16 nentries;		/* #of msg entries in local msg queue */
+	u16 entry_size;		/* message queue's message entry size */
+	u32 assigned_limit;	/* limit on #of assigned kthreads */
+	u32 idle_limit;		/* limit on #of idle kthreads */
+} ____cacheline_aligned;
+
+#define XPC_CHANNEL_REGISTERED(_c)	(xpc_registrations[_c].func != NULL)
+
+/* the following are valid xpc_send() or xpc_send_notify() flags */
+#define XPC_WAIT	0	/* wait flag */
+#define XPC_NOWAIT	1	/* no wait flag */
+
+struct xpc_interface {
+	void (*connect) (int);
+	void (*disconnect) (int);
+	enum xp_retval (*send) (short, int, u32, void *, u16);
+	enum xp_retval (*send_notify) (short, int, u32, void *, u16,
+					xpc_notify_func, void *);
+	void (*received) (short, int, void *);
+	enum xp_retval (*partid_to_nasids) (short, void *);
+};
+
+extern struct xpc_interface xpc_interface;
+
+extern void xpc_set_interface(void (*)(int),
+			      void (*)(int),
+			      enum xp_retval (*)(short, int, u32, void *, u16),
+			      enum xp_retval (*)(short, int, u32, void *, u16,
+						 xpc_notify_func, void *),
+			      void (*)(short, int, void *),
+			      enum xp_retval (*)(short, void *));
+extern void xpc_clear_interface(void);
+
+extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16,
+				   u16, u32, u32);
+extern void xpc_disconnect(int);
+
+static inline enum xp_retval
+xpc_send(short partid, int ch_number, u32 flags, void *payload,
+	 u16 payload_size)
+{
+	return xpc_interface.send(partid, ch_number, flags, payload,
+				  payload_size);
+}
+
+static inline enum xp_retval
+xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
+		u16 payload_size, xpc_notify_func func, void *key)
+{
+	return xpc_interface.send_notify(partid, ch_number, flags, payload,
+					 payload_size, func, key);
+}
+
+static inline void
+xpc_received(short partid, int ch_number, void *payload)
+{
+	return xpc_interface.received(partid, ch_number, payload);
+}
+
+static inline enum xp_retval
+xpc_partid_to_nasids(short partid, void *nasids)
+{
+	return xpc_interface.partid_to_nasids(partid, nasids);
+}
+
+extern short xp_max_npartitions;
+extern short xp_partition_id;
+extern u8 xp_region_size;
+
+extern unsigned long (*xp_pa) (void *);
+extern unsigned long (*xp_socket_pa) (unsigned long);
+extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long,
+		       size_t);
+extern int (*xp_cpu_to_nasid) (int);
+extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long);
+extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long);
+
+extern u64 xp_nofault_PIOR_target;
+extern int xp_nofault_PIOR(void *);
+extern int xp_error_PIOR(void);
+
+extern struct device *xp;
+extern enum xp_retval xp_init_sn2(void);
+extern enum xp_retval xp_init_uv(void);
+extern void xp_exit_sn2(void);
+extern void xp_exit_uv(void);
+
+#endif /* _DRIVERS_MISC_SGIXP_XP_H */
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
new file mode 100644
index 00000000000..01be66d02ca
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -0,0 +1,286 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) base.
+ *
+ *	XP provides a base from which its users can interact
+ *	with XPC, yet not be dependent on XPC.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include "xp.h"
+
+/* define the XP debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xp_dbg_name = {
+	.name = "xp"
+};
+
+struct device xp_dbg_subname = {
+	.init_name = "",		/* set to "" */
+	.driver = &xp_dbg_name
+};
+
+struct device *xp = &xp_dbg_subname;
+
+/* max #of partitions possible */
+short xp_max_npartitions;
+EXPORT_SYMBOL_GPL(xp_max_npartitions);
+
+short xp_partition_id;
+EXPORT_SYMBOL_GPL(xp_partition_id);
+
+u8 xp_region_size;
+EXPORT_SYMBOL_GPL(xp_region_size);
+
+unsigned long (*xp_pa) (void *addr);
+EXPORT_SYMBOL_GPL(xp_pa);
+
+unsigned long (*xp_socket_pa) (unsigned long gpa);
+EXPORT_SYMBOL_GPL(xp_socket_pa);
+
+enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa,
+				    const unsigned long src_gpa, size_t len);
+EXPORT_SYMBOL_GPL(xp_remote_memcpy);
+
+int (*xp_cpu_to_nasid) (int cpuid);
+EXPORT_SYMBOL_GPL(xp_cpu_to_nasid);
+
+enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr,
+					unsigned long size);
+EXPORT_SYMBOL_GPL(xp_expand_memprotect);
+enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr,
+					  unsigned long size);
+EXPORT_SYMBOL_GPL(xp_restrict_memprotect);
+
+/*
+ * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
+ * users of XPC.
+ */
+struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
+EXPORT_SYMBOL_GPL(xpc_registrations);
+
+/*
+ * Initialize the XPC interface to indicate that XPC isn't loaded.
+ */
+static enum xp_retval
+xpc_notloaded(void)
+{
+	return xpNotLoaded;
+}
+
+struct xpc_interface xpc_interface = {
+	(void (*)(int))xpc_notloaded,
+	(void (*)(int))xpc_notloaded,
+	(enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded,
+	(enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func,
+			   void *))xpc_notloaded,
+	(void (*)(short, int, void *))xpc_notloaded,
+	(enum xp_retval(*)(short, void *))xpc_notloaded
+};
+EXPORT_SYMBOL_GPL(xpc_interface);
+
+/*
+ * XPC calls this when it (the XPC module) has been loaded.
+ */
+void
+xpc_set_interface(void (*connect) (int),
+		  void (*disconnect) (int),
+		  enum xp_retval (*send) (short, int, u32, void *, u16),
+		  enum xp_retval (*send_notify) (short, int, u32, void *, u16,
+						  xpc_notify_func, void *),
+		  void (*received) (short, int, void *),
+		  enum xp_retval (*partid_to_nasids) (short, void *))
+{
+	xpc_interface.connect = connect;
+	xpc_interface.disconnect = disconnect;
+	xpc_interface.send = send;
+	xpc_interface.send_notify = send_notify;
+	xpc_interface.received = received;
+	xpc_interface.partid_to_nasids = partid_to_nasids;
+}
+EXPORT_SYMBOL_GPL(xpc_set_interface);
+
+/*
+ * XPC calls this when it (the XPC module) is being unloaded.
+ */
+void
+xpc_clear_interface(void)
+{
+	xpc_interface.connect = (void (*)(int))xpc_notloaded;
+	xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
+	xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16))
+	    xpc_notloaded;
+	xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *,
+						       u16, xpc_notify_func,
+						       void *))xpc_notloaded;
+	xpc_interface.received = (void (*)(short, int, void *))
+	    xpc_notloaded;
+	xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *))
+	    xpc_notloaded;
+}
+EXPORT_SYMBOL_GPL(xpc_clear_interface);
+
+/*
+ * Register for automatic establishment of a channel connection whenever
+ * a partition comes up.
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to register for connection.
+ *	func - function to call for asynchronous notification of channel
+ *	       state changes (i.e., connection, disconnection, error) and
+ *	       the arrival of incoming messages.
+ *      key - pointer to optional user-defined value that gets passed back
+ *	      to the user on any callouts made to func.
+ *	payload_size - size in bytes of the XPC message's payload area which
+ *		       contains a user-defined message. The user should make
+ *		       this large enough to hold their largest message.
+ *	nentries - max #of XPC message entries a message queue can contain.
+ *		   The actual number, which is determined when a connection
+ * 		   is established and may be less then requested, will be
+ *		   passed to the user via the xpConnected callout.
+ *	assigned_limit - max number of kthreads allowed to be processing
+ * 			 messages (per connection) at any given instant.
+ *	idle_limit - max number of kthreads allowed to be idle at any given
+ * 		     instant.
+ */
+enum xp_retval
+xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
+	    u16 nentries, u32 assigned_limit, u32 idle_limit)
+{
+	struct xpc_registration *registration;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+	DBUG_ON(payload_size == 0 || nentries == 0);
+	DBUG_ON(func == NULL);
+	DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
+
+	if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE)
+		return xpPayloadTooBig;
+
+	registration = &xpc_registrations[ch_number];
+
+	if (mutex_lock_interruptible(&registration->mutex) != 0)
+		return xpInterrupted;
+
+	/* if XPC_CHANNEL_REGISTERED(ch_number) */
+	if (registration->func != NULL) {
+		mutex_unlock(&registration->mutex);
+		return xpAlreadyRegistered;
+	}
+
+	/* register the channel for connection */
+	registration->entry_size = XPC_MSG_SIZE(payload_size);
+	registration->nentries = nentries;
+	registration->assigned_limit = assigned_limit;
+	registration->idle_limit = idle_limit;
+	registration->key = key;
+	registration->func = func;
+
+	mutex_unlock(&registration->mutex);
+
+	xpc_interface.connect(ch_number);
+
+	return xpSuccess;
+}
+EXPORT_SYMBOL_GPL(xpc_connect);
+
+/*
+ * Remove the registration for automatic connection of the specified channel
+ * when a partition comes up.
+ *
+ * Before returning this xpc_disconnect() will wait for all connections on the
+ * specified channel have been closed/torndown. So the caller can be assured
+ * that they will not be receiving any more callouts from XPC to their
+ * function registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to unregister.
+ */
+void
+xpc_disconnect(int ch_number)
+{
+	struct xpc_registration *registration;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+	registration = &xpc_registrations[ch_number];
+
+	/*
+	 * We've decided not to make this a down_interruptible(), since we
+	 * figured XPC's users will just turn around and call xpc_disconnect()
+	 * again anyways, so we might as well wait, if need be.
+	 */
+	mutex_lock(&registration->mutex);
+
+	/* if !XPC_CHANNEL_REGISTERED(ch_number) */
+	if (registration->func == NULL) {
+		mutex_unlock(&registration->mutex);
+		return;
+	}
+
+	/* remove the connection registration for the specified channel */
+	registration->func = NULL;
+	registration->key = NULL;
+	registration->nentries = 0;
+	registration->entry_size = 0;
+	registration->assigned_limit = 0;
+	registration->idle_limit = 0;
+
+	xpc_interface.disconnect(ch_number);
+
+	mutex_unlock(&registration->mutex);
+
+	return;
+}
+EXPORT_SYMBOL_GPL(xpc_disconnect);
+
+int __init
+xp_init(void)
+{
+	enum xp_retval ret;
+	int ch_number;
+
+	/* initialize the connection registration mutex */
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
+		mutex_init(&xpc_registrations[ch_number].mutex);
+
+	if (is_shub())
+		ret = xp_init_sn2();
+	else if (is_uv())
+		ret = xp_init_uv();
+	else
+		ret = 0;
+
+	if (ret != xpSuccess)
+		return ret;
+
+	return 0;
+}
+
+module_init(xp_init);
+
+void __exit
+xp_exit(void)
+{
+	if (is_shub())
+		xp_exit_sn2();
+	else if (is_uv())
+		xp_exit_uv();
+}
+
+module_exit(xp_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition (XP) base");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sgi-xp/xp_nofault.S b/drivers/misc/sgi-xp/xp_nofault.S
new file mode 100644
index 00000000000..e38d4331942
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_nofault.S
@@ -0,0 +1,35 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * The xp_nofault_PIOR function takes a pointer to a remote PIO register
+ * and attempts to load and consume a value from it.  This function
+ * will be registered as a nofault code block.  In the event that the
+ * PIO read fails, the MCA handler will force the error to look
+ * corrected and vector to the xp_error_PIOR which will return an error.
+ *
+ * The definition of "consumption" and the time it takes for an MCA
+ * to surface is processor implementation specific.  This code
+ * is sufficient on Itanium through the Montvale processor family.
+ * It may need to be adjusted for future processor implementations.
+ *
+ *	extern int xp_nofault_PIOR(void *remote_register);
+ */
+
+	.global xp_nofault_PIOR
+xp_nofault_PIOR:
+	mov	r8=r0			// Stage a success return value
+	ld8.acq	r9=[r32];;		// PIO Read the specified register
+	adds	r9=1,r9;;		// Add to force consumption
+	srlz.i;;			// Allow time for MCA to surface
+	br.ret.sptk.many b0;;		// Return success
+
+	.global xp_error_PIOR
+xp_error_PIOR:
+	mov	r8=1			// Return value of 1
+	br.ret.sptk.many b0;;		// Return failure
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
new file mode 100644
index 00000000000..d8e463f8724
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -0,0 +1,190 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) sn2-based functions.
+ *
+ *      Architecture specific implementation of common functions.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/sn_sal.h>
+#include "xp.h"
+
+/*
+ * The export of xp_nofault_PIOR needs to happen here since it is defined
+ * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
+ * defined here.
+ */
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
+
+u64 xp_nofault_PIOR_target;
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
+
+/*
+ * Register a nofault code region which performs a cross-partition PIO read.
+ * If the PIO read times out, the MCA handler will consume the error and
+ * return to a kernel-provided instruction to indicate an error. This PIO read
+ * exists because it is guaranteed to timeout if the destination is down
+ * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2,
+ * which unfortunately we have to work around).
+ */
+static enum xp_retval
+xp_register_nofault_code_sn2(void)
+{
+	int ret;
+	u64 func_addr;
+	u64 err_func_addr;
+
+	func_addr = *(u64 *)xp_nofault_PIOR;
+	err_func_addr = *(u64 *)xp_error_PIOR;
+	ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
+				       1, 1);
+	if (ret != 0) {
+		dev_err(xp, "can't register nofault code, error=%d\n", ret);
+		return xpSalError;
+	}
+	/*
+	 * Setup the nofault PIO read target. (There is no special reason why
+	 * SH_IPI_ACCESS was selected.)
+	 */
+	if (is_shub1())
+		xp_nofault_PIOR_target = SH1_IPI_ACCESS;
+	else if (is_shub2())
+		xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
+
+	return xpSuccess;
+}
+
+static void
+xp_unregister_nofault_code_sn2(void)
+{
+	u64 func_addr = *(u64 *)xp_nofault_PIOR;
+	u64 err_func_addr = *(u64 *)xp_error_PIOR;
+
+	/* unregister the PIO read nofault code region */
+	(void)sn_register_nofault_code(func_addr, err_func_addr,
+				       err_func_addr, 1, 0);
+}
+
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_sn2(void *addr)
+{
+	return __pa(addr);
+}
+
+/*
+ * Convert a global physical to a socket physical address.
+ */
+static unsigned long
+xp_socket_pa_sn2(unsigned long gpa)
+{
+	return gpa;
+}
+
+/*
+ * Wrapper for bte_copy().
+ *
+ *	dst_pa - physical address of the destination of the transfer.
+ *	src_pa - physical address of the source of the transfer.
+ *	len - number of bytes to transfer from source to destination.
+ *
+ * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock.
+ */
+static enum xp_retval
+xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa,
+		     size_t len)
+{
+	bte_result_t ret;
+
+	ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+	if (ret == BTE_SUCCESS)
+		return xpSuccess;
+
+	if (is_shub2()) {
+		dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa="
+			"0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa,
+			src_pa, len);
+	} else {
+		dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx "
+			"src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len);
+	}
+
+	return xpBteCopyError;
+}
+
+static int
+xp_cpu_to_nasid_sn2(int cpuid)
+{
+	return cpuid_to_nasid(cpuid);
+}
+
+static enum xp_retval
+xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size)
+{
+	u64 nasid_array = 0;
+	int ret;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+	return xpSuccess;
+}
+
+static enum xp_retval
+xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size)
+{
+	u64 nasid_array = 0;
+	int ret;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+	return xpSuccess;
+}
+
+enum xp_retval
+xp_init_sn2(void)
+{
+	BUG_ON(!is_shub());
+
+	xp_max_npartitions = XP_MAX_NPARTITIONS_SN2;
+	xp_partition_id = sn_partition_id;
+	xp_region_size = sn_region_size;
+
+	xp_pa = xp_pa_sn2;
+	xp_socket_pa = xp_socket_pa_sn2;
+	xp_remote_memcpy = xp_remote_memcpy_sn2;
+	xp_cpu_to_nasid = xp_cpu_to_nasid_sn2;
+	xp_expand_memprotect = xp_expand_memprotect_sn2;
+	xp_restrict_memprotect = xp_restrict_memprotect_sn2;
+
+	return xp_register_nofault_code_sn2();
+}
+
+void
+xp_exit_sn2(void)
+{
+	BUG_ON(!is_shub());
+
+	xp_unregister_nofault_code_sn2();
+}
+
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
new file mode 100644
index 00000000000..a0d093274dc
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -0,0 +1,171 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) uv-based functions.
+ *
+ *      Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/device.h>
+#include <asm/uv/uv_hub.h>
+#if defined CONFIG_X86_64
+#include <asm/uv/bios.h>
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+#include <asm/sn/sn_sal.h>
+#endif
+#include "../sgi-gru/grukservices.h"
+#include "xp.h"
+
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_uv(void *addr)
+{
+	return uv_gpa(addr);
+}
+
+/*
+ * Convert a global physical to socket physical address.
+ */
+static unsigned long
+xp_socket_pa_uv(unsigned long gpa)
+{
+	return uv_gpa_to_soc_phys_ram(gpa);
+}
+
+static enum xp_retval
+xp_remote_mmr_read(unsigned long dst_gpa, const unsigned long src_gpa,
+		   size_t len)
+{
+	int ret;
+	unsigned long *dst_va = __va(uv_gpa_to_soc_phys_ram(dst_gpa));
+
+	BUG_ON(!uv_gpa_in_mmr_space(src_gpa));
+	BUG_ON(len != 8);
+
+	ret = gru_read_gpa(dst_va, src_gpa);
+	if (ret == 0)
+		return xpSuccess;
+
+	dev_err(xp, "gru_read_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+		"len=%ld\n", dst_gpa, src_gpa, len);
+	return xpGruCopyError;
+}
+
+
+static enum xp_retval
+xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa,
+		    size_t len)
+{
+	int ret;
+
+	if (uv_gpa_in_mmr_space(src_gpa))
+		return xp_remote_mmr_read(dst_gpa, src_gpa, len);
+
+	ret = gru_copy_gpa(dst_gpa, src_gpa, len);
+	if (ret == 0)
+		return xpSuccess;
+
+	dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+		"len=%ld\n", dst_gpa, src_gpa, len);
+	return xpGruCopyError;
+}
+
+static int
+xp_cpu_to_nasid_uv(int cpuid)
+{
+	/* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */
+	return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid));
+}
+
+static enum xp_retval
+xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size)
+{
+	int ret;
+
+#if defined CONFIG_X86_64
+	ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW);
+	if (ret != BIOS_STATUS_SUCCESS) {
+		dev_err(xp, "uv_bios_change_memprotect(,, "
+			"UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret);
+		return xpBiosError;
+	}
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	u64 nasid_array;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+#else
+	#error not a supported configuration
+#endif
+	return xpSuccess;
+}
+
+static enum xp_retval
+xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size)
+{
+	int ret;
+
+#if defined CONFIG_X86_64
+	ret = uv_bios_change_memprotect(phys_addr, size,
+					UV_MEMPROT_RESTRICT_ACCESS);
+	if (ret != BIOS_STATUS_SUCCESS) {
+		dev_err(xp, "uv_bios_change_memprotect(,, "
+			"UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret);
+		return xpBiosError;
+	}
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	u64 nasid_array;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+#else
+	#error not a supported configuration
+#endif
+	return xpSuccess;
+}
+
+enum xp_retval
+xp_init_uv(void)
+{
+	BUG_ON(!is_uv());
+
+	xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
+	xp_partition_id = sn_partition_id;
+	xp_region_size = sn_region_size;
+
+	xp_pa = xp_pa_uv;
+	xp_socket_pa = xp_socket_pa_uv;
+	xp_remote_memcpy = xp_remote_memcpy_uv;
+	xp_cpu_to_nasid = xp_cpu_to_nasid_uv;
+	xp_expand_memprotect = xp_expand_memprotect_uv;
+	xp_restrict_memprotect = xp_restrict_memprotect_uv;
+
+	return xpSuccess;
+}
+
+void
+xp_exit_uv(void)
+{
+	BUG_ON(!is_uv());
+}
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
new file mode 100644
index 00000000000..b94d5f76770
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -0,0 +1,1004 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) structures and macros.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XPC_H
+#define _DRIVERS_MISC_SGIXP_XPC_H
+
+#include <linux/wait.h>
+#include <linux/completion.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
+#include "xp.h"
+
+/*
+ * XPC Version numbers consist of a major and minor number. XPC can always
+ * talk to versions with same major #, and never talk to versions with a
+ * different major #.
+ */
+#define _XPC_VERSION(_maj, _min)	(((_maj) << 4) | ((_min) & 0xf))
+#define XPC_VERSION_MAJOR(_v)		((_v) >> 4)
+#define XPC_VERSION_MINOR(_v)		((_v) & 0xf)
+
+/* define frequency of the heartbeat and frequency how often it's checked */
+#define XPC_HB_DEFAULT_INTERVAL		5	/* incr HB every x secs */
+#define XPC_HB_CHECK_DEFAULT_INTERVAL	20	/* check HB every x secs */
+
+/* define the process name of HB checker and the CPU it is pinned to */
+#define XPC_HB_CHECK_THREAD_NAME	"xpc_hb"
+#define XPC_HB_CHECK_CPU		0
+
+/* define the process name of the discovery thread */
+#define XPC_DISCOVERY_THREAD_NAME	"xpc_discovery"
+
+/*
+ * the reserved page
+ *
+ *   SAL reserves one page of memory per partition for XPC. Though a full page
+ *   in length (16384 bytes), its starting address is not page aligned, but it
+ *   is cacheline aligned. The reserved page consists of the following:
+ *
+ *   reserved page header
+ *
+ *     The first two 64-byte cachelines of the reserved page contain the
+ *     header (struct xpc_rsvd_page). Before SAL initialization has completed,
+ *     SAL has set up the following fields of the reserved page header:
+ *     SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The
+ *     other fields are set up by XPC. (xpc_rsvd_page points to the local
+ *     partition's reserved page.)
+ *
+ *   part_nasids mask
+ *   mach_nasids mask
+ *
+ *     SAL also sets up two bitmaps (or masks), one that reflects the actual
+ *     nasids in this partition (part_nasids), and the other that reflects
+ *     the actual nasids in the entire machine (mach_nasids). We're only
+ *     interested in the even numbered nasids (which contain the processors
+ *     and/or memory), so we only need half as many bits to represent the
+ *     nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure
+ *     to either divide or multiply by 2. The part_nasids mask is located
+ *     starting at the first cacheline following the reserved page header. The
+ *     mach_nasids mask follows right after the part_nasids mask. The size in
+ *     bytes of each mask is reflected by the reserved page header field
+ *     'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids
+ *     and xpc_mach_nasids.)
+ *
+ *   vars	(ia64-sn2 only)
+ *   vars part	(ia64-sn2 only)
+ *
+ *     Immediately following the mach_nasids mask are the XPC variables
+ *     required by other partitions. First are those that are generic to all
+ *     partitions (vars), followed on the next available cacheline by those
+ *     which are partition specific (vars part). These are setup by XPC.
+ *     (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
+ *
+ * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been
+ *       initialized.
+ */
+struct xpc_rsvd_page {
+	u64 SAL_signature;	/* SAL: unique signature */
+	u64 SAL_version;	/* SAL: version */
+	short SAL_partid;	/* SAL: partition ID */
+	short max_npartitions;	/* value of XPC_MAX_PARTITIONS */
+	u8 version;
+	u8 pad1[3];		/* align to next u64 in 1st 64-byte cacheline */
+	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
+	union {
+		struct {
+			unsigned long vars_pa;	/* phys addr */
+		} sn2;
+		struct {
+			unsigned long heartbeat_gpa; /* phys addr */
+			unsigned long activate_gru_mq_desc_gpa; /* phys addr */
+		} uv;
+	} sn;
+	u64 pad2[9];		/* align to last u64 in 2nd 64-byte cacheline */
+	u64 SAL_nasids_size;	/* SAL: size of each nasid mask in bytes */
+};
+
+#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */
+
+/*
+ * Define the structures by which XPC variables can be exported to other
+ * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
+ */
+
+/*
+ * The following structure describes the partition generic variables
+ * needed by other partitions in order to properly initialize.
+ *
+ * struct xpc_vars version number also applies to struct xpc_vars_part.
+ * Changes to either structure and/or related functionality should be
+ * reflected by incrementing either the major or minor version numbers
+ * of struct xpc_vars.
+ */
+struct xpc_vars_sn2 {
+	u8 version;
+	u64 heartbeat;
+	DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+	u64 heartbeat_offline;	/* if 0, heartbeat should be changing */
+	int activate_IRQ_nasid;
+	int activate_IRQ_phys_cpuid;
+	unsigned long vars_part_pa;
+	unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */
+	struct amo *amos_page;	/* vaddr of page of amos from MSPEC driver */
+};
+
+#define XPC_V_VERSION _XPC_VERSION(3, 1)    /* version 3.1 of the cross vars */
+
+/*
+ * The following structure describes the per partition specific variables.
+ *
+ * An array of these structures, one per partition, will be defined. As a
+ * partition becomes active XPC will copy the array entry corresponding to
+ * itself from that partition. It is desirable that the size of this structure
+ * evenly divides into a 128-byte cacheline, such that none of the entries in
+ * this array crosses a 128-byte cacheline boundary. As it is now, each entry
+ * occupies 64-bytes.
+ */
+struct xpc_vars_part_sn2 {
+	u64 magic;
+
+	unsigned long openclose_args_pa; /* phys addr of open and close args */
+	unsigned long GPs_pa;	/* physical address of Get/Put values */
+
+	unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */
+
+	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
+	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
+
+	u8 nchannels;		/* #of defined channels supported */
+
+	u8 reserved[23];	/* pad to a full 64 bytes */
+};
+
+/*
+ * The vars_part MAGIC numbers play a part in the first contact protocol.
+ *
+ * MAGIC1 indicates that the per partition specific variables for a remote
+ * partition have been initialized by this partition.
+ *
+ * MAGIC2 indicates that this partition has pulled the remote partititions
+ * per partition variables that pertain to this partition.
+ */
+#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
+#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
+
+/* the reserved page sizes and offsets */
+
+#define XPC_RP_HEADER_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
+#define XPC_RP_VARS_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2))
+
+#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \
+				 XPC_RP_HEADER_SIZE))
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \
+				 xpc_nasid_mask_nlongs)
+#define XPC_RP_VARS(_rp)	((struct xpc_vars_sn2 *) \
+				 (XPC_RP_MACH_NASIDS(_rp) + \
+				  xpc_nasid_mask_nlongs))
+
+
+/*
+ * The following structure describes the partition's heartbeat info which
+ * will be periodically read by other partitions to determine whether this
+ * XPC is still 'alive'.
+ */
+struct xpc_heartbeat_uv {
+	unsigned long value;
+	unsigned long offline;	/* if 0, heartbeat should be changing */
+};
+
+/*
+ * Info pertinent to a GRU message queue using a watch list for irq generation.
+ */
+struct xpc_gru_mq_uv {
+	void *address;		/* address of GRU message queue */
+	unsigned int order;	/* size of GRU message queue as a power of 2 */
+	int irq;		/* irq raised when message is received in mq */
+	int mmr_blade;		/* blade where watchlist was allocated from */
+	unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */
+	unsigned long mmr_value; /* value of irq mmr located on mmr_blade */
+	int watchlist_num;	/* number of watchlist allocatd by BIOS */
+	void *gru_mq_desc;	/* opaque structure used by the GRU driver */
+};
+
+/*
+ * The activate_mq is used to send/receive GRU messages that affect XPC's
+ * partition active state and channel state. This is uv only.
+ */
+struct xpc_activate_mq_msghdr_uv {
+	unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
+	short partid;		/* sender's partid */
+	u8 act_state;		/* sender's act_state at time msg sent */
+	u8 type;		/* message's type */
+	unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */
+};
+
+/* activate_mq defined message types */
+#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV		0
+
+#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV		1
+#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV		2
+
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV	3
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV		4
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV	5
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV		6
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV	7
+
+#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV		8
+#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV		9
+
+struct xpc_activate_mq_msg_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+};
+
+struct xpc_activate_mq_msg_activate_req_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	unsigned long rp_gpa;
+	unsigned long heartbeat_gpa;
+	unsigned long activate_gru_mq_desc_gpa;
+};
+
+struct xpc_activate_mq_msg_deactivate_req_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closerequest_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+	enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closereply_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+};
+
+struct xpc_activate_mq_msg_chctl_openrequest_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+	short entry_size;	/* size of notify_mq's GRU messages */
+	short local_nentries;	/* ??? Is this needed? What is? */
+};
+
+struct xpc_activate_mq_msg_chctl_openreply_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+	short remote_nentries;	/* ??? Is this needed? What is? */
+	short local_nentries;	/* ??? Is this needed? What is? */
+	unsigned long notify_gru_mq_desc_gpa;
+};
+
+struct xpc_activate_mq_msg_chctl_opencomplete_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+};
+
+/*
+ * Functions registered by add_timer() or called by kernel_thread() only
+ * allow for a single 64-bit argument. The following macros can be used to
+ * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
+ * the passed argument.
+ */
+#define XPC_PACK_ARGS(_arg1, _arg2) \
+			((((u64)_arg1) & 0xffffffff) | \
+			((((u64)_arg2) & 0xffffffff) << 32))
+
+#define XPC_UNPACK_ARG1(_args)	(((u64)_args) & 0xffffffff)
+#define XPC_UNPACK_ARG2(_args)	((((u64)_args) >> 32) & 0xffffffff)
+
+/*
+ * Define a Get/Put value pair (pointers) used with a message queue.
+ */
+struct xpc_gp_sn2 {
+	s64 get;		/* Get value */
+	s64 put;		/* Put value */
+};
+
+#define XPC_GP_SIZE \
+		L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS)
+
+/*
+ * Define a structure that contains arguments associated with opening and
+ * closing a channel.
+ */
+struct xpc_openclose_args {
+	u16 reason;		/* reason why channel is closing */
+	u16 entry_size;		/* sizeof each message entry */
+	u16 remote_nentries;	/* #of message entries in remote msg queue */
+	u16 local_nentries;	/* #of message entries in local msg queue */
+	unsigned long local_msgqueue_pa; /* phys addr of local message queue */
+};
+
+#define XPC_OPENCLOSE_ARGS_SIZE \
+	      L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \
+	      XPC_MAX_NCHANNELS)
+
+
+/*
+ * Structures to define a fifo singly-linked list.
+ */
+
+struct xpc_fifo_entry_uv {
+	struct xpc_fifo_entry_uv *next;
+};
+
+struct xpc_fifo_head_uv {
+	struct xpc_fifo_entry_uv *first;
+	struct xpc_fifo_entry_uv *last;
+	spinlock_t lock;
+	int n_entries;
+};
+
+/*
+ * Define a sn2 styled message.
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message entry (within a message queue) must be a 128-byte
+ * cacheline sized multiple in order to facilitate the BTE transfer of messages
+ * from one message queue to another.
+ */
+struct xpc_msg_sn2 {
+	u8 flags;		/* FOR XPC INTERNAL USE ONLY */
+	u8 reserved[7];		/* FOR XPC INTERNAL USE ONLY */
+	s64 number;		/* FOR XPC INTERNAL USE ONLY */
+
+	u64 payload;		/* user defined portion of message */
+};
+
+/* struct xpc_msg_sn2 flags */
+
+#define	XPC_M_SN2_DONE		0x01	/* msg has been received/consumed */
+#define	XPC_M_SN2_READY		0x02	/* msg is ready to be sent */
+#define	XPC_M_SN2_INTERRUPT	0x04	/* send interrupt when msg consumed */
+
+/*
+ * The format of a uv XPC notify_mq GRU message is as follows:
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message (payload and header) sent via the GRU must be either 1
+ * or 2 GRU_CACHE_LINE_BYTES in length.
+ */
+
+struct xpc_notify_mq_msghdr_uv {
+	union {
+		unsigned int gru_msg_hdr;	/* FOR GRU INTERNAL USE ONLY */
+		struct xpc_fifo_entry_uv next;	/* FOR XPC INTERNAL USE ONLY */
+	} u;
+	short partid;		/* FOR XPC INTERNAL USE ONLY */
+	u8 ch_number;		/* FOR XPC INTERNAL USE ONLY */
+	u8 size;		/* FOR XPC INTERNAL USE ONLY */
+	unsigned int msg_slot_number;	/* FOR XPC INTERNAL USE ONLY */
+};
+
+struct xpc_notify_mq_msg_uv {
+	struct xpc_notify_mq_msghdr_uv hdr;
+	unsigned long payload;
+};
+
+/*
+ * Define sn2's notify entry.
+ *
+ * This is used to notify a message's sender that their message was received
+ * and consumed by the intended recipient.
+ */
+struct xpc_notify_sn2 {
+	u8 type;		/* type of notification */
+
+	/* the following two fields are only used if type == XPC_N_CALL */
+	xpc_notify_func func;	/* user's notify function */
+	void *key;		/* pointer to user's key */
+};
+
+/* struct xpc_notify_sn2 type of notification */
+
+#define	XPC_N_CALL	0x01	/* notify function provided by user */
+
+/*
+ * Define uv's version of the notify entry. It additionally is used to allocate
+ * a msg slot on the remote partition into which is copied a sent message.
+ */
+struct xpc_send_msg_slot_uv {
+	struct xpc_fifo_entry_uv next;
+	unsigned int msg_slot_number;
+	xpc_notify_func func;	/* user's notify function */
+	void *key;		/* pointer to user's key */
+};
+
+/*
+ * Define the structure that manages all the stuff required by a channel. In
+ * particular, they are used to manage the messages sent across the channel.
+ *
+ * This structure is private to a partition, and is NOT shared across the
+ * partition boundary.
+ *
+ * There is an array of these structures for each remote partition. It is
+ * allocated at the time a partition becomes active. The array contains one
+ * of these structures for each potential channel connection to that partition.
+ */
+
+/*
+ * The following is sn2 only.
+ *
+ * Each channel structure manages two message queues (circular buffers).
+ * They are allocated at the time a channel connection is made. One of
+ * these message queues (local_msgqueue) holds the locally created messages
+ * that are destined for the remote partition. The other of these message
+ * queues (remote_msgqueue) is a locally cached copy of the remote partition's
+ * own local_msgqueue.
+ *
+ * The following is a description of the Get/Put pointers used to manage these
+ * two message queues. Consider the local_msgqueue to be on one partition
+ * and the remote_msgqueue to be its cached copy on another partition. A
+ * description of what each of the lettered areas contains is included.
+ *
+ *
+ *                     local_msgqueue      remote_msgqueue
+ *
+ *                        |/////////|      |/////////|
+ *    w_remote_GP.get --> +---------+      |/////////|
+ *                        |    F    |      |/////////|
+ *     remote_GP.get  --> +---------+      +---------+ <-- local_GP->get
+ *                        |         |      |         |
+ *                        |         |      |    E    |
+ *                        |         |      |         |
+ *                        |         |      +---------+ <-- w_local_GP.get
+ *                        |    B    |      |/////////|
+ *                        |         |      |////D////|
+ *                        |         |      |/////////|
+ *                        |         |      +---------+ <-- w_remote_GP.put
+ *                        |         |      |////C////|
+ *      local_GP->put --> +---------+      +---------+ <-- remote_GP.put
+ *                        |         |      |/////////|
+ *                        |    A    |      |/////////|
+ *                        |         |      |/////////|
+ *     w_local_GP.put --> +---------+      |/////////|
+ *                        |/////////|      |/////////|
+ *
+ *
+ *	    ( remote_GP.[get|put] are cached copies of the remote
+ *	      partition's local_GP->[get|put], and thus their values can
+ *	      lag behind their counterparts on the remote partition. )
+ *
+ *
+ *  A - Messages that have been allocated, but have not yet been sent to the
+ *	remote partition.
+ *
+ *  B - Messages that have been sent, but have not yet been acknowledged by the
+ *      remote partition as having been received.
+ *
+ *  C - Area that needs to be prepared for the copying of sent messages, by
+ *	the clearing of the message flags of any previously received messages.
+ *
+ *  D - Area into which sent messages are to be copied from the remote
+ *	partition's local_msgqueue and then delivered to their intended
+ *	recipients. [ To allow for a multi-message copy, another pointer
+ *	(next_msg_to_pull) has been added to keep track of the next message
+ *	number needing to be copied (pulled). It chases after w_remote_GP.put.
+ *	Any messages lying between w_local_GP.get and next_msg_to_pull have
+ *	been copied and are ready to be delivered. ]
+ *
+ *  E - Messages that have been copied and delivered, but have not yet been
+ *	acknowledged by the recipient as having been received.
+ *
+ *  F - Messages that have been acknowledged, but XPC has not yet notified the
+ *	sender that the message was received by its intended recipient.
+ *	This is also an area that needs to be prepared for the allocating of
+ *	new messages, by the clearing of the message flags of the acknowledged
+ *	messages.
+ */
+
+struct xpc_channel_sn2 {
+	struct xpc_openclose_args *local_openclose_args; /* args passed on */
+					     /* opening or closing of channel */
+
+	void *local_msgqueue_base;	/* base address of kmalloc'd space */
+	struct xpc_msg_sn2 *local_msgqueue;	/* local message queue */
+	void *remote_msgqueue_base;	/* base address of kmalloc'd space */
+	struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */
+					   /* partition's local message queue */
+	unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */
+					  /* local message queue */
+
+	struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */
+
+	/* various flavors of local and remote Get/Put values */
+
+	struct xpc_gp_sn2 *local_GP;	/* local Get/Put values */
+	struct xpc_gp_sn2 remote_GP;	/* remote Get/Put values */
+	struct xpc_gp_sn2 w_local_GP;	/* working local Get/Put values */
+	struct xpc_gp_sn2 w_remote_GP;	/* working remote Get/Put values */
+	s64 next_msg_to_pull;	/* Put value of next msg to pull */
+
+	struct mutex msg_to_pull_mutex;	/* next msg to pull serialization */
+};
+
+struct xpc_channel_uv {
+	void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */
+					 /* gru mq descriptor */
+
+	struct xpc_send_msg_slot_uv *send_msg_slots;
+	void *recv_msg_slots;	/* each slot will hold a xpc_notify_mq_msg_uv */
+				/* structure plus the user's payload */
+
+	struct xpc_fifo_head_uv msg_slot_free_list;
+	struct xpc_fifo_head_uv recv_msg_list;	/* deliverable payloads */
+};
+
+struct xpc_channel {
+	short partid;		/* ID of remote partition connected */
+	spinlock_t lock;	/* lock for updating this structure */
+	unsigned int flags;	/* general flags */
+
+	enum xp_retval reason;	/* reason why channel is disconnect'g */
+	int reason_line;	/* line# disconnect initiated from */
+
+	u16 number;		/* channel # */
+
+	u16 entry_size;		/* sizeof each msg entry */
+	u16 local_nentries;	/* #of msg entries in local msg queue */
+	u16 remote_nentries;	/* #of msg entries in remote msg queue */
+
+	atomic_t references;	/* #of external references to queues */
+
+	atomic_t n_on_msg_allocate_wq;	/* #on msg allocation wait queue */
+	wait_queue_head_t msg_allocate_wq;	/* msg allocation wait queue */
+
+	u8 delayed_chctl_flags;	/* chctl flags received, but delayed */
+				/* action until channel disconnected */
+
+	atomic_t n_to_notify;	/* #of msg senders to notify */
+
+	xpc_channel_func func;	/* user's channel function */
+	void *key;		/* pointer to user's key */
+
+	struct completion wdisconnect_wait;    /* wait for channel disconnect */
+
+	/* kthread management related fields */
+
+	atomic_t kthreads_assigned;	/* #of kthreads assigned to channel */
+	u32 kthreads_assigned_limit;	/* limit on #of kthreads assigned */
+	atomic_t kthreads_idle;	/* #of kthreads idle waiting for work */
+	u32 kthreads_idle_limit;	/* limit on #of kthreads idle */
+	atomic_t kthreads_active;	/* #of kthreads actively working */
+
+	wait_queue_head_t idle_wq;	/* idle kthread wait queue */
+
+	union {
+		struct xpc_channel_sn2 sn2;
+		struct xpc_channel_uv uv;
+	} sn;
+
+} ____cacheline_aligned;
+
+/* struct xpc_channel flags */
+
+#define	XPC_C_WASCONNECTED	0x00000001	/* channel was connected */
+
+#define XPC_C_ROPENCOMPLETE	0x00000002    /* remote open channel complete */
+#define XPC_C_OPENCOMPLETE	0x00000004     /* local open channel complete */
+#define	XPC_C_ROPENREPLY	0x00000008	/* remote open channel reply */
+#define	XPC_C_OPENREPLY		0x00000010	/* local open channel reply */
+#define	XPC_C_ROPENREQUEST	0x00000020     /* remote open channel request */
+#define	XPC_C_OPENREQUEST	0x00000040	/* local open channel request */
+
+#define	XPC_C_SETUP		0x00000080 /* channel's msgqueues are alloc'd */
+#define	XPC_C_CONNECTEDCALLOUT	0x00000100     /* connected callout initiated */
+#define	XPC_C_CONNECTEDCALLOUT_MADE \
+				0x00000200     /* connected callout completed */
+#define	XPC_C_CONNECTED		0x00000400	/* local channel is connected */
+#define	XPC_C_CONNECTING	0x00000800	/* channel is being connected */
+
+#define	XPC_C_RCLOSEREPLY	0x00001000	/* remote close channel reply */
+#define	XPC_C_CLOSEREPLY	0x00002000	/* local close channel reply */
+#define	XPC_C_RCLOSEREQUEST	0x00004000    /* remote close channel request */
+#define	XPC_C_CLOSEREQUEST	0x00008000     /* local close channel request */
+
+#define	XPC_C_DISCONNECTED	0x00010000	/* channel is disconnected */
+#define	XPC_C_DISCONNECTING	0x00020000   /* channel is being disconnected */
+#define	XPC_C_DISCONNECTINGCALLOUT \
+				0x00040000 /* disconnecting callout initiated */
+#define	XPC_C_DISCONNECTINGCALLOUT_MADE \
+				0x00080000 /* disconnecting callout completed */
+#define	XPC_C_WDISCONNECT	0x00100000  /* waiting for channel disconnect */
+
+/*
+ * The channel control flags (chctl) union consists of a 64-bit variable which
+ * is divided up into eight bytes, ordered from right to left. Byte zero
+ * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte
+ * can have one or more of the chctl flags set in it.
+ */
+
+union xpc_channel_ctl_flags {
+	u64 all_flags;
+	u8 flags[XPC_MAX_NCHANNELS];
+};
+
+/* chctl flags */
+#define	XPC_CHCTL_CLOSEREQUEST	0x01
+#define	XPC_CHCTL_CLOSEREPLY	0x02
+#define	XPC_CHCTL_OPENREQUEST	0x04
+#define	XPC_CHCTL_OPENREPLY	0x08
+#define XPC_CHCTL_OPENCOMPLETE	0x10
+#define	XPC_CHCTL_MSGREQUEST	0x20
+
+#define XPC_OPENCLOSE_CHCTL_FLAGS \
+			(XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
+			 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \
+			 XPC_CHCTL_OPENCOMPLETE)
+#define XPC_MSG_CHCTL_FLAGS	XPC_CHCTL_MSGREQUEST
+
+static inline int
+xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+	int ch_number;
+
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+		if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS)
+			return 1;
+	}
+	return 0;
+}
+
+static inline int
+xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+	int ch_number;
+
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+		if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Manage channels on a partition basis. There is one of these structures
+ * for each partition (a partition will never utilize the structure that
+ * represents itself).
+ */
+
+struct xpc_partition_sn2 {
+	unsigned long remote_amos_page_pa; /* paddr of partition's amos page */
+	int activate_IRQ_nasid;	/* active partition's act/deact nasid */
+	int activate_IRQ_phys_cpuid;	/* active part's act/deact phys cpuid */
+
+	unsigned long remote_vars_pa;	/* phys addr of partition's vars */
+	unsigned long remote_vars_part_pa; /* paddr of partition's vars part */
+	u8 remote_vars_version;	/* version# of partition's vars */
+
+	void *local_GPs_base;	/* base address of kmalloc'd space */
+	struct xpc_gp_sn2 *local_GPs;	/* local Get/Put values */
+	void *remote_GPs_base;	/* base address of kmalloc'd space */
+	struct xpc_gp_sn2 *remote_GPs;	/* copy of remote partition's local */
+					/* Get/Put values */
+	unsigned long remote_GPs_pa; /* phys addr of remote partition's local */
+				     /* Get/Put values */
+
+	void *local_openclose_args_base;   /* base address of kmalloc'd space */
+	struct xpc_openclose_args *local_openclose_args;      /* local's args */
+	unsigned long remote_openclose_args_pa;	/* phys addr of remote's args */
+
+	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
+	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
+	char notify_IRQ_owner[8];	/* notify IRQ's owner's name */
+
+	struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */
+	struct amo *local_chctl_amo_va;	/* address of chctl flags' amo */
+
+	struct timer_list dropped_notify_IRQ_timer;	/* dropped IRQ timer */
+};
+
+struct xpc_partition_uv {
+	unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */
+	struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */
+						  /* partition's heartbeat */
+	unsigned long activate_gru_mq_desc_gpa;	/* phys addr of parititon's */
+						/* activate mq's gru mq */
+						/* descriptor */
+	void *cached_activate_gru_mq_desc; /* cached copy of partition's */
+					   /* activate mq's gru mq descriptor */
+	struct mutex cached_activate_gru_mq_desc_mutex;
+	spinlock_t flags_lock;	/* protect updating of flags */
+	unsigned int flags;	/* general flags */
+	u8 remote_act_state;	/* remote partition's act_state */
+	u8 act_state_req;	/* act_state request from remote partition */
+	enum xp_retval reason;	/* reason for deactivate act_state request */
+};
+
+/* struct xpc_partition_uv flags */
+
+#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV	0x00000001
+#define XPC_P_ENGAGED_UV			0x00000002
+
+/* struct xpc_partition_uv act_state change requests */
+
+#define XPC_P_ASR_ACTIVATE_UV		0x01
+#define XPC_P_ASR_REACTIVATE_UV		0x02
+#define XPC_P_ASR_DEACTIVATE_UV		0x03
+
+struct xpc_partition {
+
+	/* XPC HB infrastructure */
+
+	u8 remote_rp_version;	/* version# of partition's rsvd pg */
+	unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */
+	unsigned long remote_rp_pa;	/* phys addr of partition's rsvd pg */
+	u64 last_heartbeat;	/* HB at last read */
+	u32 activate_IRQ_rcvd;	/* IRQs since activation */
+	spinlock_t act_lock;	/* protect updating of act_state */
+	u8 act_state;		/* from XPC HB viewpoint */
+	enum xp_retval reason;	/* reason partition is deactivating */
+	int reason_line;	/* line# deactivation initiated from */
+
+	unsigned long disengage_timeout;	/* timeout in jiffies */
+	struct timer_list disengage_timer;
+
+	/* XPC infrastructure referencing and teardown control */
+
+	u8 setup_state;		/* infrastructure setup state */
+	wait_queue_head_t teardown_wq;	/* kthread waiting to teardown infra */
+	atomic_t references;	/* #of references to infrastructure */
+
+	u8 nchannels;		/* #of defined channels supported */
+	atomic_t nchannels_active;  /* #of channels that are not DISCONNECTED */
+	atomic_t nchannels_engaged;  /* #of channels engaged with remote part */
+	struct xpc_channel *channels;	/* array of channel structures */
+
+	/* fields used for managing channel avialability and activity */
+
+	union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */
+	spinlock_t chctl_lock;	/* chctl flags lock */
+
+	void *remote_openclose_args_base;  /* base address of kmalloc'd space */
+	struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
+							  /* args */
+
+	/* channel manager related fields */
+
+	atomic_t channel_mgr_requests;	/* #of requests to activate chan mgr */
+	wait_queue_head_t channel_mgr_wq;	/* channel mgr's wait queue */
+
+	union {
+		struct xpc_partition_sn2 sn2;
+		struct xpc_partition_uv uv;
+	} sn;
+
+} ____cacheline_aligned;
+
+struct xpc_arch_operations {
+	int (*setup_partitions) (void);
+	void (*teardown_partitions) (void);
+	void (*process_activate_IRQ_rcvd) (void);
+	enum xp_retval (*get_partition_rsvd_page_pa)
+		(void *, u64 *, unsigned long *, size_t *);
+	int (*setup_rsvd_page) (struct xpc_rsvd_page *);
+
+	void (*allow_hb) (short);
+	void (*disallow_hb) (short);
+	void (*disallow_all_hbs) (void);
+	void (*increment_heartbeat) (void);
+	void (*offline_heartbeat) (void);
+	void (*online_heartbeat) (void);
+	void (*heartbeat_init) (void);
+	void (*heartbeat_exit) (void);
+	enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *);
+
+	void (*request_partition_activation) (struct xpc_rsvd_page *,
+						 unsigned long, int);
+	void (*request_partition_reactivation) (struct xpc_partition *);
+	void (*request_partition_deactivation) (struct xpc_partition *);
+	void (*cancel_partition_deactivation_request) (struct xpc_partition *);
+	enum xp_retval (*setup_ch_structures) (struct xpc_partition *);
+	void (*teardown_ch_structures) (struct xpc_partition *);
+
+	enum xp_retval (*make_first_contact) (struct xpc_partition *);
+
+	u64 (*get_chctl_all_flags) (struct xpc_partition *);
+	void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *);
+	void (*process_msg_chctl_flags) (struct xpc_partition *, int);
+
+	enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *,
+						      unsigned long);
+
+	enum xp_retval (*setup_msg_structures) (struct xpc_channel *);
+	void (*teardown_msg_structures) (struct xpc_channel *);
+
+	void (*indicate_partition_engaged) (struct xpc_partition *);
+	void (*indicate_partition_disengaged) (struct xpc_partition *);
+	void (*assume_partition_disengaged) (short);
+	int (*partition_engaged) (short);
+	int (*any_partition_engaged) (void);
+
+	int (*n_of_deliverable_payloads) (struct xpc_channel *);
+	enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *,
+					   u16, u8, xpc_notify_func, void *);
+	void *(*get_deliverable_payload) (struct xpc_channel *);
+	void (*received_payload) (struct xpc_channel *, void *);
+	void (*notify_senders_of_disconnect) (struct xpc_channel *);
+};
+
+/* struct xpc_partition act_state values (for XPC HB) */
+
+#define	XPC_P_AS_INACTIVE	0x00	/* partition is not active */
+#define XPC_P_AS_ACTIVATION_REQ	0x01	/* created thread to activate */
+#define XPC_P_AS_ACTIVATING	0x02	/* activation thread started */
+#define XPC_P_AS_ACTIVE		0x03	/* xpc_partition_up() was called */
+#define XPC_P_AS_DEACTIVATING	0x04	/* partition deactivation initiated */
+
+#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
+			xpc_deactivate_partition(__LINE__, (_p), (_reason))
+
+/* struct xpc_partition setup_state values */
+
+#define XPC_P_SS_UNSET		0x00	/* infrastructure was never setup */
+#define XPC_P_SS_SETUP		0x01	/* infrastructure is setup */
+#define XPC_P_SS_WTEARDOWN	0x02	/* waiting to teardown infrastructure */
+#define XPC_P_SS_TORNDOWN	0x03	/* infrastructure is torndown */
+
+/*
+ * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the
+ * following interval #of seconds before checking for dropped notify IRQs.
+ * These can occur whenever an IRQ's associated amo write doesn't complete
+ * until after the IRQ was received.
+ */
+#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL	(0.25 * HZ)
+
+/* number of seconds to wait for other partitions to disengage */
+#define XPC_DISENGAGE_DEFAULT_TIMELIMIT		90
+
+/* interval in seconds to print 'waiting deactivation' messages */
+#define XPC_DEACTIVATE_PRINTMSG_INTERVAL	10
+
+#define XPC_PARTID(_p)	((short)((_p) - &xpc_partitions[0]))
+
+/* found in xp_main.c */
+extern struct xpc_registration xpc_registrations[];
+
+/* found in xpc_main.c */
+extern struct device *xpc_part;
+extern struct device *xpc_chan;
+extern struct xpc_arch_operations xpc_arch_ops;
+extern int xpc_disengage_timelimit;
+extern int xpc_disengage_timedout;
+extern int xpc_activate_IRQ_rcvd;
+extern spinlock_t xpc_activate_IRQ_rcvd_lock;
+extern wait_queue_head_t xpc_activate_IRQ_wq;
+extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
+extern void xpc_activate_partition(struct xpc_partition *);
+extern void xpc_activate_kthreads(struct xpc_channel *, int);
+extern void xpc_create_kthreads(struct xpc_channel *, int, int);
+extern void xpc_disconnect_wait(int);
+
+/* found in xpc_sn2.c */
+extern int xpc_init_sn2(void);
+extern void xpc_exit_sn2(void);
+
+/* found in xpc_uv.c */
+extern int xpc_init_uv(void);
+extern void xpc_exit_uv(void);
+
+/* found in xpc_partition.c */
+extern int xpc_exiting;
+extern int xpc_nasid_mask_nlongs;
+extern struct xpc_rsvd_page *xpc_rsvd_page;
+extern unsigned long *xpc_mach_nasids;
+extern struct xpc_partition *xpc_partitions;
+extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
+extern int xpc_setup_rsvd_page(void);
+extern void xpc_teardown_rsvd_page(void);
+extern int xpc_identify_activate_IRQ_sender(void);
+extern int xpc_partition_disengaged(struct xpc_partition *);
+extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
+extern void xpc_mark_partition_inactive(struct xpc_partition *);
+extern void xpc_discovery(void);
+extern enum xp_retval xpc_get_remote_rp(int, unsigned long *,
+					struct xpc_rsvd_page *,
+					unsigned long *);
+extern void xpc_deactivate_partition(const int, struct xpc_partition *,
+				     enum xp_retval);
+extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
+
+/* found in xpc_channel.c */
+extern void xpc_initiate_connect(int);
+extern void xpc_initiate_disconnect(int);
+extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *);
+extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16);
+extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16,
+					       xpc_notify_func, void *);
+extern void xpc_initiate_received(short, int, void *);
+extern void xpc_process_sent_chctl_flags(struct xpc_partition *);
+extern void xpc_connected_callout(struct xpc_channel *);
+extern void xpc_deliver_payload(struct xpc_channel *);
+extern void xpc_disconnect_channel(const int, struct xpc_channel *,
+				   enum xp_retval, unsigned long *);
+extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
+extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
+
+static inline void
+xpc_wakeup_channel_mgr(struct xpc_partition *part)
+{
+	if (atomic_inc_return(&part->channel_mgr_requests) == 1)
+		wake_up(&part->channel_mgr_wq);
+}
+
+/*
+ * These next two inlines are used to keep us from tearing down a channel's
+ * msg queues while a thread may be referencing them.
+ */
+static inline void
+xpc_msgqueue_ref(struct xpc_channel *ch)
+{
+	atomic_inc(&ch->references);
+}
+
+static inline void
+xpc_msgqueue_deref(struct xpc_channel *ch)
+{
+	s32 refs = atomic_dec_return(&ch->references);
+
+	DBUG_ON(refs < 0);
+	if (refs == 0)
+		xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
+}
+
+#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
+		xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
+
+/*
+ * These two inlines are used to keep us from tearing down a partition's
+ * setup infrastructure while a thread may be referencing it.
+ */
+static inline void
+xpc_part_deref(struct xpc_partition *part)
+{
+	s32 refs = atomic_dec_return(&part->references);
+
+	DBUG_ON(refs < 0);
+	if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN)
+		wake_up(&part->teardown_wq);
+}
+
+static inline int
+xpc_part_ref(struct xpc_partition *part)
+{
+	int setup;
+
+	atomic_inc(&part->references);
+	setup = (part->setup_state == XPC_P_SS_SETUP);
+	if (!setup)
+		xpc_part_deref(part);
+
+	return setup;
+}
+
+/*
+ * The following macro is to be used for the setting of the reason and
+ * reason_line fields in both the struct xpc_channel and struct xpc_partition
+ * structures.
+ */
+#define XPC_SET_REASON(_p, _reason, _line) \
+	{ \
+		(_p)->reason = _reason; \
+		(_p)->reason_line = _line; \
+	}
+
+#endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
new file mode 100644
index 00000000000..652593fc486
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -0,0 +1,1011 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) channel support.
+ *
+ *	This is the part of XPC that manages the channels and
+ *	sends/receives messages across them to/from other partitions.
+ *
+ */
+
+#include <linux/device.h>
+#include "xpc.h"
+
+/*
+ * Process a connect message from a remote partition.
+ *
+ * Note: xpc_process_connect() is expecting to be called with the
+ * spin_lock_irqsave held and will leave it locked upon return.
+ */
+static void
+xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	enum xp_retval ret;
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	if (!(ch->flags & XPC_C_OPENREQUEST) ||
+	    !(ch->flags & XPC_C_ROPENREQUEST)) {
+		/* nothing more to do for now */
+		return;
+	}
+	DBUG_ON(!(ch->flags & XPC_C_CONNECTING));
+
+	if (!(ch->flags & XPC_C_SETUP)) {
+		spin_unlock_irqrestore(&ch->lock, *irq_flags);
+		ret = xpc_arch_ops.setup_msg_structures(ch);
+		spin_lock_irqsave(&ch->lock, *irq_flags);
+
+		if (ret != xpSuccess)
+			XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
+		else
+			ch->flags |= XPC_C_SETUP;
+
+		if (ch->flags & XPC_C_DISCONNECTING)
+			return;
+	}
+
+	if (!(ch->flags & XPC_C_OPENREPLY)) {
+		ch->flags |= XPC_C_OPENREPLY;
+		xpc_arch_ops.send_chctl_openreply(ch, irq_flags);
+	}
+
+	if (!(ch->flags & XPC_C_ROPENREPLY))
+		return;
+
+	if (!(ch->flags & XPC_C_OPENCOMPLETE)) {
+		ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED);
+		xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags);
+	}
+
+	if (!(ch->flags & XPC_C_ROPENCOMPLETE))
+		return;
+
+	dev_info(xpc_chan, "channel %d to partition %d connected\n",
+		 ch->number, ch->partid);
+
+	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
+}
+
+/*
+ * spin_lock_irqsave() is expected to be held on entry.
+ */
+static void
+xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	if (!(ch->flags & XPC_C_DISCONNECTING))
+		return;
+
+	DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+	/* make sure all activity has settled down first */
+
+	if (atomic_read(&ch->kthreads_assigned) > 0 ||
+	    atomic_read(&ch->references) > 0) {
+		return;
+	}
+	DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+		!(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
+
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		/* can't proceed until the other side disengages from us */
+		if (xpc_arch_ops.partition_engaged(ch->partid))
+			return;
+
+	} else {
+
+		/* as long as the other side is up do the full protocol */
+
+		if (!(ch->flags & XPC_C_RCLOSEREQUEST))
+			return;
+
+		if (!(ch->flags & XPC_C_CLOSEREPLY)) {
+			ch->flags |= XPC_C_CLOSEREPLY;
+			xpc_arch_ops.send_chctl_closereply(ch, irq_flags);
+		}
+
+		if (!(ch->flags & XPC_C_RCLOSEREPLY))
+			return;
+	}
+
+	/* wake those waiting for notify completion */
+	if (atomic_read(&ch->n_to_notify) > 0) {
+		/* we do callout while holding ch->lock, callout can't block */
+		xpc_arch_ops.notify_senders_of_disconnect(ch);
+	}
+
+	/* both sides are disconnected now */
+
+	if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
+		spin_unlock_irqrestore(&ch->lock, *irq_flags);
+		xpc_disconnect_callout(ch, xpDisconnected);
+		spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
+
+	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
+
+	/* it's now safe to free the channel's message queues */
+	xpc_arch_ops.teardown_msg_structures(ch);
+
+	ch->func = NULL;
+	ch->key = NULL;
+	ch->entry_size = 0;
+	ch->local_nentries = 0;
+	ch->remote_nentries = 0;
+	ch->kthreads_assigned_limit = 0;
+	ch->kthreads_idle_limit = 0;
+
+	/*
+	 * Mark the channel disconnected and clear all other flags, including
+	 * XPC_C_SETUP (because of call to
+	 * xpc_arch_ops.teardown_msg_structures()) but not including
+	 * XPC_C_WDISCONNECT (if it was set).
+	 */
+	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
+
+	atomic_dec(&part->nchannels_active);
+
+	if (channel_was_connected) {
+		dev_info(xpc_chan, "channel %d to partition %d disconnected, "
+			 "reason=%d\n", ch->number, ch->partid, ch->reason);
+	}
+
+	if (ch->flags & XPC_C_WDISCONNECT) {
+		/* we won't lose the CPU since we're holding ch->lock */
+		complete(&ch->wdisconnect_wait);
+	} else if (ch->delayed_chctl_flags) {
+		if (part->act_state != XPC_P_AS_DEACTIVATING) {
+			/* time to take action on any delayed chctl flags */
+			spin_lock(&part->chctl_lock);
+			part->chctl.flags[ch->number] |=
+			    ch->delayed_chctl_flags;
+			spin_unlock(&part->chctl_lock);
+		}
+		ch->delayed_chctl_flags = 0;
+	}
+}
+
+/*
+ * Process a change in the channel's remote connection state.
+ */
+static void
+xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
+				  u8 chctl_flags)
+{
+	unsigned long irq_flags;
+	struct xpc_openclose_args *args =
+	    &part->remote_openclose_args[ch_number];
+	struct xpc_channel *ch = &part->channels[ch_number];
+	enum xp_retval reason;
+	enum xp_retval ret;
+	int create_kthread = 0;
+
+	spin_lock_irqsave(&ch->lock, irq_flags);
+
+again:
+
+	if ((ch->flags & XPC_C_DISCONNECTED) &&
+	    (ch->flags & XPC_C_WDISCONNECT)) {
+		/*
+		 * Delay processing chctl flags until thread waiting disconnect
+		 * has had a chance to see that the channel is disconnected.
+		 */
+		ch->delayed_chctl_flags |= chctl_flags;
+		goto out;
+	}
+
+	if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received "
+			"from partid=%d, channel=%d\n", args->reason,
+			ch->partid, ch->number);
+
+		/*
+		 * If RCLOSEREQUEST is set, we're probably waiting for
+		 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
+		 * with this RCLOSEREQUEST in the chctl_flags.
+		 */
+
+		if (ch->flags & XPC_C_RCLOSEREQUEST) {
+			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+			DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+			DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
+			DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
+
+			DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY));
+			chctl_flags &= ~XPC_CHCTL_CLOSEREPLY;
+			ch->flags |= XPC_C_RCLOSEREPLY;
+
+			/* both sides have finished disconnecting */
+			xpc_process_disconnect(ch, &irq_flags);
+			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+			goto again;
+		}
+
+		if (ch->flags & XPC_C_DISCONNECTED) {
+			if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) {
+				if (part->chctl.flags[ch_number] &
+				    XPC_CHCTL_OPENREQUEST) {
+
+					DBUG_ON(ch->delayed_chctl_flags != 0);
+					spin_lock(&part->chctl_lock);
+					part->chctl.flags[ch_number] |=
+					    XPC_CHCTL_CLOSEREQUEST;
+					spin_unlock(&part->chctl_lock);
+				}
+				goto out;
+			}
+
+			XPC_SET_REASON(ch, 0, 0);
+			ch->flags &= ~XPC_C_DISCONNECTED;
+
+			atomic_inc(&part->nchannels_active);
+			ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
+		}
+
+		chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY |
+		    XPC_CHCTL_OPENCOMPLETE);
+
+		/*
+		 * The meaningful CLOSEREQUEST connection state fields are:
+		 *      reason = reason connection is to be closed
+		 */
+
+		ch->flags |= XPC_C_RCLOSEREQUEST;
+
+		if (!(ch->flags & XPC_C_DISCONNECTING)) {
+			reason = args->reason;
+			if (reason <= xpSuccess || reason > xpUnknownReason)
+				reason = xpUnknownReason;
+			else if (reason == xpUnregistering)
+				reason = xpOtherUnregistering;
+
+			XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+			DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
+			goto out;
+		}
+
+		xpc_process_disconnect(ch, &irq_flags);
+	}
+
+	if (chctl_flags & XPC_CHCTL_CLOSEREPLY) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid="
+			"%d, channel=%d\n", ch->partid, ch->number);
+
+		if (ch->flags & XPC_C_DISCONNECTED) {
+			DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+		if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
+			if (part->chctl.flags[ch_number] &
+			    XPC_CHCTL_CLOSEREQUEST) {
+
+				DBUG_ON(ch->delayed_chctl_flags != 0);
+				spin_lock(&part->chctl_lock);
+				part->chctl.flags[ch_number] |=
+				    XPC_CHCTL_CLOSEREPLY;
+				spin_unlock(&part->chctl_lock);
+			}
+			goto out;
+		}
+
+		ch->flags |= XPC_C_RCLOSEREPLY;
+
+		if (ch->flags & XPC_C_CLOSEREPLY) {
+			/* both sides have finished disconnecting */
+			xpc_process_disconnect(ch, &irq_flags);
+		}
+	}
+
+	if (chctl_flags & XPC_CHCTL_OPENREQUEST) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, "
+			"local_nentries=%d) received from partid=%d, "
+			"channel=%d\n", args->entry_size, args->local_nentries,
+			ch->partid, ch->number);
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING ||
+		    (ch->flags & XPC_C_ROPENREQUEST)) {
+			goto out;
+		}
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
+			ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
+			goto out;
+		}
+		DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
+				       XPC_C_OPENREQUEST)));
+		DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+				     XPC_C_OPENREPLY | XPC_C_CONNECTED));
+
+		/*
+		 * The meaningful OPENREQUEST connection state fields are:
+		 *      entry_size = size of channel's messages in bytes
+		 *      local_nentries = remote partition's local_nentries
+		 */
+		if (args->entry_size == 0 || args->local_nentries == 0) {
+			/* assume OPENREQUEST was delayed by mistake */
+			goto out;
+		}
+
+		ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
+		ch->remote_nentries = args->local_nentries;
+
+		if (ch->flags & XPC_C_OPENREQUEST) {
+			if (args->entry_size != ch->entry_size) {
+				XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
+						       &irq_flags);
+				goto out;
+			}
+		} else {
+			ch->entry_size = args->entry_size;
+
+			XPC_SET_REASON(ch, 0, 0);
+			ch->flags &= ~XPC_C_DISCONNECTED;
+
+			atomic_inc(&part->nchannels_active);
+		}
+
+		xpc_process_connect(ch, &irq_flags);
+	}
+
+	if (chctl_flags & XPC_CHCTL_OPENREPLY) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa="
+			"0x%lx, local_nentries=%d, remote_nentries=%d) "
+			"received from partid=%d, channel=%d\n",
+			args->local_msgqueue_pa, args->local_nentries,
+			args->remote_nentries, ch->partid, ch->number);
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+			goto out;
+
+		if (!(ch->flags & XPC_C_OPENREQUEST)) {
+			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
+					       &irq_flags);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+		DBUG_ON(ch->flags & XPC_C_CONNECTED);
+
+		/*
+		 * The meaningful OPENREPLY connection state fields are:
+		 *      local_msgqueue_pa = physical address of remote
+		 *                          partition's local_msgqueue
+		 *      local_nentries = remote partition's local_nentries
+		 *      remote_nentries = remote partition's remote_nentries
+		 */
+		DBUG_ON(args->local_msgqueue_pa == 0);
+		DBUG_ON(args->local_nentries == 0);
+		DBUG_ON(args->remote_nentries == 0);
+
+		ret = xpc_arch_ops.save_remote_msgqueue_pa(ch,
+						      args->local_msgqueue_pa);
+		if (ret != xpSuccess) {
+			XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags);
+			goto out;
+		}
+		ch->flags |= XPC_C_ROPENREPLY;
+
+		if (args->local_nentries < ch->remote_nentries) {
+			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
+				"remote_nentries=%d, old remote_nentries=%d, "
+				"partid=%d, channel=%d\n",
+				args->local_nentries, ch->remote_nentries,
+				ch->partid, ch->number);
+
+			ch->remote_nentries = args->local_nentries;
+		}
+		if (args->remote_nentries < ch->local_nentries) {
+			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
+				"local_nentries=%d, old local_nentries=%d, "
+				"partid=%d, channel=%d\n",
+				args->remote_nentries, ch->local_nentries,
+				ch->partid, ch->number);
+
+			ch->local_nentries = args->remote_nentries;
+		}
+
+		xpc_process_connect(ch, &irq_flags);
+	}
+
+	if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+			goto out;
+
+		if (!(ch->flags & XPC_C_OPENREQUEST) ||
+		    !(ch->flags & XPC_C_OPENREPLY)) {
+			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
+					       &irq_flags);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY));
+		DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
+
+		ch->flags |= XPC_C_ROPENCOMPLETE;
+
+		xpc_process_connect(ch, &irq_flags);
+		create_kthread = 1;
+	}
+
+out:
+	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	if (create_kthread)
+		xpc_create_kthreads(ch, 1, 0);
+}
+
+/*
+ * Attempt to establish a channel connection to a remote partition.
+ */
+static enum xp_retval
+xpc_connect_channel(struct xpc_channel *ch)
+{
+	unsigned long irq_flags;
+	struct xpc_registration *registration = &xpc_registrations[ch->number];
+
+	if (mutex_trylock(&registration->mutex) == 0)
+		return xpRetry;
+
+	if (!XPC_CHANNEL_REGISTERED(ch->number)) {
+		mutex_unlock(&registration->mutex);
+		return xpUnregistered;
+	}
+
+	spin_lock_irqsave(&ch->lock, irq_flags);
+
+	DBUG_ON(ch->flags & XPC_C_CONNECTED);
+	DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		mutex_unlock(&registration->mutex);
+		return ch->reason;
+	}
+
+	/* add info from the channel connect registration to the channel */
+
+	ch->kthreads_assigned_limit = registration->assigned_limit;
+	ch->kthreads_idle_limit = registration->idle_limit;
+	DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
+	DBUG_ON(atomic_read(&ch->kthreads_idle) != 0);
+	DBUG_ON(atomic_read(&ch->kthreads_active) != 0);
+
+	ch->func = registration->func;
+	DBUG_ON(registration->func == NULL);
+	ch->key = registration->key;
+
+	ch->local_nentries = registration->nentries;
+
+	if (ch->flags & XPC_C_ROPENREQUEST) {
+		if (registration->entry_size != ch->entry_size) {
+			/* the local and remote sides aren't the same */
+
+			/*
+			 * Because XPC_DISCONNECT_CHANNEL() can block we're
+			 * forced to up the registration sema before we unlock
+			 * the channel lock. But that's okay here because we're
+			 * done with the part that required the registration
+			 * sema. XPC_DISCONNECT_CHANNEL() requires that the
+			 * channel lock be locked and will unlock and relock
+			 * the channel lock as needed.
+			 */
+			mutex_unlock(&registration->mutex);
+			XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
+					       &irq_flags);
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return xpUnequalMsgSizes;
+		}
+	} else {
+		ch->entry_size = registration->entry_size;
+
+		XPC_SET_REASON(ch, 0, 0);
+		ch->flags &= ~XPC_C_DISCONNECTED;
+
+		atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
+	}
+
+	mutex_unlock(&registration->mutex);
+
+	/* initiate the connection */
+
+	ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
+	xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags);
+
+	xpc_process_connect(ch, &irq_flags);
+
+	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	return xpSuccess;
+}
+
+void
+xpc_process_sent_chctl_flags(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	union xpc_channel_ctl_flags chctl;
+	struct xpc_channel *ch;
+	int ch_number;
+	u32 ch_flags;
+
+	chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part);
+
+	/*
+	 * Initiate channel connections for registered channels.
+	 *
+	 * For each connected channel that has pending messages activate idle
+	 * kthreads and/or create new kthreads as needed.
+	 */
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		/*
+		 * Process any open or close related chctl flags, and then deal
+		 * with connecting or disconnecting the channel as required.
+		 */
+
+		if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) {
+			xpc_process_openclose_chctl_flags(part, ch_number,
+							chctl.flags[ch_number]);
+		}
+
+		ch_flags = ch->flags;	/* need an atomic snapshot of flags */
+
+		if (ch_flags & XPC_C_DISCONNECTING) {
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			xpc_process_disconnect(ch, &irq_flags);
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			continue;
+		}
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
+			continue;
+
+		if (!(ch_flags & XPC_C_CONNECTED)) {
+			if (!(ch_flags & XPC_C_OPENREQUEST)) {
+				DBUG_ON(ch_flags & XPC_C_SETUP);
+				(void)xpc_connect_channel(ch);
+			}
+			continue;
+		}
+
+		/*
+		 * Process any message related chctl flags, this may involve
+		 * the activation of kthreads to deliver any pending messages
+		 * sent from the other partition.
+		 */
+
+		if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+			xpc_arch_ops.process_msg_chctl_flags(part, ch_number);
+	}
+}
+
+/*
+ * XPC's heartbeat code calls this function to inform XPC that a partition is
+ * going down.  XPC responds by tearing down the XPartition Communication
+ * infrastructure used for the just downed partition.
+ *
+ * XPC's heartbeat code will never call this function and xpc_partition_up()
+ * at the same time. Nor will it ever make multiple calls to either function
+ * at the same time.
+ */
+void
+xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason)
+{
+	unsigned long irq_flags;
+	int ch_number;
+	struct xpc_channel *ch;
+
+	dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n",
+		XPC_PARTID(part), reason);
+
+	if (!xpc_part_ref(part)) {
+		/* infrastructure for this partition isn't currently set up */
+		return;
+	}
+
+	/* disconnect channels associated with the partition going down */
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		xpc_msgqueue_ref(ch);
+		spin_lock_irqsave(&ch->lock, irq_flags);
+
+		XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		xpc_msgqueue_deref(ch);
+	}
+
+	xpc_wakeup_channel_mgr(part);
+
+	xpc_part_deref(part);
+}
+
+/*
+ * Called by XP at the time of channel connection registration to cause
+ * XPC to establish connections to all currently active partitions.
+ */
+void
+xpc_initiate_connect(int ch_number)
+{
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_channel *ch;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (xpc_part_ref(part)) {
+			ch = &part->channels[ch_number];
+
+			/*
+			 * Initiate the establishment of a connection on the
+			 * newly registered channel to the remote partition.
+			 */
+			xpc_wakeup_channel_mgr(part);
+			xpc_part_deref(part);
+		}
+	}
+}
+
+void
+xpc_connected_callout(struct xpc_channel *ch)
+{
+	/* let the registerer know that a connection has been established */
+
+	if (ch->func != NULL) {
+		dev_dbg(xpc_chan, "ch->func() called, reason=xpConnected, "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+
+		ch->func(xpConnected, ch->partid, ch->number,
+			 (void *)(u64)ch->local_nentries, ch->key);
+
+		dev_dbg(xpc_chan, "ch->func() returned, reason=xpConnected, "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+	}
+}
+
+/*
+ * Called by XP at the time of channel connection unregistration to cause
+ * XPC to teardown all current connections for the specified channel.
+ *
+ * Before returning xpc_initiate_disconnect() will wait until all connections
+ * on the specified channel have been closed/torndown. So the caller can be
+ * assured that they will not be receiving any more callouts from XPC to the
+ * function they registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to unregister.
+ */
+void
+xpc_initiate_disconnect(int ch_number)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_channel *ch;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+	/* initiate the channel disconnect for every active partition */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (xpc_part_ref(part)) {
+			ch = &part->channels[ch_number];
+			xpc_msgqueue_ref(ch);
+
+			spin_lock_irqsave(&ch->lock, irq_flags);
+
+			if (!(ch->flags & XPC_C_DISCONNECTED)) {
+				ch->flags |= XPC_C_WDISCONNECT;
+
+				XPC_DISCONNECT_CHANNEL(ch, xpUnregistering,
+						       &irq_flags);
+			}
+
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			xpc_msgqueue_deref(ch);
+			xpc_part_deref(part);
+		}
+	}
+
+	xpc_disconnect_wait(ch_number);
+}
+
+/*
+ * To disconnect a channel, and reflect it back to all who may be waiting.
+ *
+ * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
+ * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
+ * xpc_disconnect_wait().
+ *
+ * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
+ */
+void
+xpc_disconnect_channel(const int line, struct xpc_channel *ch,
+		       enum xp_retval reason, unsigned long *irq_flags)
+{
+	u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+		return;
+
+	DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED)));
+
+	dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n",
+		reason, line, ch->partid, ch->number);
+
+	XPC_SET_REASON(ch, reason, line);
+
+	ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
+	/* some of these may not have been set */
+	ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
+		       XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+		       XPC_C_CONNECTING | XPC_C_CONNECTED);
+
+	xpc_arch_ops.send_chctl_closerequest(ch, irq_flags);
+
+	if (channel_was_connected)
+		ch->flags |= XPC_C_WASCONNECTED;
+
+	spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+	/* wake all idle kthreads so they can exit */
+	if (atomic_read(&ch->kthreads_idle) > 0) {
+		wake_up_all(&ch->idle_wq);
+
+	} else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+		   !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+		/* start a kthread that will do the xpDisconnecting callout */
+		xpc_create_kthreads(ch, 1, 1);
+	}
+
+	/* wake those waiting to allocate an entry from the local msg queue */
+	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+		wake_up(&ch->msg_allocate_wq);
+
+	spin_lock_irqsave(&ch->lock, *irq_flags);
+}
+
+void
+xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason)
+{
+	/*
+	 * Let the channel's registerer know that the channel is being
+	 * disconnected. We don't want to do this if the registerer was never
+	 * informed of a connection being made.
+	 */
+
+	if (ch->func != NULL) {
+		dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
+
+		ch->func(reason, ch->partid, ch->number, NULL, ch->key);
+
+		dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
+	}
+}
+
+/*
+ * Wait for a message entry to become available for the specified channel,
+ * but don't wait any longer than 1 jiffy.
+ */
+enum xp_retval
+xpc_allocate_msg_wait(struct xpc_channel *ch)
+{
+	enum xp_retval ret;
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		DBUG_ON(ch->reason == xpInterrupted);
+		return ch->reason;
+	}
+
+	atomic_inc(&ch->n_on_msg_allocate_wq);
+	ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1);
+	atomic_dec(&ch->n_on_msg_allocate_wq);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		ret = ch->reason;
+		DBUG_ON(ch->reason == xpInterrupted);
+	} else if (ret == 0) {
+		ret = xpTimeout;
+	} else {
+		ret = xpInterrupted;
+	}
+
+	return ret;
+}
+
+/*
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
+ *
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * Once sent, this routine will not wait for the message to be received, nor
+ * will notification be given when it does happen.
+ *
+ * Arguments:
+ *
+ *	partid - ID of partition to which the channel is connected.
+ *	ch_number - channel # to send message on.
+ *	flags - see xp.h for valid flags.
+ *	payload - pointer to the payload which is to be sent.
+ *	payload_size - size of the payload in bytes.
+ */
+enum xp_retval
+xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
+		  u16 payload_size)
+{
+	struct xpc_partition *part = &xpc_partitions[partid];
+	enum xp_retval ret = xpUnknownReason;
+
+	dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
+		partid, ch_number);
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+	DBUG_ON(payload == NULL);
+
+	if (xpc_part_ref(part)) {
+		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+				  flags, payload, payload_size, 0, NULL, NULL);
+		xpc_part_deref(part);
+	}
+
+	return ret;
+}
+
+/*
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
+ *
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * This routine will not wait for the message to be sent or received.
+ *
+ * Once the remote end of the channel has received the message, the function
+ * passed as an argument to xpc_initiate_send_notify() will be called. This
+ * allows the sender to free up or re-use any buffers referenced by the
+ * message, but does NOT mean the message has been processed at the remote
+ * end by a receiver.
+ *
+ * If this routine returns an error, the caller's function will NOT be called.
+ *
+ * Arguments:
+ *
+ *	partid - ID of partition to which the channel is connected.
+ *	ch_number - channel # to send message on.
+ *	flags - see xp.h for valid flags.
+ *	payload - pointer to the payload which is to be sent.
+ *	payload_size - size of the payload in bytes.
+ *	func - function to call with asynchronous notification of message
+ *		  receipt. THIS FUNCTION MUST BE NON-BLOCKING.
+ *	key - user-defined key to be passed to the function when it's called.
+ */
+enum xp_retval
+xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
+			 u16 payload_size, xpc_notify_func func, void *key)
+{
+	struct xpc_partition *part = &xpc_partitions[partid];
+	enum xp_retval ret = xpUnknownReason;
+
+	dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
+		partid, ch_number);
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+	DBUG_ON(payload == NULL);
+	DBUG_ON(func == NULL);
+
+	if (xpc_part_ref(part)) {
+		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+			  flags, payload, payload_size, XPC_N_CALL, func, key);
+		xpc_part_deref(part);
+	}
+	return ret;
+}
+
+/*
+ * Deliver a message's payload to its intended recipient.
+ */
+void
+xpc_deliver_payload(struct xpc_channel *ch)
+{
+	void *payload;
+
+	payload = xpc_arch_ops.get_deliverable_payload(ch);
+	if (payload != NULL) {
+
+		/*
+		 * This ref is taken to protect the payload itself from being
+		 * freed before the user is finished with it, which the user
+		 * indicates by calling xpc_initiate_received().
+		 */
+		xpc_msgqueue_ref(ch);
+
+		atomic_inc(&ch->kthreads_active);
+
+		if (ch->func != NULL) {
+			dev_dbg(xpc_chan, "ch->func() called, payload=0x%p "
+				"partid=%d channel=%d\n", payload, ch->partid,
+				ch->number);
+
+			/* deliver the message to its intended recipient */
+			ch->func(xpMsgReceived, ch->partid, ch->number, payload,
+				 ch->key);
+
+			dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p "
+				"partid=%d channel=%d\n", payload, ch->partid,
+				ch->number);
+		}
+
+		atomic_dec(&ch->kthreads_active);
+	}
+}
+
+/*
+ * Acknowledge receipt of a delivered message's payload.
+ *
+ * This function, although called by users, does not call xpc_part_ref() to
+ * ensure that the partition infrastructure is in place. It relies on the
+ * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload().
+ *
+ * Arguments:
+ *
+ *	partid - ID of partition to which the channel is connected.
+ *	ch_number - channel # message received on.
+ *	payload - pointer to the payload area allocated via
+ *			xpc_initiate_send() or xpc_initiate_send_notify().
+ */
+void
+xpc_initiate_received(short partid, int ch_number, void *payload)
+{
+	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_channel *ch;
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+
+	ch = &part->channels[ch_number];
+	xpc_arch_ops.received_payload(ch, payload);
+
+	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload()  */
+	xpc_msgqueue_deref(ch);
+}
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
new file mode 100644
index 00000000000..832ed4c88cf
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -0,0 +1,1343 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) support - standard version.
+ *
+ *	XPC provides a message passing capability that crosses partition
+ *	boundaries. This module is made up of two parts:
+ *
+ *	    partition	This part detects the presence/absence of other
+ *			partitions. It provides a heartbeat and monitors
+ *			the heartbeats of other partitions.
+ *
+ *	    channel	This part manages the channels and sends/receives
+ *			messages across them to/from other partitions.
+ *
+ *	There are a couple of additional functions residing in XP, which
+ *	provide an interface to XPC for its users.
+ *
+ *
+ *	Caveats:
+ *
+ *	  . Currently on sn2, we have no way to determine which nasid an IRQ
+ *	    came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
+ *	    followed by an IPI. The amo indicates where data is to be pulled
+ *	    from, so after the IPI arrives, the remote partition checks the amo
+ *	    word. The IPI can actually arrive before the amo however, so other
+ *	    code must periodically check for this case. Also, remote amo
+ *	    operations do not reliably time out. Thus we do a remote PIO read
+ *	    solely to know whether the remote partition is down and whether we
+ *	    should stop sending IPIs to it. This remote PIO read operation is
+ *	    set up in a special nofault region so SAL knows to ignore (and
+ *	    cleanup) any errors due to the remote amo write, PIO read, and/or
+ *	    PIO write operations.
+ *
+ *	    If/when new hardware solves this IPI problem, we should abandon
+ *	    the current approach.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kdebug.h>
+#include <linux/kthread.h>
+#include "xpc.h"
+
+/* define two XPC debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xpc_dbg_name = {
+	.name = "xpc"
+};
+
+struct device xpc_part_dbg_subname = {
+	.init_name = "",	/* set to "part" at xpc_init() time */
+	.driver = &xpc_dbg_name
+};
+
+struct device xpc_chan_dbg_subname = {
+	.init_name = "",	/* set to "chan" at xpc_init() time */
+	.driver = &xpc_dbg_name
+};
+
+struct device *xpc_part = &xpc_part_dbg_subname;
+struct device *xpc_chan = &xpc_chan_dbg_subname;
+
+static int xpc_kdebug_ignore;
+
+/* systune related variables for /proc/sys directories */
+
+static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
+static int xpc_hb_min_interval = 1;
+static int xpc_hb_max_interval = 10;
+
+static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
+static int xpc_hb_check_min_interval = 10;
+static int xpc_hb_check_max_interval = 120;
+
+int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
+static int xpc_disengage_min_timelimit;	/* = 0 */
+static int xpc_disengage_max_timelimit = 120;
+
+static ctl_table xpc_sys_xpc_hb_dir[] = {
+	{
+	 .procname = "hb_interval",
+	 .data = &xpc_hb_interval,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = &xpc_hb_min_interval,
+	 .extra2 = &xpc_hb_max_interval},
+	{
+	 .procname = "hb_check_interval",
+	 .data = &xpc_hb_check_interval,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = &xpc_hb_check_min_interval,
+	 .extra2 = &xpc_hb_check_max_interval},
+	{}
+};
+static ctl_table xpc_sys_xpc_dir[] = {
+	{
+	 .procname = "hb",
+	 .mode = 0555,
+	 .child = xpc_sys_xpc_hb_dir},
+	{
+	 .procname = "disengage_timelimit",
+	 .data = &xpc_disengage_timelimit,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = &xpc_disengage_min_timelimit,
+	 .extra2 = &xpc_disengage_max_timelimit},
+	{}
+};
+static ctl_table xpc_sys_dir[] = {
+	{
+	 .procname = "xpc",
+	 .mode = 0555,
+	 .child = xpc_sys_xpc_dir},
+	{}
+};
+static struct ctl_table_header *xpc_sysctl;
+
+/* non-zero if any remote partition disengage was timed out */
+int xpc_disengage_timedout;
+
+/* #of activate IRQs received and not yet processed */
+int xpc_activate_IRQ_rcvd;
+DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
+
+/* IRQ handler notifies this wait queue on receipt of an IRQ */
+DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
+
+static unsigned long xpc_hb_check_timeout;
+static struct timer_list xpc_hb_timer;
+
+/* notification that the xpc_hb_checker thread has exited */
+static DECLARE_COMPLETION(xpc_hb_checker_exited);
+
+/* notification that the xpc_discovery thread has exited */
+static DECLARE_COMPLETION(xpc_discovery_exited);
+
+static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
+
+static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_reboot_notifier = {
+	.notifier_call = xpc_system_reboot,
+};
+
+static int xpc_system_die(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_die_notifier = {
+	.notifier_call = xpc_system_die,
+};
+
+struct xpc_arch_operations xpc_arch_ops;
+
+/*
+ * Timer function to enforce the timelimit on the partition disengage.
+ */
+static void
+xpc_timeout_partition_disengage(unsigned long data)
+{
+	struct xpc_partition *part = (struct xpc_partition *)data;
+
+	DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
+
+	(void)xpc_partition_disengaged(part);
+
+	DBUG_ON(part->disengage_timeout != 0);
+	DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part)));
+}
+
+/*
+ * Timer to produce the heartbeat.  The timer structures function is
+ * already set when this is initially called.  A tunable is used to
+ * specify when the next timeout should occur.
+ */
+static void
+xpc_hb_beater(unsigned long dummy)
+{
+	xpc_arch_ops.increment_heartbeat();
+
+	if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
+	add_timer(&xpc_hb_timer);
+}
+
+static void
+xpc_start_hb_beater(void)
+{
+	xpc_arch_ops.heartbeat_init();
+	init_timer(&xpc_hb_timer);
+	xpc_hb_timer.function = xpc_hb_beater;
+	xpc_hb_beater(0);
+}
+
+static void
+xpc_stop_hb_beater(void)
+{
+	del_timer_sync(&xpc_hb_timer);
+	xpc_arch_ops.heartbeat_exit();
+}
+
+/*
+ * At periodic intervals, scan through all active partitions and ensure
+ * their heartbeat is still active.  If not, the partition is deactivated.
+ */
+static void
+xpc_check_remote_hb(void)
+{
+	struct xpc_partition *part;
+	short partid;
+	enum xp_retval ret;
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+
+		if (xpc_exiting)
+			break;
+
+		if (partid == xp_partition_id)
+			continue;
+
+		part = &xpc_partitions[partid];
+
+		if (part->act_state == XPC_P_AS_INACTIVE ||
+		    part->act_state == XPC_P_AS_DEACTIVATING) {
+			continue;
+		}
+
+		ret = xpc_arch_ops.get_remote_heartbeat(part);
+		if (ret != xpSuccess)
+			XPC_DEACTIVATE_PARTITION(part, ret);
+	}
+}
+
+/*
+ * This thread is responsible for nearly all of the partition
+ * activation/deactivation.
+ */
+static int
+xpc_hb_checker(void *ignore)
+{
+	int force_IRQ = 0;
+
+	/* this thread was marked active by xpc_hb_init() */
+
+	set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU));
+
+	/* set our heartbeating to other partitions into motion */
+	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
+	xpc_start_hb_beater();
+
+	while (!xpc_exiting) {
+
+		dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
+			"been received\n",
+			(int)(xpc_hb_check_timeout - jiffies),
+			xpc_activate_IRQ_rcvd);
+
+		/* checking of remote heartbeats is skewed by IRQ handling */
+		if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
+			xpc_hb_check_timeout = jiffies +
+			    (xpc_hb_check_interval * HZ);
+
+			dev_dbg(xpc_part, "checking remote heartbeats\n");
+			xpc_check_remote_hb();
+
+			/*
+			 * On sn2 we need to periodically recheck to ensure no
+			 * IRQ/amo pairs have been missed.
+			 */
+			if (is_shub())
+				force_IRQ = 1;
+		}
+
+		/* check for outstanding IRQs */
+		if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
+			force_IRQ = 0;
+			dev_dbg(xpc_part, "processing activate IRQs "
+				"received\n");
+			xpc_arch_ops.process_activate_IRQ_rcvd();
+		}
+
+		/* wait for IRQ or timeout */
+		(void)wait_event_interruptible(xpc_activate_IRQ_wq,
+					       (time_is_before_eq_jiffies(
+						xpc_hb_check_timeout) ||
+						xpc_activate_IRQ_rcvd > 0 ||
+						xpc_exiting));
+	}
+
+	xpc_stop_hb_beater();
+
+	dev_dbg(xpc_part, "heartbeat checker is exiting\n");
+
+	/* mark this thread as having exited */
+	complete(&xpc_hb_checker_exited);
+	return 0;
+}
+
+/*
+ * This thread will attempt to discover other partitions to activate
+ * based on info provided by SAL. This new thread is short lived and
+ * will exit once discovery is complete.
+ */
+static int
+xpc_initiate_discovery(void *ignore)
+{
+	xpc_discovery();
+
+	dev_dbg(xpc_part, "discovery thread is exiting\n");
+
+	/* mark this thread as having exited */
+	complete(&xpc_discovery_exited);
+	return 0;
+}
+
+/*
+ * The first kthread assigned to a newly activated partition is the one
+ * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
+ * that kthread until the partition is brought down, at which time that kthread
+ * returns back to XPC HB. (The return of that kthread will signify to XPC HB
+ * that XPC has dismantled all communication infrastructure for the associated
+ * partition.) This kthread becomes the channel manager for that partition.
+ *
+ * Each active partition has a channel manager, who, besides connecting and
+ * disconnecting channels, will ensure that each of the partition's connected
+ * channels has the required number of assigned kthreads to get the work done.
+ */
+static void
+xpc_channel_mgr(struct xpc_partition *part)
+{
+	while (part->act_state != XPC_P_AS_DEACTIVATING ||
+	       atomic_read(&part->nchannels_active) > 0 ||
+	       !xpc_partition_disengaged(part)) {
+
+		xpc_process_sent_chctl_flags(part);
+
+		/*
+		 * Wait until we've been requested to activate kthreads or
+		 * all of the channel's message queues have been torn down or
+		 * a signal is pending.
+		 *
+		 * The channel_mgr_requests is set to 1 after being awakened,
+		 * This is done to prevent the channel mgr from making one pass
+		 * through the loop for each request, since he will
+		 * be servicing all the requests in one pass. The reason it's
+		 * set to 1 instead of 0 is so that other kthreads will know
+		 * that the channel mgr is running and won't bother trying to
+		 * wake him up.
+		 */
+		atomic_dec(&part->channel_mgr_requests);
+		(void)wait_event_interruptible(part->channel_mgr_wq,
+				(atomic_read(&part->channel_mgr_requests) > 0 ||
+				 part->chctl.all_flags != 0 ||
+				 (part->act_state == XPC_P_AS_DEACTIVATING &&
+				 atomic_read(&part->nchannels_active) == 0 &&
+				 xpc_partition_disengaged(part))));
+		atomic_set(&part->channel_mgr_requests, 1);
+	}
+}
+
+/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+	/* see if kzalloc will give us cachline aligned memory by default */
+	*base = kzalloc(size, flags);
+	if (*base == NULL)
+		return NULL;
+
+	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+		return *base;
+
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kzalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL)
+		return NULL;
+
+	return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Setup the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static enum xp_retval
+xpc_setup_ch_structures(struct xpc_partition *part)
+{
+	enum xp_retval ret;
+	int ch_number;
+	struct xpc_channel *ch;
+	short partid = XPC_PARTID(part);
+
+	/*
+	 * Allocate all of the channel structures as a contiguous chunk of
+	 * memory.
+	 */
+	DBUG_ON(part->channels != NULL);
+	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
+				 GFP_KERNEL);
+	if (part->channels == NULL) {
+		dev_err(xpc_chan, "can't get memory for channels\n");
+		return xpNoMemory;
+	}
+
+	/* allocate the remote open and close args */
+
+	part->remote_openclose_args =
+	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+					  GFP_KERNEL, &part->
+					  remote_openclose_args_base);
+	if (part->remote_openclose_args == NULL) {
+		dev_err(xpc_chan, "can't get memory for remote connect args\n");
+		ret = xpNoMemory;
+		goto out_1;
+	}
+
+	part->chctl.all_flags = 0;
+	spin_lock_init(&part->chctl_lock);
+
+	atomic_set(&part->channel_mgr_requests, 1);
+	init_waitqueue_head(&part->channel_mgr_wq);
+
+	part->nchannels = XPC_MAX_NCHANNELS;
+
+	atomic_set(&part->nchannels_active, 0);
+	atomic_set(&part->nchannels_engaged, 0);
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		ch->partid = partid;
+		ch->number = ch_number;
+		ch->flags = XPC_C_DISCONNECTED;
+
+		atomic_set(&ch->kthreads_assigned, 0);
+		atomic_set(&ch->kthreads_idle, 0);
+		atomic_set(&ch->kthreads_active, 0);
+
+		atomic_set(&ch->references, 0);
+		atomic_set(&ch->n_to_notify, 0);
+
+		spin_lock_init(&ch->lock);
+		init_completion(&ch->wdisconnect_wait);
+
+		atomic_set(&ch->n_on_msg_allocate_wq, 0);
+		init_waitqueue_head(&ch->msg_allocate_wq);
+		init_waitqueue_head(&ch->idle_wq);
+	}
+
+	ret = xpc_arch_ops.setup_ch_structures(part);
+	if (ret != xpSuccess)
+		goto out_2;
+
+	/*
+	 * With the setting of the partition setup_state to XPC_P_SS_SETUP,
+	 * we're declaring that this partition is ready to go.
+	 */
+	part->setup_state = XPC_P_SS_SETUP;
+
+	return xpSuccess;
+
+	/* setup of ch structures failed */
+out_2:
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+out_1:
+	kfree(part->channels);
+	part->channels = NULL;
+	return ret;
+}
+
+/*
+ * Teardown the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static void
+xpc_teardown_ch_structures(struct xpc_partition *part)
+{
+	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
+	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
+
+	/*
+	 * Make this partition inaccessible to local processes by marking it
+	 * as no longer setup. Then wait before proceeding with the teardown
+	 * until all existing references cease.
+	 */
+	DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
+	part->setup_state = XPC_P_SS_WTEARDOWN;
+
+	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
+
+	/* now we can begin tearing down the infrastructure */
+
+	xpc_arch_ops.teardown_ch_structures(part);
+
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+	kfree(part->channels);
+	part->channels = NULL;
+
+	part->setup_state = XPC_P_SS_TORNDOWN;
+}
+
+/*
+ * When XPC HB determines that a partition has come up, it will create a new
+ * kthread and that kthread will call this function to attempt to set up the
+ * basic infrastructure used for Cross Partition Communication with the newly
+ * upped partition.
+ *
+ * The kthread that was created by XPC HB and which setup the XPC
+ * infrastructure will remain assigned to the partition becoming the channel
+ * manager for that partition until the partition is deactivating, at which
+ * time the kthread will teardown the XPC infrastructure and then exit.
+ */
+static int
+xpc_activating(void *__partid)
+{
+	short partid = (u64)__partid;
+	struct xpc_partition *part = &xpc_partitions[partid];
+	unsigned long irq_flags;
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		part->act_state = XPC_P_AS_INACTIVE;
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+		part->remote_rp_pa = 0;
+		return 0;
+	}
+
+	/* indicate the thread is activating */
+	DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
+	part->act_state = XPC_P_AS_ACTIVATING;
+
+	XPC_SET_REASON(part, 0, 0);
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	dev_dbg(xpc_part, "activating partition %d\n", partid);
+
+	xpc_arch_ops.allow_hb(partid);
+
+	if (xpc_setup_ch_structures(part) == xpSuccess) {
+		(void)xpc_part_ref(part);	/* this will always succeed */
+
+		if (xpc_arch_ops.make_first_contact(part) == xpSuccess) {
+			xpc_mark_partition_active(part);
+			xpc_channel_mgr(part);
+			/* won't return until partition is deactivating */
+		}
+
+		xpc_part_deref(part);
+		xpc_teardown_ch_structures(part);
+	}
+
+	xpc_arch_ops.disallow_hb(partid);
+	xpc_mark_partition_inactive(part);
+
+	if (part->reason == xpReactivating) {
+		/* interrupting ourselves results in activating partition */
+		xpc_arch_ops.request_partition_reactivation(part);
+	}
+
+	return 0;
+}
+
+void
+xpc_activate_partition(struct xpc_partition *part)
+{
+	short partid = XPC_PARTID(part);
+	unsigned long irq_flags;
+	struct task_struct *kthread;
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+
+	DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
+
+	part->act_state = XPC_P_AS_ACTIVATION_REQ;
+	XPC_SET_REASON(part, xpCloneKThread, __LINE__);
+
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
+			      partid);
+	if (IS_ERR(kthread)) {
+		spin_lock_irqsave(&part->act_lock, irq_flags);
+		part->act_state = XPC_P_AS_INACTIVE;
+		XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+	}
+}
+
+void
+xpc_activate_kthreads(struct xpc_channel *ch, int needed)
+{
+	int idle = atomic_read(&ch->kthreads_idle);
+	int assigned = atomic_read(&ch->kthreads_assigned);
+	int wakeup;
+
+	DBUG_ON(needed <= 0);
+
+	if (idle > 0) {
+		wakeup = (needed > idle) ? idle : needed;
+		needed -= wakeup;
+
+		dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
+			"channel=%d\n", wakeup, ch->partid, ch->number);
+
+		/* only wakeup the requested number of kthreads */
+		wake_up_nr(&ch->idle_wq, wakeup);
+	}
+
+	if (needed <= 0)
+		return;
+
+	if (needed + assigned > ch->kthreads_assigned_limit) {
+		needed = ch->kthreads_assigned_limit - assigned;
+		if (needed <= 0)
+			return;
+	}
+
+	dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
+		needed, ch->partid, ch->number);
+
+	xpc_create_kthreads(ch, needed, 0);
+}
+
+/*
+ * This function is where XPC's kthreads wait for messages to deliver.
+ */
+static void
+xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
+{
+	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+		xpc_arch_ops.n_of_deliverable_payloads;
+
+	do {
+		/* deliver messages to their intended recipients */
+
+		while (n_of_deliverable_payloads(ch) > 0 &&
+		       !(ch->flags & XPC_C_DISCONNECTING)) {
+			xpc_deliver_payload(ch);
+		}
+
+		if (atomic_inc_return(&ch->kthreads_idle) >
+		    ch->kthreads_idle_limit) {
+			/* too many idle kthreads on this channel */
+			atomic_dec(&ch->kthreads_idle);
+			break;
+		}
+
+		dev_dbg(xpc_chan, "idle kthread calling "
+			"wait_event_interruptible_exclusive()\n");
+
+		(void)wait_event_interruptible_exclusive(ch->idle_wq,
+				(n_of_deliverable_payloads(ch) > 0 ||
+				 (ch->flags & XPC_C_DISCONNECTING)));
+
+		atomic_dec(&ch->kthreads_idle);
+
+	} while (!(ch->flags & XPC_C_DISCONNECTING));
+}
+
+static int
+xpc_kthread_start(void *args)
+{
+	short partid = XPC_UNPACK_ARG1(args);
+	u16 ch_number = XPC_UNPACK_ARG2(args);
+	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_channel *ch;
+	int n_needed;
+	unsigned long irq_flags;
+	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+		xpc_arch_ops.n_of_deliverable_payloads;
+
+	dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
+		partid, ch_number);
+
+	ch = &part->channels[ch_number];
+
+	if (!(ch->flags & XPC_C_DISCONNECTING)) {
+
+		/* let registerer know that connection has been established */
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
+			ch->flags |= XPC_C_CONNECTEDCALLOUT;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			xpc_connected_callout(ch);
+
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			/*
+			 * It is possible that while the callout was being
+			 * made that the remote partition sent some messages.
+			 * If that is the case, we may need to activate
+			 * additional kthreads to help deliver them. We only
+			 * need one less than total #of messages to deliver.
+			 */
+			n_needed = n_of_deliverable_payloads(ch) - 1;
+			if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
+				xpc_activate_kthreads(ch, n_needed);
+
+		} else {
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+		}
+
+		xpc_kthread_waitmsgs(part, ch);
+	}
+
+	/* let registerer know that connection is disconnecting */
+
+	spin_lock_irqsave(&ch->lock, irq_flags);
+	if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+	    !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+		ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+		xpc_disconnect_callout(ch, xpDisconnecting);
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
+	}
+	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+	    atomic_dec_return(&part->nchannels_engaged) == 0) {
+		xpc_arch_ops.indicate_partition_disengaged(part);
+	}
+
+	xpc_msgqueue_deref(ch);
+
+	dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
+		partid, ch_number);
+
+	xpc_part_deref(part);
+	return 0;
+}
+
+/*
+ * For each partition that XPC has established communications with, there is
+ * a minimum of one kernel thread assigned to perform any operation that
+ * may potentially sleep or block (basically the callouts to the asynchronous
+ * functions registered via xpc_connect()).
+ *
+ * Additional kthreads are created and destroyed by XPC as the workload
+ * demands.
+ *
+ * A kthread is assigned to one of the active channels that exists for a given
+ * partition.
+ */
+void
+xpc_create_kthreads(struct xpc_channel *ch, int needed,
+		    int ignore_disconnecting)
+{
+	unsigned long irq_flags;
+	u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct task_struct *kthread;
+	void (*indicate_partition_disengaged) (struct xpc_partition *) =
+		xpc_arch_ops.indicate_partition_disengaged;
+
+	while (needed-- > 0) {
+
+		/*
+		 * The following is done on behalf of the newly created
+		 * kthread. That kthread is responsible for doing the
+		 * counterpart to the following before it exits.
+		 */
+		if (ignore_disconnecting) {
+			if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
+				/* kthreads assigned had gone to zero */
+				BUG_ON(!(ch->flags &
+					 XPC_C_DISCONNECTINGCALLOUT_MADE));
+				break;
+			}
+
+		} else if (ch->flags & XPC_C_DISCONNECTING) {
+			break;
+
+		} else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+			   atomic_inc_return(&part->nchannels_engaged) == 1) {
+			xpc_arch_ops.indicate_partition_engaged(part);
+		}
+		(void)xpc_part_ref(part);
+		xpc_msgqueue_ref(ch);
+
+		kthread = kthread_run(xpc_kthread_start, (void *)args,
+				      "xpc%02dc%d", ch->partid, ch->number);
+		if (IS_ERR(kthread)) {
+			/* the fork failed */
+
+			/*
+			 * NOTE: if (ignore_disconnecting &&
+			 * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
+			 * then we'll deadlock if all other kthreads assigned
+			 * to this channel are blocked in the channel's
+			 * registerer, because the only thing that will unblock
+			 * them is the xpDisconnecting callout that this
+			 * failed kthread_run() would have made.
+			 */
+
+			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+			    atomic_dec_return(&part->nchannels_engaged) == 0) {
+				indicate_partition_disengaged(part);
+			}
+			xpc_msgqueue_deref(ch);
+			xpc_part_deref(part);
+
+			if (atomic_read(&ch->kthreads_assigned) <
+			    ch->kthreads_idle_limit) {
+				/*
+				 * Flag this as an error only if we have an
+				 * insufficient #of kthreads for the channel
+				 * to function.
+				 */
+				spin_lock_irqsave(&ch->lock, irq_flags);
+				XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources,
+						       &irq_flags);
+				spin_unlock_irqrestore(&ch->lock, irq_flags);
+			}
+			break;
+		}
+	}
+}
+
+void
+xpc_disconnect_wait(int ch_number)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_channel *ch;
+	int wakeup_channel_mgr;
+
+	/* now wait for all callouts to the caller's function to cease */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (!xpc_part_ref(part))
+			continue;
+
+		ch = &part->channels[ch_number];
+
+		if (!(ch->flags & XPC_C_WDISCONNECT)) {
+			xpc_part_deref(part);
+			continue;
+		}
+
+		wait_for_completion(&ch->wdisconnect_wait);
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+		wakeup_channel_mgr = 0;
+
+		if (ch->delayed_chctl_flags) {
+			if (part->act_state != XPC_P_AS_DEACTIVATING) {
+				spin_lock(&part->chctl_lock);
+				part->chctl.flags[ch->number] |=
+				    ch->delayed_chctl_flags;
+				spin_unlock(&part->chctl_lock);
+				wakeup_channel_mgr = 1;
+			}
+			ch->delayed_chctl_flags = 0;
+		}
+
+		ch->flags &= ~XPC_C_WDISCONNECT;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+		if (wakeup_channel_mgr)
+			xpc_wakeup_channel_mgr(part);
+
+		xpc_part_deref(part);
+	}
+}
+
+static int
+xpc_setup_partitions(void)
+{
+	short partid;
+	struct xpc_partition *part;
+
+	xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
+				 xp_max_npartitions, GFP_KERNEL);
+	if (xpc_partitions == NULL) {
+		dev_err(xpc_part, "can't get memory for partition structure\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * The first few fields of each entry of xpc_partitions[] need to
+	 * be initialized now so that calls to xpc_connect() and
+	 * xpc_disconnect() can be made prior to the activation of any remote
+	 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
+	 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
+	 * PARTITION HAS BEEN ACTIVATED.
+	 */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
+
+		part->activate_IRQ_rcvd = 0;
+		spin_lock_init(&part->act_lock);
+		part->act_state = XPC_P_AS_INACTIVE;
+		XPC_SET_REASON(part, 0, 0);
+
+		init_timer(&part->disengage_timer);
+		part->disengage_timer.function =
+		    xpc_timeout_partition_disengage;
+		part->disengage_timer.data = (unsigned long)part;
+
+		part->setup_state = XPC_P_SS_UNSET;
+		init_waitqueue_head(&part->teardown_wq);
+		atomic_set(&part->references, 0);
+	}
+
+	return xpc_arch_ops.setup_partitions();
+}
+
+static void
+xpc_teardown_partitions(void)
+{
+	xpc_arch_ops.teardown_partitions();
+	kfree(xpc_partitions);
+}
+
+static void
+xpc_do_exit(enum xp_retval reason)
+{
+	short partid;
+	int active_part_count, printed_waiting_msg = 0;
+	struct xpc_partition *part;
+	unsigned long printmsg_time, disengage_timeout = 0;
+
+	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
+	DBUG_ON(xpc_exiting == 1);
+
+	/*
+	 * Let the heartbeat checker thread and the discovery thread
+	 * (if one is running) know that they should exit. Also wake up
+	 * the heartbeat checker thread in case it's sleeping.
+	 */
+	xpc_exiting = 1;
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	/* wait for the discovery thread to exit */
+	wait_for_completion(&xpc_discovery_exited);
+
+	/* wait for the heartbeat checker thread to exit */
+	wait_for_completion(&xpc_hb_checker_exited);
+
+	/* sleep for a 1/3 of a second or so */
+	(void)msleep_interruptible(300);
+
+	/* wait for all partitions to become inactive */
+
+	printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+	xpc_disengage_timedout = 0;
+
+	do {
+		active_part_count = 0;
+
+		for (partid = 0; partid < xp_max_npartitions; partid++) {
+			part = &xpc_partitions[partid];
+
+			if (xpc_partition_disengaged(part) &&
+			    part->act_state == XPC_P_AS_INACTIVE) {
+				continue;
+			}
+
+			active_part_count++;
+
+			XPC_DEACTIVATE_PARTITION(part, reason);
+
+			if (part->disengage_timeout > disengage_timeout)
+				disengage_timeout = part->disengage_timeout;
+		}
+
+		if (xpc_arch_ops.any_partition_engaged()) {
+			if (time_is_before_jiffies(printmsg_time)) {
+				dev_info(xpc_part, "waiting for remote "
+					 "partitions to deactivate, timeout in "
+					 "%ld seconds\n", (disengage_timeout -
+					 jiffies) / HZ);
+				printmsg_time = jiffies +
+				    (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+				printed_waiting_msg = 1;
+			}
+
+		} else if (active_part_count > 0) {
+			if (printed_waiting_msg) {
+				dev_info(xpc_part, "waiting for local partition"
+					 " to deactivate\n");
+				printed_waiting_msg = 0;
+			}
+
+		} else {
+			if (!xpc_disengage_timedout) {
+				dev_info(xpc_part, "all partitions have "
+					 "deactivated\n");
+			}
+			break;
+		}
+
+		/* sleep for a 1/3 of a second or so */
+		(void)msleep_interruptible(300);
+
+	} while (1);
+
+	DBUG_ON(xpc_arch_ops.any_partition_engaged());
+
+	xpc_teardown_rsvd_page();
+
+	if (reason == xpUnloading) {
+		(void)unregister_die_notifier(&xpc_die_notifier);
+		(void)unregister_reboot_notifier(&xpc_reboot_notifier);
+	}
+
+	/* clear the interface to XPC's functions */
+	xpc_clear_interface();
+
+	if (xpc_sysctl)
+		unregister_sysctl_table(xpc_sysctl);
+
+	xpc_teardown_partitions();
+
+	if (is_shub())
+		xpc_exit_sn2();
+	else if (is_uv())
+		xpc_exit_uv();
+}
+
+/*
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+	enum xp_retval reason;
+
+	switch (event) {
+	case SYS_RESTART:
+		reason = xpSystemReboot;
+		break;
+	case SYS_HALT:
+		reason = xpSystemHalt;
+		break;
+	case SYS_POWER_OFF:
+		reason = xpSystemPoweroff;
+		break;
+	default:
+		reason = xpSystemGoingDown;
+	}
+
+	xpc_do_exit(reason);
+	return NOTIFY_DONE;
+}
+
+/*
+ * Notify other partitions to deactivate from us by first disengaging from all
+ * references to our memory.
+ */
+static void
+xpc_die_deactivate(void)
+{
+	struct xpc_partition *part;
+	short partid;
+	int any_engaged;
+	long keep_waiting;
+	long wait_to_print;
+
+	/* keep xpc_hb_checker thread from doing anything (just in case) */
+	xpc_exiting = 1;
+
+	xpc_arch_ops.disallow_all_hbs();   /*indicate we're deactivated */
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (xpc_arch_ops.partition_engaged(partid) ||
+		    part->act_state != XPC_P_AS_INACTIVE) {
+			xpc_arch_ops.request_partition_deactivation(part);
+			xpc_arch_ops.indicate_partition_disengaged(part);
+		}
+	}
+
+	/*
+	 * Though we requested that all other partitions deactivate from us,
+	 * we only wait until they've all disengaged or we've reached the
+	 * defined timelimit.
+	 *
+	 * Given that one iteration through the following while-loop takes
+	 * approximately 200 microseconds, calculate the #of loops to take
+	 * before bailing and the #of loops before printing a waiting message.
+	 */
+	keep_waiting = xpc_disengage_timelimit * 1000 * 5;
+	wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
+
+	while (1) {
+		any_engaged = xpc_arch_ops.any_partition_engaged();
+		if (!any_engaged) {
+			dev_info(xpc_part, "all partitions have deactivated\n");
+			break;
+		}
+
+		if (!keep_waiting--) {
+			for (partid = 0; partid < xp_max_npartitions;
+			     partid++) {
+				if (xpc_arch_ops.partition_engaged(partid)) {
+					dev_info(xpc_part, "deactivate from "
+						 "remote partition %d timed "
+						 "out\n", partid);
+				}
+			}
+			break;
+		}
+
+		if (!wait_to_print--) {
+			dev_info(xpc_part, "waiting for remote partitions to "
+				 "deactivate, timeout in %ld seconds\n",
+				 keep_waiting / (1000 * 5));
+			wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
+			    1000 * 5;
+		}
+
+		udelay(200);
+	}
+}
+
+/*
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
+ */
+static int
+xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
+{
+#ifdef CONFIG_IA64		/* !!! temporary kludge */
+	switch (event) {
+	case DIE_MACHINE_RESTART:
+	case DIE_MACHINE_HALT:
+		xpc_die_deactivate();
+		break;
+
+	case DIE_KDEBUG_ENTER:
+		/* Should lack of heartbeat be ignored by other partitions? */
+		if (!xpc_kdebug_ignore)
+			break;
+
+		/* fall through */
+	case DIE_MCA_MONARCH_ENTER:
+	case DIE_INIT_MONARCH_ENTER:
+		xpc_arch_ops.offline_heartbeat();
+		break;
+
+	case DIE_KDEBUG_LEAVE:
+		/* Is lack of heartbeat being ignored by other partitions? */
+		if (!xpc_kdebug_ignore)
+			break;
+
+		/* fall through */
+	case DIE_MCA_MONARCH_LEAVE:
+	case DIE_INIT_MONARCH_LEAVE:
+		xpc_arch_ops.online_heartbeat();
+		break;
+	}
+#else
+	xpc_die_deactivate();
+#endif
+
+	return NOTIFY_DONE;
+}
+
+int __init
+xpc_init(void)
+{
+	int ret;
+	struct task_struct *kthread;
+
+	dev_set_name(xpc_part, "part");
+	dev_set_name(xpc_chan, "chan");
+
+	if (is_shub()) {
+		/*
+		 * The ia64-sn2 architecture supports at most 64 partitions.
+		 * And the inability to unregister remote amos restricts us
+		 * further to only support exactly 64 partitions on this
+		 * architecture, no less.
+		 */
+		if (xp_max_npartitions != 64) {
+			dev_err(xpc_part, "max #of partitions not set to 64\n");
+			ret = -EINVAL;
+		} else {
+			ret = xpc_init_sn2();
+		}
+
+	} else if (is_uv()) {
+		ret = xpc_init_uv();
+
+	} else {
+		ret = -ENODEV;
+	}
+
+	if (ret != 0)
+		return ret;
+
+	ret = xpc_setup_partitions();
+	if (ret != 0) {
+		dev_err(xpc_part, "can't get memory for partition structure\n");
+		goto out_1;
+	}
+
+	xpc_sysctl = register_sysctl_table(xpc_sys_dir);
+
+	/*
+	 * Fill the partition reserved page with the information needed by
+	 * other partitions to discover we are alive and establish initial
+	 * communications.
+	 */
+	ret = xpc_setup_rsvd_page();
+	if (ret != 0) {
+		dev_err(xpc_part, "can't setup our reserved page\n");
+		goto out_2;
+	}
+
+	/* add ourselves to the reboot_notifier_list */
+	ret = register_reboot_notifier(&xpc_reboot_notifier);
+	if (ret != 0)
+		dev_warn(xpc_part, "can't register reboot notifier\n");
+
+	/* add ourselves to the die_notifier list */
+	ret = register_die_notifier(&xpc_die_notifier);
+	if (ret != 0)
+		dev_warn(xpc_part, "can't register die notifier\n");
+
+	/*
+	 * The real work-horse behind xpc.  This processes incoming
+	 * interrupts and monitors remote heartbeats.
+	 */
+	kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
+	if (IS_ERR(kthread)) {
+		dev_err(xpc_part, "failed while forking hb check thread\n");
+		ret = -EBUSY;
+		goto out_3;
+	}
+
+	/*
+	 * Startup a thread that will attempt to discover other partitions to
+	 * activate based on info provided by SAL. This new thread is short
+	 * lived and will exit once discovery is complete.
+	 */
+	kthread = kthread_run(xpc_initiate_discovery, NULL,
+			      XPC_DISCOVERY_THREAD_NAME);
+	if (IS_ERR(kthread)) {
+		dev_err(xpc_part, "failed while forking discovery thread\n");
+
+		/* mark this new thread as a non-starter */
+		complete(&xpc_discovery_exited);
+
+		xpc_do_exit(xpUnloading);
+		return -EBUSY;
+	}
+
+	/* set the interface to point at XPC's functions */
+	xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
+			  xpc_initiate_send, xpc_initiate_send_notify,
+			  xpc_initiate_received, xpc_initiate_partid_to_nasids);
+
+	return 0;
+
+	/* initialization was not successful */
+out_3:
+	xpc_teardown_rsvd_page();
+
+	(void)unregister_die_notifier(&xpc_die_notifier);
+	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
+out_2:
+	if (xpc_sysctl)
+		unregister_sysctl_table(xpc_sysctl);
+
+	xpc_teardown_partitions();
+out_1:
+	if (is_shub())
+		xpc_exit_sn2();
+	else if (is_uv())
+		xpc_exit_uv();
+	return ret;
+}
+
+module_init(xpc_init);
+
+void __exit
+xpc_exit(void)
+{
+	xpc_do_exit(xpUnloading);
+}
+
+module_exit(xpc_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
+MODULE_LICENSE("GPL");
+
+module_param(xpc_hb_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
+		 "heartbeat increments.");
+
+module_param(xpc_hb_check_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
+		 "heartbeat checks.");
+
+module_param(xpc_disengage_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
+		 "for disengage to complete.");
+
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+		 "other partitions when dropping into kdebug.");
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
new file mode 100644
index 00000000000..9a6268c89fd
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -0,0 +1,535 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) partition support.
+ *
+ *	This is the part of XPC that detects the presence/absence of
+ *	other partitions. It provides a heartbeat and monitors the
+ *	heartbeats of other partitions.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/hardirq.h>
+#include "xpc.h"
+#include <asm/uv/uv_hub.h>
+
+/* XPC is exiting flag */
+int xpc_exiting;
+
+/* this partition's reserved page pointers */
+struct xpc_rsvd_page *xpc_rsvd_page;
+static unsigned long *xpc_part_nasids;
+unsigned long *xpc_mach_nasids;
+
+static int xpc_nasid_mask_nbytes;	/* #of bytes in nasid mask */
+int xpc_nasid_mask_nlongs;	/* #of longs in nasid mask */
+
+struct xpc_partition *xpc_partitions;
+
+/*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+	/* see if kmalloc will give us cachline aligned memory by default */
+	*base = kmalloc(size, flags);
+	if (*base == NULL)
+		return NULL;
+
+	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+		return *base;
+
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kmalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL)
+		return NULL;
+
+	return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Given a nasid, get the physical address of the  partition's reserved page
+ * for that nasid. This function returns 0 on any error.
+ */
+static unsigned long
+xpc_get_rsvd_page_pa(int nasid)
+{
+	enum xp_retval ret;
+	u64 cookie = 0;
+	unsigned long rp_pa = nasid;	/* seed with nasid */
+	size_t len = 0;
+	size_t buf_len = 0;
+	void *buf = buf;
+	void *buf_base = NULL;
+	enum xp_retval (*get_partition_rsvd_page_pa)
+		(void *, u64 *, unsigned long *, size_t *) =
+		xpc_arch_ops.get_partition_rsvd_page_pa;
+
+	while (1) {
+
+		/* !!! rp_pa will need to be _gpa on UV.
+		 * ??? So do we save it into the architecture specific parts
+		 * ??? of the xpc_partition structure? Do we rename this
+		 * ??? function or have two versions? Rename rp_pa for UV to
+		 * ??? rp_gpa?
+		 */
+		ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
+
+		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
+			"address=0x%016lx, len=0x%016lx\n", ret,
+			(unsigned long)cookie, rp_pa, len);
+
+		if (ret != xpNeedMoreInfo)
+			break;
+
+		/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
+		if (is_shub())
+			len = L1_CACHE_ALIGN(len);
+
+		if (len > buf_len) {
+			if (buf_base != NULL)
+				kfree(buf_base);
+			buf_len = L1_CACHE_ALIGN(len);
+			buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
+							    &buf_base);
+			if (buf_base == NULL) {
+				dev_err(xpc_part, "unable to kmalloc "
+					"len=0x%016lx\n", buf_len);
+				ret = xpNoMemory;
+				break;
+			}
+		}
+
+		ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len);
+		if (ret != xpSuccess) {
+			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
+			break;
+		}
+	}
+
+	kfree(buf_base);
+
+	if (ret != xpSuccess)
+		rp_pa = 0;
+
+	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
+	return rp_pa;
+}
+
+/*
+ * Fill the partition reserved page with the information needed by
+ * other partitions to discover we are alive and establish initial
+ * communications.
+ */
+int
+xpc_setup_rsvd_page(void)
+{
+	int ret;
+	struct xpc_rsvd_page *rp;
+	unsigned long rp_pa;
+	unsigned long new_ts_jiffies;
+
+	/* get the local reserved page's address */
+
+	preempt_disable();
+	rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
+	preempt_enable();
+	if (rp_pa == 0) {
+		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
+		return -ESRCH;
+	}
+	rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa));
+
+	if (rp->SAL_version < 3) {
+		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
+		rp->SAL_partid &= 0xff;
+	}
+	BUG_ON(rp->SAL_partid != xp_partition_id);
+
+	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
+		dev_err(xpc_part, "the reserved page's partid of %d is outside "
+			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
+			xp_max_npartitions);
+		return -EINVAL;
+	}
+
+	rp->version = XPC_RP_VERSION;
+	rp->max_npartitions = xp_max_npartitions;
+
+	/* establish the actual sizes of the nasid masks */
+	if (rp->SAL_version == 1) {
+		/* SAL_version 1 didn't set the nasids_size field */
+		rp->SAL_nasids_size = 128;
+	}
+	xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
+	xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
+					      BITS_PER_BYTE);
+
+	/* setup the pointers to the various items in the reserved page */
+	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
+	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
+
+	ret = xpc_arch_ops.setup_rsvd_page(rp);
+	if (ret != 0)
+		return ret;
+
+	/*
+	 * Set timestamp of when reserved page was setup by XPC.
+	 * This signifies to the remote partition that our reserved
+	 * page is initialized.
+	 */
+	new_ts_jiffies = jiffies;
+	if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
+		new_ts_jiffies++;
+	rp->ts_jiffies = new_ts_jiffies;
+
+	xpc_rsvd_page = rp;
+	return 0;
+}
+
+void
+xpc_teardown_rsvd_page(void)
+{
+	/* a zero timestamp indicates our rsvd page is not initialized */
+	xpc_rsvd_page->ts_jiffies = 0;
+}
+
+/*
+ * Get a copy of a portion of the remote partition's rsvd page.
+ *
+ * remote_rp points to a buffer that is cacheline aligned for BTE copies and
+ * is large enough to contain a copy of their reserved page header and
+ * part_nasids mask.
+ */
+enum xp_retval
+xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
+		  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
+{
+	int l;
+	enum xp_retval ret;
+
+	/* get the reserved page's physical address */
+
+	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
+	if (*remote_rp_pa == 0)
+		return xpNoRsvdPageAddr;
+
+	/* pull over the reserved page header and part_nasids mask */
+	ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
+			       XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
+	if (ret != xpSuccess)
+		return ret;
+
+	if (discovered_nasids != NULL) {
+		unsigned long *remote_part_nasids =
+		    XPC_RP_PART_NASIDS(remote_rp);
+
+		for (l = 0; l < xpc_nasid_mask_nlongs; l++)
+			discovered_nasids[l] |= remote_part_nasids[l];
+	}
+
+	/* zero timestamp indicates the reserved page has not been setup */
+	if (remote_rp->ts_jiffies == 0)
+		return xpRsvdPageNotSet;
+
+	if (XPC_VERSION_MAJOR(remote_rp->version) !=
+	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
+		return xpBadVersion;
+	}
+
+	/* check that both remote and local partids are valid for each side */
+	if (remote_rp->SAL_partid < 0 ||
+	    remote_rp->SAL_partid >= xp_max_npartitions ||
+	    remote_rp->max_npartitions <= xp_partition_id) {
+		return xpInvalidPartid;
+	}
+
+	if (remote_rp->SAL_partid == xp_partition_id)
+		return xpLocalPartid;
+
+	return xpSuccess;
+}
+
+/*
+ * See if the other side has responded to a partition deactivate request
+ * from us. Though we requested the remote partition to deactivate with regard
+ * to us, we really only need to wait for the other side to disengage from us.
+ */
+int
+xpc_partition_disengaged(struct xpc_partition *part)
+{
+	short partid = XPC_PARTID(part);
+	int disengaged;
+
+	disengaged = !xpc_arch_ops.partition_engaged(partid);
+	if (part->disengage_timeout) {
+		if (!disengaged) {
+			if (time_is_after_jiffies(part->disengage_timeout)) {
+				/* timelimit hasn't been reached yet */
+				return 0;
+			}
+
+			/*
+			 * Other side hasn't responded to our deactivate
+			 * request in a timely fashion, so assume it's dead.
+			 */
+
+			dev_info(xpc_part, "deactivate request to remote "
+				 "partition %d timed out\n", partid);
+			xpc_disengage_timedout = 1;
+			xpc_arch_ops.assume_partition_disengaged(partid);
+			disengaged = 1;
+		}
+		part->disengage_timeout = 0;
+
+		/* cancel the timer function, provided it's not us */
+		if (!in_interrupt())
+			del_singleshot_timer_sync(&part->disengage_timer);
+
+		DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
+			part->act_state != XPC_P_AS_INACTIVE);
+		if (part->act_state != XPC_P_AS_INACTIVE)
+			xpc_wakeup_channel_mgr(part);
+
+		xpc_arch_ops.cancel_partition_deactivation_request(part);
+	}
+	return disengaged;
+}
+
+/*
+ * Mark specified partition as active.
+ */
+enum xp_retval
+xpc_mark_partition_active(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	enum xp_retval ret;
+
+	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+	if (part->act_state == XPC_P_AS_ACTIVATING) {
+		part->act_state = XPC_P_AS_ACTIVE;
+		ret = xpSuccess;
+	} else {
+		DBUG_ON(part->reason == xpSuccess);
+		ret = part->reason;
+	}
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	return ret;
+}
+
+/*
+ * Start the process of deactivating the specified partition.
+ */
+void
+xpc_deactivate_partition(const int line, struct xpc_partition *part,
+			 enum xp_retval reason)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+
+	if (part->act_state == XPC_P_AS_INACTIVE) {
+		XPC_SET_REASON(part, reason, line);
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+		if (reason == xpReactivating) {
+			/* we interrupt ourselves to reactivate partition */
+			xpc_arch_ops.request_partition_reactivation(part);
+		}
+		return;
+	}
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		if ((part->reason == xpUnloading && reason != xpUnloading) ||
+		    reason == xpReactivating) {
+			XPC_SET_REASON(part, reason, line);
+		}
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+		return;
+	}
+
+	part->act_state = XPC_P_AS_DEACTIVATING;
+	XPC_SET_REASON(part, reason, line);
+
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	/* ask remote partition to deactivate with regard to us */
+	xpc_arch_ops.request_partition_deactivation(part);
+
+	/* set a timelimit on the disengage phase of the deactivation request */
+	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
+	part->disengage_timer.expires = part->disengage_timeout;
+	add_timer(&part->disengage_timer);
+
+	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
+		XPC_PARTID(part), reason);
+
+	xpc_partition_going_down(part, reason);
+}
+
+/*
+ * Mark specified partition as inactive.
+ */
+void
+xpc_mark_partition_inactive(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+
+	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
+		XPC_PARTID(part));
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+	part->act_state = XPC_P_AS_INACTIVE;
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+	part->remote_rp_pa = 0;
+}
+
+/*
+ * SAL has provided a partition and machine mask.  The partition mask
+ * contains a bit for each even nasid in our partition.  The machine
+ * mask contains a bit for each even nasid in the entire machine.
+ *
+ * Using those two bit arrays, we can determine which nasids are
+ * known in the machine.  Each should also have a reserved page
+ * initialized if they are available for partitioning.
+ */
+void
+xpc_discovery(void)
+{
+	void *remote_rp_base;
+	struct xpc_rsvd_page *remote_rp;
+	unsigned long remote_rp_pa;
+	int region;
+	int region_size;
+	int max_regions;
+	int nasid;
+	struct xpc_rsvd_page *rp;
+	unsigned long *discovered_nasids;
+	enum xp_retval ret;
+
+	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
+						  xpc_nasid_mask_nbytes,
+						  GFP_KERNEL, &remote_rp_base);
+	if (remote_rp == NULL)
+		return;
+
+	discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
+				    GFP_KERNEL);
+	if (discovered_nasids == NULL) {
+		kfree(remote_rp_base);
+		return;
+	}
+
+	rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
+
+	/*
+	 * The term 'region' in this context refers to the minimum number of
+	 * nodes that can comprise an access protection grouping. The access
+	 * protection is in regards to memory, IOI and IPI.
+	 */
+	max_regions = 64;
+	region_size = xp_region_size;
+
+	switch (region_size) {
+	case 128:
+		max_regions *= 2;
+	case 64:
+		max_regions *= 2;
+	case 32:
+		max_regions *= 2;
+		region_size = 16;
+		DBUG_ON(!is_shub2());
+	}
+
+	for (region = 0; region < max_regions; region++) {
+
+		if (xpc_exiting)
+			break;
+
+		dev_dbg(xpc_part, "searching region %d\n", region);
+
+		for (nasid = (region * region_size * 2);
+		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
+
+			if (xpc_exiting)
+				break;
+
+			dev_dbg(xpc_part, "checking nasid %d\n", nasid);
+
+			if (test_bit(nasid / 2, xpc_part_nasids)) {
+				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
+					"part of the local partition; skipping "
+					"region\n", nasid);
+				break;
+			}
+
+			if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
+				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
+					"not on Numa-Link network at reset\n",
+					nasid);
+				continue;
+			}
+
+			if (test_bit(nasid / 2, discovered_nasids)) {
+				dev_dbg(xpc_part, "Nasid %d is part of a "
+					"partition which was previously "
+					"discovered\n", nasid);
+				continue;
+			}
+
+			/* pull over the rsvd page header & part_nasids mask */
+
+			ret = xpc_get_remote_rp(nasid, discovered_nasids,
+						remote_rp, &remote_rp_pa);
+			if (ret != xpSuccess) {
+				dev_dbg(xpc_part, "unable to get reserved page "
+					"from nasid %d, reason=%d\n", nasid,
+					ret);
+
+				if (ret == xpLocalPartid)
+					break;
+
+				continue;
+			}
+
+			xpc_arch_ops.request_partition_activation(remote_rp,
+							 remote_rp_pa, nasid);
+		}
+	}
+
+	kfree(discovered_nasids);
+	kfree(remote_rp_base);
+}
+
+/*
+ * Given a partid, get the nasids owned by that partition from the
+ * remote partition's reserved page.
+ */
+enum xp_retval
+xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
+{
+	struct xpc_partition *part;
+	unsigned long part_nasid_pa;
+
+	part = &xpc_partitions[partid];
+	if (part->remote_rp_pa == 0)
+		return xpPartitionDown;
+
+	memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
+
+	part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
+
+	return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
+				xpc_nasid_mask_nbytes);
+}
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
new file mode 100644
index 00000000000..8b70e03f939
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -0,0 +1,2461 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) sn2-based functions.
+ *
+ *     Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/delay.h>
+#include <asm/uncached.h>
+#include <asm/sn/mspec.h>
+#include <asm/sn/sn_sal.h>
+#include "xpc.h"
+
+/*
+ * Define the number of u64s required to represent all the C-brick nasids
+ * as a bitmap.  The cross-partition kernel modules deal only with
+ * C-brick nasids, thus the need for bitmaps which don't account for
+ * odd-numbered (non C-brick) nasids.
+ */
+#define XPC_MAX_PHYSNODES_SN2	(MAX_NUMALINK_NODES / 2)
+#define XP_NASID_MASK_BYTES_SN2	((XPC_MAX_PHYSNODES_SN2 + 7) / 8)
+#define XP_NASID_MASK_WORDS_SN2	((XPC_MAX_PHYSNODES_SN2 + 63) / 64)
+
+/*
+ * Memory for XPC's amo variables is allocated by the MSPEC driver. These
+ * pages are located in the lowest granule. The lowest granule uses 4k pages
+ * for cached references and an alternate TLB handler to never provide a
+ * cacheable mapping for the entire region. This will prevent speculative
+ * reading of cached copies of our lines from being issued which will cause
+ * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
+ * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
+ * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify
+ * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote
+ * partitions (i.e., XPCs) consider themselves currently engaged with the
+ * local XPC and 1 amo variable to request partition deactivation.
+ */
+#define XPC_NOTIFY_IRQ_AMOS_SN2		0
+#define XPC_ACTIVATE_IRQ_AMOS_SN2	(XPC_NOTIFY_IRQ_AMOS_SN2 + \
+					 XP_MAX_NPARTITIONS_SN2)
+#define XPC_ENGAGED_PARTITIONS_AMO_SN2	(XPC_ACTIVATE_IRQ_AMOS_SN2 + \
+					 XP_NASID_MASK_WORDS_SN2)
+#define XPC_DEACTIVATE_REQUEST_AMO_SN2	(XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1)
+
+/*
+ * Buffer used to store a local copy of portions of a remote partition's
+ * reserved page (either its header and part_nasids mask, or its vars).
+ */
+static void *xpc_remote_copy_buffer_base_sn2;
+static char *xpc_remote_copy_buffer_sn2;
+
+static struct xpc_vars_sn2 *xpc_vars_sn2;
+static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
+
+static int
+xpc_setup_partitions_sn2(void)
+{
+	/* nothing needs to be done */
+	return 0;
+}
+
+static void
+xpc_teardown_partitions_sn2(void)
+{
+	/* nothing needs to be done */
+}
+
+/* SH_IPI_ACCESS shub register value on startup */
+static u64 xpc_sh1_IPI_access_sn2;
+static u64 xpc_sh2_IPI_access0_sn2;
+static u64 xpc_sh2_IPI_access1_sn2;
+static u64 xpc_sh2_IPI_access2_sn2;
+static u64 xpc_sh2_IPI_access3_sn2;
+
+/*
+ * Change protections to allow IPI operations.
+ */
+static void
+xpc_allow_IPI_ops_sn2(void)
+{
+	int node;
+	int nasid;
+
+	/* !!! The following should get moved into SAL. */
+	if (is_shub2()) {
+		xpc_sh2_IPI_access0_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
+		xpc_sh2_IPI_access1_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
+		xpc_sh2_IPI_access2_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
+		xpc_sh2_IPI_access3_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
+
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+			      -1UL);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+			      -1UL);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+			      -1UL);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+			      -1UL);
+		}
+	} else {
+		xpc_sh1_IPI_access_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
+
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+			      -1UL);
+		}
+	}
+}
+
+/*
+ * Restrict protections to disallow IPI operations.
+ */
+static void
+xpc_disallow_IPI_ops_sn2(void)
+{
+	int node;
+	int nasid;
+
+	/* !!! The following should get moved into SAL. */
+	if (is_shub2()) {
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+			      xpc_sh2_IPI_access0_sn2);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+			      xpc_sh2_IPI_access1_sn2);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+			      xpc_sh2_IPI_access2_sn2);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+			      xpc_sh2_IPI_access3_sn2);
+		}
+	} else {
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+			      xpc_sh1_IPI_access_sn2);
+		}
+	}
+}
+
+/*
+ * The following set of functions are used for the sending and receiving of
+ * IRQs (also known as IPIs). There are two flavors of IRQs, one that is
+ * associated with partition activity (SGI_XPC_ACTIVATE) and the other that
+ * is associated with channel activity (SGI_XPC_NOTIFY).
+ */
+
+static u64
+xpc_receive_IRQ_amo_sn2(struct amo *amo)
+{
+	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
+}
+
+static enum xp_retval
+xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid,
+		 int vector)
+{
+	int ret = 0;
+	unsigned long irq_flags;
+
+	local_irq_save(irq_flags);
+
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
+	sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+
+	return (ret == 0) ? xpSuccess : xpPioReadError;
+}
+
+static struct amo *
+xpc_init_IRQ_amo_sn2(int index)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page + index;
+
+	(void)xpc_receive_IRQ_amo_sn2(amo);	/* clear amo variable */
+	return amo;
+}
+
+/*
+ * Functions associated with SGI_XPC_ACTIVATE IRQ.
+ */
+
+/*
+ * Notify the heartbeat check thread that an activate IRQ has been received.
+ */
+static irqreturn_t
+xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	xpc_activate_IRQ_rcvd++;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+	return IRQ_HANDLED;
+}
+
+/*
+ * Flag the appropriate amo variable and send an IRQ to the specified node.
+ */
+static void
+xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid,
+			  int to_nasid, int to_phys_cpuid)
+{
+	struct amo *amos = (struct amo *)__va(amos_page_pa +
+					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
+					      sizeof(struct amo)));
+
+	(void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)],
+			       BIT_MASK(from_nasid / 2), to_nasid,
+			       to_phys_cpuid, SGI_XPC_ACTIVATE);
+}
+
+static void
+xpc_send_local_activate_IRQ_sn2(int from_nasid)
+{
+	unsigned long irq_flags;
+	struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa +
+					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
+					      sizeof(struct amo)));
+
+	/* fake the sending and receipt of an activate IRQ from remote nasid */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable),
+			 FETCHOP_OR, BIT_MASK(from_nasid / 2));
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	xpc_activate_IRQ_rcvd++;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+}
+
+/*
+ * Functions associated with SGI_XPC_NOTIFY IRQ.
+ */
+
+/*
+ * Check to see if any chctl flags were sent from the specified partition.
+ */
+static void
+xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part)
+{
+	union xpc_channel_ctl_flags chctl;
+	unsigned long irq_flags;
+
+	chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2.
+						  local_chctl_amo_va);
+	if (chctl.all_flags == 0)
+		return;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	part->chctl.all_flags |= chctl.all_flags;
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+	dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags="
+		"0x%llx\n", XPC_PARTID(part), chctl.all_flags);
+
+	xpc_wakeup_channel_mgr(part);
+}
+
+/*
+ * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
+ * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
+ * than one partition, we use an amo structure per partition to indicate
+ * whether a partition has sent an IRQ or not.  If it has, then wake up the
+ * associated kthread to handle it.
+ *
+ * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC
+ * running on other partitions.
+ *
+ * Noteworthy Arguments:
+ *
+ *	irq - Interrupt ReQuest number. NOT USED.
+ *
+ *	dev_id - partid of IRQ's potential sender.
+ */
+static irqreturn_t
+xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
+{
+	short partid = (short)(u64)dev_id;
+	struct xpc_partition *part = &xpc_partitions[partid];
+
+	DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2);
+
+	if (xpc_part_ref(part)) {
+		xpc_check_for_sent_chctl_flags_sn2(part);
+
+		xpc_part_deref(part);
+	}
+	return IRQ_HANDLED;
+}
+
+/*
+ * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor
+ * because the write to their associated amo variable completed after the IRQ
+ * was received.
+ */
+static void
+xpc_check_for_dropped_notify_IRQ_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+
+	if (xpc_part_ref(part)) {
+		xpc_check_for_sent_chctl_flags_sn2(part);
+
+		part_sn2->dropped_notify_IRQ_timer.expires = jiffies +
+		    XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
+		add_timer(&part_sn2->dropped_notify_IRQ_timer);
+		xpc_part_deref(part);
+	}
+}
+
+/*
+ * Send a notify IRQ to the remote partition that is associated with the
+ * specified channel.
+ */
+static void
+xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+			char *chctl_flag_string, unsigned long *irq_flags)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	union xpc_channel_ctl_flags chctl = { 0 };
+	enum xp_retval ret;
+
+	if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) {
+		chctl.flags[ch->number] = chctl_flag;
+		ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va,
+				       chctl.all_flags,
+				       part_sn2->notify_IRQ_nasid,
+				       part_sn2->notify_IRQ_phys_cpuid,
+				       SGI_XPC_NOTIFY);
+		dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
+			chctl_flag_string, ch->partid, ch->number, ret);
+		if (unlikely(ret != xpSuccess)) {
+			if (irq_flags != NULL)
+				spin_unlock_irqrestore(&ch->lock, *irq_flags);
+			XPC_DEACTIVATE_PARTITION(part, ret);
+			if (irq_flags != NULL)
+				spin_lock_irqsave(&ch->lock, *irq_flags);
+		}
+	}
+}
+
+#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \
+		xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f)
+
+/*
+ * Make it look like the remote partition, which is associated with the
+ * specified channel, sent us a notify IRQ. This faked IRQ will be handled
+ * by xpc_check_for_dropped_notify_IRQ_sn2().
+ */
+static void
+xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+			      char *chctl_flag_string)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	union xpc_channel_ctl_flags chctl = { 0 };
+
+	chctl.flags[ch->number] = chctl_flag;
+	FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va->
+				variable), FETCHOP_OR, chctl.all_flags);
+	dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
+		chctl_flag_string, ch->partid, ch->number);
+}
+
+#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \
+		xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f)
+
+static void
+xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch,
+				unsigned long *irq_flags)
+{
+	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+	args->reason = ch->reason;
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags);
+}
+
+static void
+xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags);
+}
+
+static void
+xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+	args->entry_size = ch->entry_size;
+	args->local_nentries = ch->local_nentries;
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags);
+}
+
+static void
+xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+	args->remote_nentries = ch->remote_nentries;
+	args->local_nentries = ch->local_nentries;
+	args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue);
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
+}
+
+static void
+xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch,
+				unsigned long *irq_flags)
+{
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags);
+}
+
+static void
+xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
+{
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL);
+}
+
+static void
+xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch)
+{
+	XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST);
+}
+
+static enum xp_retval
+xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch,
+				unsigned long msgqueue_pa)
+{
+	ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa;
+	return xpSuccess;
+}
+
+/*
+ * This next set of functions are used to keep track of when a partition is
+ * potentially engaged in accessing memory belonging to another partition.
+ */
+
+static void
+xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+					     (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
+					     sizeof(struct amo)));
+
+	local_irq_save(irq_flags);
+
+	/* set bit corresponding to our partid in remote partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+			 BIT(sn_partition_id));
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static void
+xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	unsigned long irq_flags;
+	struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+					     (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
+					     sizeof(struct amo)));
+
+	local_irq_save(irq_flags);
+
+	/* clear bit corresponding to our partid in remote partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~BIT(sn_partition_id));
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+
+	/*
+	 * Send activate IRQ to get other side to see that we've cleared our
+	 * bit in their engaged partitions amo.
+	 */
+	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+				  cnodeid_to_nasid(0),
+				  part_sn2->activate_IRQ_nasid,
+				  part_sn2->activate_IRQ_phys_cpuid);
+}
+
+static void
+xpc_assume_partition_disengaged_sn2(short partid)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+	/* clear bit(s) based on partid mask in our partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~BIT(partid));
+}
+
+static int
+xpc_partition_engaged_sn2(short partid)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+	/* our partition's amo variable ANDed with partid mask */
+	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+		BIT(partid)) != 0;
+}
+
+static int
+xpc_any_partition_engaged_sn2(void)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+	/* our partition's amo variable */
+	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
+}
+
+/* original protection values for each node */
+static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
+
+/*
+ * Change protections to allow amo operations on non-Shub 1.1 systems.
+ */
+static enum xp_retval
+xpc_allow_amo_ops_sn2(struct amo *amos_page)
+{
+	enum xp_retval ret = xpSuccess;
+
+	/*
+	 * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST
+	 * collides with memory operations. On those systems we call
+	 * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead.
+	 */
+	if (!enable_shub_wars_1_1())
+		ret = xp_expand_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE);
+
+	return ret;
+}
+
+/*
+ * Change protections to allow amo operations on Shub 1.1 systems.
+ */
+static void
+xpc_allow_amo_ops_shub_wars_1_1_sn2(void)
+{
+	int node;
+	int nasid;
+
+	if (!enable_shub_wars_1_1())
+		return;
+
+	for_each_online_node(node) {
+		nasid = cnodeid_to_nasid(node);
+		/* save current protection values */
+		xpc_prot_vec_sn2[node] =
+		    (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid,
+						  SH1_MD_DQLP_MMR_DIR_PRIVEC0));
+		/* open up everything */
+		HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+					     SH1_MD_DQLP_MMR_DIR_PRIVEC0),
+		      -1UL);
+		HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+					     SH1_MD_DQRP_MMR_DIR_PRIVEC0),
+		      -1UL);
+	}
+}
+
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
+				   size_t *len)
+{
+	s64 status;
+	enum xp_retval ret;
+
+	status = sn_partition_reserved_page_pa((u64)buf, cookie,
+			(u64 *)rp_pa, (u64 *)len);
+	if (status == SALRET_OK)
+		ret = xpSuccess;
+	else if (status == SALRET_MORE_PASSES)
+		ret = xpNeedMoreInfo;
+	else
+		ret = xpSalError;
+
+	return ret;
+}
+
+
+static int
+xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp)
+{
+	struct amo *amos_page;
+	int i;
+	int ret;
+
+	xpc_vars_sn2 = XPC_RP_VARS(rp);
+
+	rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2);
+
+	/* vars_part array follows immediately after vars */
+	xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
+							 XPC_RP_VARS_SIZE);
+
+	/*
+	 * Before clearing xpc_vars_sn2, see if a page of amos had been
+	 * previously allocated. If not we'll need to allocate one and set
+	 * permissions so that cross-partition amos are allowed.
+	 *
+	 * The allocated amo page needs MCA reporting to remain disabled after
+	 * XPC has unloaded.  To make this work, we keep a copy of the pointer
+	 * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure,
+	 * which is pointed to by the reserved page, and re-use that saved copy
+	 * on subsequent loads of XPC. This amo page is never freed, and its
+	 * memory protections are never restricted.
+	 */
+	amos_page = xpc_vars_sn2->amos_page;
+	if (amos_page == NULL) {
+		amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1));
+		if (amos_page == NULL) {
+			dev_err(xpc_part, "can't allocate page of amos\n");
+			return -ENOMEM;
+		}
+
+		/*
+		 * Open up amo-R/W to cpu.  This is done on Shub 1.1 systems
+		 * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called.
+		 */
+		ret = xpc_allow_amo_ops_sn2(amos_page);
+		if (ret != xpSuccess) {
+			dev_err(xpc_part, "can't allow amo operations\n");
+			uncached_free_page(__IA64_UNCACHED_OFFSET |
+					   TO_PHYS((u64)amos_page), 1);
+			return -EPERM;
+		}
+	}
+
+	/* clear xpc_vars_sn2 */
+	memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2));
+
+	xpc_vars_sn2->version = XPC_V_VERSION;
+	xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0);
+	xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0);
+	xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2);
+	xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page);
+	xpc_vars_sn2->amos_page = amos_page;	/* save for next load of XPC */
+
+	/* clear xpc_vars_part_sn2 */
+	memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) *
+	       XP_MAX_NPARTITIONS_SN2);
+
+	/* initialize the activate IRQ related amo variables */
+	for (i = 0; i < xpc_nasid_mask_nlongs; i++)
+		(void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i);
+
+	/* initialize the engaged remote partitions related amo variables */
+	(void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2);
+	(void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2);
+
+	return 0;
+}
+
+static int
+xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask)
+{
+	return test_bit(partid, heartbeating_to_mask);
+}
+
+static void
+xpc_allow_hb_sn2(short partid)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+	set_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
+}
+
+static void
+xpc_disallow_hb_sn2(short partid)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+	clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
+}
+
+static void
+xpc_disallow_all_hbs_sn2(void)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+	bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions);
+}
+
+static void
+xpc_increment_heartbeat_sn2(void)
+{
+	xpc_vars_sn2->heartbeat++;
+}
+
+static void
+xpc_offline_heartbeat_sn2(void)
+{
+	xpc_increment_heartbeat_sn2();
+	xpc_vars_sn2->heartbeat_offline = 1;
+}
+
+static void
+xpc_online_heartbeat_sn2(void)
+{
+	xpc_increment_heartbeat_sn2();
+	xpc_vars_sn2->heartbeat_offline = 0;
+}
+
+static void
+xpc_heartbeat_init_sn2(void)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+
+	bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+	xpc_online_heartbeat_sn2();
+}
+
+static void
+xpc_heartbeat_exit_sn2(void)
+{
+	xpc_offline_heartbeat_sn2();
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_sn2(struct xpc_partition *part)
+{
+	struct xpc_vars_sn2 *remote_vars;
+	enum xp_retval ret;
+
+	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
+
+	/* pull the remote vars structure that contains the heartbeat */
+	ret = xp_remote_memcpy(xp_pa(remote_vars),
+			       part->sn.sn2.remote_vars_pa,
+			       XPC_RP_VARS_SIZE);
+	if (ret != xpSuccess)
+		return ret;
+
+	dev_dbg(xpc_part, "partid=%d, heartbeat=%lld, last_heartbeat=%lld, "
+		"heartbeat_offline=%lld, HB_mask[0]=0x%lx\n", XPC_PARTID(part),
+		remote_vars->heartbeat, part->last_heartbeat,
+		remote_vars->heartbeat_offline,
+		remote_vars->heartbeating_to_mask[0]);
+
+	if ((remote_vars->heartbeat == part->last_heartbeat &&
+	    !remote_vars->heartbeat_offline) ||
+	    !xpc_hb_allowed_sn2(sn_partition_id,
+				remote_vars->heartbeating_to_mask)) {
+		ret = xpNoHeartbeat;
+	} else {
+		part->last_heartbeat = remote_vars->heartbeat;
+	}
+
+	return ret;
+}
+
+/*
+ * Get a copy of the remote partition's XPC variables from the reserved page.
+ *
+ * remote_vars points to a buffer that is cacheline aligned for BTE copies and
+ * assumed to be of size XPC_RP_VARS_SIZE.
+ */
+static enum xp_retval
+xpc_get_remote_vars_sn2(unsigned long remote_vars_pa,
+			struct xpc_vars_sn2 *remote_vars)
+{
+	enum xp_retval ret;
+
+	if (remote_vars_pa == 0)
+		return xpVarsNotSet;
+
+	/* pull over the cross partition variables */
+	ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa,
+			       XPC_RP_VARS_SIZE);
+	if (ret != xpSuccess)
+		return ret;
+
+	if (XPC_VERSION_MAJOR(remote_vars->version) !=
+	    XPC_VERSION_MAJOR(XPC_V_VERSION)) {
+		return xpBadVersion;
+	}
+
+	return xpSuccess;
+}
+
+static void
+xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
+				     unsigned long remote_rp_pa, int nasid)
+{
+	xpc_send_local_activate_IRQ_sn2(nasid);
+}
+
+static void
+xpc_request_partition_reactivation_sn2(struct xpc_partition *part)
+{
+	xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid);
+}
+
+static void
+xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	unsigned long irq_flags;
+	struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+					     (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
+					     sizeof(struct amo)));
+
+	local_irq_save(irq_flags);
+
+	/* set bit corresponding to our partid in remote partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+			 BIT(sn_partition_id));
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+
+	/*
+	 * Send activate IRQ to get other side to see that we've set our
+	 * bit in their deactivate request amo.
+	 */
+	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+				  cnodeid_to_nasid(0),
+				  part_sn2->activate_IRQ_nasid,
+				  part_sn2->activate_IRQ_phys_cpuid);
+}
+
+static void
+xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+					     (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
+					     sizeof(struct amo)));
+
+	local_irq_save(irq_flags);
+
+	/* clear bit corresponding to our partid in remote partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~BIT(sn_partition_id));
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static int
+xpc_partition_deactivation_requested_sn2(short partid)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_DEACTIVATE_REQUEST_AMO_SN2;
+
+	/* our partition's amo variable ANDed with partid mask */
+	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+		BIT(partid)) != 0;
+}
+
+/*
+ * Update the remote partition's info.
+ */
+static void
+xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
+			      unsigned long *remote_rp_ts_jiffies,
+			      unsigned long remote_rp_pa,
+			      unsigned long remote_vars_pa,
+			      struct xpc_vars_sn2 *remote_vars)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+
+	part->remote_rp_version = remote_rp_version;
+	dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
+		part->remote_rp_version);
+
+	part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies;
+	dev_dbg(xpc_part, "  remote_rp_ts_jiffies = 0x%016lx\n",
+		part->remote_rp_ts_jiffies);
+
+	part->remote_rp_pa = remote_rp_pa;
+	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+
+	part_sn2->remote_vars_pa = remote_vars_pa;
+	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
+		part_sn2->remote_vars_pa);
+
+	part->last_heartbeat = remote_vars->heartbeat - 1;
+	dev_dbg(xpc_part, "  last_heartbeat = 0x%016llx\n",
+		part->last_heartbeat);
+
+	part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa;
+	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
+		part_sn2->remote_vars_part_pa);
+
+	part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid;
+	dev_dbg(xpc_part, "  activate_IRQ_nasid = 0x%x\n",
+		part_sn2->activate_IRQ_nasid);
+
+	part_sn2->activate_IRQ_phys_cpuid =
+	    remote_vars->activate_IRQ_phys_cpuid;
+	dev_dbg(xpc_part, "  activate_IRQ_phys_cpuid = 0x%x\n",
+		part_sn2->activate_IRQ_phys_cpuid);
+
+	part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa;
+	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
+		part_sn2->remote_amos_page_pa);
+
+	part_sn2->remote_vars_version = remote_vars->version;
+	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
+		part_sn2->remote_vars_version);
+}
+
+/*
+ * Prior code has determined the nasid which generated a activate IRQ.
+ * Inspect that nasid to determine if its partition needs to be activated
+ * or deactivated.
+ *
+ * A partition is considered "awaiting activation" if our partition
+ * flags indicate it is not active and it has a heartbeat.  A
+ * partition is considered "awaiting deactivation" if our partition
+ * flags indicate it is active but it has no heartbeat or it is not
+ * sending its heartbeat to us.
+ *
+ * To determine the heartbeat, the remote nasid must have a properly
+ * initialized reserved page.
+ */
+static void
+xpc_identify_activate_IRQ_req_sn2(int nasid)
+{
+	struct xpc_rsvd_page *remote_rp;
+	struct xpc_vars_sn2 *remote_vars;
+	unsigned long remote_rp_pa;
+	unsigned long remote_vars_pa;
+	int remote_rp_version;
+	int reactivate = 0;
+	unsigned long remote_rp_ts_jiffies = 0;
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_partition_sn2 *part_sn2;
+	enum xp_retval ret;
+
+	/* pull over the reserved page structure */
+
+	remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2;
+
+	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
+	if (ret != xpSuccess) {
+		dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
+			 "which sent interrupt, reason=%d\n", nasid, ret);
+		return;
+	}
+
+	remote_vars_pa = remote_rp->sn.sn2.vars_pa;
+	remote_rp_version = remote_rp->version;
+	remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+
+	partid = remote_rp->SAL_partid;
+	part = &xpc_partitions[partid];
+	part_sn2 = &part->sn.sn2;
+
+	/* pull over the cross partition variables */
+
+	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
+
+	ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars);
+	if (ret != xpSuccess) {
+		dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
+			 "which sent interrupt, reason=%d\n", nasid, ret);
+
+		XPC_DEACTIVATE_PARTITION(part, ret);
+		return;
+	}
+
+	part->activate_IRQ_rcvd++;
+
+	dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
+		"%lld:0x%lx\n", (int)nasid, (int)partid,
+		part->activate_IRQ_rcvd,
+		remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]);
+
+	if (xpc_partition_disengaged(part) &&
+	    part->act_state == XPC_P_AS_INACTIVE) {
+
+		xpc_update_partition_info_sn2(part, remote_rp_version,
+					      &remote_rp_ts_jiffies,
+					      remote_rp_pa, remote_vars_pa,
+					      remote_vars);
+
+		if (xpc_partition_deactivation_requested_sn2(partid)) {
+			/*
+			 * Other side is waiting on us to deactivate even though
+			 * we already have.
+			 */
+			return;
+		}
+
+		xpc_activate_partition(part);
+		return;
+	}
+
+	DBUG_ON(part->remote_rp_version == 0);
+	DBUG_ON(part_sn2->remote_vars_version == 0);
+
+	if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) {
+
+		/* the other side rebooted */
+
+		DBUG_ON(xpc_partition_engaged_sn2(partid));
+		DBUG_ON(xpc_partition_deactivation_requested_sn2(partid));
+
+		xpc_update_partition_info_sn2(part, remote_rp_version,
+					      &remote_rp_ts_jiffies,
+					      remote_rp_pa, remote_vars_pa,
+					      remote_vars);
+		reactivate = 1;
+	}
+
+	if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) {
+		/* still waiting on other side to disengage from us */
+		return;
+	}
+
+	if (reactivate)
+		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+	else if (xpc_partition_deactivation_requested_sn2(partid))
+		XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
+}
+
+/*
+ * Loop through the activation amo variables and process any bits
+ * which are set.  Each bit indicates a nasid sending a partition
+ * activation or deactivation request.
+ *
+ * Return #of IRQs detected.
+ */
+int
+xpc_identify_activate_IRQ_sender_sn2(void)
+{
+	int l;
+	int b;
+	unsigned long nasid_mask_long;
+	u64 nasid;		/* remote nasid */
+	int n_IRQs_detected = 0;
+	struct amo *act_amos;
+
+	act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2;
+
+	/* scan through activate amo variables looking for non-zero entries */
+	for (l = 0; l < xpc_nasid_mask_nlongs; l++) {
+
+		if (xpc_exiting)
+			break;
+
+		nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]);
+
+		b = find_first_bit(&nasid_mask_long, BITS_PER_LONG);
+		if (b >= BITS_PER_LONG) {
+			/* no IRQs from nasids in this amo variable */
+			continue;
+		}
+
+		dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l,
+			nasid_mask_long);
+
+		/*
+		 * If this nasid has been added to the machine since
+		 * our partition was reset, this will retain the
+		 * remote nasid in our reserved pages machine mask.
+		 * This is used in the event of module reload.
+		 */
+		xpc_mach_nasids[l] |= nasid_mask_long;
+
+		/* locate the nasid(s) which sent interrupts */
+
+		do {
+			n_IRQs_detected++;
+			nasid = (l * BITS_PER_LONG + b) * 2;
+			dev_dbg(xpc_part, "interrupt from nasid %lld\n", nasid);
+			xpc_identify_activate_IRQ_req_sn2(nasid);
+
+			b = find_next_bit(&nasid_mask_long, BITS_PER_LONG,
+					  b + 1);
+		} while (b < BITS_PER_LONG);
+	}
+	return n_IRQs_detected;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_sn2(void)
+{
+	unsigned long irq_flags;
+	int n_IRQs_expected;
+	int n_IRQs_detected;
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	n_IRQs_expected = xpc_activate_IRQ_rcvd;
+	xpc_activate_IRQ_rcvd = 0;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2();
+	if (n_IRQs_detected < n_IRQs_expected) {
+		/* retry once to help avoid missing amo */
+		(void)xpc_identify_activate_IRQ_sender_sn2();
+	}
+}
+
+/*
+ * Setup the channel structures that are sn2 specific.
+ */
+static enum xp_retval
+xpc_setup_ch_structures_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	struct xpc_channel_sn2 *ch_sn2;
+	enum xp_retval retval;
+	int ret;
+	int cpuid;
+	int ch_number;
+	struct timer_list *timer;
+	short partid = XPC_PARTID(part);
+
+	/* allocate all the required GET/PUT values */
+
+	part_sn2->local_GPs =
+	    xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+					  &part_sn2->local_GPs_base);
+	if (part_sn2->local_GPs == NULL) {
+		dev_err(xpc_chan, "can't get memory for local get/put "
+			"values\n");
+		return xpNoMemory;
+	}
+
+	part_sn2->remote_GPs =
+	    xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+					  &part_sn2->remote_GPs_base);
+	if (part_sn2->remote_GPs == NULL) {
+		dev_err(xpc_chan, "can't get memory for remote get/put "
+			"values\n");
+		retval = xpNoMemory;
+		goto out_1;
+	}
+
+	part_sn2->remote_GPs_pa = 0;
+
+	/* allocate all the required open and close args */
+
+	part_sn2->local_openclose_args =
+	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+					  GFP_KERNEL, &part_sn2->
+					  local_openclose_args_base);
+	if (part_sn2->local_openclose_args == NULL) {
+		dev_err(xpc_chan, "can't get memory for local connect args\n");
+		retval = xpNoMemory;
+		goto out_2;
+	}
+
+	part_sn2->remote_openclose_args_pa = 0;
+
+	part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid);
+
+	part_sn2->notify_IRQ_nasid = 0;
+	part_sn2->notify_IRQ_phys_cpuid = 0;
+	part_sn2->remote_chctl_amo_va = NULL;
+
+	sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid);
+	ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2,
+			  IRQF_SHARED, part_sn2->notify_IRQ_owner,
+			  (void *)(u64)partid);
+	if (ret != 0) {
+		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
+			"errno=%d\n", -ret);
+		retval = xpLackOfResources;
+		goto out_3;
+	}
+
+	/* Setup a timer to check for dropped notify IRQs */
+	timer = &part_sn2->dropped_notify_IRQ_timer;
+	init_timer(timer);
+	timer->function =
+	    (void (*)(unsigned long))xpc_check_for_dropped_notify_IRQ_sn2;
+	timer->data = (unsigned long)part;
+	timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
+	add_timer(timer);
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch_sn2 = &part->channels[ch_number].sn.sn2;
+
+		ch_sn2->local_GP = &part_sn2->local_GPs[ch_number];
+		ch_sn2->local_openclose_args =
+		    &part_sn2->local_openclose_args[ch_number];
+
+		mutex_init(&ch_sn2->msg_to_pull_mutex);
+	}
+
+	/*
+	 * Setup the per partition specific variables required by the
+	 * remote partition to establish channel connections with us.
+	 *
+	 * The setting of the magic # indicates that these per partition
+	 * specific variables are ready to be used.
+	 */
+	xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs);
+	xpc_vars_part_sn2[partid].openclose_args_pa =
+	    xp_pa(part_sn2->local_openclose_args);
+	xpc_vars_part_sn2[partid].chctl_amo_pa =
+	    xp_pa(part_sn2->local_chctl_amo_va);
+	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
+	xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
+	xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid =
+	    cpu_physical_id(cpuid);
+	xpc_vars_part_sn2[partid].nchannels = part->nchannels;
+	xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2;
+
+	return xpSuccess;
+
+	/* setup of ch structures failed */
+out_3:
+	kfree(part_sn2->local_openclose_args_base);
+	part_sn2->local_openclose_args = NULL;
+out_2:
+	kfree(part_sn2->remote_GPs_base);
+	part_sn2->remote_GPs = NULL;
+out_1:
+	kfree(part_sn2->local_GPs_base);
+	part_sn2->local_GPs = NULL;
+	return retval;
+}
+
+/*
+ * Teardown the channel structures that are sn2 specific.
+ */
+static void
+xpc_teardown_ch_structures_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	short partid = XPC_PARTID(part);
+
+	/*
+	 * Indicate that the variables specific to the remote partition are no
+	 * longer available for its use.
+	 */
+	xpc_vars_part_sn2[partid].magic = 0;
+
+	/* in case we've still got outstanding timers registered... */
+	del_timer_sync(&part_sn2->dropped_notify_IRQ_timer);
+	free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
+
+	kfree(part_sn2->local_openclose_args_base);
+	part_sn2->local_openclose_args = NULL;
+	kfree(part_sn2->remote_GPs_base);
+	part_sn2->remote_GPs = NULL;
+	kfree(part_sn2->local_GPs_base);
+	part_sn2->local_GPs = NULL;
+	part_sn2->local_chctl_amo_va = NULL;
+}
+
+/*
+ * Create a wrapper that hides the underlying mechanism for pulling a cacheline
+ * (or multiple cachelines) from a remote partition.
+ *
+ * src_pa must be a cacheline aligned physical address on the remote partition.
+ * dst must be a cacheline aligned virtual address on this partition.
+ * cnt must be cacheline sized
+ */
+/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */
+static enum xp_retval
+xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
+			       const unsigned long src_pa, size_t cnt)
+{
+	enum xp_retval ret;
+
+	DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa));
+	DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst));
+	DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
+
+	if (part->act_state == XPC_P_AS_DEACTIVATING)
+		return part->reason;
+
+	ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt);
+	if (ret != xpSuccess) {
+		dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
+			" ret=%d\n", XPC_PARTID(part), ret);
+	}
+	return ret;
+}
+
+/*
+ * Pull the remote per partition specific variables from the specified
+ * partition.
+ */
+static enum xp_retval
+xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	u8 buffer[L1_CACHE_BYTES * 2];
+	struct xpc_vars_part_sn2 *pulled_entry_cacheline =
+	    (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer);
+	struct xpc_vars_part_sn2 *pulled_entry;
+	unsigned long remote_entry_cacheline_pa;
+	unsigned long remote_entry_pa;
+	short partid = XPC_PARTID(part);
+	enum xp_retval ret;
+
+	/* pull the cacheline that contains the variables we're interested in */
+
+	DBUG_ON(part_sn2->remote_vars_part_pa !=
+		L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa));
+	DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2);
+
+	remote_entry_pa = part_sn2->remote_vars_part_pa +
+	    sn_partition_id * sizeof(struct xpc_vars_part_sn2);
+
+	remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
+
+	pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline
+						    + (remote_entry_pa &
+						    (L1_CACHE_BYTES - 1)));
+
+	ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline,
+					     remote_entry_cacheline_pa,
+					     L1_CACHE_BYTES);
+	if (ret != xpSuccess) {
+		dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
+			"partition %d, ret=%d\n", partid, ret);
+		return ret;
+	}
+
+	/* see if they've been set up yet */
+
+	if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 &&
+	    pulled_entry->magic != XPC_VP_MAGIC2_SN2) {
+
+		if (pulled_entry->magic != 0) {
+			dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
+				"partition %d has bad magic value (=0x%llx)\n",
+				partid, sn_partition_id, pulled_entry->magic);
+			return xpBadMagic;
+		}
+
+		/* they've not been initialized yet */
+		return xpRetry;
+	}
+
+	if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) {
+
+		/* validate the variables */
+
+		if (pulled_entry->GPs_pa == 0 ||
+		    pulled_entry->openclose_args_pa == 0 ||
+		    pulled_entry->chctl_amo_pa == 0) {
+
+			dev_err(xpc_chan, "partition %d's XPC vars_part for "
+				"partition %d are not valid\n", partid,
+				sn_partition_id);
+			return xpInvalidAddress;
+		}
+
+		/* the variables we imported look to be valid */
+
+		part_sn2->remote_GPs_pa = pulled_entry->GPs_pa;
+		part_sn2->remote_openclose_args_pa =
+		    pulled_entry->openclose_args_pa;
+		part_sn2->remote_chctl_amo_va =
+		    (struct amo *)__va(pulled_entry->chctl_amo_pa);
+		part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid;
+		part_sn2->notify_IRQ_phys_cpuid =
+		    pulled_entry->notify_IRQ_phys_cpuid;
+
+		if (part->nchannels > pulled_entry->nchannels)
+			part->nchannels = pulled_entry->nchannels;
+
+		/* let the other side know that we've pulled their variables */
+
+		xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2;
+	}
+
+	if (pulled_entry->magic == XPC_VP_MAGIC1_SN2)
+		return xpRetry;
+
+	return xpSuccess;
+}
+
+/*
+ * Establish first contact with the remote partititon. This involves pulling
+ * the XPC per partition variables from the remote partition and waiting for
+ * the remote partition to pull ours.
+ */
+static enum xp_retval
+xpc_make_first_contact_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	enum xp_retval ret;
+
+	/*
+	 * Register the remote partition's amos with SAL so it can handle
+	 * and cleanup errors within that address range should the remote
+	 * partition go down. We don't unregister this range because it is
+	 * difficult to tell when outstanding writes to the remote partition
+	 * are finished and thus when it is safe to unregister. This should
+	 * not result in wasted space in the SAL xp_addr_region table because
+	 * we should get the same page for remote_amos_page_pa after module
+	 * reloads and system reboots.
+	 */
+	if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa,
+				       PAGE_SIZE, 1) < 0) {
+		dev_warn(xpc_part, "xpc_activating(%d) failed to register "
+			 "xp_addr region\n", XPC_PARTID(part));
+
+		ret = xpPhysAddrRegFailed;
+		XPC_DEACTIVATE_PARTITION(part, ret);
+		return ret;
+	}
+
+	/*
+	 * Send activate IRQ to get other side to activate if they've not
+	 * already begun to do so.
+	 */
+	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+				  cnodeid_to_nasid(0),
+				  part_sn2->activate_IRQ_nasid,
+				  part_sn2->activate_IRQ_phys_cpuid);
+
+	while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) {
+		if (ret != xpRetry) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+			return ret;
+		}
+
+		dev_dbg(xpc_part, "waiting to make first contact with "
+			"partition %d\n", XPC_PARTID(part));
+
+		/* wait a 1/4 of a second or so */
+		(void)msleep_interruptible(250);
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
+			return part->reason;
+	}
+
+	return xpSuccess;
+}
+
+/*
+ * Get the chctl flags and pull the openclose args and/or remote GPs as needed.
+ */
+static u64
+xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	unsigned long irq_flags;
+	union xpc_channel_ctl_flags chctl;
+	enum xp_retval ret;
+
+	/*
+	 * See if there are any chctl flags to be handled.
+	 */
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	chctl = part->chctl;
+	if (chctl.all_flags != 0)
+		part->chctl.all_flags = 0;
+
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+	if (xpc_any_openclose_chctl_flags_set(&chctl)) {
+		ret = xpc_pull_remote_cachelines_sn2(part, part->
+						     remote_openclose_args,
+						     part_sn2->
+						     remote_openclose_args_pa,
+						     XPC_OPENCLOSE_ARGS_SIZE);
+		if (ret != xpSuccess) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+
+			dev_dbg(xpc_chan, "failed to pull openclose args from "
+				"partition %d, ret=%d\n", XPC_PARTID(part),
+				ret);
+
+			/* don't bother processing chctl flags anymore */
+			chctl.all_flags = 0;
+		}
+	}
+
+	if (xpc_any_msg_chctl_flags_set(&chctl)) {
+		ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs,
+						     part_sn2->remote_GPs_pa,
+						     XPC_GP_SIZE);
+		if (ret != xpSuccess) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+
+			dev_dbg(xpc_chan, "failed to pull GPs from partition "
+				"%d, ret=%d\n", XPC_PARTID(part), ret);
+
+			/* don't bother processing chctl flags anymore */
+			chctl.all_flags = 0;
+		}
+	}
+
+	return chctl.all_flags;
+}
+
+/*
+ * Allocate the local message queue and the notify queue.
+ */
+static enum xp_retval
+xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	unsigned long irq_flags;
+	int nentries;
+	size_t nbytes;
+
+	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+
+		nbytes = nentries * ch->entry_size;
+		ch_sn2->local_msgqueue =
+		    xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL,
+						  &ch_sn2->local_msgqueue_base);
+		if (ch_sn2->local_msgqueue == NULL)
+			continue;
+
+		nbytes = nentries * sizeof(struct xpc_notify_sn2);
+		ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_sn2->notify_queue == NULL) {
+			kfree(ch_sn2->local_msgqueue_base);
+			ch_sn2->local_msgqueue = NULL;
+			continue;
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->local_nentries) {
+			dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
+				"partid=%d, channel=%d\n", nentries,
+				ch->local_nentries, ch->partid, ch->number);
+
+			ch->local_nentries = nentries;
+		}
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
+		"queue, partid=%d, channel=%d\n", ch->partid, ch->number);
+	return xpNoMemory;
+}
+
+/*
+ * Allocate the cached remote message queue.
+ */
+static enum xp_retval
+xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	unsigned long irq_flags;
+	int nentries;
+	size_t nbytes;
+
+	DBUG_ON(ch->remote_nentries <= 0);
+
+	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+
+		nbytes = nentries * ch->entry_size;
+		ch_sn2->remote_msgqueue =
+		    xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2->
+						  remote_msgqueue_base);
+		if (ch_sn2->remote_msgqueue == NULL)
+			continue;
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->remote_nentries) {
+			dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
+				"partid=%d, channel=%d\n", nentries,
+				ch->remote_nentries, ch->partid, ch->number);
+
+			ch->remote_nentries = nentries;
+		}
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
+		"partid=%d, channel=%d\n", ch->partid, ch->number);
+	return xpNoMemory;
+}
+
+/*
+ * Allocate message queues and other stuff associated with a channel.
+ *
+ * Note: Assumes all of the channel sizes are filled in.
+ */
+static enum xp_retval
+xpc_setup_msg_structures_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	enum xp_retval ret;
+
+	DBUG_ON(ch->flags & XPC_C_SETUP);
+
+	ret = xpc_allocate_local_msgqueue_sn2(ch);
+	if (ret == xpSuccess) {
+
+		ret = xpc_allocate_remote_msgqueue_sn2(ch);
+		if (ret != xpSuccess) {
+			kfree(ch_sn2->local_msgqueue_base);
+			ch_sn2->local_msgqueue = NULL;
+			kfree(ch_sn2->notify_queue);
+			ch_sn2->notify_queue = NULL;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Free up message queues and other stuff that were allocated for the specified
+ * channel.
+ */
+static void
+xpc_teardown_msg_structures_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	ch_sn2->remote_msgqueue_pa = 0;
+
+	ch_sn2->local_GP->get = 0;
+	ch_sn2->local_GP->put = 0;
+	ch_sn2->remote_GP.get = 0;
+	ch_sn2->remote_GP.put = 0;
+	ch_sn2->w_local_GP.get = 0;
+	ch_sn2->w_local_GP.put = 0;
+	ch_sn2->w_remote_GP.get = 0;
+	ch_sn2->w_remote_GP.put = 0;
+	ch_sn2->next_msg_to_pull = 0;
+
+	if (ch->flags & XPC_C_SETUP) {
+		dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
+			ch->flags, ch->partid, ch->number);
+
+		kfree(ch_sn2->local_msgqueue_base);
+		ch_sn2->local_msgqueue = NULL;
+		kfree(ch_sn2->remote_msgqueue_base);
+		ch_sn2->remote_msgqueue = NULL;
+		kfree(ch_sn2->notify_queue);
+		ch_sn2->notify_queue = NULL;
+	}
+}
+
+/*
+ * Notify those who wanted to be notified upon delivery of their message.
+ */
+static void
+xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
+{
+	struct xpc_notify_sn2 *notify;
+	u8 notify_type;
+	s64 get = ch->sn.sn2.w_remote_GP.get - 1;
+
+	while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
+
+		notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries];
+
+		/*
+		 * See if the notify entry indicates it was associated with
+		 * a message who's sender wants to be notified. It is possible
+		 * that it is, but someone else is doing or has done the
+		 * notification.
+		 */
+		notify_type = notify->type;
+		if (notify_type == 0 ||
+		    cmpxchg(&notify->type, notify_type, 0) != notify_type) {
+			continue;
+		}
+
+		DBUG_ON(notify_type != XPC_N_CALL);
+
+		atomic_dec(&ch->n_to_notify);
+
+		if (notify->func != NULL) {
+			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p "
+				"msg_number=%lld partid=%d channel=%d\n",
+				(void *)notify, get, ch->partid, ch->number);
+
+			notify->func(reason, ch->partid, ch->number,
+				     notify->key);
+
+			dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p"
+				" msg_number=%lld partid=%d channel=%d\n",
+				(void *)notify, get, ch->partid, ch->number);
+		}
+	}
+}
+
+static void
+xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch)
+{
+	xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put);
+}
+
+/*
+ * Clear some of the msg flags in the local message queue.
+ */
+static inline void
+xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	s64 get;
+
+	get = ch_sn2->w_remote_GP.get;
+	do {
+		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+					     (get % ch->local_nentries) *
+					     ch->entry_size);
+		DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+		msg->flags = 0;
+	} while (++get < ch_sn2->remote_GP.get);
+}
+
+/*
+ * Clear some of the msg flags in the remote message queue.
+ */
+static inline void
+xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	s64 put, remote_nentries = ch->remote_nentries;
+
+	/* flags are zeroed when the buffer is allocated */
+	if (ch_sn2->remote_GP.put < remote_nentries)
+		return;
+
+	put = max(ch_sn2->w_remote_GP.put, remote_nentries);
+	do {
+		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
+					     (put % remote_nentries) *
+					     ch->entry_size);
+		DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+		DBUG_ON(!(msg->flags & XPC_M_SN2_DONE));
+		DBUG_ON(msg->number != put - remote_nentries);
+		msg->flags = 0;
+	} while (++put < ch_sn2->remote_GP.put);
+}
+
+static int
+xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch)
+{
+	return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get;
+}
+
+static void
+xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
+{
+	struct xpc_channel *ch = &part->channels[ch_number];
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	int npayloads_sent;
+
+	ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number];
+
+	/* See what, if anything, has changed for each connected channel */
+
+	xpc_msgqueue_ref(ch);
+
+	if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get &&
+	    ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) {
+		/* nothing changed since GPs were last pulled */
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/*
+	 * First check to see if messages recently sent by us have been
+	 * received by the other side. (The remote GET value will have
+	 * changed since we last looked at it.)
+	 */
+
+	if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) {
+
+		/*
+		 * We need to notify any senders that want to be notified
+		 * that their sent messages have been received by their
+		 * intended recipients. We need to do this before updating
+		 * w_remote_GP.get so that we don't allocate the same message
+		 * queue entries prematurely (see xpc_allocate_msg()).
+		 */
+		if (atomic_read(&ch->n_to_notify) > 0) {
+			/*
+			 * Notify senders that messages sent have been
+			 * received and delivered by the other side.
+			 */
+			xpc_notify_senders_sn2(ch, xpMsgDelivered,
+					       ch_sn2->remote_GP.get);
+		}
+
+		/*
+		 * Clear msg->flags in previously sent messages, so that
+		 * they're ready for xpc_allocate_msg().
+		 */
+		xpc_clear_local_msgqueue_flags_sn2(ch);
+
+		ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get;
+
+		dev_dbg(xpc_chan, "w_remote_GP.get changed to %lld, partid=%d, "
+			"channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid,
+			ch->number);
+
+		/*
+		 * If anyone was waiting for message queue entries to become
+		 * available, wake them up.
+		 */
+		if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+			wake_up(&ch->msg_allocate_wq);
+	}
+
+	/*
+	 * Now check for newly sent messages by the other side. (The remote
+	 * PUT value will have changed since we last looked at it.)
+	 */
+
+	if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) {
+		/*
+		 * Clear msg->flags in previously received messages, so that
+		 * they're ready for xpc_get_deliverable_payload_sn2().
+		 */
+		xpc_clear_remote_msgqueue_flags_sn2(ch);
+
+		smp_wmb(); /* ensure flags have been cleared before bte_copy */
+		ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put;
+
+		dev_dbg(xpc_chan, "w_remote_GP.put changed to %lld, partid=%d, "
+			"channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid,
+			ch->number);
+
+		npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch);
+		if (npayloads_sent > 0) {
+			dev_dbg(xpc_chan, "msgs waiting to be copied and "
+				"delivered=%d, partid=%d, channel=%d\n",
+				npayloads_sent, ch->partid, ch->number);
+
+			if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
+				xpc_activate_kthreads(ch, npayloads_sent);
+		}
+	}
+
+	xpc_msgqueue_deref(ch);
+}
+
+static struct xpc_msg_sn2 *
+xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	unsigned long remote_msg_pa;
+	struct xpc_msg_sn2 *msg;
+	u32 msg_index;
+	u32 nmsgs;
+	u64 msg_offset;
+	enum xp_retval ret;
+
+	if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) {
+		/* we were interrupted by a signal */
+		return NULL;
+	}
+
+	while (get >= ch_sn2->next_msg_to_pull) {
+
+		/* pull as many messages as are ready and able to be pulled */
+
+		msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries;
+
+		DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put);
+		nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull;
+		if (msg_index + nmsgs > ch->remote_nentries) {
+			/* ignore the ones that wrap the msg queue for now */
+			nmsgs = ch->remote_nentries - msg_index;
+		}
+
+		msg_offset = msg_index * ch->entry_size;
+		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
+		    msg_offset);
+		remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset;
+
+		ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa,
+						     nmsgs * ch->entry_size);
+		if (ret != xpSuccess) {
+
+			dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
+				" msg %lld from partition %d, channel=%d, "
+				"ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull,
+				ch->partid, ch->number, ret);
+
+			XPC_DEACTIVATE_PARTITION(part, ret);
+
+			mutex_unlock(&ch_sn2->msg_to_pull_mutex);
+			return NULL;
+		}
+
+		ch_sn2->next_msg_to_pull += nmsgs;
+	}
+
+	mutex_unlock(&ch_sn2->msg_to_pull_mutex);
+
+	/* return the message we were looking for */
+	msg_offset = (get % ch->remote_nentries) * ch->entry_size;
+	msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset);
+
+	return msg;
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	void *payload = NULL;
+	s64 get;
+
+	do {
+		if (ch->flags & XPC_C_DISCONNECTING)
+			break;
+
+		get = ch_sn2->w_local_GP.get;
+		smp_rmb();	/* guarantee that .get loads before .put */
+		if (get == ch_sn2->w_remote_GP.put)
+			break;
+
+		/* There are messages waiting to be pulled and delivered.
+		 * We need to try to secure one for ourselves. We'll do this
+		 * by trying to increment w_local_GP.get and hope that no one
+		 * else beats us to it. If they do, we'll we'll simply have
+		 * to try again for the next one.
+		 */
+
+		if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) {
+			/* we got the entry referenced by get */
+
+			dev_dbg(xpc_chan, "w_local_GP.get changed to %lld, "
+				"partid=%d, channel=%d\n", get + 1,
+				ch->partid, ch->number);
+
+			/* pull the message from the remote partition */
+
+			msg = xpc_pull_remote_msg_sn2(ch, get);
+
+			if (msg != NULL) {
+				DBUG_ON(msg->number != get);
+				DBUG_ON(msg->flags & XPC_M_SN2_DONE);
+				DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+
+				payload = &msg->payload;
+			}
+			break;
+		}
+
+	} while (1);
+
+	return payload;
+}
+
+/*
+ * Now we actually send the messages that are ready to be sent by advancing
+ * the local message queue's Put value and then send a chctl msgrequest to the
+ * recipient partition.
+ */
+static void
+xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	s64 put = initial_put + 1;
+	int send_msgrequest = 0;
+
+	while (1) {
+
+		while (1) {
+			if (put == ch_sn2->w_local_GP.put)
+				break;
+
+			msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+						     local_msgqueue + (put %
+						     ch->local_nentries) *
+						     ch->entry_size);
+
+			if (!(msg->flags & XPC_M_SN2_READY))
+				break;
+
+			put++;
+		}
+
+		if (put == initial_put) {
+			/* nothing's changed */
+			break;
+		}
+
+		if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) !=
+		    initial_put) {
+			/* someone else beat us to it */
+			DBUG_ON(ch_sn2->local_GP->put < initial_put);
+			break;
+		}
+
+		/* we just set the new value of local_GP->put */
+
+		dev_dbg(xpc_chan, "local_GP->put changed to %lld, partid=%d, "
+			"channel=%d\n", put, ch->partid, ch->number);
+
+		send_msgrequest = 1;
+
+		/*
+		 * We need to ensure that the message referenced by
+		 * local_GP->put is not XPC_M_SN2_READY or that local_GP->put
+		 * equals w_local_GP.put, so we'll go have a look.
+		 */
+		initial_put = put;
+	}
+
+	if (send_msgrequest)
+		xpc_send_chctl_msgrequest_sn2(ch);
+}
+
+/*
+ * Allocate an entry for a message from the message queue associated with the
+ * specified channel.
+ */
+static enum xp_retval
+xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
+		     struct xpc_msg_sn2 **address_of_msg)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	enum xp_retval ret;
+	s64 put;
+
+	/*
+	 * Get the next available message entry from the local message queue.
+	 * If none are available, we'll make sure that we grab the latest
+	 * GP values.
+	 */
+	ret = xpTimeout;
+
+	while (1) {
+
+		put = ch_sn2->w_local_GP.put;
+		smp_rmb();	/* guarantee that .put loads before .get */
+		if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) {
+
+			/* There are available message entries. We need to try
+			 * to secure one for ourselves. We'll do this by trying
+			 * to increment w_local_GP.put as long as someone else
+			 * doesn't beat us to it. If they do, we'll have to
+			 * try again.
+			 */
+			if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) ==
+			    put) {
+				/* we got the entry referenced by put */
+				break;
+			}
+			continue;	/* try again */
+		}
+
+		/*
+		 * There aren't any available msg entries at this time.
+		 *
+		 * In waiting for a message entry to become available,
+		 * we set a timeout in case the other side is not sending
+		 * completion interrupts. This lets us fake a notify IRQ
+		 * that will cause the notify IRQ handler to fetch the latest
+		 * GP values as if an interrupt was sent by the other side.
+		 */
+		if (ret == xpTimeout)
+			xpc_send_chctl_local_msgrequest_sn2(ch);
+
+		if (flags & XPC_NOWAIT)
+			return xpNoWait;
+
+		ret = xpc_allocate_msg_wait(ch);
+		if (ret != xpInterrupted && ret != xpTimeout)
+			return ret;
+	}
+
+	/* get the message's address and initialize it */
+	msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+				     (put % ch->local_nentries) *
+				     ch->entry_size);
+
+	DBUG_ON(msg->flags != 0);
+	msg->number = put;
+
+	dev_dbg(xpc_chan, "w_local_GP.put changed to %lld; msg=0x%p, "
+		"msg_number=%lld, partid=%d, channel=%d\n", put + 1,
+		(void *)msg, msg->number, ch->partid, ch->number);
+
+	*address_of_msg = msg;
+	return xpSuccess;
+}
+
+/*
+ * Common code that does the actual sending of the message by advancing the
+ * local message queue's Put value and sends a chctl msgrequest to the
+ * partition the message is being sent to.
+ */
+static enum xp_retval
+xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload,
+		     u16 payload_size, u8 notify_type, xpc_notify_func func,
+		     void *key)
+{
+	enum xp_retval ret = xpSuccess;
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg = msg;
+	struct xpc_notify_sn2 *notify = notify;
+	s64 msg_number;
+	s64 put;
+
+	DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
+
+	if (XPC_MSG_SIZE(payload_size) > ch->entry_size)
+		return xpPayloadTooBig;
+
+	xpc_msgqueue_ref(ch);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		ret = ch->reason;
+		goto out_1;
+	}
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		ret = xpNotConnected;
+		goto out_1;
+	}
+
+	ret = xpc_allocate_msg_sn2(ch, flags, &msg);
+	if (ret != xpSuccess)
+		goto out_1;
+
+	msg_number = msg->number;
+
+	if (notify_type != 0) {
+		/*
+		 * Tell the remote side to send an ACK interrupt when the
+		 * message has been delivered.
+		 */
+		msg->flags |= XPC_M_SN2_INTERRUPT;
+
+		atomic_inc(&ch->n_to_notify);
+
+		notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries];
+		notify->func = func;
+		notify->key = key;
+		notify->type = notify_type;
+
+		/* ??? Is a mb() needed here? */
+
+		if (ch->flags & XPC_C_DISCONNECTING) {
+			/*
+			 * An error occurred between our last error check and
+			 * this one. We will try to clear the type field from
+			 * the notify entry. If we succeed then
+			 * xpc_disconnect_channel() didn't already process
+			 * the notify entry.
+			 */
+			if (cmpxchg(&notify->type, notify_type, 0) ==
+			    notify_type) {
+				atomic_dec(&ch->n_to_notify);
+				ret = ch->reason;
+			}
+			goto out_1;
+		}
+	}
+
+	memcpy(&msg->payload, payload, payload_size);
+
+	msg->flags |= XPC_M_SN2_READY;
+
+	/*
+	 * The preceding store of msg->flags must occur before the following
+	 * load of local_GP->put.
+	 */
+	smp_mb();
+
+	/* see if the message is next in line to be sent, if so send it */
+
+	put = ch_sn2->local_GP->put;
+	if (put == msg_number)
+		xpc_send_msgs_sn2(ch, put);
+
+out_1:
+	xpc_msgqueue_deref(ch);
+	return ret;
+}
+
+/*
+ * Now we actually acknowledge the messages that have been delivered and ack'd
+ * by advancing the cached remote message queue's Get value and if requested
+ * send a chctl msgrequest to the message sender's partition.
+ *
+ * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition
+ * that sent the message.
+ */
+static void
+xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	s64 get = initial_get + 1;
+	int send_msgrequest = 0;
+
+	while (1) {
+
+		while (1) {
+			if (get == ch_sn2->w_local_GP.get)
+				break;
+
+			msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+						     remote_msgqueue + (get %
+						     ch->remote_nentries) *
+						     ch->entry_size);
+
+			if (!(msg->flags & XPC_M_SN2_DONE))
+				break;
+
+			msg_flags |= msg->flags;
+			get++;
+		}
+
+		if (get == initial_get) {
+			/* nothing's changed */
+			break;
+		}
+
+		if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) !=
+		    initial_get) {
+			/* someone else beat us to it */
+			DBUG_ON(ch_sn2->local_GP->get <= initial_get);
+			break;
+		}
+
+		/* we just set the new value of local_GP->get */
+
+		dev_dbg(xpc_chan, "local_GP->get changed to %lld, partid=%d, "
+			"channel=%d\n", get, ch->partid, ch->number);
+
+		send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT);
+
+		/*
+		 * We need to ensure that the message referenced by
+		 * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get
+		 * equals w_local_GP.get, so we'll go have a look.
+		 */
+		initial_get = get;
+	}
+
+	if (send_msgrequest)
+		xpc_send_chctl_msgrequest_sn2(ch);
+}
+
+static void
+xpc_received_payload_sn2(struct xpc_channel *ch, void *payload)
+{
+	struct xpc_msg_sn2 *msg;
+	s64 msg_number;
+	s64 get;
+
+	msg = container_of(payload, struct xpc_msg_sn2, payload);
+	msg_number = msg->number;
+
+	dev_dbg(xpc_chan, "msg=0x%p, msg_number=%lld, partid=%d, channel=%d\n",
+		(void *)msg, msg_number, ch->partid, ch->number);
+
+	DBUG_ON((((u64)msg - (u64)ch->sn.sn2.remote_msgqueue) / ch->entry_size) !=
+		msg_number % ch->remote_nentries);
+	DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+	DBUG_ON(msg->flags & XPC_M_SN2_DONE);
+
+	msg->flags |= XPC_M_SN2_DONE;
+
+	/*
+	 * The preceding store of msg->flags must occur before the following
+	 * load of local_GP->get.
+	 */
+	smp_mb();
+
+	/*
+	 * See if this message is next in line to be acknowledged as having
+	 * been delivered.
+	 */
+	get = ch->sn.sn2.local_GP->get;
+	if (get == msg_number)
+		xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
+}
+
+static struct xpc_arch_operations xpc_arch_ops_sn2 = {
+	.setup_partitions = xpc_setup_partitions_sn2,
+	.teardown_partitions = xpc_teardown_partitions_sn2,
+	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2,
+	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2,
+	.setup_rsvd_page = xpc_setup_rsvd_page_sn2,
+
+	.allow_hb = xpc_allow_hb_sn2,
+	.disallow_hb = xpc_disallow_hb_sn2,
+	.disallow_all_hbs = xpc_disallow_all_hbs_sn2,
+	.increment_heartbeat = xpc_increment_heartbeat_sn2,
+	.offline_heartbeat = xpc_offline_heartbeat_sn2,
+	.online_heartbeat = xpc_online_heartbeat_sn2,
+	.heartbeat_init = xpc_heartbeat_init_sn2,
+	.heartbeat_exit = xpc_heartbeat_exit_sn2,
+	.get_remote_heartbeat = xpc_get_remote_heartbeat_sn2,
+
+	.request_partition_activation =
+		xpc_request_partition_activation_sn2,
+	.request_partition_reactivation =
+		xpc_request_partition_reactivation_sn2,
+	.request_partition_deactivation =
+		xpc_request_partition_deactivation_sn2,
+	.cancel_partition_deactivation_request =
+		xpc_cancel_partition_deactivation_request_sn2,
+
+	.setup_ch_structures = xpc_setup_ch_structures_sn2,
+	.teardown_ch_structures = xpc_teardown_ch_structures_sn2,
+
+	.make_first_contact = xpc_make_first_contact_sn2,
+
+	.get_chctl_all_flags = xpc_get_chctl_all_flags_sn2,
+	.send_chctl_closerequest = xpc_send_chctl_closerequest_sn2,
+	.send_chctl_closereply = xpc_send_chctl_closereply_sn2,
+	.send_chctl_openrequest = xpc_send_chctl_openrequest_sn2,
+	.send_chctl_openreply = xpc_send_chctl_openreply_sn2,
+	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2,
+	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2,
+
+	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2,
+
+	.setup_msg_structures = xpc_setup_msg_structures_sn2,
+	.teardown_msg_structures = xpc_teardown_msg_structures_sn2,
+
+	.indicate_partition_engaged = xpc_indicate_partition_engaged_sn2,
+	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2,
+	.partition_engaged = xpc_partition_engaged_sn2,
+	.any_partition_engaged = xpc_any_partition_engaged_sn2,
+	.assume_partition_disengaged = xpc_assume_partition_disengaged_sn2,
+
+	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2,
+	.send_payload = xpc_send_payload_sn2,
+	.get_deliverable_payload = xpc_get_deliverable_payload_sn2,
+	.received_payload = xpc_received_payload_sn2,
+	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2,
+};
+
+int
+xpc_init_sn2(void)
+{
+	int ret;
+	size_t buf_size;
+
+	xpc_arch_ops = xpc_arch_ops_sn2;
+
+	if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) {
+		dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is "
+			"larger than %d\n", XPC_MSG_HDR_MAX_SIZE);
+		return -E2BIG;
+	}
+
+	buf_size = max(XPC_RP_VARS_SIZE,
+		       XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2);
+	xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size,
+								   GFP_KERNEL,
+					      &xpc_remote_copy_buffer_base_sn2);
+	if (xpc_remote_copy_buffer_sn2 == NULL) {
+		dev_err(xpc_part, "can't get memory for remote copy buffer\n");
+		return -ENOMEM;
+	}
+
+	/* open up protections for IPI and [potentially] amo operations */
+	xpc_allow_IPI_ops_sn2();
+	xpc_allow_amo_ops_shub_wars_1_1_sn2();
+
+	/*
+	 * This is safe to do before the xpc_hb_checker thread has started
+	 * because the handler releases a wait queue.  If an interrupt is
+	 * received before the thread is waiting, it will not go to sleep,
+	 * but rather immediately process the interrupt.
+	 */
+	ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0,
+			  "xpc hb", NULL);
+	if (ret != 0) {
+		dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
+			"errno=%d\n", -ret);
+		xpc_disallow_IPI_ops_sn2();
+		kfree(xpc_remote_copy_buffer_base_sn2);
+	}
+	return ret;
+}
+
+void
+xpc_exit_sn2(void)
+{
+	free_irq(SGI_XPC_ACTIVATE, NULL);
+	xpc_disallow_IPI_ops_sn2();
+	kfree(xpc_remote_copy_buffer_base_sn2);
+}
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
new file mode 100644
index 00000000000..8725d5e8ab0
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -0,0 +1,1749 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) uv-based functions.
+ *
+ *     Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <asm/uv/uv_hub.h>
+#if defined CONFIG_X86_64
+#include <asm/uv/bios.h>
+#include <asm/uv/uv_irq.h>
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#endif
+#include "../sgi-gru/gru.h"
+#include "../sgi-gru/grukservices.h"
+#include "xpc.h"
+
+#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+struct uv_IO_APIC_route_entry {
+	__u64	vector		:  8,
+		delivery_mode	:  3,
+		dest_mode	:  1,
+		delivery_status	:  1,
+		polarity	:  1,
+		__reserved_1	:  1,
+		trigger		:  1,
+		mask		:  1,
+		__reserved_2	: 15,
+		dest		: 32;
+};
+#endif
+
+static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
+
+#define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
+#define XPC_ACTIVATE_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
+					 XPC_ACTIVATE_MSG_SIZE_UV)
+#define XPC_ACTIVATE_IRQ_NAME		"xpc_activate"
+
+#define XPC_NOTIFY_MSG_SIZE_UV		(2 * GRU_CACHE_LINE_BYTES)
+#define XPC_NOTIFY_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
+					 XPC_NOTIFY_MSG_SIZE_UV)
+#define XPC_NOTIFY_IRQ_NAME		"xpc_notify"
+
+static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
+static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
+
+static int
+xpc_setup_partitions_uv(void)
+{
+	short partid;
+	struct xpc_partition_uv *part_uv;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+
+		mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
+		spin_lock_init(&part_uv->flags_lock);
+		part_uv->remote_act_state = XPC_P_AS_INACTIVE;
+	}
+	return 0;
+}
+
+static void
+xpc_teardown_partitions_uv(void)
+{
+	short partid;
+	struct xpc_partition_uv *part_uv;
+	unsigned long irq_flags;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+
+		if (part_uv->cached_activate_gru_mq_desc != NULL) {
+			mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			kfree(part_uv->cached_activate_gru_mq_desc);
+			part_uv->cached_activate_gru_mq_desc = NULL;
+			mutex_unlock(&part_uv->
+				     cached_activate_gru_mq_desc_mutex);
+		}
+	}
+}
+
+static int
+xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
+{
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+#if defined CONFIG_X86_64
+	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+			UV_AFFINITY_CPU);
+	if (mq->irq < 0) {
+		dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
+			-mq->irq);
+		return mq->irq;
+	}
+
+	mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0)
+		mq->irq = SGI_XPC_ACTIVATE;
+	else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0)
+		mq->irq = SGI_XPC_NOTIFY;
+	else
+		return -EINVAL;
+
+	mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
+	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value);
+#else
+	#error not a supported configuration
+#endif
+
+	return 0;
+}
+
+static void
+xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
+{
+#if defined CONFIG_X86_64
+	uv_teardown_irq(mq->irq);
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	int mmr_pnode;
+	unsigned long mmr_value;
+
+	mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+	mmr_value = 1UL << 16;
+
+	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
+#else
+	#error not a supported configuration
+#endif
+}
+
+static int
+xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
+{
+	int ret;
+
+#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+	ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address),
+				    mq->order, &mq->mmr_offset);
+	if (ret < 0) {
+		dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
+			ret);
+		return -EBUSY;
+	}
+#elif defined CONFIG_X86_64
+	ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address),
+					 mq->order, &mq->mmr_offset);
+	if (ret < 0) {
+		dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
+			"ret=%d\n", ret);
+		return ret;
+	}
+#else
+	#error not a supported configuration
+#endif
+
+	mq->watchlist_num = ret;
+	return 0;
+}
+
+static void
+xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
+{
+	int ret;
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+#if defined CONFIG_X86_64
+	ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
+	BUG_ON(ret != BIOS_STATUS_SUCCESS);
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
+	BUG_ON(ret != SALRET_OK);
+#else
+	#error not a supported configuration
+#endif
+}
+
+static struct xpc_gru_mq_uv *
+xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
+		     irq_handler_t irq_handler)
+{
+	enum xp_retval xp_ret;
+	int ret;
+	int nid;
+	int nasid;
+	int pg_order;
+	struct page *page;
+	struct xpc_gru_mq_uv *mq;
+	struct uv_IO_APIC_route_entry *mmr_value;
+
+	mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
+	if (mq == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
+			"a xpc_gru_mq_uv structure\n");
+		ret = -ENOMEM;
+		goto out_0;
+	}
+
+	mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc),
+				  GFP_KERNEL);
+	if (mq->gru_mq_desc == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
+			"a gru_message_queue_desc structure\n");
+		ret = -ENOMEM;
+		goto out_1;
+	}
+
+	pg_order = get_order(mq_size);
+	mq->order = pg_order + PAGE_SHIFT;
+	mq_size = 1UL << mq->order;
+
+	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
+
+	nid = cpu_to_node(cpu);
+	page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+				pg_order);
+	if (page == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
+			"bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
+		ret = -ENOMEM;
+		goto out_2;
+	}
+	mq->address = page_address(page);
+
+	/* enable generation of irq when GRU mq operation occurs to this mq */
+	ret = xpc_gru_mq_watchlist_alloc_uv(mq);
+	if (ret != 0)
+		goto out_3;
+
+	ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
+	if (ret != 0)
+		goto out_4;
+
+	ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
+	if (ret != 0) {
+		dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
+			mq->irq, -ret);
+		goto out_5;
+	}
+
+	nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu));
+
+	mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
+	ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
+				     nasid, mmr_value->vector, mmr_value->dest);
+	if (ret != 0) {
+		dev_err(xpc_part, "gru_create_message_queue() returned "
+			"error=%d\n", ret);
+		ret = -EINVAL;
+		goto out_6;
+	}
+
+	/* allow other partitions to access this GRU mq */
+	xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
+	if (xp_ret != xpSuccess) {
+		ret = -EACCES;
+		goto out_6;
+	}
+
+	return mq;
+
+	/* something went wrong */
+out_6:
+	free_irq(mq->irq, NULL);
+out_5:
+	xpc_release_gru_mq_irq_uv(mq);
+out_4:
+	xpc_gru_mq_watchlist_free_uv(mq);
+out_3:
+	free_pages((unsigned long)mq->address, pg_order);
+out_2:
+	kfree(mq->gru_mq_desc);
+out_1:
+	kfree(mq);
+out_0:
+	return ERR_PTR(ret);
+}
+
+static void
+xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
+{
+	unsigned int mq_size;
+	int pg_order;
+	int ret;
+
+	/* disallow other partitions to access GRU mq */
+	mq_size = 1UL << mq->order;
+	ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
+	BUG_ON(ret != xpSuccess);
+
+	/* unregister irq handler and release mq irq/vector mapping */
+	free_irq(mq->irq, NULL);
+	xpc_release_gru_mq_irq_uv(mq);
+
+	/* disable generation of irq when GRU mq op occurs to this mq */
+	xpc_gru_mq_watchlist_free_uv(mq);
+
+	pg_order = mq->order - PAGE_SHIFT;
+	free_pages((unsigned long)mq->address, pg_order);
+
+	kfree(mq);
+}
+
+static enum xp_retval
+xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
+		 size_t msg_size)
+{
+	enum xp_retval xp_ret;
+	int ret;
+
+	while (1) {
+		ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
+		if (ret == MQE_OK) {
+			xp_ret = xpSuccess;
+			break;
+		}
+
+		if (ret == MQE_QUEUE_FULL) {
+			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+				"error=MQE_QUEUE_FULL\n");
+			/* !!! handle QLimit reached; delay & try again */
+			/* ??? Do we add a limit to the number of retries? */
+			(void)msleep_interruptible(10);
+		} else if (ret == MQE_CONGESTION) {
+			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+				"error=MQE_CONGESTION\n");
+			/* !!! handle LB Overflow; simply try again */
+			/* ??? Do we add a limit to the number of retries? */
+		} else {
+			/* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
+			dev_err(xpc_chan, "gru_send_message_gpa() returned "
+				"error=%d\n", ret);
+			xp_ret = xpGruSendMqError;
+			break;
+		}
+	}
+	return xp_ret;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_uv(void)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	u8 act_state_req;
+
+	DBUG_ON(xpc_activate_IRQ_rcvd == 0);
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (part->sn.uv.act_state_req == 0)
+			continue;
+
+		xpc_activate_IRQ_rcvd--;
+		BUG_ON(xpc_activate_IRQ_rcvd < 0);
+
+		act_state_req = part->sn.uv.act_state_req;
+		part->sn.uv.act_state_req = 0;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
+			if (part->act_state == XPC_P_AS_INACTIVE)
+				xpc_activate_partition(part);
+			else if (part->act_state == XPC_P_AS_DEACTIVATING)
+				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+		} else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
+			if (part->act_state == XPC_P_AS_INACTIVE)
+				xpc_activate_partition(part);
+			else
+				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+		} else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
+			XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
+
+		} else {
+			BUG();
+		}
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (xpc_activate_IRQ_rcvd == 0)
+			break;
+	}
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+}
+
+static void
+xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
+			      struct xpc_activate_mq_msghdr_uv *msg_hdr,
+			      int *wakeup_hb_checker)
+{
+	unsigned long irq_flags;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct xpc_openclose_args *args;
+
+	part_uv->remote_act_state = msg_hdr->act_state;
+
+	switch (msg_hdr->type) {
+	case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
+		/* syncing of remote_act_state was just done above */
+		break;
+
+	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
+		struct xpc_activate_mq_msg_activate_req_uv *msg;
+
+		/*
+		 * ??? Do we deal here with ts_jiffies being different
+		 * ??? if act_state != XPC_P_AS_INACTIVE instead of
+		 * ??? below?
+		 */
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_activate_req_uv, hdr);
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
+		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
+		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+		part_uv->heartbeat_gpa = msg->heartbeat_gpa;
+
+		if (msg->activate_gru_mq_desc_gpa !=
+		    part_uv->activate_gru_mq_desc_gpa) {
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			part_uv->activate_gru_mq_desc_gpa =
+			    msg->activate_gru_mq_desc_gpa;
+		}
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
+		struct xpc_activate_mq_msg_deactivate_req_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_deactivate_req_uv, hdr);
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = msg->reason;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		return;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
+		struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_closerequest_uv,
+				   hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->reason = msg->reason;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
+		struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_closereply_uv,
+				   hdr);
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
+		struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_openrequest_uv,
+				   hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->entry_size = msg->entry_size;
+		args->local_nentries = msg->local_nentries;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
+		struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_openreply_uv, hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->remote_nentries = msg->remote_nentries;
+		args->local_nentries = msg->local_nentries;
+		args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
+		struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+	}
+	case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_ENGAGED_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+
+	case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags &= ~XPC_P_ENGAGED_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+
+	default:
+		dev_err(xpc_part, "received unknown activate_mq msg type=%d "
+			"from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
+
+		/* get hb checker to deactivate from the remote partition */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = xpBadMsgType;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		return;
+	}
+
+	if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
+	    part->remote_rp_ts_jiffies != 0) {
+		/*
+		 * ??? Does what we do here need to be sensitive to
+		 * ??? act_state or remote_act_state?
+		 */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+	}
+}
+
+static irqreturn_t
+xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
+{
+	struct xpc_activate_mq_msghdr_uv *msg_hdr;
+	short partid;
+	struct xpc_partition *part;
+	int wakeup_hb_checker = 0;
+	int part_referenced;
+
+	while (1) {
+		msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
+		if (msg_hdr == NULL)
+			break;
+
+		partid = msg_hdr->partid;
+		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+			dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
+				"received invalid partid=0x%x in message\n",
+				partid);
+		} else {
+			part = &xpc_partitions[partid];
+
+			part_referenced = xpc_part_ref(part);
+			xpc_handle_activate_mq_msg_uv(part, msg_hdr,
+						      &wakeup_hb_checker);
+			if (part_referenced)
+				xpc_part_deref(part);
+		}
+
+		gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
+	}
+
+	if (wakeup_hb_checker)
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	return IRQ_HANDLED;
+}
+
+static enum xp_retval
+xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
+				unsigned long gru_mq_desc_gpa)
+{
+	enum xp_retval ret;
+
+	ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
+			       sizeof(struct gru_message_queue_desc));
+	if (ret == xpSuccess)
+		gru_mq_desc->mq = NULL;
+
+	return ret;
+}
+
+static enum xp_retval
+xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
+			 int msg_type)
+{
+	struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct gru_message_queue_desc *gru_mq_desc;
+	unsigned long irq_flags;
+	enum xp_retval ret;
+
+	DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
+
+	msg_hdr->type = msg_type;
+	msg_hdr->partid = xp_partition_id;
+	msg_hdr->act_state = part->act_state;
+	msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
+
+	mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+again:
+	if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
+		gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
+		if (gru_mq_desc == NULL) {
+			gru_mq_desc = kmalloc(sizeof(struct
+					      gru_message_queue_desc),
+					      GFP_KERNEL);
+			if (gru_mq_desc == NULL) {
+				ret = xpNoMemory;
+				goto done;
+			}
+			part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
+		}
+
+		ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
+						      part_uv->
+						      activate_gru_mq_desc_gpa);
+		if (ret != xpSuccess)
+			goto done;
+
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+	}
+
+	/* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
+	ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
+			       msg_size);
+	if (ret != xpSuccess) {
+		smp_rmb();	/* ensure a fresh copy of part_uv->flags */
+		if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
+			goto again;
+	}
+done:
+	mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
+	return ret;
+}
+
+static void
+xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
+			      size_t msg_size, int msg_type)
+{
+	enum xp_retval ret;
+
+	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+	if (unlikely(ret != xpSuccess))
+		XPC_DEACTIVATE_PARTITION(part, ret);
+}
+
+static void
+xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
+			 void *msg, size_t msg_size, int msg_type)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	enum xp_retval ret;
+
+	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+	if (unlikely(ret != xpSuccess)) {
+		if (irq_flags != NULL)
+			spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+		XPC_DEACTIVATE_PARTITION(part, ret);
+
+		if (irq_flags != NULL)
+			spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
+}
+
+static void
+xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
+{
+	unsigned long irq_flags;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+
+	/*
+	 * !!! Make our side think that the remote partition sent an activate
+	 * !!! mq message our way by doing what the activate IRQ handler would
+	 * !!! do had one really been sent.
+	 */
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	if (part_uv->act_state_req == 0)
+		xpc_activate_IRQ_rcvd++;
+	part_uv->act_state_req = act_state_req;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+}
+
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
+				  size_t *len)
+{
+	s64 status;
+	enum xp_retval ret;
+
+#if defined CONFIG_X86_64
+	status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
+					  (u64 *)len);
+	if (status == BIOS_STATUS_SUCCESS)
+		ret = xpSuccess;
+	else if (status == BIOS_STATUS_MORE_PASSES)
+		ret = xpNeedMoreInfo;
+	else
+		ret = xpBiosError;
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
+	if (status == SALRET_OK)
+		ret = xpSuccess;
+	else if (status == SALRET_MORE_PASSES)
+		ret = xpNeedMoreInfo;
+	else
+		ret = xpSalError;
+
+#else
+	#error not a supported configuration
+#endif
+
+	return ret;
+}
+
+static int
+xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
+{
+	xpc_heartbeat_uv =
+	    &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
+	rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
+	rp->sn.uv.activate_gru_mq_desc_gpa =
+	    uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
+	return 0;
+}
+
+static void
+xpc_allow_hb_uv(short partid)
+{
+}
+
+static void
+xpc_disallow_hb_uv(short partid)
+{
+}
+
+static void
+xpc_disallow_all_hbs_uv(void)
+{
+}
+
+static void
+xpc_increment_heartbeat_uv(void)
+{
+	xpc_heartbeat_uv->value++;
+}
+
+static void
+xpc_offline_heartbeat_uv(void)
+{
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 1;
+}
+
+static void
+xpc_online_heartbeat_uv(void)
+{
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 0;
+}
+
+static void
+xpc_heartbeat_init_uv(void)
+{
+	xpc_heartbeat_uv->value = 1;
+	xpc_heartbeat_uv->offline = 0;
+}
+
+static void
+xpc_heartbeat_exit_uv(void)
+{
+	xpc_offline_heartbeat_uv();
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
+{
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	enum xp_retval ret;
+
+	ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
+			       part_uv->heartbeat_gpa,
+			       sizeof(struct xpc_heartbeat_uv));
+	if (ret != xpSuccess)
+		return ret;
+
+	if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
+	    !part_uv->cached_heartbeat.offline) {
+
+		ret = xpNoHeartbeat;
+	} else {
+		part->last_heartbeat = part_uv->cached_heartbeat.value;
+	}
+	return ret;
+}
+
+static void
+xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
+				    unsigned long remote_rp_gpa, int nasid)
+{
+	short partid = remote_rp->SAL_partid;
+	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_activate_mq_msg_activate_req_uv msg;
+
+	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
+	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+	part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
+	part->sn.uv.activate_gru_mq_desc_gpa =
+	    remote_rp->sn.uv.activate_gru_mq_desc_gpa;
+
+	/*
+	 * ??? Is it a good idea to make this conditional on what is
+	 * ??? potentially stale state information?
+	 */
+	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
+		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
+		msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
+		msg.activate_gru_mq_desc_gpa =
+		    xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
+	}
+
+	if (part->act_state == XPC_P_AS_INACTIVE)
+		xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_reactivation_uv(struct xpc_partition *part)
+{
+	xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_deactivation_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_deactivate_req_uv msg;
+
+	/*
+	 * ??? Is it a good idea to make this conditional on what is
+	 * ??? potentially stale state information?
+	 */
+	if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
+	    part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
+
+		msg.reason = part->reason;
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					 XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
+	}
+}
+
+static void
+xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
+{
+	/* nothing needs to be done */
+	return;
+}
+
+static void
+xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
+{
+	head->first = NULL;
+	head->last = NULL;
+	spin_lock_init(&head->lock);
+	head->n_entries = 0;
+}
+
+static void *
+xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
+{
+	unsigned long irq_flags;
+	struct xpc_fifo_entry_uv *first;
+
+	spin_lock_irqsave(&head->lock, irq_flags);
+	first = head->first;
+	if (head->first != NULL) {
+		head->first = first->next;
+		if (head->first == NULL)
+			head->last = NULL;
+
+		head->n_entries--;
+		BUG_ON(head->n_entries < 0);
+
+		first->next = NULL;
+	}
+	spin_unlock_irqrestore(&head->lock, irq_flags);
+	return first;
+}
+
+static void
+xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
+		      struct xpc_fifo_entry_uv *last)
+{
+	unsigned long irq_flags;
+
+	last->next = NULL;
+	spin_lock_irqsave(&head->lock, irq_flags);
+	if (head->last != NULL)
+		head->last->next = last;
+	else
+		head->first = last;
+	head->last = last;
+	head->n_entries++;
+	spin_unlock_irqrestore(&head->lock, irq_flags);
+}
+
+static int
+xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
+{
+	return head->n_entries;
+}
+
+/*
+ * Setup the channel structures that are uv specific.
+ */
+static enum xp_retval
+xpc_setup_ch_structures_uv(struct xpc_partition *part)
+{
+	struct xpc_channel_uv *ch_uv;
+	int ch_number;
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch_uv = &part->channels[ch_number].sn.uv;
+
+		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+	}
+
+	return xpSuccess;
+}
+
+/*
+ * Teardown the channel structures that are uv specific.
+ */
+static void
+xpc_teardown_ch_structures_uv(struct xpc_partition *part)
+{
+	/* nothing needs to be done */
+	return;
+}
+
+static enum xp_retval
+xpc_make_first_contact_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	/*
+	 * We send a sync msg to get the remote partition's remote_act_state
+	 * updated to our current act_state which at this point should
+	 * be XPC_P_AS_ACTIVATING.
+	 */
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
+
+	while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) ||
+		 (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) {
+
+		dev_dbg(xpc_part, "waiting to make first contact with "
+			"partition %d\n", XPC_PARTID(part));
+
+		/* wait a 1/4 of a second or so */
+		(void)msleep_interruptible(250);
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
+			return part->reason;
+	}
+
+	return xpSuccess;
+}
+
+static u64
+xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	union xpc_channel_ctl_flags chctl;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	chctl = part->chctl;
+	if (chctl.all_flags != 0)
+		part->chctl.all_flags = 0;
+
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+	return chctl.all_flags;
+}
+
+static enum xp_retval
+xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+	struct xpc_send_msg_slot_uv *msg_slot;
+	unsigned long irq_flags;
+	int nentries;
+	int entry;
+	size_t nbytes;
+
+	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+		nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
+		ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_uv->send_msg_slots == NULL)
+			continue;
+
+		for (entry = 0; entry < nentries; entry++) {
+			msg_slot = &ch_uv->send_msg_slots[entry];
+
+			msg_slot->msg_slot_number = entry;
+			xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
+					      &msg_slot->next);
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->local_nentries)
+			ch->local_nentries = nentries;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	return xpNoMemory;
+}
+
+static enum xp_retval
+xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+	struct xpc_notify_mq_msg_uv *msg_slot;
+	unsigned long irq_flags;
+	int nentries;
+	int entry;
+	size_t nbytes;
+
+	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+		nbytes = nentries * ch->entry_size;
+		ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_uv->recv_msg_slots == NULL)
+			continue;
+
+		for (entry = 0; entry < nentries; entry++) {
+			msg_slot = ch_uv->recv_msg_slots +
+			    entry * ch->entry_size;
+
+			msg_slot->hdr.msg_slot_number = entry;
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->remote_nentries)
+			ch->remote_nentries = nentries;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	return xpNoMemory;
+}
+
+/*
+ * Allocate msg_slots associated with the channel.
+ */
+static enum xp_retval
+xpc_setup_msg_structures_uv(struct xpc_channel *ch)
+{
+	static enum xp_retval ret;
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(ch->flags & XPC_C_SETUP);
+
+	ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct
+						   gru_message_queue_desc),
+						   GFP_KERNEL);
+	if (ch_uv->cached_notify_gru_mq_desc == NULL)
+		return xpNoMemory;
+
+	ret = xpc_allocate_send_msg_slot_uv(ch);
+	if (ret == xpSuccess) {
+
+		ret = xpc_allocate_recv_msg_slot_uv(ch);
+		if (ret != xpSuccess) {
+			kfree(ch_uv->send_msg_slots);
+			xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		}
+	}
+	return ret;
+}
+
+/*
+ * Free up msg_slots and clear other stuff that were setup for the specified
+ * channel.
+ */
+static void
+xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	kfree(ch_uv->cached_notify_gru_mq_desc);
+	ch_uv->cached_notify_gru_mq_desc = NULL;
+
+	if (ch->flags & XPC_C_SETUP) {
+		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		kfree(ch_uv->send_msg_slots);
+		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+		kfree(ch_uv->recv_msg_slots);
+	}
+}
+
+static void
+xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.reason = ch->reason;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_closereply_uv msg;
+
+	msg.ch_number = ch->number;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
+}
+
+static void
+xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.entry_size = ch->entry_size;
+	msg.local_nentries = ch->local_nentries;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_openreply_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.local_nentries = ch->local_nentries;
+	msg.remote_nentries = ch->remote_nentries;
+	msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
+}
+
+static void
+xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
+
+	msg.ch_number = ch->number;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
+}
+
+static void
+xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+	xpc_wakeup_channel_mgr(part);
+}
+
+static enum xp_retval
+xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
+			       unsigned long gru_mq_desc_gpa)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
+	return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
+					       gru_mq_desc_gpa);
+}
+
+static void
+xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
+}
+
+static void
+xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
+}
+
+static void
+xpc_assume_partition_disengaged_uv(short partid)
+{
+	struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+	part_uv->flags &= ~XPC_P_ENGAGED_UV;
+	spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+}
+
+static int
+xpc_partition_engaged_uv(short partid)
+{
+	return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
+}
+
+static int
+xpc_any_partition_engaged_uv(void)
+{
+	struct xpc_partition_uv *part_uv;
+	short partid;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+		if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
+			return 1;
+	}
+	return 0;
+}
+
+static enum xp_retval
+xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
+			 struct xpc_send_msg_slot_uv **address_of_msg_slot)
+{
+	enum xp_retval ret;
+	struct xpc_send_msg_slot_uv *msg_slot;
+	struct xpc_fifo_entry_uv *entry;
+
+	while (1) {
+		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
+		if (entry != NULL)
+			break;
+
+		if (flags & XPC_NOWAIT)
+			return xpNoWait;
+
+		ret = xpc_allocate_msg_wait(ch);
+		if (ret != xpInterrupted && ret != xpTimeout)
+			return ret;
+	}
+
+	msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
+	*address_of_msg_slot = msg_slot;
+	return xpSuccess;
+}
+
+static void
+xpc_free_msg_slot_uv(struct xpc_channel *ch,
+		     struct xpc_send_msg_slot_uv *msg_slot)
+{
+	xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
+
+	/* wakeup anyone waiting for a free msg slot */
+	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+		wake_up(&ch->msg_allocate_wq);
+}
+
+static void
+xpc_notify_sender_uv(struct xpc_channel *ch,
+		     struct xpc_send_msg_slot_uv *msg_slot,
+		     enum xp_retval reason)
+{
+	xpc_notify_func func = msg_slot->func;
+
+	if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
+
+		atomic_dec(&ch->n_to_notify);
+
+		dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
+			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+			msg_slot->msg_slot_number, ch->partid, ch->number);
+
+		func(reason, ch->partid, ch->number, msg_slot->key);
+
+		dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
+			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+			msg_slot->msg_slot_number, ch->partid, ch->number);
+	}
+}
+
+static void
+xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
+			    struct xpc_notify_mq_msg_uv *msg)
+{
+	struct xpc_send_msg_slot_uv *msg_slot;
+	int entry = msg->hdr.msg_slot_number % ch->local_nentries;
+
+	msg_slot = &ch->sn.uv.send_msg_slots[entry];
+
+	BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
+	msg_slot->msg_slot_number += ch->local_nentries;
+
+	if (msg_slot->func != NULL)
+		xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
+
+	xpc_free_msg_slot_uv(ch, msg_slot);
+}
+
+static void
+xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
+			    struct xpc_notify_mq_msg_uv *msg)
+{
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct xpc_channel *ch;
+	struct xpc_channel_uv *ch_uv;
+	struct xpc_notify_mq_msg_uv *msg_slot;
+	unsigned long irq_flags;
+	int ch_number = msg->hdr.ch_number;
+
+	if (unlikely(ch_number >= part->nchannels)) {
+		dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
+			"channel number=0x%x in message from partid=%d\n",
+			ch_number, XPC_PARTID(part));
+
+		/* get hb checker to deactivate from the remote partition */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = xpBadChannelNumber;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+		return;
+	}
+
+	ch = &part->channels[ch_number];
+	xpc_msgqueue_ref(ch);
+
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/* see if we're really dealing with an ACK for a previously sent msg */
+	if (msg->hdr.size == 0) {
+		xpc_handle_notify_mq_ack_uv(ch, msg);
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/* we're dealing with a normal message sent via the notify_mq */
+	ch_uv = &ch->sn.uv;
+
+	msg_slot = ch_uv->recv_msg_slots +
+	    (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
+
+	BUG_ON(msg_slot->hdr.size != 0);
+
+	memcpy(msg_slot, msg, msg->hdr.size);
+
+	xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
+
+	if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
+		/*
+		 * If there is an existing idle kthread get it to deliver
+		 * the payload, otherwise we'll have to get the channel mgr
+		 * for this partition to create a kthread to do the delivery.
+		 */
+		if (atomic_read(&ch->kthreads_idle) > 0)
+			wake_up_nr(&ch->idle_wq, 1);
+		else
+			xpc_send_chctl_local_msgrequest_uv(part, ch->number);
+	}
+	xpc_msgqueue_deref(ch);
+}
+
+static irqreturn_t
+xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
+{
+	struct xpc_notify_mq_msg_uv *msg;
+	short partid;
+	struct xpc_partition *part;
+
+	while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
+	       NULL) {
+
+		partid = msg->hdr.partid;
+		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+			dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
+				"invalid partid=0x%x in message\n", partid);
+		} else {
+			part = &xpc_partitions[partid];
+
+			if (xpc_part_ref(part)) {
+				xpc_handle_notify_mq_msg_uv(part, msg);
+				xpc_part_deref(part);
+			}
+		}
+
+		gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int
+xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
+{
+	return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
+}
+
+static void
+xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
+{
+	struct xpc_channel *ch = &part->channels[ch_number];
+	int ndeliverable_payloads;
+
+	xpc_msgqueue_ref(ch);
+
+	ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
+
+	if (ndeliverable_payloads > 0 &&
+	    (ch->flags & XPC_C_CONNECTED) &&
+	    (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
+
+		xpc_activate_kthreads(ch, ndeliverable_payloads);
+	}
+
+	xpc_msgqueue_deref(ch);
+}
+
+static enum xp_retval
+xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
+		    u16 payload_size, u8 notify_type, xpc_notify_func func,
+		    void *key)
+{
+	enum xp_retval ret = xpSuccess;
+	struct xpc_send_msg_slot_uv *msg_slot = NULL;
+	struct xpc_notify_mq_msg_uv *msg;
+	u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
+	size_t msg_size;
+
+	DBUG_ON(notify_type != XPC_N_CALL);
+
+	msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
+	if (msg_size > ch->entry_size)
+		return xpPayloadTooBig;
+
+	xpc_msgqueue_ref(ch);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		ret = ch->reason;
+		goto out_1;
+	}
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		ret = xpNotConnected;
+		goto out_1;
+	}
+
+	ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
+	if (ret != xpSuccess)
+		goto out_1;
+
+	if (func != NULL) {
+		atomic_inc(&ch->n_to_notify);
+
+		msg_slot->key = key;
+		smp_wmb(); /* a non-NULL func must hit memory after the key */
+		msg_slot->func = func;
+
+		if (ch->flags & XPC_C_DISCONNECTING) {
+			ret = ch->reason;
+			goto out_2;
+		}
+	}
+
+	msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
+	msg->hdr.partid = xp_partition_id;
+	msg->hdr.ch_number = ch->number;
+	msg->hdr.size = msg_size;
+	msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
+	memcpy(&msg->payload, payload, payload_size);
+
+	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
+			       msg_size);
+	if (ret == xpSuccess)
+		goto out_1;
+
+	XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+out_2:
+	if (func != NULL) {
+		/*
+		 * Try to NULL the msg_slot's func field. If we fail, then
+		 * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
+		 * case we need to pretend we succeeded to send the message
+		 * since the user will get a callout for the disconnect error
+		 * by xpc_notify_senders_of_disconnect_uv(), and to also get an
+		 * error returned here will confuse them. Additionally, since
+		 * in this case the channel is being disconnected we don't need
+		 * to put the the msg_slot back on the free list.
+		 */
+		if (cmpxchg(&msg_slot->func, func, NULL) != func) {
+			ret = xpSuccess;
+			goto out_1;
+		}
+
+		msg_slot->key = NULL;
+		atomic_dec(&ch->n_to_notify);
+	}
+	xpc_free_msg_slot_uv(ch, msg_slot);
+out_1:
+	xpc_msgqueue_deref(ch);
+	return ret;
+}
+
+/*
+ * Tell the callers of xpc_send_notify() that the status of their payloads
+ * is unknown because the channel is now disconnecting.
+ *
+ * We don't worry about putting these msg_slots on the free list since the
+ * msg_slots themselves are about to be kfree'd.
+ */
+static void
+xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
+{
+	struct xpc_send_msg_slot_uv *msg_slot;
+	int entry;
+
+	DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+
+	for (entry = 0; entry < ch->local_nentries; entry++) {
+
+		if (atomic_read(&ch->n_to_notify) == 0)
+			break;
+
+		msg_slot = &ch->sn.uv.send_msg_slots[entry];
+		if (msg_slot->func != NULL)
+			xpc_notify_sender_uv(ch, msg_slot, ch->reason);
+	}
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
+{
+	struct xpc_fifo_entry_uv *entry;
+	struct xpc_notify_mq_msg_uv *msg;
+	void *payload = NULL;
+
+	if (!(ch->flags & XPC_C_DISCONNECTING)) {
+		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
+		if (entry != NULL) {
+			msg = container_of(entry, struct xpc_notify_mq_msg_uv,
+					   hdr.u.next);
+			payload = &msg->payload;
+		}
+	}
+	return payload;
+}
+
+static void
+xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
+{
+	struct xpc_notify_mq_msg_uv *msg;
+	enum xp_retval ret;
+
+	msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
+
+	/* return an ACK to the sender of this message */
+
+	msg->hdr.partid = xp_partition_id;
+	msg->hdr.size = 0;	/* size of zero indicates this is an ACK */
+
+	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
+			       sizeof(struct xpc_notify_mq_msghdr_uv));
+	if (ret != xpSuccess)
+		XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+}
+
+static struct xpc_arch_operations xpc_arch_ops_uv = {
+	.setup_partitions = xpc_setup_partitions_uv,
+	.teardown_partitions = xpc_teardown_partitions_uv,
+	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
+	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
+	.setup_rsvd_page = xpc_setup_rsvd_page_uv,
+
+	.allow_hb = xpc_allow_hb_uv,
+	.disallow_hb = xpc_disallow_hb_uv,
+	.disallow_all_hbs = xpc_disallow_all_hbs_uv,
+	.increment_heartbeat = xpc_increment_heartbeat_uv,
+	.offline_heartbeat = xpc_offline_heartbeat_uv,
+	.online_heartbeat = xpc_online_heartbeat_uv,
+	.heartbeat_init = xpc_heartbeat_init_uv,
+	.heartbeat_exit = xpc_heartbeat_exit_uv,
+	.get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
+
+	.request_partition_activation =
+		xpc_request_partition_activation_uv,
+	.request_partition_reactivation =
+		xpc_request_partition_reactivation_uv,
+	.request_partition_deactivation =
+		xpc_request_partition_deactivation_uv,
+	.cancel_partition_deactivation_request =
+		xpc_cancel_partition_deactivation_request_uv,
+
+	.setup_ch_structures = xpc_setup_ch_structures_uv,
+	.teardown_ch_structures = xpc_teardown_ch_structures_uv,
+
+	.make_first_contact = xpc_make_first_contact_uv,
+
+	.get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
+	.send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
+	.send_chctl_closereply = xpc_send_chctl_closereply_uv,
+	.send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
+	.send_chctl_openreply = xpc_send_chctl_openreply_uv,
+	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
+	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
+
+	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
+
+	.setup_msg_structures = xpc_setup_msg_structures_uv,
+	.teardown_msg_structures = xpc_teardown_msg_structures_uv,
+
+	.indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
+	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
+	.assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
+	.partition_engaged = xpc_partition_engaged_uv,
+	.any_partition_engaged = xpc_any_partition_engaged_uv,
+
+	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
+	.send_payload = xpc_send_payload_uv,
+	.get_deliverable_payload = xpc_get_deliverable_payload_uv,
+	.received_payload = xpc_received_payload_uv,
+	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
+};
+
+int
+xpc_init_uv(void)
+{
+	xpc_arch_ops = xpc_arch_ops_uv;
+
+	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
+		dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
+			XPC_MSG_HDR_MAX_SIZE);
+		return -E2BIG;
+	}
+
+	xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0,
+						  XPC_ACTIVATE_IRQ_NAME,
+						  xpc_handle_activate_IRQ_uv);
+	if (IS_ERR(xpc_activate_mq_uv))
+		return PTR_ERR(xpc_activate_mq_uv);
+
+	xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0,
+						XPC_NOTIFY_IRQ_NAME,
+						xpc_handle_notify_IRQ_uv);
+	if (IS_ERR(xpc_notify_mq_uv)) {
+		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+		return PTR_ERR(xpc_notify_mq_uv);
+	}
+
+	return 0;
+}
+
+void
+xpc_exit_uv(void)
+{
+	xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
+	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+}
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
new file mode 100644
index 00000000000..16f0abda142
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -0,0 +1,606 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2009 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * Cross Partition Network Interface (XPNET) support
+ *
+ *	XPNET provides a virtual network layered on top of the Cross
+ *	Partition communication layer.
+ *
+ *	XPNET provides direct point-to-point and broadcast-like support
+ *	for an ethernet-like device.  The ethernet broadcast medium is
+ *	replaced with a point-to-point message structure which passes
+ *	pointers to a DMA-capable block that a remote partition should
+ *	retrieve and pass to the upper level networking layer.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include "xp.h"
+
+/*
+ * The message payload transferred by XPC.
+ *
+ * buf_pa is the physical address where the DMA should pull from.
+ *
+ * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
+ * cacheline boundary.  To accomplish this, we record the number of
+ * bytes from the beginning of the first cacheline to the first useful
+ * byte of the skb (leadin_ignore) and the number of bytes from the
+ * last useful byte of the skb to the end of the last cacheline
+ * (tailout_ignore).
+ *
+ * size is the number of bytes to transfer which includes the skb->len
+ * (useful bytes of the senders skb) plus the leadin and tailout
+ */
+struct xpnet_message {
+	u16 version;		/* Version for this message */
+	u16 embedded_bytes;	/* #of bytes embedded in XPC message */
+	u32 magic;		/* Special number indicating this is xpnet */
+	unsigned long buf_pa;	/* phys address of buffer to retrieve */
+	u32 size;		/* #of bytes in buffer */
+	u8 leadin_ignore;	/* #of bytes to ignore at the beginning */
+	u8 tailout_ignore;	/* #of bytes to ignore at the end */
+	unsigned char data;	/* body of small packets */
+};
+
+/*
+ * Determine the size of our message, the cacheline aligned size,
+ * and then the number of message will request from XPC.
+ *
+ * XPC expects each message to exist in an individual cacheline.
+ */
+#define XPNET_MSG_SIZE		XPC_MSG_PAYLOAD_MAX_SIZE
+#define XPNET_MSG_DATA_MAX	\
+		(XPNET_MSG_SIZE - offsetof(struct xpnet_message, data))
+#define XPNET_MSG_NENTRIES	(PAGE_SIZE / XPC_MSG_MAX_SIZE)
+
+#define XPNET_MAX_KTHREADS	(XPNET_MSG_NENTRIES + 1)
+#define XPNET_MAX_IDLE_KTHREADS	(XPNET_MSG_NENTRIES + 1)
+
+/*
+ * Version number of XPNET implementation. XPNET can always talk to versions
+ * with same major #, and never talk to versions with a different version.
+ */
+#define _XPNET_VERSION(_major, _minor)	(((_major) << 4) | (_minor))
+#define XPNET_VERSION_MAJOR(_v)		((_v) >> 4)
+#define XPNET_VERSION_MINOR(_v)		((_v) & 0xf)
+
+#define	XPNET_VERSION _XPNET_VERSION(1, 0)	/* version 1.0 */
+#define	XPNET_VERSION_EMBED _XPNET_VERSION(1, 1)	/* version 1.1 */
+#define XPNET_MAGIC	0x88786984	/* "XNET" */
+
+#define XPNET_VALID_MSG(_m)						     \
+   ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
+    && (msg->magic == XPNET_MAGIC))
+
+#define XPNET_DEVICE_NAME		"xp0"
+
+/*
+ * When messages are queued with xpc_send_notify, a kmalloc'd buffer
+ * of the following type is passed as a notification cookie.  When the
+ * notification function is called, we use the cookie to decide
+ * whether all outstanding message sends have completed.  The skb can
+ * then be released.
+ */
+struct xpnet_pending_msg {
+	struct sk_buff *skb;
+	atomic_t use_count;
+};
+
+struct net_device *xpnet_device;
+
+/*
+ * When we are notified of other partitions activating, we add them to
+ * our bitmask of partitions to which we broadcast.
+ */
+static unsigned long *xpnet_broadcast_partitions;
+/* protect above */
+static DEFINE_SPINLOCK(xpnet_broadcast_lock);
+
+/*
+ * Since the Block Transfer Engine (BTE) is being used for the transfer
+ * and it relies upon cache-line size transfers, we need to reserve at
+ * least one cache-line for head and tail alignment.  The BTE is
+ * limited to 8MB transfers.
+ *
+ * Testing has shown that changing MTU to greater than 64KB has no effect
+ * on TCP as the two sides negotiate a Max Segment Size that is limited
+ * to 64K.  Other protocols May use packets greater than this, but for
+ * now, the default is 64KB.
+ */
+#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
+/* 32KB has been determined to be the ideal */
+#define XPNET_DEF_MTU (0x8000UL)
+
+/*
+ * The partid is encapsulated in the MAC address beginning in the following
+ * octet and it consists of two octets.
+ */
+#define XPNET_PARTID_OCTET	2
+
+/* Define the XPNET debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xpnet_dbg_name = {
+	.name = "xpnet"
+};
+
+struct device xpnet_dbg_subname = {
+	.init_name = "",	/* set to "" */
+	.driver = &xpnet_dbg_name
+};
+
+struct device *xpnet = &xpnet_dbg_subname;
+
+/*
+ * Packet was recevied by XPC and forwarded to us.
+ */
+static void
+xpnet_receive(short partid, int channel, struct xpnet_message *msg)
+{
+	struct sk_buff *skb;
+	void *dst;
+	enum xp_retval ret;
+
+	if (!XPNET_VALID_MSG(msg)) {
+		/*
+		 * Packet with a different XPC version.  Ignore.
+		 */
+		xpc_received(partid, channel, (void *)msg);
+
+		xpnet_device->stats.rx_errors++;
+
+		return;
+	}
+	dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
+		msg->leadin_ignore, msg->tailout_ignore);
+
+	/* reserve an extra cache line */
+	skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
+	if (!skb) {
+		dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
+			msg->size + L1_CACHE_BYTES);
+
+		xpc_received(partid, channel, (void *)msg);
+
+		xpnet_device->stats.rx_errors++;
+
+		return;
+	}
+
+	/*
+	 * The allocated skb has some reserved space.
+	 * In order to use xp_remote_memcpy(), we need to get the
+	 * skb->data pointer moved forward.
+	 */
+	skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
+					    (L1_CACHE_BYTES - 1)) +
+			  msg->leadin_ignore));
+
+	/*
+	 * Update the tail pointer to indicate data actually
+	 * transferred.
+	 */
+	skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
+
+	/*
+	 * Move the data over from the other side.
+	 */
+	if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
+	    (msg->embedded_bytes != 0)) {
+		dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
+			"%lu)\n", skb->data, &msg->data,
+			(size_t)msg->embedded_bytes);
+
+		skb_copy_to_linear_data(skb, &msg->data,
+					(size_t)msg->embedded_bytes);
+	} else {
+		dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
+		dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
+			"xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst,
+					  (void *)msg->buf_pa, msg->size);
+
+		ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
+		if (ret != xpSuccess) {
+			/*
+			 * !!! Need better way of cleaning skb.  Currently skb
+			 * !!! appears in_use and we can't just call
+			 * !!! dev_kfree_skb.
+			 */
+			dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
+				"returned error=0x%x\n", dst,
+				(void *)msg->buf_pa, msg->size, ret);
+
+			xpc_received(partid, channel, (void *)msg);
+
+			xpnet_device->stats.rx_errors++;
+
+			return;
+		}
+	}
+
+	dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+		skb->len);
+
+	skb->protocol = eth_type_trans(skb, xpnet_device);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	dev_dbg(xpnet, "passing skb to network layer\n"
+		"\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"skb->end=0x%p skb->len=%d\n",
+		(void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
+		skb_end_pointer(skb), skb->len);
+
+	xpnet_device->stats.rx_packets++;
+	xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN;
+
+	netif_rx_ni(skb);
+	xpc_received(partid, channel, (void *)msg);
+}
+
+/*
+ * This is the handler which XPC calls during any sort of change in
+ * state or message reception on a connection.
+ */
+static void
+xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
+			  void *data, void *key)
+{
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(channel != XPC_NET_CHANNEL);
+
+	switch (reason) {
+	case xpMsgReceived:	/* message received */
+		DBUG_ON(data == NULL);
+
+		xpnet_receive(partid, channel, (struct xpnet_message *)data);
+		break;
+
+	case xpConnected:	/* connection completed to a partition */
+		spin_lock_bh(&xpnet_broadcast_lock);
+		__set_bit(partid, xpnet_broadcast_partitions);
+		spin_unlock_bh(&xpnet_broadcast_lock);
+
+		netif_carrier_on(xpnet_device);
+
+		dev_dbg(xpnet, "%s connected to partition %d\n",
+			xpnet_device->name, partid);
+		break;
+
+	default:
+		spin_lock_bh(&xpnet_broadcast_lock);
+		__clear_bit(partid, xpnet_broadcast_partitions);
+		spin_unlock_bh(&xpnet_broadcast_lock);
+
+		if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions,
+				 xp_max_npartitions)) {
+			netif_carrier_off(xpnet_device);
+		}
+
+		dev_dbg(xpnet, "%s disconnected from partition %d\n",
+			xpnet_device->name, partid);
+		break;
+	}
+}
+
+static int
+xpnet_dev_open(struct net_device *dev)
+{
+	enum xp_retval ret;
+
+	dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
+		"%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
+		(unsigned long)XPNET_MSG_SIZE,
+		(unsigned long)XPNET_MSG_NENTRIES,
+		(unsigned long)XPNET_MAX_KTHREADS,
+		(unsigned long)XPNET_MAX_IDLE_KTHREADS);
+
+	ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
+			  XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
+			  XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
+	if (ret != xpSuccess) {
+		dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
+			"ret=%d\n", dev->name, ret);
+
+		return -ENOMEM;
+	}
+
+	dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
+
+	return 0;
+}
+
+static int
+xpnet_dev_stop(struct net_device *dev)
+{
+	xpc_disconnect(XPC_NET_CHANNEL);
+
+	dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
+
+	return 0;
+}
+
+static int
+xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
+{
+	/* 68 comes from min TCP+IP+MAC header */
+	if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
+		dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
+			"between 68 and %ld\n", dev->name, new_mtu,
+			XPNET_MAX_MTU);
+		return -EINVAL;
+	}
+
+	dev->mtu = new_mtu;
+	dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
+	return 0;
+}
+
+/*
+ * Notification that the other end has received the message and
+ * DMA'd the skb information.  At this point, they are done with
+ * our side.  When all recipients are done processing, we
+ * release the skb and then release our pending message structure.
+ */
+static void
+xpnet_send_completed(enum xp_retval reason, short partid, int channel,
+		     void *__qm)
+{
+	struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm;
+
+	DBUG_ON(queued_msg == NULL);
+
+	dev_dbg(xpnet, "message to %d notified with reason %d\n",
+		partid, reason);
+
+	if (atomic_dec_return(&queued_msg->use_count) == 0) {
+		dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
+			(void *)queued_msg->skb->head);
+
+		dev_kfree_skb_any(queued_msg->skb);
+		kfree(queued_msg);
+	}
+}
+
+static void
+xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
+	   u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid)
+{
+	u8 msg_buffer[XPNET_MSG_SIZE];
+	struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer;
+	u16 msg_size = sizeof(struct xpnet_message);
+	enum xp_retval ret;
+
+	msg->embedded_bytes = embedded_bytes;
+	if (unlikely(embedded_bytes != 0)) {
+		msg->version = XPNET_VERSION_EMBED;
+		dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
+			&msg->data, skb->data, (size_t)embedded_bytes);
+		skb_copy_from_linear_data(skb, &msg->data,
+					  (size_t)embedded_bytes);
+		msg_size += embedded_bytes - 1;
+	} else {
+		msg->version = XPNET_VERSION;
+	}
+	msg->magic = XPNET_MAGIC;
+	msg->size = end_addr - start_addr;
+	msg->leadin_ignore = (u64)skb->data - start_addr;
+	msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
+	msg->buf_pa = xp_pa((void *)start_addr);
+
+	dev_dbg(xpnet, "sending XPC message to %d:%d\n"
+		"msg->buf_pa=0x%lx, msg->size=%u, "
+		"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
+		dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
+		msg->leadin_ignore, msg->tailout_ignore);
+
+	atomic_inc(&queued_msg->use_count);
+
+	ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg,
+			      msg_size, xpnet_send_completed, queued_msg);
+	if (unlikely(ret != xpSuccess))
+		atomic_dec(&queued_msg->use_count);
+}
+
+/*
+ * Network layer has formatted a packet (skb) and is ready to place it
+ * "on the wire".  Prepare and send an xpnet_message to all partitions
+ * which have connected with us and are targets of this packet.
+ *
+ * MAC-NOTE:  For the XPNET driver, the MAC address contains the
+ * destination partid.  If the destination partid octets are 0xffff,
+ * this packet is to be broadcast to all connected partitions.
+ */
+static int
+xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xpnet_pending_msg *queued_msg;
+	u64 start_addr, end_addr;
+	short dest_partid;
+	u16 embedded_bytes = 0;
+
+	dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+		skb->len);
+
+	if (skb->data[0] == 0x33) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;	/* nothing needed to be done */
+	}
+
+	/*
+	 * The xpnet_pending_msg tracks how many outstanding
+	 * xpc_send_notifies are relying on this skb.  When none
+	 * remain, release the skb.
+	 */
+	queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
+	if (queued_msg == NULL) {
+		dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
+			 "packet\n", sizeof(struct xpnet_pending_msg));
+
+		dev->stats.tx_errors++;
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* get the beginning of the first cacheline and end of last */
+	start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1));
+	end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
+
+	/* calculate how many bytes to embed in the XPC message */
+	if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
+		/* skb->data does fit so embed */
+		embedded_bytes = skb->len;
+	}
+
+	/*
+	 * Since the send occurs asynchronously, we set the count to one
+	 * and begin sending.  Any sends that happen to complete before
+	 * we are done sending will not free the skb.  We will be left
+	 * with that task during exit.  This also handles the case of
+	 * a packet destined for a partition which is no longer up.
+	 */
+	atomic_set(&queued_msg->use_count, 1);
+	queued_msg->skb = skb;
+
+	if (skb->data[0] == 0xff) {
+		/* we are being asked to broadcast to all partitions */
+		for_each_bit(dest_partid, xpnet_broadcast_partitions,
+			     xp_max_npartitions) {
+
+			xpnet_send(skb, queued_msg, start_addr, end_addr,
+				   embedded_bytes, dest_partid);
+		}
+	} else {
+		dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1];
+		dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8;
+
+		if (dest_partid >= 0 &&
+		    dest_partid < xp_max_npartitions &&
+		    test_bit(dest_partid, xpnet_broadcast_partitions) != 0) {
+
+			xpnet_send(skb, queued_msg, start_addr, end_addr,
+				   embedded_bytes, dest_partid);
+		}
+	}
+
+	if (atomic_dec_return(&queued_msg->use_count) == 0) {
+		dev_kfree_skb(skb);
+		kfree(queued_msg);
+	}
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	return NETDEV_TX_OK;
+}
+
+/*
+ * Deal with transmit timeouts coming from the network layer.
+ */
+static void
+xpnet_dev_tx_timeout(struct net_device *dev)
+{
+	dev->stats.tx_errors++;
+}
+
+static const struct net_device_ops xpnet_netdev_ops = {
+	.ndo_open		= xpnet_dev_open,
+	.ndo_stop		= xpnet_dev_stop,
+	.ndo_start_xmit		= xpnet_dev_hard_start_xmit,
+	.ndo_change_mtu		= xpnet_dev_change_mtu,
+	.ndo_tx_timeout		= xpnet_dev_tx_timeout,
+	.ndo_set_mac_address 	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static int __init
+xpnet_init(void)
+{
+	int result;
+
+	if (!is_shub() && !is_uv())
+		return -ENODEV;
+
+	dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
+
+	xpnet_broadcast_partitions = kzalloc(BITS_TO_LONGS(xp_max_npartitions) *
+					     sizeof(long), GFP_KERNEL);
+	if (xpnet_broadcast_partitions == NULL)
+		return -ENOMEM;
+
+	/*
+	 * use ether_setup() to init the majority of our device
+	 * structure and then override the necessary pieces.
+	 */
+	xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, ether_setup);
+	if (xpnet_device == NULL) {
+		kfree(xpnet_broadcast_partitions);
+		return -ENOMEM;
+	}
+
+	netif_carrier_off(xpnet_device);
+
+	xpnet_device->netdev_ops = &xpnet_netdev_ops;
+	xpnet_device->mtu = XPNET_DEF_MTU;
+
+	/*
+	 * Multicast assumes the LSB of the first octet is set for multicast
+	 * MAC addresses.  We chose the first octet of the MAC to be unlikely
+	 * to collide with any vendor's officially issued MAC.
+	 */
+	xpnet_device->dev_addr[0] = 0x02;     /* locally administered, no OUI */
+
+	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
+	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
+
+	/*
+	 * ether_setup() sets this to a multicast device.  We are
+	 * really not supporting multicast at this time.
+	 */
+	xpnet_device->flags &= ~IFF_MULTICAST;
+
+	/*
+	 * No need to checksum as it is a DMA transfer.  The BTE will
+	 * report an error if the data is not retrievable and the
+	 * packet will be dropped.
+	 */
+	xpnet_device->features = NETIF_F_NO_CSUM;
+
+	result = register_netdev(xpnet_device);
+	if (result != 0) {
+		free_netdev(xpnet_device);
+		kfree(xpnet_broadcast_partitions);
+	}
+
+	return result;
+}
+
+module_init(xpnet_init);
+
+static void __exit
+xpnet_exit(void)
+{
+	dev_info(xpnet, "unregistering network device %s\n",
+		 xpnet_device[0].name);
+
+	unregister_netdev(xpnet_device);
+	free_netdev(xpnet_device);
+	kfree(xpnet_broadcast_partitions);
+}
+
+module_exit(xpnet_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ti_dac7512.c b/drivers/misc/ti_dac7512.c
new file mode 100644
index 00000000000..d3f229a3a77
--- /dev/null
+++ b/drivers/misc/ti_dac7512.c
@@ -0,0 +1,101 @@
+/*
+ *  dac7512.c - Linux kernel module for
+ * 	Texas Instruments DAC7512
+ *
+ *  Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spi/spi.h>
+
+#define DAC7512_DRV_NAME	"dac7512"
+#define DRIVER_VERSION		"1.0"
+
+static ssize_t dac7512_store_val(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	unsigned char tmp[2];
+	unsigned long val;
+
+	if (strict_strtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	tmp[0] = val >> 8;
+	tmp[1] = val & 0xff;
+	spi_write(spi, tmp, sizeof(tmp));
+	return count;
+}
+
+static DEVICE_ATTR(value, S_IWUSR, NULL, dac7512_store_val);
+
+static struct attribute *dac7512_attributes[] = {
+	&dev_attr_value.attr,
+	NULL
+};
+
+static const struct attribute_group dac7512_attr_group = {
+	.attrs = dac7512_attributes,
+};
+
+static int __devinit dac7512_probe(struct spi_device *spi)
+{
+	int ret;
+
+	spi->bits_per_word = 8;
+	spi->mode = SPI_MODE_0;
+	ret = spi_setup(spi);
+	if (ret < 0)
+		return ret;
+
+	return sysfs_create_group(&spi->dev.kobj, &dac7512_attr_group);
+}
+
+static int __devexit dac7512_remove(struct spi_device *spi)
+{
+	sysfs_remove_group(&spi->dev.kobj, &dac7512_attr_group);
+	return 0;
+}
+
+static struct spi_driver dac7512_driver = {
+	.driver = {
+		.name	= DAC7512_DRV_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.probe	= dac7512_probe,
+	.remove	= __devexit_p(dac7512_remove),
+};
+
+static int __init dac7512_init(void)
+{
+	return spi_register_driver(&dac7512_driver);
+}
+
+static void __exit dac7512_exit(void)
+{
+	spi_unregister_driver(&dac7512_driver);
+}
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("DAC7512 16-bit DAC");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRIVER_VERSION);
+
+module_init(dac7512_init);
+module_exit(dac7512_exit);
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
new file mode 100644
index 00000000000..a6ef18259da
--- /dev/null
+++ b/drivers/misc/tifm_7xx1.c
@@ -0,0 +1,453 @@
+/*
+ *  tifm_7xx1.c - TI FlashMedia driver
+ *
+ *  Copyright (C) 2006 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/tifm.h>
+#include <linux/dma-mapping.h>
+
+#define DRIVER_NAME "tifm_7xx1"
+#define DRIVER_VERSION "0.8"
+
+#define TIFM_IRQ_ENABLE           0x80000000
+#define TIFM_IRQ_SOCKMASK(x)      (x)
+#define TIFM_IRQ_CARDMASK(x)      ((x) << 8)
+#define TIFM_IRQ_FIFOMASK(x)      ((x) << 16)
+#define TIFM_IRQ_SETALL           0xffffffff
+
+static void tifm_7xx1_dummy_eject(struct tifm_adapter *fm,
+				  struct tifm_dev *sock)
+{
+}
+
+static void tifm_7xx1_eject(struct tifm_adapter *fm, struct tifm_dev *sock)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fm->lock, flags);
+	fm->socket_change_set |= 1 << sock->socket_id;
+	tifm_queue_work(&fm->media_switcher);
+	spin_unlock_irqrestore(&fm->lock, flags);
+}
+
+static irqreturn_t tifm_7xx1_isr(int irq, void *dev_id)
+{
+	struct tifm_adapter *fm = dev_id;
+	struct tifm_dev *sock;
+	unsigned int irq_status, cnt;
+
+	spin_lock(&fm->lock);
+	irq_status = readl(fm->addr + FM_INTERRUPT_STATUS);
+	if (irq_status == 0 || irq_status == (~0)) {
+		spin_unlock(&fm->lock);
+		return IRQ_NONE;
+	}
+
+	if (irq_status & TIFM_IRQ_ENABLE) {
+		writel(TIFM_IRQ_ENABLE, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+
+		for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+			sock = fm->sockets[cnt];
+			if (sock) {
+				if ((irq_status >> cnt) & TIFM_IRQ_FIFOMASK(1))
+					sock->data_event(sock);
+				if ((irq_status >> cnt) & TIFM_IRQ_CARDMASK(1))
+					sock->card_event(sock);
+			}
+		}
+
+		fm->socket_change_set |= irq_status
+					 & ((1 << fm->num_sockets) - 1);
+	}
+	writel(irq_status, fm->addr + FM_INTERRUPT_STATUS);
+
+	if (fm->finish_me)
+		complete_all(fm->finish_me);
+	else if (!fm->socket_change_set)
+		writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE);
+	else
+		tifm_queue_work(&fm->media_switcher);
+
+	spin_unlock(&fm->lock);
+	return IRQ_HANDLED;
+}
+
+static unsigned char tifm_7xx1_toggle_sock_power(char __iomem *sock_addr)
+{
+	unsigned int s_state;
+	int cnt;
+
+	writel(0x0e00, sock_addr + SOCK_CONTROL);
+
+	for (cnt = 16; cnt <= 256; cnt <<= 1) {
+		if (!(TIFM_SOCK_STATE_POWERED
+		      & readl(sock_addr + SOCK_PRESENT_STATE)))
+			break;
+
+		msleep(cnt);
+	}
+
+	s_state = readl(sock_addr + SOCK_PRESENT_STATE);
+	if (!(TIFM_SOCK_STATE_OCCUPIED & s_state))
+		return 0;
+
+	writel(readl(sock_addr + SOCK_CONTROL) | TIFM_CTRL_LED,
+	       sock_addr + SOCK_CONTROL);
+
+	/* xd needs some extra time before power on */
+	if (((readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7)
+	    == TIFM_TYPE_XD)
+		msleep(40);
+
+	writel((s_state & TIFM_CTRL_POWER_MASK) | 0x0c00,
+	       sock_addr + SOCK_CONTROL);
+	/* wait for power to stabilize */
+	msleep(20);
+	for (cnt = 16; cnt <= 256; cnt <<= 1) {
+		if ((TIFM_SOCK_STATE_POWERED
+		     & readl(sock_addr + SOCK_PRESENT_STATE)))
+			break;
+
+		msleep(cnt);
+	}
+
+	writel(readl(sock_addr + SOCK_CONTROL) & (~TIFM_CTRL_LED),
+	       sock_addr + SOCK_CONTROL);
+
+	return (readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7;
+}
+
+inline static void tifm_7xx1_sock_power_off(char __iomem *sock_addr)
+{
+	writel((~TIFM_CTRL_POWER_MASK) & readl(sock_addr + SOCK_CONTROL),
+	       sock_addr + SOCK_CONTROL);
+}
+
+inline static char __iomem *
+tifm_7xx1_sock_addr(char __iomem *base_addr, unsigned int sock_num)
+{
+	return base_addr + ((sock_num + 1) << 10);
+}
+
+static void tifm_7xx1_switch_media(struct work_struct *work)
+{
+	struct tifm_adapter *fm = container_of(work, struct tifm_adapter,
+					       media_switcher);
+	struct tifm_dev *sock;
+	char __iomem *sock_addr;
+	unsigned long flags;
+	unsigned char media_id;
+	unsigned int socket_change_set, cnt;
+
+	spin_lock_irqsave(&fm->lock, flags);
+	socket_change_set = fm->socket_change_set;
+	fm->socket_change_set = 0;
+
+	dev_dbg(fm->dev.parent, "checking media set %x\n",
+		socket_change_set);
+
+	if (!socket_change_set) {
+		spin_unlock_irqrestore(&fm->lock, flags);
+		return;
+	}
+
+	for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+		if (!(socket_change_set & (1 << cnt)))
+			continue;
+		sock = fm->sockets[cnt];
+		if (sock) {
+			printk(KERN_INFO
+			       "%s : demand removing card from socket %u:%u\n",
+			       dev_name(&fm->dev), fm->id, cnt);
+			fm->sockets[cnt] = NULL;
+			sock_addr = sock->addr;
+			spin_unlock_irqrestore(&fm->lock, flags);
+			device_unregister(&sock->dev);
+			spin_lock_irqsave(&fm->lock, flags);
+			tifm_7xx1_sock_power_off(sock_addr);
+			writel(0x0e00, sock_addr + SOCK_CONTROL);
+		}
+
+		spin_unlock_irqrestore(&fm->lock, flags);
+
+		media_id = tifm_7xx1_toggle_sock_power(
+				tifm_7xx1_sock_addr(fm->addr, cnt));
+
+		// tifm_alloc_device will check if media_id is valid
+		sock = tifm_alloc_device(fm, cnt, media_id);
+		if (sock) {
+			sock->addr = tifm_7xx1_sock_addr(fm->addr, cnt);
+
+			if (!device_register(&sock->dev)) {
+				spin_lock_irqsave(&fm->lock, flags);
+				if (!fm->sockets[cnt]) {
+					fm->sockets[cnt] = sock;
+					sock = NULL;
+				}
+				spin_unlock_irqrestore(&fm->lock, flags);
+			}
+			if (sock)
+				tifm_free_device(&sock->dev);
+		}
+		spin_lock_irqsave(&fm->lock, flags);
+	}
+
+	writel(TIFM_IRQ_FIFOMASK(socket_change_set)
+	       | TIFM_IRQ_CARDMASK(socket_change_set),
+	       fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+
+	writel(TIFM_IRQ_FIFOMASK(socket_change_set)
+	       | TIFM_IRQ_CARDMASK(socket_change_set),
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+
+	writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE);
+	spin_unlock_irqrestore(&fm->lock, flags);
+}
+
+#ifdef CONFIG_PM
+
+static int tifm_7xx1_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	struct tifm_adapter *fm = pci_get_drvdata(dev);
+	int cnt;
+
+	dev_dbg(&dev->dev, "suspending host\n");
+
+	for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+		if (fm->sockets[cnt])
+			tifm_7xx1_sock_power_off(fm->sockets[cnt]->addr);
+	}
+
+	pci_save_state(dev);
+	pci_enable_wake(dev, pci_choose_state(dev, state), 0);
+	pci_disable_device(dev);
+	pci_set_power_state(dev, pci_choose_state(dev, state));
+	return 0;
+}
+
+static int tifm_7xx1_resume(struct pci_dev *dev)
+{
+	struct tifm_adapter *fm = pci_get_drvdata(dev);
+	int rc;
+	unsigned int good_sockets = 0, bad_sockets = 0;
+	unsigned long flags;
+	unsigned char new_ids[fm->num_sockets];
+	DECLARE_COMPLETION_ONSTACK(finish_resume);
+
+	pci_set_power_state(dev, PCI_D0);
+	pci_restore_state(dev);
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+	pci_set_master(dev);
+
+	dev_dbg(&dev->dev, "resuming host\n");
+
+	for (rc = 0; rc < fm->num_sockets; rc++)
+		new_ids[rc] = tifm_7xx1_toggle_sock_power(
+					tifm_7xx1_sock_addr(fm->addr, rc));
+	spin_lock_irqsave(&fm->lock, flags);
+	for (rc = 0; rc < fm->num_sockets; rc++) {
+		if (fm->sockets[rc]) {
+			if (fm->sockets[rc]->type == new_ids[rc])
+				good_sockets |= 1 << rc;
+			else
+				bad_sockets |= 1 << rc;
+		}
+	}
+
+	writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+	dev_dbg(&dev->dev, "change sets on resume: good %x, bad %x\n",
+		good_sockets, bad_sockets);
+
+	fm->socket_change_set = 0;
+	if (good_sockets) {
+		fm->finish_me = &finish_resume;
+		spin_unlock_irqrestore(&fm->lock, flags);
+		rc = wait_for_completion_timeout(&finish_resume, HZ);
+		dev_dbg(&dev->dev, "wait returned %d\n", rc);
+		writel(TIFM_IRQ_FIFOMASK(good_sockets)
+		       | TIFM_IRQ_CARDMASK(good_sockets),
+		       fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+		writel(TIFM_IRQ_FIFOMASK(good_sockets)
+		       | TIFM_IRQ_CARDMASK(good_sockets),
+		       fm->addr + FM_SET_INTERRUPT_ENABLE);
+		spin_lock_irqsave(&fm->lock, flags);
+		fm->finish_me = NULL;
+		fm->socket_change_set ^= good_sockets & fm->socket_change_set;
+	}
+
+	fm->socket_change_set |= bad_sockets;
+	if (fm->socket_change_set)
+		tifm_queue_work(&fm->media_switcher);
+
+	spin_unlock_irqrestore(&fm->lock, flags);
+	writel(TIFM_IRQ_ENABLE,
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+
+	return 0;
+}
+
+#else
+
+#define tifm_7xx1_suspend NULL
+#define tifm_7xx1_resume NULL
+
+#endif /* CONFIG_PM */
+
+static int tifm_7xx1_dummy_has_ms_pif(struct tifm_adapter *fm,
+				      struct tifm_dev *sock)
+{
+	return 0;
+}
+
+static int tifm_7xx1_has_ms_pif(struct tifm_adapter *fm, struct tifm_dev *sock)
+{
+	if (((fm->num_sockets == 4) && (sock->socket_id == 2))
+	    || ((fm->num_sockets == 2) && (sock->socket_id == 0)))
+		return 1;
+
+	return 0;
+}
+
+static int tifm_7xx1_probe(struct pci_dev *dev,
+			   const struct pci_device_id *dev_id)
+{
+	struct tifm_adapter *fm;
+	int pci_dev_busy = 0;
+	int rc;
+
+	rc = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+
+	pci_set_master(dev);
+
+	rc = pci_request_regions(dev, DRIVER_NAME);
+	if (rc) {
+		pci_dev_busy = 1;
+		goto err_out;
+	}
+
+	pci_intx(dev, 1);
+
+	fm = tifm_alloc_adapter(dev->device == PCI_DEVICE_ID_TI_XX21_XX11_FM
+				? 4 : 2, &dev->dev);
+	if (!fm) {
+		rc = -ENOMEM;
+		goto err_out_int;
+	}
+
+	INIT_WORK(&fm->media_switcher, tifm_7xx1_switch_media);
+	fm->eject = tifm_7xx1_eject;
+	fm->has_ms_pif = tifm_7xx1_has_ms_pif;
+	pci_set_drvdata(dev, fm);
+
+	fm->addr = pci_ioremap_bar(dev, 0);
+	if (!fm->addr)
+		goto err_out_free;
+
+	rc = request_irq(dev->irq, tifm_7xx1_isr, IRQF_SHARED, DRIVER_NAME, fm);
+	if (rc)
+		goto err_out_unmap;
+
+	rc = tifm_add_adapter(fm);
+	if (rc)
+		goto err_out_irq;
+
+	writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+	       fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+	writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+	return 0;
+
+err_out_irq:
+	free_irq(dev->irq, fm);
+err_out_unmap:
+	iounmap(fm->addr);
+err_out_free:
+	pci_set_drvdata(dev, NULL);
+	tifm_free_adapter(fm);
+err_out_int:
+	pci_intx(dev, 0);
+	pci_release_regions(dev);
+err_out:
+	if (!pci_dev_busy)
+		pci_disable_device(dev);
+	return rc;
+}
+
+static void tifm_7xx1_remove(struct pci_dev *dev)
+{
+	struct tifm_adapter *fm = pci_get_drvdata(dev);
+	int cnt;
+
+	fm->eject = tifm_7xx1_dummy_eject;
+	fm->has_ms_pif = tifm_7xx1_dummy_has_ms_pif;
+	writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+	mmiowb();
+	free_irq(dev->irq, fm);
+
+	tifm_remove_adapter(fm);
+
+	for (cnt = 0; cnt < fm->num_sockets; cnt++)
+		tifm_7xx1_sock_power_off(tifm_7xx1_sock_addr(fm->addr, cnt));
+
+	pci_set_drvdata(dev, NULL);
+
+	iounmap(fm->addr);
+	pci_intx(dev, 0);
+	pci_release_regions(dev);
+
+	pci_disable_device(dev);
+	tifm_free_adapter(fm);
+}
+
+static struct pci_device_id tifm_7xx1_pci_tbl [] = {
+	{ PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX21_XX11_FM, PCI_ANY_ID,
+	  PCI_ANY_ID, 0, 0, 0 }, /* xx21 - the one I have */
+        { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX12_FM, PCI_ANY_ID,
+	  PCI_ANY_ID, 0, 0, 0 },
+	{ PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX20_FM, PCI_ANY_ID,
+	  PCI_ANY_ID, 0, 0, 0 },
+	{ }
+};
+
+static struct pci_driver tifm_7xx1_driver = {
+	.name = DRIVER_NAME,
+	.id_table = tifm_7xx1_pci_tbl,
+	.probe = tifm_7xx1_probe,
+	.remove = tifm_7xx1_remove,
+	.suspend = tifm_7xx1_suspend,
+	.resume = tifm_7xx1_resume,
+};
+
+static int __init tifm_7xx1_init(void)
+{
+	return pci_register_driver(&tifm_7xx1_driver);
+}
+
+static void __exit tifm_7xx1_exit(void)
+{
+	pci_unregister_driver(&tifm_7xx1_driver);
+}
+
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("TI FlashMedia host driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, tifm_7xx1_pci_tbl);
+MODULE_VERSION(DRIVER_VERSION);
+
+module_init(tifm_7xx1_init);
+module_exit(tifm_7xx1_exit);
diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c
new file mode 100644
index 00000000000..98bcba521da
--- /dev/null
+++ b/drivers/misc/tifm_core.c
@@ -0,0 +1,366 @@
+/*
+ *  tifm_core.c - TI FlashMedia driver
+ *
+ *  Copyright (C) 2006 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/tifm.h>
+#include <linux/init.h>
+#include <linux/idr.h>
+
+#define DRIVER_NAME "tifm_core"
+#define DRIVER_VERSION "0.8"
+
+static struct workqueue_struct *workqueue;
+static DEFINE_IDR(tifm_adapter_idr);
+static DEFINE_SPINLOCK(tifm_adapter_lock);
+
+static const char *tifm_media_type_name(unsigned char type, unsigned char nt)
+{
+	const char *card_type_name[3][3] = {
+		{ "SmartMedia/xD", "MemoryStick", "MMC/SD" },
+		{ "XD", "MS", "SD"},
+		{ "xd", "ms", "sd"}
+	};
+
+	if (nt > 2 || type < 1 || type > 3)
+		return NULL;
+	return card_type_name[nt][type - 1];
+}
+
+static int tifm_dev_match(struct tifm_dev *sock, struct tifm_device_id *id)
+{
+	if (sock->type == id->type)
+		return 1;
+	return 0;
+}
+
+static int tifm_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *fm_drv = container_of(drv, struct tifm_driver,
+						  driver);
+	struct tifm_device_id *ids = fm_drv->id_table;
+
+	if (ids) {
+		while (ids->type) {
+			if (tifm_dev_match(sock, ids))
+				return 1;
+			++ids;
+		}
+	}
+	return 0;
+}
+
+static int tifm_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+
+	if (add_uevent_var(env, "TIFM_CARD_TYPE=%s", tifm_media_type_name(sock->type, 1)))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int tifm_device_probe(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+	int rc = -ENODEV;
+
+	get_device(dev);
+	if (dev->driver && drv->probe) {
+		rc = drv->probe(sock);
+		if (!rc)
+			return 0;
+	}
+	put_device(dev);
+	return rc;
+}
+
+static void tifm_dummy_event(struct tifm_dev *sock)
+{
+	return;
+}
+
+static int tifm_device_remove(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+
+	if (dev->driver && drv->remove) {
+		sock->card_event = tifm_dummy_event;
+		sock->data_event = tifm_dummy_event;
+		drv->remove(sock);
+		sock->dev.driver = NULL;
+	}
+
+	put_device(dev);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int tifm_device_suspend(struct device *dev, pm_message_t state)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+
+	if (dev->driver && drv->suspend)
+		return drv->suspend(sock, state);
+	return 0;
+}
+
+static int tifm_device_resume(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+
+	if (dev->driver && drv->resume)
+		return drv->resume(sock);
+	return 0;
+}
+
+#else
+
+#define tifm_device_suspend NULL
+#define tifm_device_resume NULL
+
+#endif /* CONFIG_PM */
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	return sprintf(buf, "%x", sock->type);
+}
+
+static struct device_attribute tifm_dev_attrs[] = {
+	__ATTR(type, S_IRUGO, type_show, NULL),
+	__ATTR_NULL
+};
+
+static struct bus_type tifm_bus_type = {
+	.name      = "tifm",
+	.dev_attrs = tifm_dev_attrs,
+	.match     = tifm_bus_match,
+	.uevent    = tifm_uevent,
+	.probe     = tifm_device_probe,
+	.remove    = tifm_device_remove,
+	.suspend   = tifm_device_suspend,
+	.resume    = tifm_device_resume
+};
+
+static void tifm_free(struct device *dev)
+{
+	struct tifm_adapter *fm = container_of(dev, struct tifm_adapter, dev);
+
+	kfree(fm);
+}
+
+static struct class tifm_adapter_class = {
+	.name    = "tifm_adapter",
+	.dev_release = tifm_free
+};
+
+struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets,
+					struct device *dev)
+{
+	struct tifm_adapter *fm;
+
+	fm = kzalloc(sizeof(struct tifm_adapter)
+		     + sizeof(struct tifm_dev*) * num_sockets, GFP_KERNEL);
+	if (fm) {
+		fm->dev.class = &tifm_adapter_class;
+		fm->dev.parent = dev;
+		device_initialize(&fm->dev);
+		spin_lock_init(&fm->lock);
+		fm->num_sockets = num_sockets;
+	}
+	return fm;
+}
+EXPORT_SYMBOL(tifm_alloc_adapter);
+
+int tifm_add_adapter(struct tifm_adapter *fm)
+{
+	int rc;
+
+	if (!idr_pre_get(&tifm_adapter_idr, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(&tifm_adapter_lock);
+	rc = idr_get_new(&tifm_adapter_idr, fm, &fm->id);
+	spin_unlock(&tifm_adapter_lock);
+	if (rc)
+		return rc;
+
+	dev_set_name(&fm->dev, "tifm%u", fm->id);
+	rc = device_add(&fm->dev);
+	if (rc) {
+		spin_lock(&tifm_adapter_lock);
+		idr_remove(&tifm_adapter_idr, fm->id);
+		spin_unlock(&tifm_adapter_lock);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(tifm_add_adapter);
+
+void tifm_remove_adapter(struct tifm_adapter *fm)
+{
+	unsigned int cnt;
+
+	flush_workqueue(workqueue);
+	for (cnt = 0; cnt < fm->num_sockets; ++cnt) {
+		if (fm->sockets[cnt])
+			device_unregister(&fm->sockets[cnt]->dev);
+	}
+
+	spin_lock(&tifm_adapter_lock);
+	idr_remove(&tifm_adapter_idr, fm->id);
+	spin_unlock(&tifm_adapter_lock);
+	device_del(&fm->dev);
+}
+EXPORT_SYMBOL(tifm_remove_adapter);
+
+void tifm_free_adapter(struct tifm_adapter *fm)
+{
+	put_device(&fm->dev);
+}
+EXPORT_SYMBOL(tifm_free_adapter);
+
+void tifm_free_device(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	kfree(sock);
+}
+EXPORT_SYMBOL(tifm_free_device);
+
+struct tifm_dev *tifm_alloc_device(struct tifm_adapter *fm, unsigned int id,
+				   unsigned char type)
+{
+	struct tifm_dev *sock = NULL;
+
+	if (!tifm_media_type_name(type, 0))
+		return sock;
+
+	sock = kzalloc(sizeof(struct tifm_dev), GFP_KERNEL);
+	if (sock) {
+		spin_lock_init(&sock->lock);
+		sock->type = type;
+		sock->socket_id = id;
+		sock->card_event = tifm_dummy_event;
+		sock->data_event = tifm_dummy_event;
+
+		sock->dev.parent = fm->dev.parent;
+		sock->dev.bus = &tifm_bus_type;
+		sock->dev.dma_mask = fm->dev.parent->dma_mask;
+		sock->dev.release = tifm_free_device;
+
+		dev_set_name(&sock->dev, "tifm_%s%u:%u",
+			     tifm_media_type_name(type, 2), fm->id, id);
+		printk(KERN_INFO DRIVER_NAME
+		       ": %s card detected in socket %u:%u\n",
+		       tifm_media_type_name(type, 0), fm->id, id);
+	}
+	return sock;
+}
+EXPORT_SYMBOL(tifm_alloc_device);
+
+void tifm_eject(struct tifm_dev *sock)
+{
+	struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent);
+	fm->eject(fm, sock);
+}
+EXPORT_SYMBOL(tifm_eject);
+
+int tifm_has_ms_pif(struct tifm_dev *sock)
+{
+	struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent);
+	return fm->has_ms_pif(fm, sock);
+}
+EXPORT_SYMBOL(tifm_has_ms_pif);
+
+int tifm_map_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
+		int direction)
+{
+	return pci_map_sg(to_pci_dev(sock->dev.parent), sg, nents, direction);
+}
+EXPORT_SYMBOL(tifm_map_sg);
+
+void tifm_unmap_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
+		   int direction)
+{
+	pci_unmap_sg(to_pci_dev(sock->dev.parent), sg, nents, direction);
+}
+EXPORT_SYMBOL(tifm_unmap_sg);
+
+void tifm_queue_work(struct work_struct *work)
+{
+	queue_work(workqueue, work);
+}
+EXPORT_SYMBOL(tifm_queue_work);
+
+int tifm_register_driver(struct tifm_driver *drv)
+{
+	drv->driver.bus = &tifm_bus_type;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(tifm_register_driver);
+
+void tifm_unregister_driver(struct tifm_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(tifm_unregister_driver);
+
+static int __init tifm_init(void)
+{
+	int rc;
+
+	workqueue = create_freezeable_workqueue("tifm");
+	if (!workqueue)
+		return -ENOMEM;
+
+	rc = bus_register(&tifm_bus_type);
+
+	if (rc)
+		goto err_out_wq;
+
+	rc = class_register(&tifm_adapter_class);
+	if (!rc)
+		return 0;
+
+	bus_unregister(&tifm_bus_type);
+
+err_out_wq:
+	destroy_workqueue(workqueue);
+
+	return rc;
+}
+
+static void __exit tifm_exit(void)
+{
+	class_unregister(&tifm_adapter_class);
+	bus_unregister(&tifm_bus_type);
+	destroy_workqueue(workqueue);
+}
+
+subsys_initcall(tifm_init);
+module_exit(tifm_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("TI FlashMedia core driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);